{ Tokens Library. Copyright (C) 1996, Earl F. Glynn. All Rights Reserved. Converted from C++ Tokens unit, December 1996 Customized for the tiOPF2 by G.Geldenhuys, March 2007 Note: The diagram explaining the Finite State Machine used for this parser can be found at: Docs/diagrams/tiTokenLibrary_Diagram.png Sample Usage: FieldSpec := TTokens.Create(FieldSpecLine, ', ', '"', '"', '\', tsMultipleSeparatorsBetweenTokens); try FieldType := UpperCase(FieldSpec.Token(2)); finally FieldSpec.Free; end; } unit gfx_tokenlibrary; {$mode objfpc}{$H+} interface type TTokenSeparator = (tsSingleSeparatorBetweenTokens, tsMultipleSeparatorsBetweenTokens); TTokens = class(TObject) private FOriginalString: string; FCount: integer; FTokenString: string; // Separator-stripped string with tokens separated by NULLs public constructor Create(const OriginalString: string; const Separators: string; const LeftMark: char; const RightMark: char; const Escape: char; const SeparatorBetweenTokens: TTokenSeparator = tsMultipleSeparatorsBetweenTokens); overload; destructor Destroy; override; function Token(const index: integer): string; function TokenCount: integer; end; implementation const NULL = #$00; type TFiniteStates = (fsSkipSeparatorsState, fsAcceptSingleWordTokenState, fsAcceptMultiWordTokenState, fsEscapeState); constructor TTokens.Create(const OriginalString: string; const Separators: string; const LeftMark: char; const RightMark: char; const Escape: char; const SeparatorBetweenTokens: TTokenSeparator); var c: char; i: integer; IgnoreNextSeparator: boolean; state: TFiniteStates; begin inherited Create; FOriginalString := OriginalString; FTokenString := ''; FCount := 0; // The following "flag" is somewhat of a kludge to allow a single // separator to follow the closing RightMark of a Multiword Token // when SeparatorBetweenTokens = tsSingleSeparatorBetweenTokens IgnoreNextSeparator := False; // Initial state of finite state machine that recognizes tokens state := fsSkipSeparatorsState; for i := 1 to Length(FOriginalString) do begin c := FOriginalString[i]; case state of fsSkipSeparatorsState: // Do nothing if character is separator if Pos(c, Separators) > 0 then begin // For cases like multiple comma-delimited fields, treat each // separator as end of a token, e.g, "x,,,y" would be 4 tokens if SeparatorBetweenTokens = tsSingleSeparatorBetweenTokens then begin if IgnoreNextSeparator then IgnoreNextSeparator := False else begin Inc(FCount); FTokenString := FTokenString + NULL; end; end; end else if c = LeftMark then begin state := fsAcceptMultiWordTokenState; Inc(FCount); end else begin state := fsAcceptSingleWordTokenState; Inc(FCount); FTokenString := FTokenString + c; end; fsAcceptSingleWordTokenState: if Pos(c, Separators) = 0 then FTokenString := FTokenString + c // not a separator else begin // separator FTokenString := FTokenString + NULL; state := fsSkipSeparatorsState; end; fsAcceptMultiWordTokenState: if c = RightMark then begin FTokenString := FTokenString + NULL; state := fsSkipSeparatorsState; IgnoreNextSeparator := True; end else if c = Escape then state := fsEscapeState else FTokenString := FTokenString + c; fsEscapeState: begin FTokenString := FTokenString + c; state := fsAcceptMultiWordTokenState; end end; { case } end; { for } end; destructor TTokens.Destroy; begin inherited Destroy; end; function TTokens.Token(const index: integer): string; var c: char; found: integer; i: integer; begin Result := ''; found := 1; i := 1; while (i <= length(FTokenString)) and (found <= index) do begin c := FTokenString[i]; if c = NULL then Inc(found) else if (found = index) then Result := Result + c; Inc(i); end; end; function TTokens.TokenCount: integer; begin Result := FCount; end; end.