prototypes/miglayout/gfx_tokenlibrary.pas


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180

{
  Tokens Library.

  Copyright (C) 1996, Earl F. Glynn.  All Rights Reserved.
  Converted from C++ Tokens unit, December 1996
  Customized for the tiOPF2 by G.Geldenhuys, March 2007

  Note:
    The diagram explaining the Finite State Machine used for this parser
    can be found at:   Docs/diagrams/tiTokenLibrary_Diagram.png


  Sample Usage:

    FieldSpec := TTokens.Create(FieldSpecLine, ', ', '"', '"', '\', tsMultipleSeparatorsBetweenTokens);
    try
      FieldType := UpperCase(FieldSpec.Token(2));
    finally
      FieldSpec.Free;
    end;
}

unit gfx_tokenlibrary;

{$mode objfpc}{$H+}

interface

type
  TTokenSeparator = (tsSingleSeparatorBetweenTokens,
                     tsMultipleSeparatorsBetweenTokens);

  TTokens = class(TObject)
  private
    FOriginalString: string;
    FCount: integer;
    FTokenString: string;  // Separator-stripped string with tokens separated by NULLs
  public
    constructor Create(const OriginalString: string; const Separators: string;
        const LeftMark: char; const RightMark: char; const Escape: char;
        const SeparatorBetweenTokens: TTokenSeparator = tsMultipleSeparatorsBetweenTokens); overload;
    destructor  Destroy; override;
    function    Token(const index: integer): string;
    function    TokenCount: integer;
  end;


implementation

const
  NULL = #$00;

type
    TFiniteStates = (fsSkipSeparatorsState,
                     fsAcceptSingleWordTokenState,
                     fsAcceptMultiWordTokenState,
                     fsEscapeState);

constructor TTokens.Create(const OriginalString: string; const Separators: string;
    const LeftMark: char; const RightMark: char; const Escape: char;
    const SeparatorBetweenTokens: TTokenSeparator);
var
  c: char;
  i: integer;
  IgnoreNextSeparator: boolean;
  state: TFiniteStates;
begin
  inherited Create;

  FOriginalString := OriginalString;
  FTokenString    := '';
  FCount          := 0;

  // The following "flag" is somewhat of a kludge to allow a single
  // separator to follow the closing RightMark of a Multiword Token
  // when SeparatorBetweenTokens = tsSingleSeparatorBetweenTokens
  IgnoreNextSeparator := False;

  // Initial state of finite state machine that recognizes tokens
  state := fsSkipSeparatorsState;

  for i := 1 to Length(FOriginalString) do
  begin
    c := FOriginalString[i];
    case state of
      fsSkipSeparatorsState:
        // Do nothing if character is separator
        if Pos(c, Separators) > 0 then
        begin
          // For cases like multiple comma-delimited fields, treat each
          // separator as end of a token, e.g, "x,,,y" would be 4 tokens
          if SeparatorBetweenTokens = tsSingleSeparatorBetweenTokens then
          begin
            if IgnoreNextSeparator then
              IgnoreNextSeparator := False
            else
            begin
              Inc(FCount);
              FTokenString := FTokenString + NULL;
            end;
          end;
        end
        else if c = LeftMark then
        begin
          state := fsAcceptMultiWordTokenState;
          Inc(FCount);
        end
        else
        begin
          state := fsAcceptSingleWordTokenState;
          Inc(FCount);
          FTokenString := FTokenString + c;
        end;

      fsAcceptSingleWordTokenState:
        if Pos(c, Separators) = 0 then
          FTokenString := FTokenString + c    // not a separator
        else
        begin                                 // separator
          FTokenString := FTokenString + NULL;
          state := fsSkipSeparatorsState;
        end;

      fsAcceptMultiWordTokenState:
        if c = RightMark then
        begin
          FTokenString := FTokenString + NULL;
          state := fsSkipSeparatorsState;
          IgnoreNextSeparator := True;
        end
        else if c = Escape then
          state := fsEscapeState
        else
          FTokenString := FTokenString + c;

      fsEscapeState:
        begin
          FTokenString := FTokenString + c;
          state := fsAcceptMultiWordTokenState;
        end
    end;  { case }
  end;  { for }
end;

destructor TTokens.Destroy;
begin
  inherited Destroy;
end;

function TTokens.Token(const index: integer): string;
var
  c: char;
  found: integer;
  i: integer;
begin
  Result  := '';
  found   := 1;
  i       := 1;

  while (i <= length(FTokenString)) and (found <= index) do
  begin
    c := FTokenString[i];

    if c = NULL then
      Inc(found)
    else if (found = index) then
      Result := Result + c;

    Inc(i);
  end;
end;

function TTokens.TokenCount: integer;
begin
  Result := FCount;
end;


end.