1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
|
{
Tokens Library.
Copyright (C) 1996, Earl F. Glynn. All Rights Reserved.
Converted from C++ Tokens unit, December 1996
Customized for the tiOPF2 by G.Geldenhuys, March 2007
Note:
The diagram explaining the Finite State Machine used for this parser
can be found at: Docs/diagrams/tiTokenLibrary_Diagram.png
Sample Usage:
FieldSpec := TTokens.Create(FieldSpecLine, ', ', '"', '"', '\', tsMultipleSeparatorsBetweenTokens);
try
FieldType := UpperCase(FieldSpec.Token(2));
finally
FieldSpec.Free;
end;
}
unit gfx_tokenlibrary;
{$mode objfpc}{$H+}
interface
type
TTokenSeparator = (tsSingleSeparatorBetweenTokens,
tsMultipleSeparatorsBetweenTokens);
TTokens = class(TObject)
private
FOriginalString: string;
FCount: integer;
FTokenString: string; // Separator-stripped string with tokens separated by NULLs
public
constructor Create(const OriginalString: string; const Separators: string;
const LeftMark: char; const RightMark: char; const Escape: char;
const SeparatorBetweenTokens: TTokenSeparator = tsMultipleSeparatorsBetweenTokens); overload;
destructor Destroy; override;
function Token(const index: integer): string;
function TokenCount: integer;
end;
implementation
const
NULL = #$00;
type
TFiniteStates = (fsSkipSeparatorsState,
fsAcceptSingleWordTokenState,
fsAcceptMultiWordTokenState,
fsEscapeState);
constructor TTokens.Create(const OriginalString: string; const Separators: string;
const LeftMark: char; const RightMark: char; const Escape: char;
const SeparatorBetweenTokens: TTokenSeparator);
var
c: char;
i: integer;
IgnoreNextSeparator: boolean;
state: TFiniteStates;
begin
inherited Create;
FOriginalString := OriginalString;
FTokenString := '';
FCount := 0;
// The following "flag" is somewhat of a kludge to allow a single
// separator to follow the closing RightMark of a Multiword Token
// when SeparatorBetweenTokens = tsSingleSeparatorBetweenTokens
IgnoreNextSeparator := False;
// Initial state of finite state machine that recognizes tokens
state := fsSkipSeparatorsState;
for i := 1 to Length(FOriginalString) do
begin
c := FOriginalString[i];
case state of
fsSkipSeparatorsState:
// Do nothing if character is separator
if Pos(c, Separators) > 0 then
begin
// For cases like multiple comma-delimited fields, treat each
// separator as end of a token, e.g, "x,,,y" would be 4 tokens
if SeparatorBetweenTokens = tsSingleSeparatorBetweenTokens then
begin
if IgnoreNextSeparator then
IgnoreNextSeparator := False
else
begin
Inc(FCount);
FTokenString := FTokenString + NULL;
end;
end;
end
else if c = LeftMark then
begin
state := fsAcceptMultiWordTokenState;
Inc(FCount);
end
else
begin
state := fsAcceptSingleWordTokenState;
Inc(FCount);
FTokenString := FTokenString + c;
end;
fsAcceptSingleWordTokenState:
if Pos(c, Separators) = 0 then
FTokenString := FTokenString + c // not a separator
else
begin // separator
FTokenString := FTokenString + NULL;
state := fsSkipSeparatorsState;
end;
fsAcceptMultiWordTokenState:
if c = RightMark then
begin
FTokenString := FTokenString + NULL;
state := fsSkipSeparatorsState;
IgnoreNextSeparator := True;
end
else if c = Escape then
state := fsEscapeState
else
FTokenString := FTokenString + c;
fsEscapeState:
begin
FTokenString := FTokenString + c;
state := fsAcceptMultiWordTokenState;
end
end; { case }
end; { for }
end;
destructor TTokens.Destroy;
begin
inherited Destroy;
end;
function TTokens.Token(const index: integer): string;
var
c: char;
found: integer;
i: integer;
begin
Result := '';
found := 1;
i := 1;
while (i <= length(FTokenString)) and (found <= index) do
begin
c := FTokenString[i];
if c = NULL then
Inc(found)
else if (found = index) then
Result := Result + c;
Inc(i);
end;
end;
function TTokens.TokenCount: integer;
begin
Result := FCount;
end;
end.
|