summaryrefslogtreecommitdiff
path: root/src/corelib/fpg_csvparser.pas
diff options
context:
space:
mode:
Diffstat (limited to 'src/corelib/fpg_csvparser.pas')
-rw-r--r--src/corelib/fpg_csvparser.pas320
1 files changed, 320 insertions, 0 deletions
diff --git a/src/corelib/fpg_csvparser.pas b/src/corelib/fpg_csvparser.pas
new file mode 100644
index 00000000..f5c0d0ed
--- /dev/null
+++ b/src/corelib/fpg_csvparser.pas
@@ -0,0 +1,320 @@
+{
+ fpGUI - Free Pascal GUI Toolkit
+
+ Copyright (C) 2006 - 2014 See the file AUTHORS.txt, included in this
+ distribution, for details of the copyright.
+
+ See the file COPYING.modifiedLGPL, included in this distribution,
+ for details about redistributing fpGUI.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+
+ Description:
+ Uses a Finite State Machine to parse CSV files.
+ Graeme Geldenhuys <graemeg@gmail.com>
+
+ This unit shows how one could use the State Design Pattern to implement a
+ FSM (Finite State Machine) to create a CSV Parser. It handles invalid
+ CSV as well and will raise an appropriate exception. In the State pattern,
+ each of the states becomes a subclass of the base class. Each subclass must
+ implement the abstract method which will handle the input character and
+ decide on the next state.
+}
+
+unit fpg_CSVParser;
+
+{$mode objfpc}{$H+}
+
+interface
+
+uses
+ Classes;
+
+type
+ { forward declarations }
+ TCSVParser = class;
+ TParserStateClass = class of TCSVParserState;
+
+
+ { Abstract State object }
+ TCSVParserState = class(TObject)
+ private
+ FParser: TCSVParser;
+ procedure ChangeState(NewState: TParserStateClass);
+ procedure AddCharToCurrField(Ch: char);
+ procedure AddCurrFieldToList;
+ public
+ constructor Create(AParser: TCSVParser);
+ { Must be implemented in the concrete classes to handle the input character
+ and decide on the next state. }
+ procedure ProcessChar(Ch: AnsiChar; Pos: integer); virtual; abstract;
+ end;
+
+
+ { A concrete state object - used when starting a new field }
+ TCSVParserFieldStartState = class(TCSVParserState)
+ public
+ procedure ProcessChar(Ch: AnsiChar; Pos: integer); override;
+ end;
+
+
+ { A concrete state object - used while scanning a field }
+ TCSVParserScanFieldState = class(TCSVParserState)
+ public
+ procedure ProcessChar(Ch: AnsiChar; Pos: integer); override;
+ end;
+
+
+ { A concrete state object - used while scanning double quoted fields }
+ TCSVParserScanQuotedState = class(TCSVParserState)
+ public
+ procedure ProcessChar(Ch: AnsiChar; Pos: integer); override;
+ end;
+
+
+ { A concrete state object - used when found the ending double quote }
+ TCSVParserEndQuotedState = class(TCSVParserState)
+ public
+ procedure ProcessChar(Ch: AnsiChar; Pos: integer); override;
+ end;
+
+
+ { A concrete state object - some error occured / invalid CSV structure }
+ TCSVParserGotErrorState = class(TCSVParserState)
+ public
+ procedure ProcessChar(Ch: AnsiChar; Pos: integer); override;
+ end;
+
+
+ { The actual state machine - CSV parser }
+ TCSVParser = class(TObject)
+ private
+ FCurrentLine: string;
+ FState: TCSVParserState;
+ { Cache state objects for greater performance. This comes in handy when
+ parsing a large CSV file. For smaller files you might want to create them
+ on the fly. }
+ FFieldStartState: TCSVParserFieldStartState;
+ FScanFieldState: TCSVParserScanFieldState;
+ FScanQuotedState: TCSVParserScanQuotedState;
+ FEndQuotedState: TCSVParserEndQuotedState;
+ FGotErrorState: TCSVParserGotErrorState;
+ { Fields used during parsing }
+ FCurrField: string;
+ FFieldList: TStrings;
+ function GetState: TParserStateClass;
+ procedure SetState(const Value: TParserStateClass);
+ protected
+ procedure AddCharToCurrField(Ch: char);
+ procedure AddCurrFieldToList;
+ { An example of Self Encapsulating Field refactoring }
+ property State: TParserStateClass read GetState write SetState;
+ public
+ constructor Create;
+ destructor Destroy; override;
+ { prodecure to call, to start the parsing process }
+ procedure ExtractFields(const S: string; const pFieldList: TStrings);
+ property CurrentLine: string read FCurrentLine;
+ end;
+
+
+// global singleton function
+function gCSVParser: TCSVParser;
+
+
+implementation
+
+uses
+ SysUtils;
+
+var
+ uCSVParser: TCSVParser;
+
+
+// Lazy mans singleton
+function gCSVParser: TCSVParser;
+begin
+ if uCSVParser = nil then
+ uCSVParser := TCSVParser.Create;
+ Result := uCSVParser;
+end;
+
+{ TCSVParser }
+
+constructor TCSVParser.Create;
+begin
+ inherited Create;
+ FCurrentLine := '';
+ FFieldStartState := TCSVParserFieldStartState.Create(Self);
+ FScanFieldState := TCSVParserScanFieldState.Create(Self);
+ FScanQuotedState := TCSVParserScanQuotedState.Create(Self);
+ FEndQuotedState := TCSVParserEndQuotedState.Create(Self);
+ FGotErrorState := TCSVParserGotErrorState.Create(Self);
+end;
+
+destructor TCSVParser.Destroy;
+begin
+ FFieldStartState.Free;
+ FScanFieldState.Free;
+ FScanQuotedState.Free;
+ FEndQuotedState.Free;
+ FGotErrorState.Free;
+ inherited;
+end;
+
+function TCSVParser.GetState: TParserStateClass;
+begin
+ Result := TParserStateClass(FState.ClassType);
+end;
+
+procedure TCSVParser.SetState(const Value: TParserStateClass);
+begin
+ if Value = TCSVParserFieldStartState then
+ FState := FFieldStartState
+ else if Value = TCSVParserScanFieldState then
+ FState := FScanFieldState
+ else if Value = TCSVParserScanQuotedState then
+ FState := FScanQuotedState
+ else if Value = TCSVParserEndQuotedState then
+ FState := FEndQuotedState
+ else if Value = TCSVParserGotErrorState then
+ FState := FGotErrorState;
+end;
+
+procedure TCSVParser.ExtractFields(const S: string; const pFieldList: TStrings);
+var
+ i: integer;
+ Ch: AnsiChar;
+begin
+ FCurrentLine := S;
+ FFieldList := pFieldList;
+ Assert(Assigned(FFieldList), 'FieldList not assigned');
+ { Initialize by clearing the string list, and starting in FieldStart state }
+ FFieldList.Clear;
+ State := TCSVParserFieldStartState;
+ FCurrField := '';
+
+ { Read through all the characters in the string }
+ for i := 1 to Length(s) do
+ begin
+ { Get the next character }
+ Ch := s[i];
+ FState.ProcessChar(Ch, i);
+ end;
+
+ { If we are in the ScanQuoted or GotError state at the end of the string,
+ there was a problem with a closing quote. You can add the second if test
+ for an extra failsafe! }
+ if (State = TCSVParserScanQuotedState) then
+ // or (State = TCSVParserGotErrorState) then
+ raise Exception.Create('Missing closing quote');
+
+ { If the current field is not empty, add it to the list }
+ if (FCurrField <> '') then
+ AddCurrFieldToList;
+end;
+
+procedure TCSVParser.AddCharToCurrField(Ch: char);
+begin
+ FCurrField := FCurrField + Ch;
+end;
+
+procedure TCSVParser.AddCurrFieldToList;
+begin
+ FFieldList.Add(FCurrField);
+ // Clear the field in preparation for collecting the next one
+ FCurrField := '';
+end;
+
+{ TCSVParserState }
+
+constructor TCSVParserState.Create(AParser: TCSVParser);
+begin
+ inherited Create;
+ FParser := AParser;
+end;
+
+procedure TCSVParserState.ChangeState(NewState: TParserStateClass);
+begin
+ FParser.State := NewState;
+end;
+
+procedure TCSVParserState.AddCharToCurrField(Ch: char);
+begin
+ FParser.AddCharToCurrField(Ch);
+end;
+
+procedure TCSVParserState.AddCurrFieldToList;
+begin
+ FParser.AddCurrFieldToList;
+end;
+
+{ TCSVParserFieldStartState }
+
+procedure TCSVParserFieldStartState.ProcessChar(Ch: AnsiChar; Pos: integer);
+begin
+ case Ch of
+ '"': ChangeState(TCSVParserScanQuotedState);
+ ',': AddCurrFieldToList;
+ else
+ AddCharToCurrField(Ch);
+ ChangeState(TCSVParserScanFieldState);
+ end;
+end;
+
+{ TCSVParserScanFieldState }
+
+procedure TCSVParserScanFieldState.ProcessChar(Ch: AnsiChar; Pos: integer);
+begin
+ if (Ch = ',') then
+ begin
+ AddCurrFieldToList;
+ ChangeState(TCSVParserFieldStartState);
+ end
+ else
+ AddCharToCurrField(Ch);
+end;
+
+{ TCSVParserScanQuotedState }
+
+procedure TCSVParserScanQuotedState.ProcessChar(Ch: AnsiChar; Pos: integer);
+begin
+ if (Ch = '"') then
+ ChangeState(TCSVParserEndQuotedState)
+ else
+ AddCharToCurrField(Ch);
+end;
+
+{ TCSVParserEndQuotedState }
+
+procedure TCSVParserEndQuotedState.ProcessChar(Ch: AnsiChar; Pos: integer);
+begin
+ if (Ch = ',') then
+ begin
+ AddCurrFieldToList;
+ ChangeState(TCSVParserFieldStartState);
+ end
+ else
+ ChangeState(TCSVParserGotErrorState);
+end;
+
+{ TCSVParserGotErrorState }
+
+procedure TCSVParserGotErrorState.ProcessChar(Ch: AnsiChar; Pos: integer);
+begin
+ raise Exception.Create(Format('Error in line at position %d: ' + #10 +
+ '<%s>', [Pos, FParser.CurrentLine]));
+end;
+
+
+initialization
+ uCSVParser := nil;
+
+finalization
+ if uCSVParser <> nil then
+ uCSVParser.Free;
+
+end.
+