From d756e9abeb89b01bd0aae1fd25c700826ba96d30 Mon Sep 17 00:00:00 2001 From: Graeme Geldenhuys Date: Mon, 25 Feb 2013 14:23:15 +0000 Subject: regex: safer/proper alignment based on FPC_REQUIRES_PROPER_ALIGNMENT define --- examples/apps/ide/src/synregexpr.pas | 75 ++++++++++++++++++++++++------------ 1 file changed, 51 insertions(+), 24 deletions(-) diff --git a/examples/apps/ide/src/synregexpr.pas b/examples/apps/ide/src/synregexpr.pas index 636e2253..e0cad0e4 100644 --- a/examples/apps/ide/src/synregexpr.pas +++ b/examples/apps/ide/src/synregexpr.pas @@ -77,7 +77,9 @@ interface {$ENDIF} {$DEFINE ComplexBraces} // support braces in complex cases {$IFNDEF UniCode} // the option applicable only for non-UniCode mode + {$IFNDEF FPC_REQUIRES_PROPER_ALIGNMENT} //sets have to be aligned {$DEFINE UseSetOfChar} // Significant optimization by using set of char + {$ENDIF} {$ENDIF} {$IFDEF UseSetOfChar} {$DEFINE UseFirstCharSet} // Fast skip between matches for r.e. that starts with determined set of chars @@ -120,8 +122,15 @@ type const REOpSz = SizeOf (TREOp) div SizeOf (REChar); // size of p-code in RegExprString units - RENextOffSz = SizeOf (TRENextOff) div SizeOf (REChar); // size of Next 'pointer' -"- + {$IFDEF FPC_REQUIRES_PROPER_ALIGNMENT} + // add space for aligning pointer + // -1 is the correct max size but also needed for InsertOperator that needs a multiple of pointer size + RENextOffSz = (2 * SizeOf (TRENextOff) div SizeOf (REChar))-1; + REBracesArgSz = (2 * SizeOf (TREBracesArg) div SizeOf (REChar)); // add space for aligning pointer + {$ELSE} + RENextOffSz = (SizeOf (TRENextOff) div SizeOf (REChar)); // size of Next 'pointer' -"- REBracesArgSz = SizeOf (TREBracesArg) div SizeOf (REChar); // size of BRACES arguments -"- + {$ENDIF} type TRegExprInvertCaseFunction = function (const Ch : REChar) : REChar @@ -650,6 +659,24 @@ const XIgnoredChars = [' ', #9, #$d, #$a]; {$ENDIF} +function AlignToPtr(const p: Pointer): Pointer; +begin +{$IFDEF FPC_REQUIRES_PROPER_ALIGNMENT} + Result := Align(p, SizeOf(Pointer)); +{$ELSE} + Result := p; +{$ENDIF} +end; + +function AlignToInt(const p: Pointer): Pointer; +begin +{$IFDEF FPC_REQUIRES_PROPER_ALIGNMENT} + Result := Align(p, SizeOf(integer)); +{$ELSE} + Result := p; +{$ENDIF} +end; + {=============================================================} {=================== WideString functions ====================} {=============================================================} @@ -1454,7 +1481,7 @@ procedure TRegExpr.Tail (p : PRegExprChar; val : PRegExprChar); UNTIL false; // Set Next 'pointer' if val < scan - then PRENextOff (scan + REOpSz)^ := - (scan - val) //###0.948 + then PRENextOff (AlignToPtr(scan + REOpSz))^ := - (scan - val) //###0.948 // work around PWideChar subtraction bug (Delphi uses // shr after subtraction to calculate widechar distance %-( ) // so, if difference is negative we have .. the "feature" :( @@ -1462,7 +1489,7 @@ procedure TRegExpr.Tail (p : PRegExprChar; val : PRegExprChar); // "P – Q computes the difference between the address given // by P (the higher address) and the address given by Q (the // lower address)" - Delphi help quotation. - else PRENextOff (scan + REOpSz)^ := val - scan; //###0.933 + else PRENextOff (AlignToPtr(scan + REOpSz))^ := val - scan; //###0.933 end; { of procedure TRegExpr.Tail --------------------------------------------------------------} @@ -1483,7 +1510,7 @@ function TRegExpr.EmitNode (op : TREOp) : PRegExprChar; //###0.933 if Result <> @regdummy then begin PREOp (regcode)^ := op; inc (regcode, REOpSz); - PRENextOff (regcode)^ := 0; // Next "pointer" := nil + PRENextOff (AlignToPtr(regcode))^ := 0; // Next "pointer" := nil inc (regcode, RENextOffSz); {$IFDEF DebugSynRegExpr} if regcode-programm>regsize then @@ -1526,8 +1553,8 @@ procedure TRegExpr.InsertOperator (op : TREOp; opnd : PRegExprChar; sz : integer {$IFDEF DebugSynRegExpr} if regcode-programm>regsize then raise Exception.Create('TRegExpr.InsertOperator buffer overrun'); - if (opndregsize) then - raise Exception.Create('TRegExpr.InsertOperator invalid opnd'); +// if (opndregsize) then + // raise Exception.Create('TRegExpr.InsertOperator invalid opnd'); {$ENDIF} dst := regcode; while src > opnd do begin @@ -1876,11 +1903,11 @@ function TRegExpr.ParsePiece (out flagp : integer) : PRegExprChar; if regcode <> @regdummy then begin off := (Result + REOpSz + RENextOffSz) - (regcode - REOpSz - RENextOffSz); // back to Atom after LOOPENTRY - PREBracesArg (regcode)^ := ABracesMin; + PREBracesArg (AlignToInt(regcode))^ := ABracesMin; inc (regcode, REBracesArgSz); - PREBracesArg (regcode)^ := ABracesMax; + PREBracesArg (AlignToInt(regcode))^ := ABracesMax; inc (regcode, REBracesArgSz); - PRENextOff (regcode)^ := off; + PRENextOff (AlignToPtr(regcode))^ := off; inc (regcode, RENextOffSz); {$IFDEF DebugSynRegExpr} if regcode-programm>regsize then @@ -1902,8 +1929,8 @@ function TRegExpr.ParsePiece (out flagp : integer) : PRegExprChar; else TheOp := BRACES; InsertOperator (TheOp, Result, REOpSz + RENextOffSz + REBracesArgSz * 2); if regcode <> @regdummy then begin - PREBracesArg (Result + REOpSz + RENextOffSz)^ := ABracesMin; - PREBracesArg (Result + REOpSz + RENextOffSz + REBracesArgSz)^ := ABracesMax; + PREBracesArg (AlignToInt(Result + REOpSz + RENextOffSz))^ := ABracesMin; + PREBracesArg (AlignToInt(Result + REOpSz + RENextOffSz + REBracesArgSz))^ := ABracesMax; end; end; @@ -2782,7 +2809,7 @@ function TRegExpr.regnext (p : PRegExprChar) : PRegExprChar; Result := nil; EXIT; end; - offset := PRENextOff (p + REOpSz)^; //###0.933 inlined NEXT + offset := PRENextOff (AlignToPtr(p + REOpSz))^; //###0.933 inlined NEXT if offset = 0 then Result := nil else Result := p + offset; @@ -2815,7 +2842,7 @@ function TRegExpr.MatchPrim (prog : PRegExprChar) : boolean; scan := prog; while scan <> nil do begin - len := PRENextOff (scan + 1)^; //###0.932 inlined regnext + len := PRENextOff (AlignToPtr(scan + 1))^; //###0.932 inlined regnext if len = 0 then next := nil else next := scan + len; @@ -3104,9 +3131,9 @@ function TRegExpr.MatchPrim (prog : PRegExprChar) : boolean; Error (reeLoopWithoutEntry); EXIT; end; - opnd := scan + PRENextOff (scan + REOpSz + RENextOffSz + 2 * REBracesArgSz)^; - BracesMin := PREBracesArg (scan + REOpSz + RENextOffSz)^; - BracesMax := PREBracesArg (scan + REOpSz + RENextOffSz + REBracesArgSz)^; + opnd := scan + PRENextOff (AlignToPtr(scan + REOpSz + RENextOffSz + 2 * REBracesArgSz))^; + BracesMin := PREBracesArg (AlignToInt(scan + REOpSz + RENextOffSz))^; + BracesMax := PREBracesArg (AlignToPtr(scan + REOpSz + RENextOffSz + REBracesArgSz))^; save := reginput; if LoopStack [LoopStackIdx] >= BracesMin then begin // Min alredy matched - we can work if scan^ = LOOP then begin @@ -3170,8 +3197,8 @@ function TRegExpr.MatchPrim (prog : PRegExprChar) : boolean; else if (scan^ = PLUS) or (scan^ = PLUSNG) then BracesMin := 1 // PLUS else begin // BRACES - BracesMin := PREBracesArg (scan + REOpSz + RENextOffSz)^; - BracesMax := PREBracesArg (scan + REOpSz + RENextOffSz + REBracesArgSz)^; + BracesMin := PREBracesArg (AlignToPtr(scan + REOpSz + RENextOffSz))^; + BracesMax := PREBracesArg (AlignToPtr(scan + REOpSz + RENextOffSz + REBracesArgSz))^; end; save := reginput; opnd := scan + REOpSz + RENextOffSz; @@ -3351,8 +3378,8 @@ procedure TRegExpr.FillFirstCharSet (prog : PRegExprChar); EXIT; end; LOOP, LOOPNG: begin //###0.940 - opnd := scan + PRENextOff (scan + REOpSz + RENextOffSz + REBracesArgSz * 2)^; - min_cnt := PREBracesArg (scan + REOpSz + RENextOffSz)^; + opnd := scan + PRENextOff (AlignToPtr(scan + REOpSz + RENextOffSz + REBracesArgSz * 2))^; + min_cnt := PREBracesArg (AlignToPtr(scan + REOpSz + RENextOffSz))^; FillFirstCharSet (opnd); if min_cnt = 0 then FillFirstCharSet (next); @@ -3367,7 +3394,7 @@ procedure TRegExpr.FillFirstCharSet (prog : PRegExprChar); end; BRACES, BRACESNG: begin //###0.940 opnd := scan + REOpSz + RENextOffSz + REBracesArgSz * 2; - min_cnt := PREBracesArg (scan + REOpSz + RENextOffSz)^; // BRACES + min_cnt := PREBracesArg (AlignToPtr(scan + REOpSz + RENextOffSz))^; // BRACES FillFirstCharSet (opnd); if min_cnt > 0 then EXIT; @@ -4045,14 +4072,14 @@ function TRegExpr.Dump : RegExprString; {$ENDIF} if (op = BRACES) or (op = BRACESNG) then begin //###0.941 // show min/max argument of BRACES operator - Result := Result + Format ('{%d,%d}', [PREBracesArg (s)^, PREBracesArg (s + REBracesArgSz)^]); + Result := Result + Format ('{%d,%d}', [PREBracesArg (AlignToInt(s))^, PREBracesArg (AlignToInt(s + REBracesArgSz))^]); inc (s, REBracesArgSz * 2); end; {$IFDEF ComplexBraces} if (op = LOOP) or (op = LOOPNG) then begin //###0.940 Result := Result + Format (' -> (%d) {%d,%d}', [ - (s - programm - (REOpSz + RENextOffSz)) + PRENextOff (s + 2 * REBracesArgSz)^, - PREBracesArg (s)^, PREBracesArg (s + REBracesArgSz)^]); + (s - programm - (REOpSz + RENextOffSz)) + PRENextOff (AlignToPtr(s + 2 * REBracesArgSz))^, + PREBracesArg (AlignToInt(s))^, PREBracesArg (AlignToInt(s + REBracesArgSz))^]); inc (s, 2 * REBracesArgSz + RENextOffSz); end; {$ENDIF} -- cgit v1.2.3-70-g09d2