unit AG.PascalTokenizer; interface uses {$IFDEF FPC}fgl,SysUtils,Classes{$ELSE}System.Generics.Collections,System.SysUtils,System.Classes{$ENDIF}; {$IFDEF FPC} {$mode Delphi} {$ENDIF} type {$IFDEF FPC}TFpcList=specialize TFPGList;{$ENDIF} TAGTokenizerPos = record x, y: integer; end; TAGToken = record Text: string; &begin, &end: TAGTokenizerPos; ended: boolean; {$IFNDEF FPC}constructor Create(Text: string; &begin, &end: TAGTokenizerPos;ended:boolean);{$ENDIF} end; TAGPasTokenizer = class strict protected s: TStrings; y: integer; x: integer; function DoReadable():boolean; function IsReadable():boolean; function NextReadable():boolean; procedure SkipSpaces(); function GetPos():TAGTokenizerPos; procedure SetPos(pos:TAGTokenizerPos); public ended: boolean; function GetNext(): TAGToken; // procedure read_next(); constructor Create(input:TStrings); property Pos:TAGTokenizerPos read GetPos write SetPos; end; {PasTokenizerStack = class private stack: TStack; // _pop procedure _get_with_comments(); procedure _get_without_comments(); public procedure push(s: string); procedure pop(); procedure read_last(); procedure is_ended(); end;} function IsComment(s: string): boolean; function IsName(s: string): boolean; function IsString(s: string): boolean; implementation {$IFNDEF FPC} constructor TAGToken.Create(Text: string; &begin, &end: TAGTokenizerPos; ended: boolean); begin Self.Text := Text; Self.&begin := &begin; Self.&end := &end; Self.ended := ended; end; {$ENDIF} const SYMS1 = '()[]/|\@#=><:;,.$+-*^'; SPACES = #12#10#13#9#11' '; NO_NAME_SYMS = SYMS1 + SPACES + '{}'; CHARS_ID0 = '&abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_'; CHARS_ID = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_'; fix = {$IFDEF NEXTGEN}-1{$ELSE}0{$ENDIF}; var SYMS2:{$IFDEF FPC}TFpcList{$ELSE}TList{$ENDIF}; // array[0..8]of string=(); function IsComment(s:string):boolean; begin Result:=(s.startswith('{') or s.startswith('(*') or s.startswith('//')); end; function IsName(s:string):boolean; var i: integer; begin if length(s) <= 0 then Exit(False); if s = '&' then Exit(False); if not CHARS_ID0.Contains(s[1 + fix]) then Exit(False); for i := 1 to length(s) do begin if not CHARS_ID.Contains(s[i]) then Exit(False); end; Result:=True; end; function IsString(s: string):boolean; begin Result:=s.StartsWith(#39); end; function TAGPasTokenizer.DoReadable(): boolean; begin if not IsReadable() then begin if (y + 1 = s.Count) then ended := True else begin inc(y); x := 1+Fix; while s[y]='' do begin if y + 1 = s.Count then begin ended := True; break; end; inc(y); end; end; Exit(True); end else Exit(False); end; function TAGPasTokenizer.IsReadable(): boolean; begin Exit(length(s[y])+1+Fix > x); end; function TAGPasTokenizer.NextReadable(): boolean; begin inc(x); Result := DoReadable(); end; procedure TAGPasTokenizer.SkipSpaces(); begin DoReadable(); if not ended then begin while SPACES.Contains(s[y][x]) do NextReadable(); end; end; function TAGPasTokenizer.GetPos(): TAGTokenizerPos; begin Result.x := x; Result.y := y; end; procedure TAGPasTokenizer.SetPos(pos:TAGTokenizerPos); begin y:=Pos.x; x:=Pos.y; ended:=False; DoReadable(); end; function TAGPasTokenizer.GetNext(): TAGToken; var l,i,last_i0:integer; ml,ss,line:string; now_sym,next_sym:char; f{$IFDEF FPC},ff{$ENDIF}:boolean; begin_pos:TAGTokenizerPos; begin ml := ''; ss := ''; f := True; begin_pos := GetPos(); while f and not ended do begin line := s[y]; now_sym := line[x]; l := length(line); if x<>l+Fix then next_sym := line[x + 1] else next_sym := #0; if ml='' then begin if now_sym = '/' then begin if next_sym = '/' then begin for i:=x to l+Fix do ss:=ss+line[i]; x := l+Fix; break; end; end else if now_sym = '{' then begin ml := '}'; ss := now_sym; last_i0 := y; end else if now_sym = '(' then begin if next_sym = '*' then begin ml := ')'; inc(x); last_i0 := y; ss := now_sym + next_sym; end else begin ss := '('; inc(x); break; end; end else begin if SYMS1.Contains(now_sym) then begin ss := now_sym; inc(x); {$IFDEF FPC} ff:=false; for i:=0 to SYMS2.Count-1 do if SYMS2[i]=now_sym+next_sym then begin ff:=true; break; end; if ff then {$ELSE} if SYMS2.Contains(now_sym+next_sym) then {$ENDIF} begin inc(x); ss := ss + next_sym; end; break; end else if now_sym = #39 then begin ss := #39; inc(x); if next_sym <> '' then begin ss := ss + next_sym; while line[x] <> #39 do begin inc(x); if not IsReadable() then begin dec(x); break; end; ss := ss + line[x]; end; inc(x); end; break; end else begin while not NO_NAME_SYMS.Contains(line[x]) do begin ss := ss + line[x]; inc(x); if not IsReadable() then break; end; break; end; end; end else begin while last_i0 <> y do begin ss := ss + #10; inc(last_i0); end; ss:=ss+now_sym; if now_sym = ml then if ml = '}' then begin inc(x); break; end else if (x <> 0) and (line[x - 1] = '*') then begin inc(x); break; end; end; NextReadable(); end; {$IFDEF FPC} Result.Text:=ss; Result.&begin:=begin_pos; Result.&end:=GetPos; Result.ended:=ended; {$ELSE} Result := TAGToken.Create(ss, begin_pos, GetPos, ended); {$ENDIF} SkipSpaces; end; constructor TAGPasTokenizer.Create(input:TStrings); begin s:=input; y:=0; x:=1+fix; ended:=False; SkipSpaces; end; initialization SYMS2 := {$IFDEF FPC}TFpcList{$ELSE}TList{$ENDIF}.Create(); SYMS2.Add('>='); SYMS2.Add('<='); SYMS2.Add('<>'); SYMS2.Add(':='); SYMS2.Add('..'); SYMS2.Add('-='); SYMS2.Add('+='); SYMS2.Add('/='); SYMS2.Add('*='); SYMS2.Add('**'); SYMS2.Add('><'); SYMS2.Add('(.'); SYMS2.Add('.)'); SYMS2.Add('<<'); SYMS2.Add('>>'); end.