Stablize code base (needs improvement, but should work for now)

This commit is contained in:
2018-10-28 15:27:24 +02:00
parent 57fe45f4ec
commit c9584c15ec
5 changed files with 584 additions and 385 deletions

View File

@@ -3,216 +3,245 @@ unit AG.PascalTokeniser;
interface
uses
System.Generics.Collections,
System.Generics.Collections, System.SysUtils,
System.Classes;
function is_comment(s:string);
function is_name(s:string);
function is_string(s:string);
const
SYMS1 = ['(',')','[',']','/','|','\','@','#','=','>','<',':',';',',','.','$','+','-','*'];
SYMS2 = ['>=','<=','<>',':=','..','-=','+=','/=','*='];
SPACES = [#12,#10,#13,#9,#11,' '];
NO_NAME_SYMS = SYMS1 + SPACES + ['{','}'];
CHARS_ID0 = '&abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_';
CHARS_ID = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_';
type
PasTokenizer=class
private
s:TStrings;
y:integer;
x:integer;
ended:boolean;
procedure _do_readable();
procedure _is_readable();
procedure _next_readable();
procedure _skip_spaces();
procedure _get_pos();
procedure _set_pos(i0:integer; i1:integer);
public
procedure get_next();
procedure read_next();
procedure is_ended();
TTokenizerPos = record
x, y: integer;
end;
PasTokenizerStack=class
private
stack:TStack<integer>;
// _pop
procedure _get_with_comments();
procedure _get_without_comments();
public
procedure push(s:string);
procedure pop();
procedure read_last();
procedure is_ended();
TToken = record
Text: string;
&begin, &end: TTokenizerPos;
ended: boolean;
constructor Create(Text: string; &begin, &end: TTokenizerPos;
ended: boolean);
end;
TPasTokenizer = class
private
s: TStrings;
y: integer;
x: integer;
ended: boolean;
function _do_readable(): boolean;
function _is_readable(): boolean;
function _next_readable(): boolean;
procedure _skip_spaces();
function _get_pos(): TTokenizerPos;
procedure _set_pos(i0: integer; i1: integer);
public
function get_next(): TToken;
// procedure read_next();
// procedure is_ended();
constructor Create(input:TStrings);
end;
{PasTokenizerStack = class
private
stack: TStack<integer>;
// _pop
procedure _get_with_comments();
procedure _get_without_comments();
public
procedure push(s: string);
procedure pop();
procedure read_last();
procedure is_ended();
end;}
function is_comment(s: string): boolean;
function is_name(s: string): boolean;
function is_string(s: string): boolean;
implementation
function is_comment(s:string);
constructor TToken.Create(Text: string; &begin, &end: TTokenizerPos;
ended: boolean);
begin
Self.Text := Text;
Self.&begin := &begin;
Self.&end := &end;
Self.ended := ended;
end;
const
SYMS1 = '()[]/|\@#=><:;,.$+-*';
SPACES = #12#10#13#9#11' ';
NO_NAME_SYMS = SYMS1 + SPACES + '{}';
CHARS_ID0 = '&abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_';
CHARS_ID = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_';
fix = {$IFDEF NEXTGEN}-1{$ELSE}0{$ENDIF};
var
SYMS2: TList<String>; // array[0..8]of string=();
function is_comment(s: string): boolean;
begin
// TODO
end;
function is_name(s:string);
function is_name(s: string): boolean;
var
i:integer;
i: integer;
begin
if length(s) <= 0 then Exit(False);
if s = '&' then Exit(False);
if not (s[0] in CHARS_ID0) then Exit(False);
if length(s) <= 0 then
Exit(False);
if s = '&' then
Exit(False);
if not CHARS_ID0.Contains(s[1 + fix]) then
Exit(False);
for i := 1 to length(s) do
begin
if not (s[i] in CHARS_ID) then
if not CHARS_ID.Contains(s[i]) then
Exit(False);
end;
end;
function is_string(s);
function is_string(s: string): boolean;
begin
// TODO
end;
class function PasTokenizer._do_readable();
function TPasTokenizer._do_readable(): boolean;
begin
if not _is_readable() then
begin
if (y+1 = Length(s)) then
if (y + 1 = s.Count) then
ended := True
else
begin
ended = True;
end
else begin
inc(y);
x = 0;
while not s[y] = '' do
x := 1+Fix;
while s[y]='' do
begin
if y+1 = length(s) then
if y + 1 = s.Count then
begin
ended = True;
ended := True;
break;
end;
inc(y);
end;
end;
Exit(True);
end else Exit(False);
end
else
Exit(False);
end;
class function PasTokenizer._is_readable();
function TPasTokenizer._is_readable(): boolean;
begin
Exit(length(s[y]) > x);
Exit(length(s[y])+1+Fix > x);
end;
class function PasTokenizer._next_readable();
function TPasTokenizer._next_readable(): boolean;
begin
inc(x);
Exit(_do_readable());
Result := _do_readable();
end;
class function PasTokenizer._skip_spaces();
procedure TPasTokenizer._skip_spaces();
begin
_do_readable();
if not ended then
begin
while s[y][x] in SPACES do
_next_readable();
while SPACES.Contains(s[y][x]) do
_next_readable();
end;
end;
class function PasTokenizer._get_pos();
function TPasTokenizer._get_pos(): TTokenizerPos;
begin
Exit(y, x);
Result.x := x;
Result.y := y;
end;
class function PasTokenizer._set_pos(i0:integer; i1:integer);
procedure TPasTokenizer._set_pos(i0: integer; i1: integer);
begin
y = i0;
x = i1;
ended = False;
y := i0;
x := i1;
ended := False;
_do_readable();
end;
class function PasTokenizer.get_next();
function TPasTokenizer.get_next(): TToken;
var
begin_pos:integer;
l:integer;
last_i0:integer;
m1:string = '';
ss:string = '';
line:string;
now_sym:char;
next_sym:char;
f:boolean = True;
str_changed:boolean = True;
l,i,last_i0:integer;
ml,ss,line:string;
now_sym,next_sym:char;
f,str_changed:boolean;
begin_pos:TTokenizerPos;
begin
begin_pos = _get_pos();
ml := '';
ss := '';
f := True;
str_changed := True;
begin_pos := _get_pos();
while f and not ended do
begin
line = s[y];
now_sym = line[x];
l = length(line);
if x+1 <> 1 then
begin
next_sym = line[x+1];
end else begin
next_sym = '';
end;
if m1 = '' then
line := s[y];
now_sym := line[x];
l := length(line);
if x<>l+Fix then
next_sym := line[x + 1]
else
next_sym := #0;
if ml='' then
begin
if now_sym = '/' then
begin
if next_sym = '/' then
begin
ss = line[x];
x = 1;
for i:=x to l+Fix do
ss:=ss+line[i];
x := l+Fix;
break;
end;
end
else if now_sym = '{' then
begin
m1 = '}';
ss = [now_sym];
last_i0 = y;
ml := '}';
ss := now_sym;
last_i0 := y;
end
else if now_sym = '(' then
begin
if next_sym = '*' then
begin
ml = ')';
ml := ')';
inc(x);
last_i0 = y;
ss = [now_sym+next_sym];
end
last_i0 := y;
ss := now_sym + next_sym;
end
else
begin
ss = '(';
ss := '(';
inc(x);
break;
end;
end
else
begin
if now_sym in SYMS1 then
if SYMS1.Contains(now_sym) then
begin
ss = now_sym;
ss := now_sym;
inc(x);
if now_sym + next_sym in SYMS2 then
if SYMS2.Contains(now_sym + next_sym) then
begin
inc(x);
ss = ss + next_sym;
ss := ss + next_sym;
end;
break;
end
else if now_sym = '' {TODO "'"} then
else if now_sym = #39 then
begin
ss = '' {TODO "'"};
ss := #39;
inc(x);
if next_sym <> '' then
begin
ss = ss + next_sym;
while line[x] <> '' {TODO "'"} do
ss := ss + next_sym;
while line[x] <> #39 do
begin
inc(x);
if not _is_readable() then
@@ -220,7 +249,7 @@ begin
dec(x);
break;
end;
ss = ss + line[x];
ss := ss + line[x];
end;
inc(x);
end;
@@ -228,11 +257,12 @@ begin
end
else
begin
while not (line[x] in NO_NAME_SYMS) then
while not NO_NAME_SYMS.Contains(line[x]) do
begin
ss = ss + line[x]
ss := ss + line[x];
inc(x);
if not _is_readable() then break;
if not _is_readable() then
break;
end;
break;
end;
@@ -242,12 +272,51 @@ begin
begin
while last_i0 <> y do
begin
{TODO ss.append('')}
ss := ss + #10;
inc(last_i0);
end;
//TODO
if ss[length(ss) + fix] = #10 then
begin
ss[length(ss) + fix] := now_sym;
ss := ss + #10;
end;
if now_sym = ml then
if ml = '}' then
begin
inc(x);
break;
end
else if (x <> 0) and (line[x - 1] = '*') then
begin
inc(x);
break;
end;
end;
//TODO
_next_readable();
end;
Result := TToken.Create(ss, begin_pos, _get_pos, ended);
_skip_spaces;
end;
constructor TPasTokenizer.Create(input:TStrings);
begin
s:=input;
y:=0;
x:=1+fix;
ended:=False;
_skip_spaces;
end;
initialization
SYMS2 := TList<string>.Create();
SYMS2.Add('>=');
SYMS2.Add('<=');
SYMS2.Add('<>');
SYMS2.Add(':=');
SYMS2.Add('..');
SYMS2.Add('-=');
SYMS2.Add('+=');
SYMS2.Add('/=');
SYMS2.Add('*=');
end.