Work on pascal version of tokenizer

This commit is contained in:
2018-08-27 01:25:33 +03:00
parent b227c23ce0
commit da59396578

View File

@@ -2,241 +2,186 @@ unit AG.PascalTokeniser;
interface interface
uses
System.Generics.Collections,
System.Classes;
function is_comment(s:string);
function is_name(s:string);
function is_string(s:string);
const
SYMS1 = ['(',')','[',']','/','|','\\','@','#','=','>','<',':',';',',','.','$','+','-','*'];
SYMS2 = ['>=','<=','<>',':=','..','-=','+=','/=','*='];
SPACES = ['\f','\n','\r','\t','\v',' '];
NO_NAME_SYMS = SYMS1 + SPACES + ['{','}'];
CHARS_ID0 = '&abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_';
CHARS_ID = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_';
type type
TPasTokenizer=class PasTokenizer=class
(*class PasTokenizer(): private
def __init__(self, s): s:TStrings;
self.s, self.y, self.x, self.ended = s, 0, 0, False y:integer;
self._skip_spaces() x:integer;
ended:boolean;
def _do_readable(self): procedure _do_readable();
if not self._is_readable(): procedure _is_readable();
if self.y+1 == len(self.s): procedure _next_readable();
self.ended = True procedure _skip_spaces();
else: procedure _get_pos();
self.y+=1 procedure _set_pos(i0:integer; i1:integer);
self.x=0 public
while not self.s[self.y]: procedure get_next();
if self.y+1 == len(self.s): procedure read_next();
self.ended = True procedure is_ended();
break end;
self.y+=1 PasTokenizerStack=class
return True private
else: stack:TStack<integer>;
return False // _pop
procedure _get_with_comments();
def _is_readable(self): procedure _get_without_comments();
return len(self.s[self.y])>self.x public
procedure push(s:string);
def _next_readable(self): procedure pop();
self.x+=1 procedure read_last();
return self._do_readable() procedure is_ended();
def _skip_spaces(self):
self._do_readable()
if not self.ended:
while self.s[self.y][self.x] in SPACES:
self._next_readable()
def _get_pos(self):
return self.y, self.x
def _set_pos(self, i0, i1):
self.y, self.x, self.ended = i0, i1, False
self._do_readable()
def get_next(self):
begin_pos = self._get_pos()
ml, ss, f = '', '', True
str_changed = False
while f and not self.ended:
line = self.s[self.y]
now_sym = line[self.x]
l = len(line)
if self.x+1 != l:
next_sym = line[self.x+1]
else:
next_sym = ''
if ml == '':
if now_sym == '/':
if next_sym == '/':
ss = line[self.x:]
self.x = l
break
elif now_sym == '{':
ml = '}'
ss=[now_sym]
last_i0 = self.y
elif now_sym == '(':
if next_sym == '*':
ml = ')'
self.x+=1
last_i0 = self.y
ss = [now_sym+next_sym]
else:
ss = '('
self.x+=1
break
else:
if now_sym in SYMS1:
ss = now_sym
self.x+=1
if now_sym + next_sym in SYMS2:
self.x+=1
ss = ss + next_sym
break
elif now_sym=="'":
ss="'"
self.x+=1
if next_sym!='':
ss = ss + next_sym
while line[self.x]!="'":
self.x+=1
if not self._is_readable():
self.x-=1
break
ss = ss + line[self.x]
self.x+=1
break
else:
while not(line[self.x] in NO_NAME_SYMS):
ss=ss+line[self.x]
self.x+=1
if not self._is_readable():
break
break
else:
while last_i0!=self.y:
ss.append('')
last_i0+=1
ss[-1] = ss[-1] + now_sym
if now_sym==ml:
if ml=='}':
self.x+=1
break
elif self.x!=0:
if line[self.x-1]=='*':
self.x+=1
break
self._next_readable()
if len(ss)==1:
ss=ss[0]
ss=(ss,begin_pos,self._get_pos(),self.ended)
self._skip_spaces()
return ss
def read_next(self):
i0, i1 = self._get_pos()
z = self.get_next()
self._set_pos(i0, i1)
return z
def is_ended(self):
return self.ended*)
end; end;
implementation implementation
(* function is_comment(s:string);
import queue, threading begin
// TODO
end;
SYMS1 = ['(',')','[',']','/','|','\\','@','#','=','>','<',':',';',',','.','$','+','-','*'] function is_name(s:string);
SYMS2 = ['>=','<=','<>',':=','..','-=','+=','/=','*='] var
SPACES = ['\f','\n','\r','\t','\v',' '] i:integer;
NO_NAME_SYMS = SYMS1 + SPACES + ['{','}'] begin
CHARS_ID0 = '&abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_' if length(s) <= 0 then Exit(False);
CHARS_ID = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_' if s = '&' then Exit(False);
if not (s[0] in CHARS_ID0) then Exit(False);
for i := 1 to length(s) do
begin
if not (s[i] in CHARS_ID) then
Exit(False);
end;
end;
def is_comment(s): function is_string(s);
if type(s) is list: begin
return True // TODO
else: end;
return s.startswith('{') or s.startswith('(*') or s.startswith('//')
def is_name(s): class function PasTokenizer._do_readable();
if len(s)<=0: begin
return False if not _is_readable() then
if s=='&': begin
return False if (y+1 = Length(s)) then
if not (s[0] in CHARS_ID0): begin
return False ended = True;
for i in s[1:]: end
if not (i in CHARS_ID): else begin
return False inc(y);
return True x = 0;
while not s[y] = '' do
begin
if y+1 = length(s) then
begin
ended = True;
break;
end;
inc(y);
end;
end;
Exit(True);
end else Exit(False);
end;
def is_string(s): class function PasTokenizer._is_readable();
return s.startswith("'") begin
Exit(length(s[y]) > x);
end;
class function PasTokenizer._next_readable();
begin
inc(x);
Exit(_do_readable());
end;
class function PasTokenizer._skip_spaces();
begin
_do_readable();
if not ended then
begin
while s[y][x] in SPACES do
_next_readable();
end;
end;
class PasTokenizerStack(): class function PasTokenizer._get_pos();
def __init__(self, s, comments=True): begin
self.main = PasTokenizer(s) Exit(y, x);
self.stack = [] end;
if comments:
self._pop = self._get_with_comments
else:
self._pop = self._get_without_comments
def _get_with_comments(self): class function PasTokenizer._set_pos(i0:integer; i1:integer);
return self.main.get_next() begin
y = i0;
def _get_without_comments(self): x = i1;
while True: ended = False;
s = self.main.get_next() _do_readable();
if not is_comment(s[0]): end;
return s
if s[3]:
return ('',(0,0),(0,0),True)
def push(self, s):
self.stack.append(s)
def pop(self):
if self.stack:
return self.stack.pop()
else:
return self._pop()
def read_last(self):
if not self.stack:
self.stack.append(self._pop())
return self.stack[-1]
def is_ended(self):
return self.stack and self.main.is_ended()
class PasTokenizerParallelStack(PasTokenizerStack):
def __init__(self, s, comments = True, qlong = 1000):
super(PasTokenizerParallelStack,self).__init__(s, comments)
self.queue = queue.Queue(qlong)
th = threading.Thread(target = self._work, args = (self,))
th.start()
def _get_with_comments(self):
s = self.queue.get()
return s
def _get_without_comments(self):
while True:
s = self.queue.get()
if not is_comment(s[0]):
return s
if s[3]:
return ('',(0,0),(0,0),True)
def _work(self,s):
while not self.main.is_ended():
self.queue.put(self.main.get_next())
self.queue.put(('',(0,0),(0,0),True))
def is_ended(self):
return self.stack and self.main.is_ended()and self.queue.empty()
def stop(self):
self.main.ended = True
self.queue.task_done()
*)
class function PasTokenizer.get_next();
var
begin_pos:integer;
l:integer;
last_i0:integer;
m1:string = '';
ss:string = '';
line:string;
now_sym:char;
next_sym:char;
f:boolean = True;
str_changed:boolean = True;
begin
begin_pos = _get_pos();
while f and not ended do
begin
line = s[y];
now_sym = line[x];
l = length(line);
if x+1 <> 1 then
begin
next_sym = line[x+1];
end else begin
next_sym = '';
end;
if m1 = '' then
begin
if now_sym = '/' then
begin
if next_sym = '/' then
begin
ss = line[x];
x = 1;
break;
end;
end
else if now_sym = '{' then
begin
m1 = '}';
ss = [now_sym];
last_i0 = y;
end
else if now_sym = '(' then
begin
// TODO
end;
// TODO
end;
end;
end;
end. end.