Files
AGPascalTokenizer/AG.PascalTokeniser.pas
2018-08-26 22:05:46 +03:00

244 lines
6.8 KiB
ObjectPascal

unit AG.PascalTokeniser;
interface
implementation
(*
#
# PyPascalTokenizer
# Author: Artem Gavrilov (@Artem3213212)
# License: MPL 2.0
#
import queue, threading
SYMS1 = ['(',')','[',']','/','|','\\','@','#','=','>','<',':',';',',','.','$','+','-','*']
SYMS2 = ['>=','<=','<>',':=','..','-=','+=','/=','*=']
SPACES = ['\f','\n','\r','\t','\v',' ']
NO_NAME_SYMS = SYMS1 + SPACES + ['{','}']
CHARS_ID0 = '&abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_'
CHARS_ID = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_'
def is_comment(s):
if type(s) is list:
return True
else:
return s.startswith('{') or s.startswith('(*') or s.startswith('//')
def is_name(s):
if len(s)<=0:
return False
if s=='&':
return False
if not (s[0] in CHARS_ID0):
return False
for i in s[1:]:
if not (i in CHARS_ID):
return False
return True
def is_string(s):
return s.startswith("'")
class PasTokenizer():
def __init__(self, s):
self.s, self.y, self.x, self.ended = s, 0, 0, False
self._skip_spaces()
def _do_readable(self):
if not self._is_readable():
if self.y+1 == len(self.s):
self.ended = True
else:
self.y+=1
self.x=0
while not self.s[self.y]:
if self.y+1 == len(self.s):
self.ended = True
break
self.y+=1
return True
else:
return False
def _is_readable(self):
return len(self.s[self.y])>self.x
def _next_readable(self):
self.x+=1
return self._do_readable()
def _skip_spaces(self):
self._do_readable()
if not self.ended:
while self.s[self.y][self.x] in SPACES:
self._next_readable()
def _get_pos(self):
return self.y, self.x
def _set_pos(self, i0, i1):
self.y, self.x, self.ended = i0, i1, False
self._do_readable()
def get_next(self):
begin_pos = self._get_pos()
ml, ss, f = '', '', True
str_changed = False
while f and not self.ended:
line = self.s[self.y]
now_sym = line[self.x]
l = len(line)
if self.x+1 != l:
next_sym = line[self.x+1]
else:
next_sym = ''
if ml == '':
if now_sym == '/':
if next_sym == '/':
ss = line[self.x:]
self.x = l
break
elif now_sym == '{':
ml = '}'
ss=[now_sym]
last_i0 = self.y
elif now_sym == '(':
if next_sym == '*':
ml = ')'
self.x+=1
last_i0 = self.y
ss = [now_sym+next_sym]
else:
ss = '('
self.x+=1
break
else:
if now_sym in SYMS1:
ss = now_sym
self.x+=1
if now_sym + next_sym in SYMS2:
self.x+=1
ss = ss + next_sym
break
elif now_sym=="'":
ss="'"
self.x+=1
if next_sym!='':
ss = ss + next_sym
while line[self.x]!="'":
self.x+=1
if not self._is_readable():
self.x-=1
break
ss = ss + line[self.x]
self.x+=1
break
else:
while not(line[self.x] in NO_NAME_SYMS):
ss=ss+line[self.x]
self.x+=1
if not self._is_readable():
break
break
else:
while last_i0!=self.y:
ss.append('')
last_i0+=1
ss[-1] = ss[-1] + now_sym
if now_sym==ml:
if ml=='}':
self.x+=1
break
elif self.x!=0:
if line[self.x-1]=='*':
self.x+=1
break
self._next_readable()
if len(ss)==1:
ss=ss[0]
ss=(ss,begin_pos,self._get_pos(),self.ended)
self._skip_spaces()
return ss
def read_next(self):
i0, i1 = self._get_pos()
z = self.get_next()
self._set_pos(i0, i1)
return z
def is_ended(self):
return self.ended
class PasTokenizerStack():
def __init__(self, s, comments=True):
self.main = PasTokenizer(s)
self.stack = []
if comments:
self._pop = self._get_with_comments
else:
self._pop = self._get_without_comments
def _get_with_comments(self):
return self.main.get_next()
def _get_without_comments(self):
while True:
s = self.main.get_next()
if not is_comment(s[0]):
return s
if s[3]:
return ('',(0,0),(0,0),True)
def push(self, s):
self.stack.append(s)
def pop(self):
if self.stack:
return self.stack.pop()
else:
return self._pop()
def read_last(self):
if not self.stack:
self.stack.append(self._pop())
return self.stack[-1]
def is_ended(self):
return self.stack and self.main.is_ended()
class PasTokenizerParallelStack(PasTokenizerStack):
def __init__(self, s, comments = True, qlong = 1000):
super(PasTokenizerParallelStack,self).__init__(s, comments)
self.queue = queue.Queue(qlong)
th = threading.Thread(target = self._work, args = (self,))
th.start()
def _get_with_comments(self):
s = self.queue.get()
return s
def _get_without_comments(self):
while True:
s = self.queue.get()
if not is_comment(s[0]):
return s
if s[3]:
return ('',(0,0),(0,0),True)
def _work(self,s):
while not self.main.is_ended():
self.queue.put(self.main.get_next())
self.queue.put(('',(0,0),(0,0),True))
def is_ended(self):
return self.stack and self.main.is_ended()and self.queue.empty()
def stop(self):
self.main.ended = True
self.queue.task_done()
*)
end.