Init
This commit is contained in:
243
AG.PascalTokeniser.pas
Normal file
243
AG.PascalTokeniser.pas
Normal file
@@ -0,0 +1,243 @@
|
||||
unit AG.PascalTokeniser;
|
||||
|
||||
interface
|
||||
|
||||
implementation
|
||||
|
||||
(*
|
||||
#
|
||||
# PyPascalTokenizer
|
||||
# Author: Artem Gavrilov (@Artem3213212)
|
||||
# License: MPL 2.0
|
||||
#
|
||||
|
||||
import queue, threading
|
||||
|
||||
SYMS1 = ['(',')','[',']','/','|','\\','@','#','=','>','<',':',';',',','.','$','+','-','*']
|
||||
SYMS2 = ['>=','<=','<>',':=','..','-=','+=','/=','*=']
|
||||
SPACES = ['\f','\n','\r','\t','\v',' ']
|
||||
NO_NAME_SYMS = SYMS1 + SPACES + ['{','}']
|
||||
CHARS_ID0 = '&abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_'
|
||||
CHARS_ID = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_'
|
||||
|
||||
def is_comment(s):
|
||||
if type(s) is list:
|
||||
return True
|
||||
else:
|
||||
return s.startswith('{') or s.startswith('(*') or s.startswith('//')
|
||||
|
||||
def is_name(s):
|
||||
if len(s)<=0:
|
||||
return False
|
||||
if s=='&':
|
||||
return False
|
||||
if not (s[0] in CHARS_ID0):
|
||||
return False
|
||||
for i in s[1:]:
|
||||
if not (i in CHARS_ID):
|
||||
return False
|
||||
return True
|
||||
|
||||
def is_string(s):
|
||||
return s.startswith("'")
|
||||
|
||||
class PasTokenizer():
|
||||
def __init__(self, s):
|
||||
self.s, self.y, self.x, self.ended = s, 0, 0, False
|
||||
self._skip_spaces()
|
||||
|
||||
def _do_readable(self):
|
||||
if not self._is_readable():
|
||||
if self.y+1 == len(self.s):
|
||||
self.ended = True
|
||||
else:
|
||||
self.y+=1
|
||||
self.x=0
|
||||
while not self.s[self.y]:
|
||||
if self.y+1 == len(self.s):
|
||||
self.ended = True
|
||||
break
|
||||
self.y+=1
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
|
||||
def _is_readable(self):
|
||||
return len(self.s[self.y])>self.x
|
||||
|
||||
def _next_readable(self):
|
||||
self.x+=1
|
||||
return self._do_readable()
|
||||
|
||||
def _skip_spaces(self):
|
||||
self._do_readable()
|
||||
if not self.ended:
|
||||
while self.s[self.y][self.x] in SPACES:
|
||||
self._next_readable()
|
||||
|
||||
def _get_pos(self):
|
||||
return self.y, self.x
|
||||
|
||||
def _set_pos(self, i0, i1):
|
||||
self.y, self.x, self.ended = i0, i1, False
|
||||
self._do_readable()
|
||||
|
||||
def get_next(self):
|
||||
begin_pos = self._get_pos()
|
||||
ml, ss, f = '', '', True
|
||||
str_changed = False
|
||||
while f and not self.ended:
|
||||
line = self.s[self.y]
|
||||
now_sym = line[self.x]
|
||||
l = len(line)
|
||||
if self.x+1 != l:
|
||||
next_sym = line[self.x+1]
|
||||
else:
|
||||
next_sym = ''
|
||||
if ml == '':
|
||||
if now_sym == '/':
|
||||
if next_sym == '/':
|
||||
ss = line[self.x:]
|
||||
self.x = l
|
||||
break
|
||||
elif now_sym == '{':
|
||||
ml = '}'
|
||||
ss=[now_sym]
|
||||
last_i0 = self.y
|
||||
elif now_sym == '(':
|
||||
if next_sym == '*':
|
||||
ml = ')'
|
||||
self.x+=1
|
||||
last_i0 = self.y
|
||||
ss = [now_sym+next_sym]
|
||||
else:
|
||||
ss = '('
|
||||
self.x+=1
|
||||
break
|
||||
else:
|
||||
if now_sym in SYMS1:
|
||||
ss = now_sym
|
||||
self.x+=1
|
||||
if now_sym + next_sym in SYMS2:
|
||||
self.x+=1
|
||||
ss = ss + next_sym
|
||||
break
|
||||
elif now_sym=="'":
|
||||
ss="'"
|
||||
self.x+=1
|
||||
if next_sym!='':
|
||||
ss = ss + next_sym
|
||||
while line[self.x]!="'":
|
||||
self.x+=1
|
||||
if not self._is_readable():
|
||||
self.x-=1
|
||||
break
|
||||
ss = ss + line[self.x]
|
||||
self.x+=1
|
||||
break
|
||||
else:
|
||||
while not(line[self.x] in NO_NAME_SYMS):
|
||||
ss=ss+line[self.x]
|
||||
self.x+=1
|
||||
if not self._is_readable():
|
||||
break
|
||||
break
|
||||
else:
|
||||
while last_i0!=self.y:
|
||||
ss.append('')
|
||||
last_i0+=1
|
||||
ss[-1] = ss[-1] + now_sym
|
||||
if now_sym==ml:
|
||||
if ml=='}':
|
||||
self.x+=1
|
||||
break
|
||||
elif self.x!=0:
|
||||
if line[self.x-1]=='*':
|
||||
self.x+=1
|
||||
break
|
||||
self._next_readable()
|
||||
if len(ss)==1:
|
||||
ss=ss[0]
|
||||
ss=(ss,begin_pos,self._get_pos(),self.ended)
|
||||
self._skip_spaces()
|
||||
return ss
|
||||
|
||||
def read_next(self):
|
||||
i0, i1 = self._get_pos()
|
||||
z = self.get_next()
|
||||
self._set_pos(i0, i1)
|
||||
return z
|
||||
|
||||
def is_ended(self):
|
||||
return self.ended
|
||||
|
||||
class PasTokenizerStack():
|
||||
def __init__(self, s, comments=True):
|
||||
self.main = PasTokenizer(s)
|
||||
self.stack = []
|
||||
if comments:
|
||||
self._pop = self._get_with_comments
|
||||
else:
|
||||
self._pop = self._get_without_comments
|
||||
|
||||
def _get_with_comments(self):
|
||||
return self.main.get_next()
|
||||
|
||||
def _get_without_comments(self):
|
||||
while True:
|
||||
s = self.main.get_next()
|
||||
if not is_comment(s[0]):
|
||||
return s
|
||||
if s[3]:
|
||||
return ('',(0,0),(0,0),True)
|
||||
|
||||
def push(self, s):
|
||||
self.stack.append(s)
|
||||
|
||||
def pop(self):
|
||||
if self.stack:
|
||||
return self.stack.pop()
|
||||
else:
|
||||
return self._pop()
|
||||
|
||||
def read_last(self):
|
||||
if not self.stack:
|
||||
self.stack.append(self._pop())
|
||||
return self.stack[-1]
|
||||
|
||||
def is_ended(self):
|
||||
return self.stack and self.main.is_ended()
|
||||
|
||||
class PasTokenizerParallelStack(PasTokenizerStack):
|
||||
def __init__(self, s, comments = True, qlong = 1000):
|
||||
super(PasTokenizerParallelStack,self).__init__(s, comments)
|
||||
self.queue = queue.Queue(qlong)
|
||||
th = threading.Thread(target = self._work, args = (self,))
|
||||
th.start()
|
||||
|
||||
def _get_with_comments(self):
|
||||
s = self.queue.get()
|
||||
return s
|
||||
|
||||
def _get_without_comments(self):
|
||||
while True:
|
||||
s = self.queue.get()
|
||||
if not is_comment(s[0]):
|
||||
return s
|
||||
if s[3]:
|
||||
return ('',(0,0),(0,0),True)
|
||||
|
||||
def _work(self,s):
|
||||
while not self.main.is_ended():
|
||||
self.queue.put(self.main.get_next())
|
||||
self.queue.put(('',(0,0),(0,0),True))
|
||||
|
||||
def is_ended(self):
|
||||
return self.stack and self.main.is_ended()and self.queue.empty()
|
||||
|
||||
def stop(self):
|
||||
self.main.ended = True
|
||||
self.queue.task_done()
|
||||
*)
|
||||
|
||||
end.
|
||||
Reference in New Issue
Block a user