57 lines
1.2 KiB
Python
57 lines
1.2 KiB
Python
|
from collections import deque
|
||
|
|
||
|
base_chars = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
|
||
|
end_of_token_chars = "() \t"
|
||
|
whitespace_chars = " \t"
|
||
|
|
||
|
class UnexpectedCharacterException(Exception):
|
||
|
def __init__(self, msg, offset, char, *args):
|
||
|
Exception.__init__(self, *args)
|
||
|
self._msg = msg
|
||
|
self._offset = offset
|
||
|
self._char = char
|
||
|
|
||
|
|
||
|
class TokenStream(object):
|
||
|
def __init__(self, file_):
|
||
|
self._file = file_
|
||
|
self._file.seek(0, 0)
|
||
|
self._offset = 0
|
||
|
|
||
|
def _getc(self):
|
||
|
c = self._file.read(1)
|
||
|
if(c):
|
||
|
self._offset += 1
|
||
|
return c
|
||
|
def _ungetc(self):
|
||
|
self._file.seek(self._offset - 1, 0)
|
||
|
self._offset -= 1
|
||
|
|
||
|
def get_token(self):
|
||
|
result = deque()
|
||
|
while True:
|
||
|
c = self._getc()
|
||
|
if(not c):
|
||
|
# EOF.
|
||
|
break
|
||
|
if(c in base_chars):
|
||
|
result.append(c)
|
||
|
continue
|
||
|
if(c in end_of_token_chars):
|
||
|
if(not result):
|
||
|
# We are not inside a token.
|
||
|
if(c in whitespace_chars):
|
||
|
# Some whitespace. Ignore it.
|
||
|
continue
|
||
|
# A parenthesis.
|
||
|
return c
|
||
|
|
||
|
# End of token.
|
||
|
self._ungetc()
|
||
|
break
|
||
|
raise UnexpectedCharacterException("Unexpected character while tokenizing", self._offset, c)
|
||
|
return "".join(result)
|
||
|
|
||
|
|
||
|
|