scientific-programming-exer.../exam/ex06/tokenio.py

57 lines
1.2 KiB
Python

from collections import deque
base_chars = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
end_of_token_chars = "() \t"
whitespace_chars = " \t"
class UnexpectedCharacterException(Exception):
def __init__(self, msg, offset, char, *args):
Exception.__init__(self, *args)
self._msg = msg
self._offset = offset
self._char = char
class TokenStream(object):
def __init__(self, file_):
self._file = file_
self._file.seek(0, 0)
self._offset = 0
def _getc(self):
c = self._file.read(1)
if(c):
self._offset += 1
return c
def _ungetc(self):
self._file.seek(self._offset - 1, 0)
self._offset -= 1
def get_token(self):
result = deque()
while True:
c = self._getc()
if(not c):
# EOF.
break
if(c in base_chars):
result.append(c)
continue
if(c in end_of_token_chars):
if(not result):
# We are not inside a token.
if(c in whitespace_chars):
# Some whitespace. Ignore it.
continue
# A parenthesis.
return c
# End of token.
self._ungetc()
break
raise UnexpectedCharacterException("Unexpected character while tokenizing", self._offset, c)
return "".join(result)