from collections import deque base_chars = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" end_of_token_chars = "() \t" whitespace_chars = " \t" class UnexpectedCharacterException(Exception): def __init__(self, msg, offset, char, *args): Exception.__init__(self, *args) self._msg = msg self._offset = offset self._char = char class TokenStream(object): def __init__(self, file_): self._file = file_ self._file.seek(0, 0) self._offset = 0 def _getc(self): c = self._file.read(1) if(c): self._offset += 1 return c def _ungetc(self): self._file.seek(self._offset - 1, 0) self._offset -= 1 def get_token(self): result = deque() while True: c = self._getc() if(not c): # EOF. break if(c in base_chars): result.append(c) continue if(c in end_of_token_chars): if(not result): # We are not inside a token. if(c in whitespace_chars): # Some whitespace. Ignore it. continue # A parenthesis. return c # End of token. self._ungetc() break raise UnexpectedCharacterException("Unexpected character while tokenizing", self._offset, c) return "".join(result)