56 lines
1.1 KiB
Python
56 lines
1.1 KiB
Python
from collections import deque
|
|
|
|
WHITESPACE = {" ", "\t", "\n", "\r"}
|
|
TOKENS_END_OF_WORD = WHITESPACE | {";", ":", ",", ".", "[", "]"}
|
|
|
|
TOKENS_EXPECT_NEWLINE = {":", "]"}
|
|
|
|
|
|
class Tokenizer(object):
|
|
def __init__(self, context):
|
|
self._context = context
|
|
self._expect_newline = False
|
|
self._expect_whitespace = False
|
|
|
|
def __iter__(self):
|
|
return self
|
|
def __next__(self):
|
|
"""
|
|
Return the next token.
|
|
"""
|
|
|
|
current_token = deque()
|
|
|
|
while(True):
|
|
c = self._context.getc()
|
|
|
|
# Sometimes we need the explicit newline.
|
|
if(self._expect_newline and c == "\n"):
|
|
self._expect_newline = False
|
|
return c
|
|
|
|
# Skip multiple whitespaces.
|
|
if(c in WHITESPACE and not current_token):
|
|
if(self._expect_whitespace):
|
|
self._expect_whitespace = False
|
|
return c
|
|
continue
|
|
|
|
if(c in TOKENS_END_OF_WORD):
|
|
if(current_token):
|
|
self._context.ungetc(c)
|
|
if(c in WHITESPACE):
|
|
self._expect_whitespace = True
|
|
return "".join(current_token)
|
|
else:
|
|
if(c in TOKENS_EXPECT_NEWLINE):
|
|
self._expect_newline = True
|
|
return c
|
|
|
|
if(not c):
|
|
raise StopIteration()
|
|
|
|
current_token.append(c)
|
|
|
|
|