156 lines
3.5 KiB
Python
156 lines
3.5 KiB
Python
from collections import deque
|
|
from abc import ABCMeta, abstractmethod
|
|
|
|
from finite_state_machine import FiniteStateMachine
|
|
|
|
BASE_NUMBER_TOKENS = {"one": 1
|
|
, "two": 2
|
|
, "three": 3
|
|
, "four": 4
|
|
, "five": 5
|
|
, "six": 6
|
|
, "seven": 7
|
|
, "eight": 8
|
|
, "nine": 9}
|
|
DECI_NUMBER_TOKENS = {"twenty": 20
|
|
, "thirty": 30
|
|
, "fourty": 40
|
|
, "fifty": 50
|
|
, "sixty": 60
|
|
, "secenty": 70
|
|
, "eigthy": 80
|
|
, "ninety": 90}
|
|
TEEN_NUMBER_TOKENS = {"ten": 10
|
|
, "eleven": 11
|
|
, "twelve": 12
|
|
, "thirteen": 13
|
|
, "fourteen": 14
|
|
, "fifteen": 15
|
|
, "sixteen": 16
|
|
, "seventeen": 17
|
|
, "eighteen": 18
|
|
, "nineteen": 19}
|
|
HUNDRED_NUMBER_TOKENS = {"hundred": 100}
|
|
ZERO_NUMBER_TOKENS = {"zero": 0
|
|
, "null": 0}
|
|
|
|
OPERATOR_TOKENS = { "plus": lambda x,y: x + y
|
|
, "minus": lambda x,y: x - y
|
|
, "times": lambda x,y: x * y
|
|
, "through": lambda x,y: x / y}
|
|
|
|
transitions = {
|
|
0: { tuple(ZERO_NUMBER_TOKENS) + tuple(TEEN_NUMBER_TOKENS): 1
|
|
, tuple(DECI_NUMBER_TOKENS): 2
|
|
, tuple(BASE_NUMBER_TOKENS): 3}
|
|
, 2: {tuple(BASE_NUMBER_TOKENS): 1}
|
|
, 3: {tuple(HUNDRED_NUMBER_TOKENS): 4}
|
|
, 4: {tuple(DECI_NUMBER_TOKENS): 2}
|
|
}
|
|
valid_states = [1, 2, 3, 4]
|
|
default_transition = -1
|
|
|
|
|
|
|
|
class TokenParsingException(Exception):
|
|
pass
|
|
class SubtokenizingException(TokenParsingException):
|
|
pass
|
|
|
|
|
|
class TokenParser(metaclass = ABCMeta):
|
|
@classmethod
|
|
def can_parse(cls, token):
|
|
try:
|
|
cls(token).parse()
|
|
return True
|
|
except TokenParsingException:
|
|
return False
|
|
@abstractmethod
|
|
def parse(self):
|
|
pass
|
|
|
|
class NumberTokenParser(TokenParser):
|
|
def __init__(self, token):
|
|
self._token = token.lower()
|
|
self._token_length = len(token)
|
|
|
|
self._finite_state_machine = FiniteStateMachine(0, valid_states, default_transition, transitions)
|
|
|
|
def get_token_of_class_or_none(self, offset, token_class):
|
|
for token in token_class:
|
|
if(len(token) + offset > self._token_length):
|
|
continue
|
|
|
|
if(self._token[offset: offset + len(token)] == token):
|
|
return token
|
|
return None
|
|
|
|
def get_next_token_part(self, offset):
|
|
token_classes = [ZERO_NUMBER_TOKENS
|
|
, HUNDRED_NUMBER_TOKENS
|
|
, TEEN_NUMBER_TOKENS
|
|
, DECI_NUMBER_TOKENS
|
|
, BASE_NUMBER_TOKENS]
|
|
|
|
result = None
|
|
for token_class in token_classes:
|
|
result = self.get_token_of_class_or_none(offset, token_class)
|
|
if(result):
|
|
break
|
|
|
|
return result
|
|
|
|
def subtokenize(self):
|
|
token_parts = deque()
|
|
|
|
offset = 0
|
|
while(True):
|
|
subtoken = self.get_next_token_part(offset)
|
|
if(subtoken is None):
|
|
if(offset != self._token_length):
|
|
raise SubtokenizingException("part of the token is dangling: '{}'".format(self._token[offset:]))
|
|
return list(token_parts)
|
|
|
|
offset += len(subtoken)
|
|
token_parts.append(subtoken)
|
|
|
|
def parse(self):
|
|
token_parts = self.subtokenize()
|
|
|
|
if(not self._finite_state_machine.check(token_parts)):
|
|
raise TokenParsingException("token '{}' is invalid".format(self._token))
|
|
|
|
|
|
# This is ugly but it works.
|
|
result = 0
|
|
for subtoken in token_parts:
|
|
if(subtoken in BASE_NUMBER_TOKENS):
|
|
result += BASE_NUMBER_TOKENS[subtoken]
|
|
if(subtoken in TEEN_NUMBER_TOKENS):
|
|
result += TEEN_NUMBER_TOKENS[subtoken]
|
|
if(subtoken in DECI_NUMBER_TOKENS):
|
|
result += DECI_NUMBER_TOKENS[subtoken]
|
|
|
|
if(subtoken in HUNDRED_NUMBER_TOKENS):
|
|
result *= HUNDRED_NUMBER_TOKENS[subtoken]
|
|
|
|
|
|
return result
|
|
|
|
|
|
|
|
class OperatorTokenParser(TokenParser):
|
|
def __init__(self, token):
|
|
self._token = token.lower()
|
|
|
|
def parse(self):
|
|
if(not self._token in OPERATOR_TOKENS):
|
|
raise TokenParsingException("token '{}' is not an operator".format(self._token))
|
|
return OPERATOR_TOKENS[self._token]
|
|
|
|
|
|
|
|
|
|
|