from collections import deque from abc import ABCMeta, abstractmethod from finite_state_machine import FiniteStateMachine BASE_NUMBER_TOKENS = {"one": 1 , "two": 2 , "three": 3 , "four": 4 , "five": 5 , "six": 6 , "seven": 7 , "eight": 8 , "nine": 9} DECI_NUMBER_TOKENS = {"twenty": 20 , "thirty": 30 , "fourty": 40 , "fifty": 50 , "sixty": 60 , "secenty": 70 , "eigthy": 80 , "ninety": 90} TEEN_NUMBER_TOKENS = {"ten": 10 , "eleven": 11 , "twelve": 12 , "thirteen": 13 , "fourteen": 14 , "fifteen": 15 , "sixteen": 16 , "seventeen": 17 , "eighteen": 18 , "nineteen": 19} HUNDRED_NUMBER_TOKENS = {"hundred": 100} ZERO_NUMBER_TOKENS = {"zero": 0 , "null": 0} OPERATOR_TOKENS = { "plus": lambda x,y: x + y , "minus": lambda x,y: x - y , "times": lambda x,y: x * y , "through": lambda x,y: x / y} transitions = { 0: { tuple(ZERO_NUMBER_TOKENS) + tuple(TEEN_NUMBER_TOKENS): 1 , tuple(DECI_NUMBER_TOKENS): 2 , tuple(BASE_NUMBER_TOKENS): 3} , 2: {tuple(BASE_NUMBER_TOKENS): 1} , 3: {tuple(HUNDRED_NUMBER_TOKENS): 4} , 4: {tuple(DECI_NUMBER_TOKENS): 2} } valid_states = [1, 2, 3, 4] default_transition = -1 class TokenParsingException(Exception): pass class SubtokenizingException(TokenParsingException): pass class TokenParser(metaclass = ABCMeta): @classmethod def can_parse(cls, token): try: cls(token).parse() return True except TokenParsingException: return False @abstractmethod def parse(self): pass class NumberTokenParser(TokenParser): def __init__(self, token): self._token = token.lower() self._token_length = len(token) self._finite_state_machine = FiniteStateMachine(0, valid_states, default_transition, transitions) def get_token_of_class_or_none(self, offset, token_class): for token in token_class: if(len(token) + offset > self._token_length): continue if(self._token[offset: offset + len(token)] == token): return token return None def get_next_token_part(self, offset): token_classes = [ZERO_NUMBER_TOKENS , HUNDRED_NUMBER_TOKENS , TEEN_NUMBER_TOKENS , DECI_NUMBER_TOKENS , BASE_NUMBER_TOKENS] result = None for token_class in token_classes: result = self.get_token_of_class_or_none(offset, token_class) if(result): break return result def subtokenize(self): token_parts = deque() offset = 0 while(True): subtoken = self.get_next_token_part(offset) if(subtoken is None): if(offset != self._token_length): raise SubtokenizingException("part of the token is dangling: '{}'".format(self._token[offset:])) return list(token_parts) offset += len(subtoken) token_parts.append(subtoken) def parse(self): token_parts = self.subtokenize() if(not self._finite_state_machine.check(token_parts)): raise TokenParsingException("token '{}' is invalid".format(self._token)) # This is ugly but it works. result = 0 for subtoken in token_parts: if(subtoken in BASE_NUMBER_TOKENS): result += BASE_NUMBER_TOKENS[subtoken] if(subtoken in TEEN_NUMBER_TOKENS): result += TEEN_NUMBER_TOKENS[subtoken] if(subtoken in DECI_NUMBER_TOKENS): result += DECI_NUMBER_TOKENS[subtoken] if(subtoken in HUNDRED_NUMBER_TOKENS): result *= HUNDRED_NUMBER_TOKENS[subtoken] return result class OperatorTokenParser(TokenParser): def __init__(self, token): self._token = token.lower() def parse(self): if(not self._token in OPERATOR_TOKENS): raise TokenParsingException("token '{}' is not an operator".format(self._token)) return OPERATOR_TOKENS[self._token]