scientific-programming-exer.../exam/ex06/tokens.py

from collections import deque
from abc import ABCMeta, abstractmethod

from finite_state_machine import FiniteStateMachine

BASE_NUMBER_TOKENS = {"one": 1
			, "two": 2
			, "three": 3
			, "four": 4
			, "five": 5
			, "six": 6
			, "seven": 7
			, "eight": 8
			, "nine": 9}
DECI_NUMBER_TOKENS = {"twenty": 20
			, "thirty": 30
			, "fourty": 40
			, "fifty": 50
			, "sixty": 60
			, "secenty": 70
			, "eigthy": 80
			, "ninety": 90}
TEEN_NUMBER_TOKENS = {"ten": 10
			, "eleven": 11
			, "twelve": 12
			, "thirteen": 13
			, "fourteen": 14
			, "fifteen": 15
			, "sixteen": 16
			, "seventeen": 17
			, "eighteen": 18
			, "nineteen": 19}
HUNDRED_NUMBER_TOKENS = {"hundred": 100}
ZERO_NUMBER_TOKENS = {"zero": 0
			, "null": 0}

OPERATOR_TOKENS = { "plus": lambda x,y: x + y
			, "minus": lambda x,y: x - y
			, "times": lambda x,y: x * y
			, "through": lambda x,y: x / y}

transitions = {
	0: { tuple(ZERO_NUMBER_TOKENS) + tuple(TEEN_NUMBER_TOKENS): 1
		, tuple(DECI_NUMBER_TOKENS): 2
		, tuple(BASE_NUMBER_TOKENS): 3}
	, 2: {tuple(BASE_NUMBER_TOKENS): 1}
	, 3: {tuple(HUNDRED_NUMBER_TOKENS): 4}
	, 4: {tuple(DECI_NUMBER_TOKENS): 2}
}
valid_states = [1, 2, 3, 4]
default_transition = -1


class TokenParsingException(Exception):
	pass
class SubtokenizingException(TokenParsingException):
	pass


class TokenParser(metaclass = ABCMeta):
	@classmethod
	def can_parse(cls, token):
		try:
			cls(token).parse()
			return True
		except TokenParsingException:
			return False
	@abstractmethod
	def parse(self):
		pass

class NumberTokenParser(TokenParser):
	def __init__(self, token):
		self._token = token.lower()
		self._token_length = len(token)

		self._finite_state_machine = FiniteStateMachine(0, valid_states, default_transition, transitions)

	def get_token_of_class_or_none(self, offset, token_class):
		for token in token_class:
			if(len(token) + offset > self._token_length):
				continue

			if(self._token[offset: offset + len(token)] == token):
				return token
		return None

	def get_next_token_part(self, offset):
		token_classes = [ZERO_NUMBER_TOKENS
				, HUNDRED_NUMBER_TOKENS
				, TEEN_NUMBER_TOKENS
				, DECI_NUMBER_TOKENS
				, BASE_NUMBER_TOKENS]

		result = None
		for token_class in token_classes:
			result = self.get_token_of_class_or_none(offset, token_class)
			if(result):
				break

		return result

	def subtokenize(self):
		token_parts = deque()

		offset = 0
		while(True):
			subtoken = self.get_next_token_part(offset)
			if(subtoken is None):
				if(offset != self._token_length):
					raise SubtokenizingException("part of the token is dangling: '{}'".format(self._token[offset:]))
				return list(token_parts)

			offset += len(subtoken)
			token_parts.append(subtoken)

	def parse(self):
		token_parts = self.subtokenize()

		if(not self._finite_state_machine.check(token_parts)):
			raise TokenParsingException("token '{}' is invalid".format(self._token))


		# This is ugly but it works.
		result = 0
		for subtoken in token_parts:
			if(subtoken in BASE_NUMBER_TOKENS):
				result += BASE_NUMBER_TOKENS[subtoken]
			if(subtoken in TEEN_NUMBER_TOKENS):
				result += TEEN_NUMBER_TOKENS[subtoken]
			if(subtoken in DECI_NUMBER_TOKENS):
				result += DECI_NUMBER_TOKENS[subtoken]

			if(subtoken in HUNDRED_NUMBER_TOKENS):
				result *= HUNDRED_NUMBER_TOKENS[subtoken]


		return result


class OperatorTokenParser(TokenParser):
	def __init__(self, token):
		self._token = token.lower()

	def parse(self):
		if(not self._token in OPERATOR_TOKENS):
			raise TokenParsingException("token '{}' is not an operator".format(self._token))
		return OPERATOR_TOKENS[self._token]