added exam exercise 06
This commit is contained in:
parent
4e2e417e5b
commit
166414197b
36
exam/ex06/finite_state_machine.py
Normal file
36
exam/ex06/finite_state_machine.py
Normal file
|
@ -0,0 +1,36 @@
|
|||
|
||||
|
||||
class FiniteStateMachine(object):
|
||||
def __init__(self, start: int, valid: list, default: int, transitions: dict):
|
||||
self._start = start
|
||||
self._valid = valid
|
||||
|
||||
self._transitions = dict()
|
||||
for state, trans in transitions.items():
|
||||
self._transitions[state] = dict()
|
||||
for words, target in trans.items():
|
||||
for word in words:
|
||||
self._transitions[state][word] = target
|
||||
|
||||
self._default = default
|
||||
self._state = start
|
||||
|
||||
def reset(self):
|
||||
self._state = self._start
|
||||
|
||||
def make_transition(self, word):
|
||||
if(not self._state in self._transitions):
|
||||
self._state = self._default
|
||||
if(not word in self._transitions[self._state]):
|
||||
self._state = self._default
|
||||
return
|
||||
|
||||
self._state = self._transitions[self._state][word]
|
||||
|
||||
def check(self, sequence):
|
||||
for word in sequence:
|
||||
self.make_transition(word)
|
||||
is_valid = self._state in self._valid
|
||||
self.reset()
|
||||
return is_valid
|
||||
|
16
exam/ex06/main.py
Normal file
16
exam/ex06/main.py
Normal file
|
@ -0,0 +1,16 @@
|
|||
from io import StringIO
|
||||
|
||||
from parser import Parser
|
||||
from tokenio import TokenStream
|
||||
from tokens import NumberTokenParser
|
||||
|
||||
texts = ["one plus one"
|
||||
, "one plus two"
|
||||
, "thirtytwo plus eleven"
|
||||
, "four times four"
|
||||
, "(eight plus eleven) times two"
|
||||
, "twohundred through eleven"]
|
||||
|
||||
for text in texts:
|
||||
print(text, "=", Parser(TokenStream(StringIO(text))).parse())
|
||||
|
91
exam/ex06/parser.py
Normal file
91
exam/ex06/parser.py
Normal file
|
@ -0,0 +1,91 @@
|
|||
from collections import deque
|
||||
|
||||
from tokens import NumberTokenParser, OperatorTokenParser
|
||||
|
||||
class ParsingException(Exception):
|
||||
pass
|
||||
|
||||
class Parser(object):
|
||||
def __init__(self, token_stream):
|
||||
self._token_stream = token_stream
|
||||
self._stack = deque()
|
||||
self._current_list = deque()
|
||||
|
||||
def parse(self):
|
||||
state = 0
|
||||
while True:
|
||||
token = self._token_stream.get_token()
|
||||
if(token == "("):
|
||||
if(state == 1):
|
||||
raise ParsingException(
|
||||
"expected operator, not parenthesis: {} (near '{}')".format(
|
||||
self._token_stream._offset
|
||||
, token))
|
||||
|
||||
self._stack.append(self._current_list)
|
||||
continue
|
||||
|
||||
if(NumberTokenParser.can_parse(token)):
|
||||
if(state == 1):
|
||||
raise ParsingException(
|
||||
"expected operator, not number: {} (near '{}')".format(
|
||||
self._token_stream._offset
|
||||
, token))
|
||||
self._current_list.append(NumberTokenParser(token).parse())
|
||||
state = 1
|
||||
continue
|
||||
|
||||
if(OperatorTokenParser.can_parse(token)):
|
||||
if(state != 1):
|
||||
raise ParsingException(
|
||||
"expected number or parenthesis, not operator: {} (near '{}')".format(
|
||||
self._token_stream._offset
|
||||
, token))
|
||||
|
||||
self._current_list.append(OperatorTokenParser(token).parse())
|
||||
state = 0
|
||||
continue
|
||||
|
||||
if(token == ")"):
|
||||
#if(state == 1):
|
||||
# raise ParsingException(
|
||||
# "expected operator, not parenthesis: {} (near '{}')".format(
|
||||
# self._token_stream._offset
|
||||
# , token))
|
||||
state = 1
|
||||
|
||||
result = self.execute_branch(self._current_list)
|
||||
self._current_list = self._stack.pop()
|
||||
continue
|
||||
|
||||
if(not token):
|
||||
if(self._stack):
|
||||
raise ParsingException("unexpected EOF while parsing")
|
||||
return self.execute_branch(self._current_list)
|
||||
|
||||
raise ParsingException("unknown token: {} (near '{}')".format(self._token_stream._offset, token))
|
||||
|
||||
return self.execute_branch(self._current_list)
|
||||
|
||||
def execute_branch(self, branch):
|
||||
result = None
|
||||
current_operator = None
|
||||
|
||||
for element in branch:
|
||||
if(result is None):
|
||||
result = element
|
||||
continue
|
||||
|
||||
if(not isinstance(element, (float, int, complex))):
|
||||
# Operator
|
||||
current_operator = element
|
||||
continue
|
||||
|
||||
if(current_operator):
|
||||
result = current_operator(result, element)
|
||||
current_operator = None
|
||||
return result
|
||||
|
||||
|
||||
|
||||
|
56
exam/ex06/tokenio.py
Normal file
56
exam/ex06/tokenio.py
Normal file
|
@ -0,0 +1,56 @@
|
|||
from collections import deque
|
||||
|
||||
base_chars = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
|
||||
end_of_token_chars = "() \t"
|
||||
whitespace_chars = " \t"
|
||||
|
||||
class UnexpectedCharacterException(Exception):
|
||||
def __init__(self, msg, offset, char, *args):
|
||||
Exception.__init__(self, *args)
|
||||
self._msg = msg
|
||||
self._offset = offset
|
||||
self._char = char
|
||||
|
||||
|
||||
class TokenStream(object):
|
||||
def __init__(self, file_):
|
||||
self._file = file_
|
||||
self._file.seek(0, 0)
|
||||
self._offset = 0
|
||||
|
||||
def _getc(self):
|
||||
c = self._file.read(1)
|
||||
if(c):
|
||||
self._offset += 1
|
||||
return c
|
||||
def _ungetc(self):
|
||||
self._file.seek(self._offset - 1, 0)
|
||||
self._offset -= 1
|
||||
|
||||
def get_token(self):
|
||||
result = deque()
|
||||
while True:
|
||||
c = self._getc()
|
||||
if(not c):
|
||||
# EOF.
|
||||
break
|
||||
if(c in base_chars):
|
||||
result.append(c)
|
||||
continue
|
||||
if(c in end_of_token_chars):
|
||||
if(not result):
|
||||
# We are not inside a token.
|
||||
if(c in whitespace_chars):
|
||||
# Some whitespace. Ignore it.
|
||||
continue
|
||||
# A parenthesis.
|
||||
return c
|
||||
|
||||
# End of token.
|
||||
self._ungetc()
|
||||
break
|
||||
raise UnexpectedCharacterException("Unexpected character while tokenizing", self._offset, c)
|
||||
return "".join(result)
|
||||
|
||||
|
||||
|
155
exam/ex06/tokens.py
Normal file
155
exam/ex06/tokens.py
Normal file
|
@ -0,0 +1,155 @@
|
|||
from collections import deque
|
||||
from abc import ABCMeta, abstractmethod
|
||||
|
||||
from finite_state_machine import FiniteStateMachine
|
||||
|
||||
BASE_NUMBER_TOKENS = {"one": 1
|
||||
, "two": 2
|
||||
, "three": 3
|
||||
, "four": 4
|
||||
, "five": 5
|
||||
, "six": 6
|
||||
, "seven": 7
|
||||
, "eight": 8
|
||||
, "nine": 9}
|
||||
DECI_NUMBER_TOKENS = {"twenty": 20
|
||||
, "thirty": 30
|
||||
, "fourty": 40
|
||||
, "fifty": 50
|
||||
, "sixty": 60
|
||||
, "secenty": 70
|
||||
, "eigthy": 80
|
||||
, "ninety": 90}
|
||||
TEEN_NUMBER_TOKENS = {"ten": 10
|
||||
, "eleven": 11
|
||||
, "twelve": 12
|
||||
, "thirteen": 13
|
||||
, "fourteen": 14
|
||||
, "fifteen": 15
|
||||
, "sixteen": 16
|
||||
, "seventeen": 17
|
||||
, "eighteen": 18
|
||||
, "nineteen": 19}
|
||||
HUNDRED_NUMBER_TOKENS = {"hundred": 100}
|
||||
ZERO_NUMBER_TOKENS = {"zero": 0
|
||||
, "null": 0}
|
||||
|
||||
OPERATOR_TOKENS = { "plus": lambda x,y: x + y
|
||||
, "minus": lambda x,y: x - y
|
||||
, "times": lambda x,y: x * y
|
||||
, "through": lambda x,y: x / y}
|
||||
|
||||
transitions = {
|
||||
0: { tuple(ZERO_NUMBER_TOKENS) + tuple(TEEN_NUMBER_TOKENS): 1
|
||||
, tuple(DECI_NUMBER_TOKENS): 2
|
||||
, tuple(BASE_NUMBER_TOKENS): 3}
|
||||
, 2: {tuple(BASE_NUMBER_TOKENS): 1}
|
||||
, 3: {tuple(HUNDRED_NUMBER_TOKENS): 4}
|
||||
, 4: {tuple(DECI_NUMBER_TOKENS): 2}
|
||||
}
|
||||
valid_states = [1, 2, 3, 4]
|
||||
default_transition = -1
|
||||
|
||||
|
||||
|
||||
class TokenParsingException(Exception):
|
||||
pass
|
||||
class SubtokenizingException(TokenParsingException):
|
||||
pass
|
||||
|
||||
|
||||
class TokenParser(metaclass = ABCMeta):
|
||||
@classmethod
|
||||
def can_parse(cls, token):
|
||||
try:
|
||||
cls(token).parse()
|
||||
return True
|
||||
except TokenParsingException:
|
||||
return False
|
||||
@abstractmethod
|
||||
def parse(self):
|
||||
pass
|
||||
|
||||
class NumberTokenParser(TokenParser):
|
||||
def __init__(self, token):
|
||||
self._token = token.lower()
|
||||
self._token_length = len(token)
|
||||
|
||||
self._finite_state_machine = FiniteStateMachine(0, valid_states, default_transition, transitions)
|
||||
|
||||
def get_token_of_class_or_none(self, offset, token_class):
|
||||
for token in token_class:
|
||||
if(len(token) + offset > self._token_length):
|
||||
continue
|
||||
|
||||
if(self._token[offset: offset + len(token)] == token):
|
||||
return token
|
||||
return None
|
||||
|
||||
def get_next_token_part(self, offset):
|
||||
token_classes = [ZERO_NUMBER_TOKENS
|
||||
, HUNDRED_NUMBER_TOKENS
|
||||
, TEEN_NUMBER_TOKENS
|
||||
, DECI_NUMBER_TOKENS
|
||||
, BASE_NUMBER_TOKENS]
|
||||
|
||||
result = None
|
||||
for token_class in token_classes:
|
||||
result = self.get_token_of_class_or_none(offset, token_class)
|
||||
if(result):
|
||||
break
|
||||
|
||||
return result
|
||||
|
||||
def subtokenize(self):
|
||||
token_parts = deque()
|
||||
|
||||
offset = 0
|
||||
while(True):
|
||||
subtoken = self.get_next_token_part(offset)
|
||||
if(subtoken is None):
|
||||
if(offset != self._token_length):
|
||||
raise SubtokenizingException("part of the token is dangling: '{}'".format(self._token[offset:]))
|
||||
return list(token_parts)
|
||||
|
||||
offset += len(subtoken)
|
||||
token_parts.append(subtoken)
|
||||
|
||||
def parse(self):
|
||||
token_parts = self.subtokenize()
|
||||
|
||||
if(not self._finite_state_machine.check(token_parts)):
|
||||
raise TokenParsingException("token '{}' is invalid".format(self._token))
|
||||
|
||||
|
||||
# This is ugly but it works.
|
||||
result = 0
|
||||
for subtoken in token_parts:
|
||||
if(subtoken in BASE_NUMBER_TOKENS):
|
||||
result += BASE_NUMBER_TOKENS[subtoken]
|
||||
if(subtoken in TEEN_NUMBER_TOKENS):
|
||||
result += TEEN_NUMBER_TOKENS[subtoken]
|
||||
if(subtoken in DECI_NUMBER_TOKENS):
|
||||
result += DECI_NUMBER_TOKENS[subtoken]
|
||||
|
||||
if(subtoken in HUNDRED_NUMBER_TOKENS):
|
||||
result *= HUNDRED_NUMBER_TOKENS[subtoken]
|
||||
|
||||
|
||||
return result
|
||||
|
||||
|
||||
|
||||
class OperatorTokenParser(TokenParser):
|
||||
def __init__(self, token):
|
||||
self._token = token.lower()
|
||||
|
||||
def parse(self):
|
||||
if(not self._token in OPERATOR_TOKENS):
|
||||
raise TokenParsingException("token '{}' is not an operator".format(self._token))
|
||||
return OPERATOR_TOKENS[self._token]
|
||||
|
||||
|
||||
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user