added exam exercise 06

master
Daniel Knüttel 2019-02-25 13:05:55 +01:00
parent 4e2e417e5b
commit 166414197b
5 changed files with 354 additions and 0 deletions

View File

@ -0,0 +1,36 @@
class FiniteStateMachine(object):
def __init__(self, start: int, valid: list, default: int, transitions: dict):
self._start = start
self._valid = valid
self._transitions = dict()
for state, trans in transitions.items():
self._transitions[state] = dict()
for words, target in trans.items():
for word in words:
self._transitions[state][word] = target
self._default = default
self._state = start
def reset(self):
self._state = self._start
def make_transition(self, word):
if(not self._state in self._transitions):
self._state = self._default
if(not word in self._transitions[self._state]):
self._state = self._default
return
self._state = self._transitions[self._state][word]
def check(self, sequence):
for word in sequence:
self.make_transition(word)
is_valid = self._state in self._valid
self.reset()
return is_valid

16
exam/ex06/main.py 100644
View File

@ -0,0 +1,16 @@
from io import StringIO
from parser import Parser
from tokenio import TokenStream
from tokens import NumberTokenParser
texts = ["one plus one"
, "one plus two"
, "thirtytwo plus eleven"
, "four times four"
, "(eight plus eleven) times two"
, "twohundred through eleven"]
for text in texts:
print(text, "=", Parser(TokenStream(StringIO(text))).parse())

View File

@ -0,0 +1,91 @@
from collections import deque
from tokens import NumberTokenParser, OperatorTokenParser
class ParsingException(Exception):
pass
class Parser(object):
def __init__(self, token_stream):
self._token_stream = token_stream
self._stack = deque()
self._current_list = deque()
def parse(self):
state = 0
while True:
token = self._token_stream.get_token()
if(token == "("):
if(state == 1):
raise ParsingException(
"expected operator, not parenthesis: {} (near '{}')".format(
self._token_stream._offset
, token))
self._stack.append(self._current_list)
continue
if(NumberTokenParser.can_parse(token)):
if(state == 1):
raise ParsingException(
"expected operator, not number: {} (near '{}')".format(
self._token_stream._offset
, token))
self._current_list.append(NumberTokenParser(token).parse())
state = 1
continue
if(OperatorTokenParser.can_parse(token)):
if(state != 1):
raise ParsingException(
"expected number or parenthesis, not operator: {} (near '{}')".format(
self._token_stream._offset
, token))
self._current_list.append(OperatorTokenParser(token).parse())
state = 0
continue
if(token == ")"):
#if(state == 1):
# raise ParsingException(
# "expected operator, not parenthesis: {} (near '{}')".format(
# self._token_stream._offset
# , token))
state = 1
result = self.execute_branch(self._current_list)
self._current_list = self._stack.pop()
continue
if(not token):
if(self._stack):
raise ParsingException("unexpected EOF while parsing")
return self.execute_branch(self._current_list)
raise ParsingException("unknown token: {} (near '{}')".format(self._token_stream._offset, token))
return self.execute_branch(self._current_list)
def execute_branch(self, branch):
result = None
current_operator = None
for element in branch:
if(result is None):
result = element
continue
if(not isinstance(element, (float, int, complex))):
# Operator
current_operator = element
continue
if(current_operator):
result = current_operator(result, element)
current_operator = None
return result

View File

@ -0,0 +1,56 @@
from collections import deque
base_chars = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
end_of_token_chars = "() \t"
whitespace_chars = " \t"
class UnexpectedCharacterException(Exception):
def __init__(self, msg, offset, char, *args):
Exception.__init__(self, *args)
self._msg = msg
self._offset = offset
self._char = char
class TokenStream(object):
def __init__(self, file_):
self._file = file_
self._file.seek(0, 0)
self._offset = 0
def _getc(self):
c = self._file.read(1)
if(c):
self._offset += 1
return c
def _ungetc(self):
self._file.seek(self._offset - 1, 0)
self._offset -= 1
def get_token(self):
result = deque()
while True:
c = self._getc()
if(not c):
# EOF.
break
if(c in base_chars):
result.append(c)
continue
if(c in end_of_token_chars):
if(not result):
# We are not inside a token.
if(c in whitespace_chars):
# Some whitespace. Ignore it.
continue
# A parenthesis.
return c
# End of token.
self._ungetc()
break
raise UnexpectedCharacterException("Unexpected character while tokenizing", self._offset, c)
return "".join(result)

155
exam/ex06/tokens.py 100644
View File

@ -0,0 +1,155 @@
from collections import deque
from abc import ABCMeta, abstractmethod
from finite_state_machine import FiniteStateMachine
BASE_NUMBER_TOKENS = {"one": 1
, "two": 2
, "three": 3
, "four": 4
, "five": 5
, "six": 6
, "seven": 7
, "eight": 8
, "nine": 9}
DECI_NUMBER_TOKENS = {"twenty": 20
, "thirty": 30
, "fourty": 40
, "fifty": 50
, "sixty": 60
, "secenty": 70
, "eigthy": 80
, "ninety": 90}
TEEN_NUMBER_TOKENS = {"ten": 10
, "eleven": 11
, "twelve": 12
, "thirteen": 13
, "fourteen": 14
, "fifteen": 15
, "sixteen": 16
, "seventeen": 17
, "eighteen": 18
, "nineteen": 19}
HUNDRED_NUMBER_TOKENS = {"hundred": 100}
ZERO_NUMBER_TOKENS = {"zero": 0
, "null": 0}
OPERATOR_TOKENS = { "plus": lambda x,y: x + y
, "minus": lambda x,y: x - y
, "times": lambda x,y: x * y
, "through": lambda x,y: x / y}
transitions = {
0: { tuple(ZERO_NUMBER_TOKENS) + tuple(TEEN_NUMBER_TOKENS): 1
, tuple(DECI_NUMBER_TOKENS): 2
, tuple(BASE_NUMBER_TOKENS): 3}
, 2: {tuple(BASE_NUMBER_TOKENS): 1}
, 3: {tuple(HUNDRED_NUMBER_TOKENS): 4}
, 4: {tuple(DECI_NUMBER_TOKENS): 2}
}
valid_states = [1, 2, 3, 4]
default_transition = -1
class TokenParsingException(Exception):
pass
class SubtokenizingException(TokenParsingException):
pass
class TokenParser(metaclass = ABCMeta):
@classmethod
def can_parse(cls, token):
try:
cls(token).parse()
return True
except TokenParsingException:
return False
@abstractmethod
def parse(self):
pass
class NumberTokenParser(TokenParser):
def __init__(self, token):
self._token = token.lower()
self._token_length = len(token)
self._finite_state_machine = FiniteStateMachine(0, valid_states, default_transition, transitions)
def get_token_of_class_or_none(self, offset, token_class):
for token in token_class:
if(len(token) + offset > self._token_length):
continue
if(self._token[offset: offset + len(token)] == token):
return token
return None
def get_next_token_part(self, offset):
token_classes = [ZERO_NUMBER_TOKENS
, HUNDRED_NUMBER_TOKENS
, TEEN_NUMBER_TOKENS
, DECI_NUMBER_TOKENS
, BASE_NUMBER_TOKENS]
result = None
for token_class in token_classes:
result = self.get_token_of_class_or_none(offset, token_class)
if(result):
break
return result
def subtokenize(self):
token_parts = deque()
offset = 0
while(True):
subtoken = self.get_next_token_part(offset)
if(subtoken is None):
if(offset != self._token_length):
raise SubtokenizingException("part of the token is dangling: '{}'".format(self._token[offset:]))
return list(token_parts)
offset += len(subtoken)
token_parts.append(subtoken)
def parse(self):
token_parts = self.subtokenize()
if(not self._finite_state_machine.check(token_parts)):
raise TokenParsingException("token '{}' is invalid".format(self._token))
# This is ugly but it works.
result = 0
for subtoken in token_parts:
if(subtoken in BASE_NUMBER_TOKENS):
result += BASE_NUMBER_TOKENS[subtoken]
if(subtoken in TEEN_NUMBER_TOKENS):
result += TEEN_NUMBER_TOKENS[subtoken]
if(subtoken in DECI_NUMBER_TOKENS):
result += DECI_NUMBER_TOKENS[subtoken]
if(subtoken in HUNDRED_NUMBER_TOKENS):
result *= HUNDRED_NUMBER_TOKENS[subtoken]
return result
class OperatorTokenParser(TokenParser):
def __init__(self, token):
self._token = token.lower()
def parse(self):
if(not self._token in OPERATOR_TOKENS):
raise TokenParsingException("token '{}' is not an operator".format(self._token))
return OPERATOR_TOKENS[self._token]