187 lines
4.8 KiB
Python
187 lines
4.8 KiB
Python
|
from collections import deque
|
||
|
import string
|
||
|
from io import StringIO
|
||
|
|
||
|
TOKEN_NUMERIC_LITERAL = "0123456789"
|
||
|
TOKEN_DOT = "."
|
||
|
TOKEN_PARENTHESIS_OPEN = "("
|
||
|
TOKEN_PARENTHESIS_CLOSE = ")"
|
||
|
TOKEN_BINARY_OPERATOR = "+-*/"
|
||
|
TOKEN_SIGN = "-"
|
||
|
TOKEN_WHITESPACE = string.whitespace
|
||
|
|
||
|
STATE_BEGIN_EXPRESSION = 0
|
||
|
STATE_INSIDE_TERM = 1
|
||
|
STATE_EXPECT_NEXT_ARGUMENT = 2
|
||
|
STATE_PARENTHESIS_CLOSED = 4
|
||
|
|
||
|
class CharacterDevice(object):
|
||
|
def __init__(self, file_):
|
||
|
self._file = file_
|
||
|
self._offset = 0
|
||
|
self.character = deque([0])
|
||
|
self.line = 0
|
||
|
|
||
|
def getc(self):
|
||
|
self._offset += 1
|
||
|
character = self._file.read(1)
|
||
|
if(character == "\n"):
|
||
|
self.line += 1
|
||
|
self.character.append(0)
|
||
|
else:
|
||
|
self.character[-1] += 1
|
||
|
return character
|
||
|
|
||
|
def ungetc(self, c):
|
||
|
if(c == "\n"):
|
||
|
self.line -= 1
|
||
|
self.character.pop()
|
||
|
else:
|
||
|
self.character[-1] -= 1
|
||
|
self._offset -= 1
|
||
|
self._file.seek(self._offset)
|
||
|
|
||
|
class ParsingError(Exception):
|
||
|
pass
|
||
|
|
||
|
class InternalError(Exception):
|
||
|
pass
|
||
|
|
||
|
class ArithmeticParser(object):
|
||
|
def __init__(self, file_):
|
||
|
self._file = CharacterDevice(file_)
|
||
|
self._stack = deque()
|
||
|
self._current_expr = deque()
|
||
|
|
||
|
def parse_numeric_literal(self, carry = ""):
|
||
|
literal = carry
|
||
|
dot = False
|
||
|
character = None
|
||
|
while True:
|
||
|
character = self._file.getc()
|
||
|
if(not character):
|
||
|
break
|
||
|
if(character in TOKEN_NUMERIC_LITERAL):
|
||
|
literal += character
|
||
|
continue
|
||
|
elif(character in TOKEN_DOT):
|
||
|
if(dot):
|
||
|
raise self.build_parsing_error("Unexpected dot while parsing literal")
|
||
|
dot = True
|
||
|
literal += character
|
||
|
continue
|
||
|
break
|
||
|
self._file.ungetc(character)
|
||
|
|
||
|
self._current_expr.append(literal)
|
||
|
|
||
|
def parse_parenthesis(self):
|
||
|
parenthesis = self._file.getc()
|
||
|
if(parenthesis not in TOKEN_PARENTHESIS_OPEN):
|
||
|
raise InternalError("parse_parenthesis did not recieve an opening parenthesis")
|
||
|
|
||
|
self._stack.append(self._current_expr)
|
||
|
self._current_expr = deque()
|
||
|
state = self.parse_expression()
|
||
|
parenthesis = self._file.getc()
|
||
|
if(parenthesis not in TOKEN_PARENTHESIS_CLOSE):
|
||
|
raise self.build_parsing_error("Expected closing parenthesis")
|
||
|
if(not parenthesis):
|
||
|
raise self.build_parsing_error("Unexpected EOF while parsing")
|
||
|
nested = self._current_expr
|
||
|
self._current_expr = self._stack.pop()
|
||
|
self._current_expr.append(list(nested))
|
||
|
|
||
|
def parse_expression(self):
|
||
|
state = STATE_BEGIN_EXPRESSION
|
||
|
while True:
|
||
|
peek = self._file.getc()
|
||
|
if(not peek):
|
||
|
break
|
||
|
if(peek in TOKEN_WHITESPACE):
|
||
|
continue
|
||
|
if(state == STATE_BEGIN_EXPRESSION):
|
||
|
if(peek in TOKEN_NUMERIC_LITERAL):
|
||
|
self._file.ungetc(peek)
|
||
|
self.parse_numeric_literal()
|
||
|
state = STATE_INSIDE_TERM
|
||
|
continue
|
||
|
if(peek in TOKEN_SIGN):
|
||
|
self.parse_numeric_literal(carry=peek)
|
||
|
state = STATE_INSIDE_TERM
|
||
|
continue
|
||
|
if(peek in TOKEN_PARENTHESIS_OPEN):
|
||
|
self._file.ungetc(peek)
|
||
|
self.parse_parenthesis()
|
||
|
state = STATE_INSIDE_TERM
|
||
|
continue
|
||
|
|
||
|
raise self.build_parsing_error("Unexpected token: '{}'".format(peek))
|
||
|
if(state == STATE_INSIDE_TERM):
|
||
|
if(peek in TOKEN_BINARY_OPERATOR):
|
||
|
self._current_expr.append(peek)
|
||
|
state = STATE_EXPECT_NEXT_ARGUMENT
|
||
|
continue
|
||
|
if(peek in TOKEN_PARENTHESIS_CLOSE):
|
||
|
state = STATE_PARENTHESIS_CLOSED
|
||
|
self._file.ungetc(peek)
|
||
|
break
|
||
|
raise self.build_parsing_error("Unexpected token: '{}', expected binary operator".format(peek))
|
||
|
if(state == STATE_EXPECT_NEXT_ARGUMENT):
|
||
|
if(peek in TOKEN_NUMERIC_LITERAL):
|
||
|
self._file.ungetc(peek)
|
||
|
self.parse_numeric_literal()
|
||
|
state = STATE_INSIDE_TERM
|
||
|
continue
|
||
|
if(peek in TOKEN_SIGN):
|
||
|
self.parse_numeric_literal(carry=peek)
|
||
|
state = STATE_INSIDE_TERM
|
||
|
continue
|
||
|
if(peek in TOKEN_PARENTHESIS_OPEN):
|
||
|
self._file.ungetc(peek)
|
||
|
self.parse_parenthesis()
|
||
|
state = STATE_INSIDE_TERM
|
||
|
continue
|
||
|
|
||
|
raise self.build_parsing_error("Unexpected token: '{}'".format(peek))
|
||
|
if(state not in (STATE_BEGIN_EXPRESSION
|
||
|
, STATE_INSIDE_TERM
|
||
|
, STATE_PARENTHESIS_CLOSED)):
|
||
|
raise self.build_parsing_error("Unexpected EOF while parsing")
|
||
|
return state
|
||
|
|
||
|
|
||
|
def parse(self):
|
||
|
state = self.parse_expression()
|
||
|
if(state not in (STATE_BEGIN_EXPRESSION
|
||
|
, STATE_INSIDE_TERM)):
|
||
|
raise self.build_parsing_error("Unexpected EOF while parsing")
|
||
|
|
||
|
return list(self._current_expr)
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
def build_parsing_error(self, msg):
|
||
|
return ParsingError("Error: Line {}, Characer {}: {}".format(self._file.line, self._file.character, msg))
|
||
|
|
||
|
|
||
|
def check_expression(string):
|
||
|
parser = ArithmeticParser(StringIO(string))
|
||
|
try:
|
||
|
parser.parse()
|
||
|
return True
|
||
|
except ParsingError:
|
||
|
return False
|
||
|
|
||
|
|
||
|
if( __name__ == "__main__"):
|
||
|
|
||
|
print(ArithmeticParser(StringIO("((3.2+4)*(5)+7)")).parse())
|
||
|
print(check_expression("((3+4)*(5)+7)"))
|
||
|
print(check_expression("(-3+4)*(5/7)"))
|
||
|
print(check_expression("(-3+4)*(5/(7+5))"))
|
||
|
print(check_expression("(-3.3+4)*(5/(7+5)"))
|
||
|
print(ArithmeticParser(StringIO("((3+4)*(5)+7")).parse())
|