312 lines
9.1 KiB
Python
312 lines
9.1 KiB
Python
from collections import deque
|
|
|
|
from .context import FileContext
|
|
from .tokenize import Tokenizer, WHITESPACE
|
|
from .opcodes import make_opcodes
|
|
from .util import can_be_mark, can_convert_to_int, autoint, int16_2_bytes
|
|
from .directives import SetDirective
|
|
|
|
class ParsingError(Exception):
|
|
pass
|
|
|
|
class Assembler(object):
|
|
"""
|
|
This class is used for generating bytecode from a file containing assembly.
|
|
|
|
Also required is the memory definition of the interpreter given as a dict::
|
|
|
|
{
|
|
"registers": <register_count>,
|
|
"memory": <number of memory words>,
|
|
"program_memory": <number of words available for the program>
|
|
}
|
|
|
|
And the definition of the commands. This is a list of dicts::
|
|
|
|
[
|
|
{
|
|
"mnemonic": <mnemonic>,
|
|
"args": [
|
|
("register"|"memory"|"program_memory"|"direct_input"), ...
|
|
]
|
|
}
|
|
]
|
|
|
|
The method ``parse`` will parse the input file and ``bindump`` will dump the binary
|
|
bytecode into a file.
|
|
"""
|
|
def __init__(self, file_, memory_definition, command_definition, custom_directives):
|
|
self._file_context = FileContext(file_)
|
|
self._code_objects = deque()
|
|
self._memory_definition = memory_definition
|
|
self._command_definition = command_definition
|
|
self._word_count = 0
|
|
self._marks = {}
|
|
|
|
self._opcodes = make_opcodes([cmd["mnemonic"] for cmd in command_definition])
|
|
self._commands_by_mnemonic = {cmd["mnemonic"]: cmd for cmd in command_definition}
|
|
self._tokenizer = Tokenizer(self._file_context)
|
|
|
|
self._directives = {"set": SetDirective()}
|
|
self._directives.update(custom_directives)
|
|
|
|
def parse(self):
|
|
try:
|
|
for token in self._tokenizer:
|
|
|
|
# Comments
|
|
if(token == ";"):
|
|
while(token != "\n"):
|
|
token = next(self._tokenizer)
|
|
continue
|
|
|
|
# Commands
|
|
if(token in self._commands_by_mnemonic):
|
|
self.parse_command(token)
|
|
continue
|
|
|
|
# Directives
|
|
if(token == "."):
|
|
self.parse_directive()
|
|
continue
|
|
|
|
# The default is trying to parse a mark
|
|
if(not can_be_mark(token)):
|
|
self.raise_unexpected_token(token
|
|
, "comment, command, directive or mark"
|
|
, token)
|
|
self.parse_mark(token)
|
|
except StopIteration:
|
|
raise ParsingError("Unexpected EOF while parsing.")
|
|
|
|
for mark, mark_data in self._marks.items():
|
|
if(mark_data["target"] < 0):
|
|
raise ParsingError("Mark {} undefined. Referenced in lines: {}".format(
|
|
mark
|
|
, mark_data["references"]))
|
|
|
|
self._code_objects = [self._marks[c]["target"] if c in self._marks else c
|
|
for c in self._code_objects ]
|
|
|
|
def bindump(self, file_):
|
|
# FIXME:
|
|
# Make this work for tons of data.
|
|
# Or is that necessary?
|
|
# TODO:
|
|
# Figure out whether/what improovements are necessary here
|
|
length = int16_2_bytes(len(self._code_objects))
|
|
if(len(self._code_objects).bit_length() > 16):
|
|
raise ValueError("Program size excceeds 2^16.")
|
|
file_.write(length)
|
|
for word in self._code_objects:
|
|
file_.write(int16_2_bytes(word))
|
|
|
|
|
|
def parse_mark(self, token):
|
|
should_be_colon = next(self._tokenizer)
|
|
|
|
if(should_be_colon != ":"):
|
|
self.raise_unexpected_token(token, "':'", should_be_colon)
|
|
|
|
should_be_newline = next(self._tokenizer)
|
|
if(should_be_newline != "\n"):
|
|
self.raise_unexpected_token(token + ":", "'\\n'", should_be_newline)
|
|
|
|
if(token in self._marks):
|
|
if(self._marks[token]["target"] != -1):
|
|
raise ParsingError("Error in line {} column {} mark already defined: '{}'. Previous was in line {}.".format(
|
|
self._file_context._line
|
|
, self._column
|
|
, token
|
|
, self._marks[token]["target_line"]))
|
|
self._marks[token]["target"] = self._word_count
|
|
self._marks[token]["target_line"] = self._file_context._line
|
|
|
|
else:
|
|
self._marks[token] = {
|
|
"target": self._word_count
|
|
, "target_line": self._file_context._line
|
|
, "references": []
|
|
}
|
|
|
|
|
|
def parse_directive(self):
|
|
should_be_name = next(self._tokenizer)
|
|
|
|
if(not should_be_name in self._directives):
|
|
self.raise_unexpected_token(".", "directive name", should_be_name)
|
|
|
|
should_be_whitespace = next(self._tokenizer)
|
|
if(not should_be_whitespace in WHITESPACE):
|
|
self.raise_unexpected_token(should_be_name, "' '", should_be_whitespace)
|
|
|
|
words = self._directives[should_be_name].parse(self, self._tokenizer)
|
|
|
|
self._word_count += len(words)
|
|
self._code_objects.extend(words)
|
|
|
|
should_be_newline = next(self._tokenizer)
|
|
if(should_be_newline != "\n"):
|
|
self.raise_unexpected_token(".", "newline", should_be_newline)
|
|
|
|
|
|
|
|
|
|
def parse_command(self, cmd):
|
|
# We have no arguments
|
|
if(not self._commands_by_mnemonic[cmd]["args"]):
|
|
self._code_objects.append(self._opcodes[cmd])
|
|
self._word_count += 1
|
|
token = next(self._tokenizer)
|
|
if(token != "\n"):
|
|
self.raise_unexpected_token(cmd, "newline", token)
|
|
return
|
|
|
|
# Small argument must be treated separately
|
|
should_be_whitespace = next(self._tokenizer)
|
|
if(should_be_whitespace not in WHITESPACE
|
|
or should_be_whitespace == "\n"):
|
|
self.raise_unexpected_token(cmd, "' '", should_be_whitespace)
|
|
|
|
should_be_an_argument = next(self._tokenizer)
|
|
argument = self.check_and_convert_argument(cmd
|
|
, should_be_an_argument
|
|
, self._commands_by_mnemonic[cmd]["args"][0])
|
|
self._word_count += 1
|
|
# NOTE:
|
|
# The Small Argument is stored within the first word (!)
|
|
self._code_objects.append(self._opcodes[cmd] | (argument & 0xffff))
|
|
|
|
|
|
# All the 16bit arguments
|
|
for argument in self._commands_by_mnemonic[cmd]["args"][1:]:
|
|
should_be_comma = next(self._tokenizer)
|
|
if(should_be_comma != ","):
|
|
self.raise_unexpected_token(cmd, "','", should_be_comma)
|
|
|
|
self._word_count += 1
|
|
self._code_objects.append(
|
|
self.check_and_convert_argument(
|
|
cmd
|
|
, next(self._tokenizer)
|
|
, argument))
|
|
|
|
should_be_newline = next(self._tokenizer)
|
|
if(should_be_newline != "\n"):
|
|
self.raise_unexpected_token(cmd, "newline", should_be_newline)
|
|
|
|
|
|
def raise_unexpected_token(self, after, expected, got):
|
|
raise ParsingError("Error in line {} column {} after '{}': expected {}, got '{}'".format(
|
|
self._file_context._line
|
|
, self._file_context._column
|
|
, after
|
|
, expected
|
|
, got))
|
|
|
|
def raise_invalid_address(self, after, memtype, maxval, got):
|
|
raise ParsingError("Error in line {} column {} after '{}': value {} is invalid for {} (max is {})".format(
|
|
self._file_context._line
|
|
, self._file_context._column
|
|
, after
|
|
, got
|
|
, memtype
|
|
, maxval))
|
|
|
|
def check_and_convert_argument(self, cmd, argument, argument_definition):
|
|
if(argument_definition == "register"):
|
|
if(not argument.startswith("r")):
|
|
self.raise_unexpected_token(cmd, "register name", argument)
|
|
register_offset = argument[1:]
|
|
raise_could_not_convert_register_offset = False
|
|
try:
|
|
register_offset = int(register_offset)
|
|
except:
|
|
raise_could_not_convert_register_offset = True
|
|
|
|
if(raise_could_not_convert_register_offset):
|
|
self.raise_unexpected_token(cmd, "register name", argument)
|
|
|
|
if(register_offset > self._memory_definition["registers"]
|
|
or register_offset < 0):
|
|
self.raise_invalid_address(cmd
|
|
, "register"
|
|
, self._memory_definition["registers"]
|
|
, register_offset)
|
|
|
|
return register_offset
|
|
|
|
if(argument_definition == "memory"):
|
|
if(not can_convert_to_int(argument)):
|
|
self.raise_unexpected_token(cmd, "integer address", argument)
|
|
argument = autoint(argument)
|
|
|
|
if(argument < 0 or argument > self._memory_definition["memory"]):
|
|
self.raise_invalid_address(cmd
|
|
, "memory"
|
|
, self._memory_definition["memory"]
|
|
, argument)
|
|
return argument
|
|
|
|
if(argument_definition == "program_memory"):
|
|
# Non-integer Argument.
|
|
if(not can_convert_to_int(argument)):
|
|
# Just nonsense.
|
|
if(not can_be_mark(argument)):
|
|
self.raise_unexpected_token(cmd, "integer address or mark", argument)
|
|
# The Mark has appeared before.
|
|
if(argument in self._marks):
|
|
# Add this line to the references.
|
|
self._marks[argument]["references"].append(self._file_context._line)
|
|
# The target is already known. Insert it now.
|
|
if(self._marks[argument]["target"] != -1):
|
|
return self._marks[argument]["target"]
|
|
# The target is unknown.
|
|
return argument
|
|
# The Mark has not appeared before.
|
|
self._marks[argument] = {
|
|
"target": -1
|
|
, "target_line": 0
|
|
, "references": [self._file_context._line]
|
|
}
|
|
# Target is obviously unknown.
|
|
return argument
|
|
|
|
|
|
# Integer argument.
|
|
argument = autoint(argument)
|
|
|
|
if(argument < 0 or argument > self._memory_definition["program_memory"]):
|
|
self.raise_invalid_address(cmd
|
|
, "program_memory"
|
|
, self._memory_definition["program_memory"]
|
|
, argument)
|
|
return argument
|
|
|
|
# This is direct input (default).
|
|
|
|
# Integer
|
|
if(can_convert_to_int(argument)):
|
|
return autoint(argument)
|
|
|
|
# This is nonsense.
|
|
if(not can_be_mark(argument)):
|
|
self.raise_unexpected_token(cmd, "integer, char or mark", argument)
|
|
|
|
# It is a Mark.
|
|
if(argument in self._marks):
|
|
if(self._marks[argument]["target"] >= 0):
|
|
self._marks[argument]["references"].append(self._file_context._line)
|
|
return self._marks[argument]["target"]
|
|
self._marks[argument]["references"].append(self._file_context._line)
|
|
return argument
|
|
|
|
self._marks[argument] = {
|
|
"target": -1
|
|
, "target_line": 0
|
|
, "references": [self._file_context._line]
|
|
}
|
|
|
|
return argument
|
|
|