from collections import deque from .context import FileContext from .tokenize import Tokenizer, WHITESPACE from .opcodes import make_opcodes from .util import can_be_mark, can_convert_to_int, autoint, int16_2_bytes from .directives import SetDirective class ParsingError(Exception): pass class Assembler(object): """ This class is used for generating bytecode from a file containing assembly. Also required is the memory definition of the interpreter given as a dict:: { "registers": , "memory": , "program_memory": } And the definition of the commands. This is a list of dicts:: [ { "mnemonic": , "args": [ ("register"|"memory"|"program_memory"|"direct_input"), ... ] } ] The method ``parse`` will parse the input file and ``bindump`` will dump the binary bytecode into a file. """ def __init__(self, file_, memory_definition, command_definition, custom_directives): self._file_context = FileContext(file_) self._code_objects = deque() self._memory_definition = memory_definition self._command_definition = command_definition self._word_count = 0 self._marks = {} self._opcodes = make_opcodes([cmd["mnemonic"] for cmd in command_definition]) self._commands_by_mnemonic = {cmd["mnemonic"]: cmd for cmd in command_definition} self._tokenizer = Tokenizer(self._file_context) self._directives = {"set": SetDirective()} self._directives.update(custom_directives) def parse(self): try: for token in self._tokenizer: # Comments if(token == ";"): while(token != "\n"): token = next(self._tokenizer) continue # Commands if(token in self._commands_by_mnemonic): self.parse_command(token) continue # Directives if(token == "."): self.parse_directive() continue # The default is trying to parse a mark if(not can_be_mark(token)): self.raise_unexpected_token(token , "comment, command, directive or mark" , token) self.parse_mark(token) except StopIteration: raise ParsingError("Unexpected EOF while parsing.") for mark, mark_data in self._marks.items(): if(mark_data["target"] < 0): raise ParsingError("Mark {} undefined. Referenced in lines: {}".format( mark , mark_data["references"])) self._code_objects = [self._marks[c]["target"] if c in self._marks else c for c in self._code_objects ] def bindump(self, file_): # FIXME: # Make this work for tons of data. # Or is that necessary? # TODO: # Figure out whether/what improovements are necessary here length = int16_2_bytes(len(self._code_objects)) if(len(self._code_objects).bit_length() > 16): raise ValueError("Program size excceeds 2^16.") file_.write(length) for word in self._code_objects: file_.write(int16_2_bytes(word)) def parse_mark(self, token): should_be_colon = next(self._tokenizer) if(should_be_colon != ":"): self.raise_unexpected_token(token, "':'", should_be_colon) should_be_newline = next(self._tokenizer) if(should_be_newline != "\n"): self.raise_unexpected_token(token + ":", "'\\n'", should_be_newline) if(token in self._marks): if(self._marks[token]["target"] != -1): raise ParsingError("Error in line {} column {} mark already defined: '{}'. Previous was in line {}.".format( self._file_context._line , self._column , token , self._marks[token]["target_line"])) self._marks[token]["target"] = self._word_count self._marks[token]["target_line"] = self._file_context._line else: self._marks[token] = { "target": self._word_count , "target_line": self._file_context._line , "references": [] } def parse_directive(self): should_be_name = next(self._tokenizer) if(not should_be_name in self._directives): self.raise_unexpected_token(".", "directive name", should_be_name) should_be_whitespace = next(self._tokenizer) if(not should_be_whitespace in WHITESPACE): self.raise_unexpected_token(should_be_name, "' '", should_be_whitespace) words = self._directives[should_be_name].parse(self, self._tokenizer) self._word_count += len(words) self._code_objects.extend(words) should_be_newline = next(self._tokenizer) if(should_be_newline != "\n"): self.raise_unexpected_token(".", "newline", should_be_newline) def parse_command(self, cmd): # We have no arguments if(not self._commands_by_mnemonic[cmd]["args"]): self._code_objects.append(self._opcodes[cmd]) self._word_count += 1 token = next(self._tokenizer) if(token != "\n"): self.raise_unexpected_token(cmd, "newline", token) return # Small argument must be treated separately should_be_whitespace = next(self._tokenizer) if(should_be_whitespace not in WHITESPACE or should_be_whitespace == "\n"): self.raise_unexpected_token(cmd, "' '", should_be_whitespace) should_be_an_argument = next(self._tokenizer) argument = self.check_and_convert_argument(cmd , should_be_an_argument , self._commands_by_mnemonic[cmd]["args"][0]) self._word_count += 1 # NOTE: # The Small Argument is stored within the first word (!) self._code_objects.append(self._opcodes[cmd] | (argument & 0xffff)) # All the 16bit arguments for argument in self._commands_by_mnemonic[cmd]["args"][1:]: should_be_comma = next(self._tokenizer) if(should_be_comma != ","): self.raise_unexpected_token(cmd, "','", should_be_comma) self._word_count += 1 self._code_objects.append( self.check_and_convert_argument( cmd , next(self._tokenizer) , argument)) should_be_newline = next(self._tokenizer) if(should_be_newline != "\n"): self.raise_unexpected_token(cmd, "newline", should_be_newline) def raise_unexpected_token(self, after, expected, got): raise ParsingError("Error in line {} column {} after '{}': expected {}, got '{}'".format( self._file_context._line , self._file_context._column , after , expected , got)) def raise_invalid_address(self, after, memtype, maxval, got): raise ParsingError("Error in line {} column {} after '{}': value {} is invalid for {} (max is {})".format( self._file_context._line , self._file_context._column , after , got , memtype , maxval)) def check_and_convert_argument(self, cmd, argument, argument_definition): if(argument_definition == "register"): if(not argument.startswith("r")): self.raise_unexpected_token(cmd, "register name", argument) register_offset = argument[1:] raise_could_not_convert_register_offset = False try: register_offset = int(register_offset) except: raise_could_not_convert_register_offset = True if(raise_could_not_convert_register_offset): self.raise_unexpected_token(cmd, "register name", argument) if(register_offset > self._memory_definition["registers"] or register_offset < 0): self.raise_invalid_address(cmd , "register" , self._memory_definition["registers"] , register_offset) return register_offset if(argument_definition == "memory"): if(not can_convert_to_int(argument)): self.raise_unexpected_token(cmd, "integer address", argument) argument = autoint(argument) if(argument < 0 or argument > self._memory_definition["memory"]): self.raise_invalid_address(cmd , "memory" , self._memory_definition["memory"] , argument) return argument if(argument_definition == "program_memory"): # Non-integer Argument. if(not can_convert_to_int(argument)): # Just nonsense. if(not can_be_mark(argument)): self.raise_unexpected_token(cmd, "integer address or mark", argument) # The Mark has appeared before. if(argument in self._marks): # Add this line to the references. self._marks[argument]["references"].append(self._file_context._line) # The target is already known. Insert it now. if(self._marks[argument]["target"] != -1): return self._marks[argument]["target"] # The target is unknown. return argument # The Mark has not appeared before. self._marks[argument] = { "target": -1 , "target_line": 0 , "references": [self._file_context._line] } # Target is obviously unknown. return argument # Integer argument. argument = autoint(argument) if(argument < 0 or argument > self._memory_definition["program_memory"]): self.raise_invalid_address(cmd , "program_memory" , self._memory_definition["program_memory"] , argument) return argument # This is direct input (default). # Integer if(can_convert_to_int(argument)): return autoint(argument) # This is nonsense. if(not can_be_mark(argument)): self.raise_unexpected_token(cmd, "integer, char or mark", argument) # It is a Mark. if(argument in self._marks): if(self._marks[argument]["target"] >= 0): self._marks[argument]["references"].append(self._file_context._line) return self._marks[argument]["target"] self._marks[argument]["references"].append(self._file_context._line) return argument self._marks[argument] = { "target": -1 , "target_line": 0 , "references": [self._file_context._line] } return argument