From f7cb729e4ddf54d007bfbdf4c9cc24aeacc79a5c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Kn=C3=BCttel?= Date: Tue, 9 Oct 2018 15:55:42 +0200 Subject: [PATCH] fixed some bugs in the assembler --- assembler/assembler/assembler.py | 64 ++++++++++++++++-------- assembler/assembler/directives.py | 2 + assembler/test/test_011_tokenize.py | 17 +++++++ assembler/test/test_020_basic_parsing.py | 52 +++++++++++++++++-- 4 files changed, 112 insertions(+), 23 deletions(-) diff --git a/assembler/assembler/assembler.py b/assembler/assembler/assembler.py index 53f556c..f4dbbe6 100644 --- a/assembler/assembler/assembler.py +++ b/assembler/assembler/assembler.py @@ -108,24 +108,20 @@ class Assembler(object): if(token in self._marks): if(self._marks[token]["target"] != -1): raise ParsingError("Error in line {} column {} mark already defined: '{}'. Previous was in line {}.".format( - self._line + self._file_context._line , self._column , token , self._marks[token]["target_line"])) self._marks[token]["target"] = self._word_count - self._marks[token]["target_line"] = self._line + self._marks[token]["target_line"] = self._file_context._line else: self._marks[token] = { "target": self._word_count - , "target_line": self._line + , "target_line": self._file_context._line , "references": [] } - should_be_newline = next(self._tokenizer) - if(should_be_newline != "\n"): - self.raise_unexpected_token(token, "newline", should_be_newline) - def parse_directive(self): should_be_name = next(self._tokenizer) @@ -133,6 +129,10 @@ class Assembler(object): if(not should_be_name in self._directives): self.raise_unexpected_token(".", "directive name", should_be_name) + should_be_whitespace = next(self._tokenizer) + if(not should_be_whitespace in WHITESPACE): + self.raise_unexpected_token(should_be_name, "' '", should_be_whitespace) + words = self._directives[should_be_name].parse(self, self._tokenizer) self._word_count += len(words) @@ -146,16 +146,16 @@ class Assembler(object): def parse_command(self, cmd): - - self._code_objects.append(self._opcodes[cmd]) - self._word_count += 1 - + # We have no arguments if(not self._commands_by_mnemonic[cmd]["args"]): + self._code_objects.append(self._opcodes[cmd]) + self._word_count += 1 token = next(self._tokenizer) if(token != "\n"): self.raise_unexpected_token(cmd, "newline", token) return + # Small argument must be treated separately should_be_whitespace = next(self._tokenizer) if(should_be_whitespace not in WHITESPACE or should_be_whitespace == "\n"): @@ -166,9 +166,12 @@ class Assembler(object): , should_be_an_argument , self._commands_by_mnemonic[cmd]["args"][0]) self._word_count += 1 - self._code_objects.append(argument) + # NOTE: + # The Small Argument is stored within the first word (!) + self._code_objects.append(self._opcodes[cmd] | (argument & 0xffff)) + # All the 16bit arguments for argument in self._commands_by_mnemonic[cmd]["args"][1:]: should_be_comma = next(self._tokenizer) if(should_be_comma != ","): @@ -239,22 +242,31 @@ class Assembler(object): return argument if(argument_definition == "program_memory"): + # Non-integer Argument. if(not can_convert_to_int(argument)): + # Just nonsense. if(not can_be_mark(argument)): self.raise_unexpected_token(cmd, "integer address or mark", argument) + # The Mark has appeared before. if(argument in self._marks): - self._marks[argument]["references"].append(self._line) + # Add this line to the references. + self._marks[argument]["references"].append(self._file_context._line) + # The target is already known. Insert it now. if(self._marks[argument]["target"] != -1): return self._marks[argument]["target"] + # The target is unknown. return argument + # The Mark has not appeared before. self._marks[argument] = { "target": -1 , "target_line": 0 - , "references": [self._line] + , "references": [self._file_context._line] } + # Target is obviously unknown. return argument + # Integer argument. argument = autoint(argument) if(argument < 0 or argument > self._memory_definition["program_memory"]): @@ -264,17 +276,29 @@ class Assembler(object): , argument) return argument + # This is direct input (default). + # Integer if(can_convert_to_int(argument)): return autoint(argument) + # This is nonsense. if(not can_be_mark(argument)): self.raise_unexpected_token(cmd, "integer, char or mark", argument) - if(argument in self._marks and self._marks[argument] >= 0): - return self._marks[argument] - self._marks[argument] = -1 + + # It is a Mark. + if(argument in self._marks): + if(self._marks[argument]["target"] >= 0): + self._marks[argument]["references"].append(self._file_context._line) + return self._marks[argument]["target"] + self._marks[argument]["references"].append(self._file_context._line) + return argument + + self._marks[argument] = { + "target": -1 + , "target_line": 0 + , "references": [self._file_context._line] + } + return argument - - - diff --git a/assembler/assembler/directives.py b/assembler/assembler/directives.py index 24b6c99..a03383d 100644 --- a/assembler/assembler/directives.py +++ b/assembler/assembler/directives.py @@ -5,6 +5,8 @@ Directives for explicitly modifying the program memory. from abc import ABC, abstractmethod from collections import deque +from .util import can_convert_to_int, autoint + class AbstractDirective(ABC): @abstractmethod def parse(self, assembler, tokenizer): diff --git a/assembler/test/test_011_tokenize.py b/assembler/test/test_011_tokenize.py index 2a2955c..590d13f 100644 --- a/assembler/test/test_011_tokenize.py +++ b/assembler/test/test_011_tokenize.py @@ -52,3 +52,20 @@ def test_tokenize_3(): , "string", ":", "\n" , ".", "set", " ", "[", "'h'", ",", "'e'", ",", "'l'", ",", "'l'", ",", "'o'", "]", "\n" ] + +def test_tokenize_4(): + data = ''' + ldi r0, 0xfefe + test_mark: + ldi r1, 0xefef + ''' + data = StringIO(data) + tokenizer = Tokenizer(FileContext(data)) + + result = list(tokenizer) + + assert result == [ + "ldi", " ", "r0", ",", "0xfefe", "\n" + , "test_mark", ":", "\n" + , "ldi", " ", "r1", ",", "0xefef", "\n" + ] diff --git a/assembler/test/test_020_basic_parsing.py b/assembler/test/test_020_basic_parsing.py index 9e89eb9..0b1a569 100644 --- a/assembler/test/test_020_basic_parsing.py +++ b/assembler/test/test_020_basic_parsing.py @@ -17,6 +17,52 @@ def test_commands(basic_machine_definition): assembler = Assembler(data, memory_definition, command_defintion, {}) assembler.parse() - assert assembler._code_objects == [32704, 0, 0xfe - , 32704, 1, 0xfe - , 40896, 0, 1] + assert assembler._code_objects == [32704 | 0, 0xfe + , 32704 | 1, 0xfe + , 40896 | 0, 1] + +def test_mark(basic_machine_definition): + memory_definition, command_defintion = basic_machine_definition + + data = StringIO( + ''' + ldi r0, test_mark + ldi r1, 0xfe + test_mark: + add r0, r1 + ''' + ) + assembler = Assembler(data, memory_definition, command_defintion, {}) + assembler.parse() + + + assert assembler._code_objects == [32704 | 0, 4 + , 32704 | 1, 0xfe + , 40896 | 0, 1] + + +def test_set_directive(basic_machine_definition): + memory_definition, command_defintion = basic_machine_definition + + data = StringIO( + ''' + ldi r0, test_mark + ldi r1, 0xfe + test_mark: + .set [0xfe, 0xef, + 10, 20, + 'a', 'b', + '\\n', 0b10] + ''' + ) + assembler = Assembler(data, memory_definition, command_defintion, {}) + assembler.parse() + + + assert assembler._code_objects == [32704 | 0, 4 + , 32704 | 1, 0xfe + , 0xfe, 0xef + , 10, 20 + , ord("a"), ord("b") + , ord("\n"), 0b10] +