fixed some bugs in the assembler

This commit is contained in:
Daniel Knüttel 2018-10-09 15:55:42 +02:00
parent c5bc1c92d1
commit f7cb729e4d
4 changed files with 112 additions and 23 deletions

View File

@ -108,24 +108,20 @@ class Assembler(object):
if(token in self._marks):
if(self._marks[token]["target"] != -1):
raise ParsingError("Error in line {} column {} mark already defined: '{}'. Previous was in line {}.".format(
self._line
self._file_context._line
, self._column
, token
, self._marks[token]["target_line"]))
self._marks[token]["target"] = self._word_count
self._marks[token]["target_line"] = self._line
self._marks[token]["target_line"] = self._file_context._line
else:
self._marks[token] = {
"target": self._word_count
, "target_line": self._line
, "target_line": self._file_context._line
, "references": []
}
should_be_newline = next(self._tokenizer)
if(should_be_newline != "\n"):
self.raise_unexpected_token(token, "newline", should_be_newline)
def parse_directive(self):
should_be_name = next(self._tokenizer)
@ -133,6 +129,10 @@ class Assembler(object):
if(not should_be_name in self._directives):
self.raise_unexpected_token(".", "directive name", should_be_name)
should_be_whitespace = next(self._tokenizer)
if(not should_be_whitespace in WHITESPACE):
self.raise_unexpected_token(should_be_name, "' '", should_be_whitespace)
words = self._directives[should_be_name].parse(self, self._tokenizer)
self._word_count += len(words)
@ -146,16 +146,16 @@ class Assembler(object):
def parse_command(self, cmd):
self._code_objects.append(self._opcodes[cmd])
self._word_count += 1
# We have no arguments
if(not self._commands_by_mnemonic[cmd]["args"]):
self._code_objects.append(self._opcodes[cmd])
self._word_count += 1
token = next(self._tokenizer)
if(token != "\n"):
self.raise_unexpected_token(cmd, "newline", token)
return
# Small argument must be treated separately
should_be_whitespace = next(self._tokenizer)
if(should_be_whitespace not in WHITESPACE
or should_be_whitespace == "\n"):
@ -166,9 +166,12 @@ class Assembler(object):
, should_be_an_argument
, self._commands_by_mnemonic[cmd]["args"][0])
self._word_count += 1
self._code_objects.append(argument)
# NOTE:
# The Small Argument is stored within the first word (!)
self._code_objects.append(self._opcodes[cmd] | (argument & 0xffff))
# All the 16bit arguments
for argument in self._commands_by_mnemonic[cmd]["args"][1:]:
should_be_comma = next(self._tokenizer)
if(should_be_comma != ","):
@ -239,22 +242,31 @@ class Assembler(object):
return argument
if(argument_definition == "program_memory"):
# Non-integer Argument.
if(not can_convert_to_int(argument)):
# Just nonsense.
if(not can_be_mark(argument)):
self.raise_unexpected_token(cmd, "integer address or mark", argument)
# The Mark has appeared before.
if(argument in self._marks):
self._marks[argument]["references"].append(self._line)
# Add this line to the references.
self._marks[argument]["references"].append(self._file_context._line)
# The target is already known. Insert it now.
if(self._marks[argument]["target"] != -1):
return self._marks[argument]["target"]
# The target is unknown.
return argument
# The Mark has not appeared before.
self._marks[argument] = {
"target": -1
, "target_line": 0
, "references": [self._line]
, "references": [self._file_context._line]
}
# Target is obviously unknown.
return argument
# Integer argument.
argument = autoint(argument)
if(argument < 0 or argument > self._memory_definition["program_memory"]):
@ -264,17 +276,29 @@ class Assembler(object):
, argument)
return argument
# This is direct input (default).
# Integer
if(can_convert_to_int(argument)):
return autoint(argument)
# This is nonsense.
if(not can_be_mark(argument)):
self.raise_unexpected_token(cmd, "integer, char or mark", argument)
if(argument in self._marks and self._marks[argument] >= 0):
return self._marks[argument]
self._marks[argument] = -1
# It is a Mark.
if(argument in self._marks):
if(self._marks[argument]["target"] >= 0):
self._marks[argument]["references"].append(self._file_context._line)
return self._marks[argument]["target"]
self._marks[argument]["references"].append(self._file_context._line)
return argument
self._marks[argument] = {
"target": -1
, "target_line": 0
, "references": [self._file_context._line]
}
return argument

View File

@ -5,6 +5,8 @@ Directives for explicitly modifying the program memory.
from abc import ABC, abstractmethod
from collections import deque
from .util import can_convert_to_int, autoint
class AbstractDirective(ABC):
@abstractmethod
def parse(self, assembler, tokenizer):

View File

@ -52,3 +52,20 @@ def test_tokenize_3():
, "string", ":", "\n"
, ".", "set", " ", "[", "'h'", ",", "'e'", ",", "'l'", ",", "'l'", ",", "'o'", "]", "\n"
]
def test_tokenize_4():
data = '''
ldi r0, 0xfefe
test_mark:
ldi r1, 0xefef
'''
data = StringIO(data)
tokenizer = Tokenizer(FileContext(data))
result = list(tokenizer)
assert result == [
"ldi", " ", "r0", ",", "0xfefe", "\n"
, "test_mark", ":", "\n"
, "ldi", " ", "r1", ",", "0xefef", "\n"
]

View File

@ -17,6 +17,52 @@ def test_commands(basic_machine_definition):
assembler = Assembler(data, memory_definition, command_defintion, {})
assembler.parse()
assert assembler._code_objects == [32704, 0, 0xfe
, 32704, 1, 0xfe
, 40896, 0, 1]
assert assembler._code_objects == [32704 | 0, 0xfe
, 32704 | 1, 0xfe
, 40896 | 0, 1]
def test_mark(basic_machine_definition):
memory_definition, command_defintion = basic_machine_definition
data = StringIO(
'''
ldi r0, test_mark
ldi r1, 0xfe
test_mark:
add r0, r1
'''
)
assembler = Assembler(data, memory_definition, command_defintion, {})
assembler.parse()
assert assembler._code_objects == [32704 | 0, 4
, 32704 | 1, 0xfe
, 40896 | 0, 1]
def test_set_directive(basic_machine_definition):
memory_definition, command_defintion = basic_machine_definition
data = StringIO(
'''
ldi r0, test_mark
ldi r1, 0xfe
test_mark:
.set [0xfe, 0xef,
10, 20,
'a', 'b',
'\\n', 0b10]
'''
)
assembler = Assembler(data, memory_definition, command_defintion, {})
assembler.parse()
assert assembler._code_objects == [32704 | 0, 4
, 32704 | 1, 0xfe
, 0xfe, 0xef
, 10, 20
, ord("a"), ord("b")
, ord("\n"), 0b10]