From 481c4ff5ba6ca301229160e7d4b8de40dec20288 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Daniel=20Kn=C3=BCttel?= <daniel.knuettel@daknuett.eu>
Date: Tue, 9 Oct 2018 11:43:27 +0200
Subject: [PATCH 1/9] added language and bytecode definition

---
 bytecode.pdf | 244 ++++++++++++++
 bytecode.rst |  46 +++
 language.pdf | 881 +++++++++++++++++++++++++++++++++++++++++++++++++++
 language.rst | 180 +++++++++++
 4 files changed, 1351 insertions(+)
 create mode 100644 bytecode.pdf
 create mode 100644 bytecode.rst
 create mode 100644 language.pdf
 create mode 100644 language.rst

diff --git a/bytecode.pdf b/bytecode.pdf
new file mode 100644
index 0000000..bdd4dda
--- /dev/null
+++ b/bytecode.pdf
@@ -0,0 +1,244 @@
+%PDF-1.4
+%“Ś‹ž ReportLab Generated PDF document http://www.reportlab.com
+1 0 obj
+<< /F1 2 0 R /F2 3 0 R /F3 10 0 R >>
+endobj
+2 0 obj
+<< /BaseFont /Helvetica /Encoding /WinAnsiEncoding /Name /F1 /Subtype /Type1 /Type /Font >>
+endobj
+3 0 obj
+<< /BaseFont /Helvetica-Bold /Encoding /WinAnsiEncoding /Name /F2 /Subtype /Type1 /Type /Font >>
+endobj
+4 0 obj
+<< /Border [ 0 0 0 ] /Contents () /Dest [ 13 0 R /XYZ 62.69291 636.0236 0 ] /Rect [ 62.69291 687.0236 178.2829 699.0236 ] /Subtype /Link /Type /Annot >>
+endobj
+5 0 obj
+<< /Border [ 0 0 0 ] /Contents () /Dest [ 13 0 R /XYZ 62.69291 636.0236 0 ] /Rect [ 527.0227 687.7736 532.5827 699.7736 ] /Subtype /Link /Type /Annot >>
+endobj
+6 0 obj
+<< /Border [ 0 0 0 ] /Contents () /Dest [ 13 0 R /XYZ 62.69291 525.0236 0 ] /Rect [ 62.69291 669.0236 197.7229 681.0236 ] /Subtype /Link /Type /Annot >>
+endobj
+7 0 obj
+<< /Border [ 0 0 0 ] /Contents () /Dest [ 13 0 R /XYZ 62.69291 525.0236 0 ] /Rect [ 527.0227 669.7736 532.5827 681.7736 ] /Subtype /Link /Type /Annot >>
+endobj
+8 0 obj
+<< /Border [ 0 0 0 ] /Contents () /Dest [ 13 0 R /XYZ 62.69291 414.0236 0 ] /Rect [ 62.69291 651.0236 213.8229 663.0236 ] /Subtype /Link /Type /Annot >>
+endobj
+9 0 obj
+<< /Border [ 0 0 0 ] /Contents () /Dest [ 13 0 R /XYZ 62.69291 414.0236 0 ] /Rect [ 527.0227 651.7736 532.5827 663.7736 ] /Subtype /Link /Type /Annot >>
+endobj
+10 0 obj
+<< /BaseFont /Courier /Encoding /WinAnsiEncoding /Name /F3 /Subtype /Type1 /Type /Font >>
+endobj
+11 0 obj
+<< /Border [ 0 0 0 ] /Contents () /Dest [ 13 0 R /XYZ 62.69291 528.5236 0 ] /Rect [ 515.3527 333.0236 532.1177 345.0236 ] /Subtype /Link /Type /Annot >>
+endobj
+12 0 obj
+<< /Border [ 0 0 0 ] /Contents () /Dest [ 13 0 R /XYZ 62.69291 528.5236 0 ] /Rect [ 62.69291 321.0236 168.2829 333.0236 ] /Subtype /Link /Type /Annot >>
+endobj
+13 0 obj
+<< /Annots [ 4 0 R 5 0 R 6 0 R 7 0 R 8 0 R 9 0 R 11 0 R 12 0 R ] /Contents 21 0 R /MediaBox [ 0 0 595.2756 841.8898 ] /Parent 20 0 R /Resources << /Font 1 0 R /ProcSet [ /PDF /Text /ImageB /ImageC /ImageI ] >> /Rotate 0 
+  /Trans <<  >> /Type /Page >>
+endobj
+14 0 obj
+<< /Outlines 16 0 R /PageLabels 22 0 R /PageMode /UseNone /Pages 20 0 R /Type /Catalog >>
+endobj
+15 0 obj
+<< /Author () /CreationDate (D:20181006214730+00'00') /Creator (\(unspecified\)) /Keywords () /ModDate (D:20181006214730+00'00') /Producer (ReportLab PDF Library - www.reportlab.com) 
+  /Subject (\(unspecified\)) /Title (BCI Bytecode) /Trapped /False >>
+endobj
+16 0 obj
+<< /Count 3 /First 17 0 R /Last 19 0 R /Type /Outlines >>
+endobj
+17 0 obj
+<< /Dest [ 13 0 R /XYZ 62.69291 636.0236 0 ] /Next 18 0 R /Parent 16 0 R /Title (Assembly and Bytecode) >>
+endobj
+18 0 obj
+<< /Dest [ 13 0 R /XYZ 62.69291 525.0236 0 ] /Next 19 0 R /Parent 16 0 R /Prev 17 0 R /Title (The Dynamic Instruction Set) >>
+endobj
+19 0 obj
+<< /Dest [ 13 0 R /XYZ 62.69291 414.0236 0 ] /Parent 16 0 R /Prev 18 0 R /Title (Byte Code Interpreter Definition) >>
+endobj
+20 0 obj
+<< /Count 1 /Kids [ 13 0 R ] /Type /Pages >>
+endobj
+21 0 obj
+<< /Length 3790 >>
+stream
+1 0 0 1 0 0 cm  BT /F1 12 Tf 14.4 TL ET
+q
+1 0 0 1 62.69291 741.0236 cm
+q
+0 0 0 rg
+BT 1 0 0 1 0 4 Tm /F2 20 Tf 24 TL 169.9349 0 Td (BCI Bytecode) Tj T* -169.9349 0 Td ET
+Q
+Q
+q
+1 0 0 1 62.69291 708.0236 cm
+q
+BT 1 0 0 1 0 3.5 Tm 21 TL /F2 17.5 Tf 0 0 0 rg (Contents) Tj T* ET
+Q
+Q
+q
+1 0 0 1 62.69291 648.0236 cm
+0 0 0 rg
+BT /F1 10 Tf 12 TL ET
+q
+1 0 0 1 0 39 cm
+q
+BT 1 0 0 1 0 2 Tm 12 TL /F2 10 Tf 0 0 .501961 rg (Assembly and Bytecode) Tj T* ET
+Q
+Q
+q
+1 0 0 1 397.8898 39 cm
+q
+0 0 .501961 rg
+0 0 .501961 RG
+BT 1 0 0 1 0 2 Tm /F2 10 Tf 12 TL 66.44 0 Td (1) Tj T* -66.44 0 Td ET
+Q
+Q
+q
+1 0 0 1 0 21 cm
+q
+BT 1 0 0 1 0 2 Tm 12 TL /F2 10 Tf 0 0 .501961 rg (The Dynamic Instruction Set) Tj T* ET
+Q
+Q
+q
+1 0 0 1 397.8898 21 cm
+q
+0 0 .501961 rg
+0 0 .501961 RG
+BT 1 0 0 1 0 2 Tm /F2 10 Tf 12 TL 66.44 0 Td (1) Tj T* -66.44 0 Td ET
+Q
+Q
+q
+1 0 0 1 0 3 cm
+q
+BT 1 0 0 1 0 2 Tm 12 TL /F2 10 Tf 0 0 .501961 rg (Byte Code Interpreter Definition) Tj T* ET
+Q
+Q
+q
+1 0 0 1 397.8898 3 cm
+q
+0 0 .501961 rg
+0 0 .501961 RG
+BT 1 0 0 1 0 2 Tm /F2 10 Tf 12 TL 66.44 0 Td (1) Tj T* -66.44 0 Td ET
+Q
+Q
+q
+Q
+Q
+q
+1 0 0 1 62.69291 615.0236 cm
+q
+BT 1 0 0 1 0 3.5 Tm 21 TL /F2 17.5 Tf 0 0 0 rg (Assembly and Bytecode) Tj T* ET
+Q
+Q
+q
+1 0 0 1 62.69291 585.0236 cm
+q
+BT 1 0 0 1 0 14 Tm 3.309982 Tw 12 TL /F1 10 Tf 0 0 0 rg (Unlike machine code \(and other bytecode\) BCI bytecode has dynamic opcodes. This means that) Tj T* 0 Tw (bytecode is ) Tj /F2 10 Tf (not ) Tj /F1 10 Tf (necessarily portable.) Tj T* ET
+Q
+Q
+q
+1 0 0 1 62.69291 567.0236 cm
+q
+0 0 0 rg
+BT 1 0 0 1 0 2 Tm /F1 10 Tf 12 TL (This makes sense since the BCI instruction set can be extended for applications.) Tj T* ET
+Q
+Q
+q
+1 0 0 1 62.69291 537.0236 cm
+q
+0 0 0 rg
+BT 1 0 0 1 0 14 Tm /F1 10 Tf 12 TL .742619 Tw (If one wants to share code that should run on any BCI it should be shared as assembly. The assembler) Tj T* 0 Tw (will then use the local interpreter definition and generate suiting bytecode.) Tj T* ET
+Q
+Q
+q
+1 0 0 1 62.69291 504.0236 cm
+q
+BT 1 0 0 1 0 3.5 Tm 21 TL /F2 17.5 Tf 0 0 0 rg (The Dynamic Instruction Set) Tj T* ET
+Q
+Q
+q
+1 0 0 1 62.69291 474.0236 cm
+q
+0 0 0 rg
+BT 1 0 0 1 0 14 Tm /F1 10 Tf 12 TL 1.093876 Tw (The BCI comes with a set of prepared instructions. These are complete and provide a way to do basic) Tj T* 0 Tw (operations like routines, loops and branching.) Tj T* ET
+Q
+Q
+q
+1 0 0 1 62.69291 444.0236 cm
+q
+BT 1 0 0 1 0 14 Tm .743672 Tw 12 TL /F1 10 Tf 0 0 0 rg (The methods are organized in a binary tree internally. To build the tree in a comfortable way there is an) Tj T* 0 Tw (autoinserter that can insert up to ) Tj /F3 10 Tf 0 0 0 rg (1023 ) Tj /F1 10 Tf 0 0 0 rg (methods into the tree.) Tj T* ET
+Q
+Q
+q
+1 0 0 1 62.69291 426.0236 cm
+q
+0 0 0 rg
+BT 1 0 0 1 0 2 Tm /F1 10 Tf 12 TL (The autoinserter creates the opcode basing on the order of the method that he inserts.) Tj T* ET
+Q
+Q
+q
+1 0 0 1 62.69291 393.0236 cm
+q
+BT 1 0 0 1 0 3.5 Tm 21 TL /F2 17.5 Tf 0 0 0 rg (Byte Code Interpreter Definition) Tj T* ET
+Q
+Q
+q
+1 0 0 1 62.69291 351.0236 cm
+q
+0 0 0 rg
+BT 1 0 0 1 0 26 Tm /F1 10 Tf 12 TL 1.843555 Tw (A Bytecode Interpreter Definition consists of two mayor parts: The memory definition that defines the) Tj T* 0 Tw 1.896098 Tw (number of data registers \(up to 63\), the number of memory words \(up to 65535\) and the number of) Tj T* 0 Tw (program memory words \(up to 65535\).) Tj T* ET
+Q
+Q
+q
+1 0 0 1 62.69291 321.0236 cm
+q
+BT 1 0 0 1 0 14 Tm .464985 Tw 12 TL /F1 10 Tf 0 0 0 rg (The second part defines the commands. The definition contains bot the order of the commands \(see ) Tj 0 0 .501961 rg (The) Tj T* 0 Tw (Dynamic Instruction Set) Tj 0 0 0 rg (\) and the required arguments.) Tj T* ET
+Q
+Q
+ 
+endstream
+endobj
+22 0 obj
+<< /Nums [ 0 23 0 R ] >>
+endobj
+23 0 obj
+<< /S /D /St 1 >>
+endobj
+xref
+0 24
+0000000000 65535 f
+0000000075 00000 n
+0000000130 00000 n
+0000000240 00000 n
+0000000355 00000 n
+0000000526 00000 n
+0000000697 00000 n
+0000000868 00000 n
+0000001039 00000 n
+0000001210 00000 n
+0000001381 00000 n
+0000001490 00000 n
+0000001662 00000 n
+0000001834 00000 n
+0000002106 00000 n
+0000002215 00000 n
+0000002489 00000 n
+0000002566 00000 n
+0000002692 00000 n
+0000002837 00000 n
+0000002974 00000 n
+0000003038 00000 n
+0000006885 00000 n
+0000006929 00000 n
+trailer
+<< /ID 
+ % ReportLab generated PDF document -- digest (http://www.reportlab.com)
+ [(\003\236V\37247z\240'\2312!\276\204\362\214) (\003\236V\37247z\240'\2312!\276\204\362\214)]
+ /Info 15 0 R /Root 14 0 R /Size 24 >>
+startxref
+6966
+%%EOF
diff --git a/bytecode.rst b/bytecode.rst
new file mode 100644
index 0000000..ea2f634
--- /dev/null
+++ b/bytecode.rst
@@ -0,0 +1,46 @@
+BCI Bytecode 
+*************
+
+.. contents::
+
+Assembly and Bytecode
+=====================
+
+Unlike machine code (and other bytecode) BCI bytecode has
+dynamic opcodes. This means that bytecode is **not**
+necessarily portable. 
+
+This makes sense since the BCI instruction set can be
+extended for applications.
+
+If one wants to share code that should run on any BCI it
+should be shared as assembly. The assembler will then use
+the local interpreter definition and generate suiting
+bytecode.
+
+The Dynamic Instruction Set
+===========================
+
+The BCI comes with a set of prepared instructions. These are
+complete and provide a way to do basic operations like
+routines, loops and branching.
+
+The methods are organized in a binary tree internally. To
+build the tree in a comfortable way there is an autoinserter
+that can insert up to ``1023`` methods into the tree.
+
+The autoinserter creates the opcode basing on the order of
+the method that he inserts.
+
+Byte Code Interpreter Definition
+================================
+
+A Bytecode Interpreter Definition consists of two mayor
+parts: The memory definition that defines the number of data
+registers (up to 63), the number of memory words (up to
+65535) and the number of program memory words (up to 65535).
+
+The second part defines the commands. The definition
+contains bot the order of the commands (see `The Dynamic
+Instruction Set`_) and the required arguments.
+
diff --git a/language.pdf b/language.pdf
new file mode 100644
index 0000000..0c9f798
--- /dev/null
+++ b/language.pdf
@@ -0,0 +1,881 @@
+%PDF-1.4
+%“Ś‹ž ReportLab Generated PDF document http://www.reportlab.com
+1 0 obj
+<< /F1 2 0 R /F2 3 0 R /F3 14 0 R >>
+endobj
+2 0 obj
+<< /BaseFont /Helvetica /Encoding /WinAnsiEncoding /Name /F1 /Subtype /Type1 /Type /Font >>
+endobj
+3 0 obj
+<< /BaseFont /Helvetica-Bold /Encoding /WinAnsiEncoding /Name /F2 /Subtype /Type1 /Type /Font >>
+endobj
+4 0 obj
+<< /Border [ 0 0 0 ] /Contents () /Dest [ 15 0 R /XYZ 62.69291 600.0236 0 ] /Rect [ 62.69291 687.0236 299.9529 699.0236 ] /Subtype /Link /Type /Annot >>
+endobj
+5 0 obj
+<< /Border [ 0 0 0 ] /Contents () /Dest [ 15 0 R /XYZ 62.69291 600.0236 0 ] /Rect [ 527.0227 687.7736 532.5827 699.7736 ] /Subtype /Link /Type /Annot >>
+endobj
+6 0 obj
+<< /Border [ 0 0 0 ] /Contents () /Dest [ 15 0 R /XYZ 62.69291 224.2236 0 ] /Rect [ 62.69291 669.0236 154.3629 681.0236 ] /Subtype /Link /Type /Annot >>
+endobj
+7 0 obj
+<< /Border [ 0 0 0 ] /Contents () /Dest [ 15 0 R /XYZ 62.69291 224.2236 0 ] /Rect [ 527.0227 669.7736 532.5827 681.7736 ] /Subtype /Link /Type /Annot >>
+endobj
+8 0 obj
+<< /Border [ 0 0 0 ] /Contents () /Dest [ 16 0 R /XYZ 62.69291 380.0236 0 ] /Rect [ 62.69291 651.0236 114.3629 663.0236 ] /Subtype /Link /Type /Annot >>
+endobj
+9 0 obj
+<< /Border [ 0 0 0 ] /Contents () /Dest [ 16 0 R /XYZ 62.69291 380.0236 0 ] /Rect [ 527.0227 651.7736 532.5827 663.7736 ] /Subtype /Link /Type /Annot >>
+endobj
+10 0 obj
+<< /Border [ 0 0 0 ] /Contents () /Dest [ 16 0 R /XYZ 62.69291 329.0236 0 ] /Rect [ 62.69291 633.0236 91.59291 645.0236 ] /Subtype /Link /Type /Annot >>
+endobj
+11 0 obj
+<< /Border [ 0 0 0 ] /Contents () /Dest [ 16 0 R /XYZ 62.69291 329.0236 0 ] /Rect [ 527.0227 633.7736 532.5827 645.7736 ] /Subtype /Link /Type /Annot >>
+endobj
+12 0 obj
+<< /Border [ 0 0 0 ] /Contents () /Dest [ 18 0 R /XYZ 62.69291 765.0236 0 ] /Rect [ 62.69291 615.0236 118.2529 627.0236 ] /Subtype /Link /Type /Annot >>
+endobj
+13 0 obj
+<< /Border [ 0 0 0 ] /Contents () /Dest [ 18 0 R /XYZ 62.69291 765.0236 0 ] /Rect [ 527.0227 615.7736 532.5827 627.7736 ] /Subtype /Link /Type /Annot >>
+endobj
+14 0 obj
+<< /BaseFont /Courier /Encoding /WinAnsiEncoding /Name /F3 /Subtype /Type1 /Type /Font >>
+endobj
+15 0 obj
+<< /Annots [ 4 0 R 5 0 R 6 0 R 7 0 R 8 0 R 9 0 R 10 0 R 11 0 R 12 0 R 13 0 R ] /Contents 28 0 R /MediaBox [ 0 0 595.2756 841.8898 ] /Parent 27 0 R /Resources << /Font 1 0 R /ProcSet [ /PDF /Text /ImageB /ImageC /ImageI ] >> /Rotate 0 
+  /Trans <<  >> /Type /Page >>
+endobj
+16 0 obj
+<< /Contents 29 0 R /MediaBox [ 0 0 595.2756 841.8898 ] /Parent 27 0 R /Resources << /Font 1 0 R /ProcSet [ /PDF /Text /ImageB /ImageC /ImageI ] >> /Rotate 0 /Trans <<  >> 
+  /Type /Page >>
+endobj
+17 0 obj
+<< /Border [ 0 0 0 ] /Contents () /Dest [ 16 0 R /XYZ 62.69291 332.5236 0 ] /Rect [ 62.69291 680.7736 91.59291 692.7736 ] /Subtype /Link /Type /Annot >>
+endobj
+18 0 obj
+<< /Annots [ 17 0 R ] /Contents 30 0 R /MediaBox [ 0 0 595.2756 841.8898 ] /Parent 27 0 R /Resources << /Font 1 0 R /ProcSet [ /PDF /Text /ImageB /ImageC /ImageI ] >> /Rotate 0 
+  /Trans <<  >> /Type /Page >>
+endobj
+19 0 obj
+<< /Outlines 21 0 R /PageLabels 31 0 R /PageMode /UseNone /Pages 27 0 R /Type /Catalog >>
+endobj
+20 0 obj
+<< /Author () /CreationDate (D:20181006214702+00'00') /Creator (\(unspecified\)) /Keywords () /ModDate (D:20181006214702+00'00') /Producer (ReportLab PDF Library - www.reportlab.com) 
+  /Subject (\(unspecified\)) /Title (BCI Assembly Language) /Trapped /False >>
+endobj
+21 0 obj
+<< /Count 5 /First 22 0 R /Last 26 0 R /Type /Outlines >>
+endobj
+22 0 obj
+<< /Dest [ 15 0 R /XYZ 62.69291 600.0236 0 ] /Next 23 0 R /Parent 21 0 R /Title (Commands, Small Arguments and Big Arguments) >>
+endobj
+23 0 obj
+<< /Dest [ 15 0 R /XYZ 62.69291 224.2236 0 ] /Next 24 0 R /Parent 21 0 R /Prev 22 0 R /Title (Built-In Commands) >>
+endobj
+24 0 obj
+<< /Dest [ 16 0 R /XYZ 62.69291 380.0236 0 ] /Next 25 0 R /Parent 21 0 R /Prev 23 0 R /Title (Comments) >>
+endobj
+25 0 obj
+<< /Dest [ 16 0 R /XYZ 62.69291 329.0236 0 ] /Next 26 0 R /Parent 21 0 R /Prev 24 0 R /Title (Marks) >>
+endobj
+26 0 obj
+<< /Dest [ 18 0 R /XYZ 62.69291 765.0236 0 ] /Parent 21 0 R /Prev 25 0 R /Title (Direct Input) >>
+endobj
+27 0 obj
+<< /Count 3 /Kids [ 15 0 R 16 0 R 18 0 R ] /Type /Pages >>
+endobj
+28 0 obj
+<< /Length 6815 >>
+stream
+1 0 0 1 0 0 cm  BT /F1 12 Tf 14.4 TL ET
+q
+1 0 0 1 62.69291 741.0236 cm
+q
+0 0 0 rg
+BT 1 0 0 1 0 4 Tm /F2 20 Tf 24 TL 117.6949 0 Td (BCI Assembly Language) Tj T* -117.6949 0 Td ET
+Q
+Q
+q
+1 0 0 1 62.69291 708.0236 cm
+q
+BT 1 0 0 1 0 3.5 Tm 21 TL /F2 17.5 Tf 0 0 0 rg (Contents) Tj T* ET
+Q
+Q
+q
+1 0 0 1 62.69291 612.0236 cm
+0 0 0 rg
+BT /F1 10 Tf 12 TL ET
+q
+1 0 0 1 0 75 cm
+q
+BT 1 0 0 1 0 2 Tm 12 TL /F2 10 Tf 0 0 .501961 rg (Commands, Small Arguments and Big Arguments) Tj T* ET
+Q
+Q
+q
+1 0 0 1 397.8898 75 cm
+q
+0 0 .501961 rg
+0 0 .501961 RG
+BT 1 0 0 1 0 2 Tm /F2 10 Tf 12 TL 66.44 0 Td (1) Tj T* -66.44 0 Td ET
+Q
+Q
+q
+1 0 0 1 0 57 cm
+q
+BT 1 0 0 1 0 2 Tm 12 TL /F2 10 Tf 0 0 .501961 rg (Built-In Commands) Tj T* ET
+Q
+Q
+q
+1 0 0 1 397.8898 57 cm
+q
+0 0 .501961 rg
+0 0 .501961 RG
+BT 1 0 0 1 0 2 Tm /F2 10 Tf 12 TL 66.44 0 Td (1) Tj T* -66.44 0 Td ET
+Q
+Q
+q
+1 0 0 1 0 39 cm
+q
+BT 1 0 0 1 0 2 Tm 12 TL /F2 10 Tf 0 0 .501961 rg (Comments) Tj T* ET
+Q
+Q
+q
+1 0 0 1 397.8898 39 cm
+q
+0 0 .501961 rg
+0 0 .501961 RG
+BT 1 0 0 1 0 2 Tm /F2 10 Tf 12 TL 66.44 0 Td (2) Tj T* -66.44 0 Td ET
+Q
+Q
+q
+1 0 0 1 0 21 cm
+q
+BT 1 0 0 1 0 2 Tm 12 TL /F2 10 Tf 0 0 .501961 rg (Marks) Tj T* ET
+Q
+Q
+q
+1 0 0 1 397.8898 21 cm
+q
+0 0 .501961 rg
+0 0 .501961 RG
+BT 1 0 0 1 0 2 Tm /F2 10 Tf 12 TL 66.44 0 Td (2) Tj T* -66.44 0 Td ET
+Q
+Q
+q
+1 0 0 1 0 3 cm
+q
+BT 1 0 0 1 0 2 Tm 12 TL /F2 10 Tf 0 0 .501961 rg (Direct Input) Tj T* ET
+Q
+Q
+q
+1 0 0 1 397.8898 3 cm
+q
+0 0 .501961 rg
+0 0 .501961 RG
+BT 1 0 0 1 0 2 Tm /F2 10 Tf 12 TL 66.44 0 Td (3) Tj T* -66.44 0 Td ET
+Q
+Q
+q
+Q
+Q
+q
+1 0 0 1 62.69291 579.0236 cm
+q
+BT 1 0 0 1 0 3.5 Tm 21 TL /F2 17.5 Tf 0 0 0 rg (Commands, Small Arguments and Big Arguments) Tj T* ET
+Q
+Q
+q
+1 0 0 1 62.69291 549.0236 cm
+q
+BT 1 0 0 1 0 14 Tm .87311 Tw 12 TL /F1 10 Tf 0 0 0 rg (A command in BCI Assembly is a word starting with an alphabetic character \() Tj /F3 10 Tf 0 0 0 rg (a..zA..Z) Tj /F1 10 Tf 0 0 0 rg (\) following by a) Tj T* 0 Tw (sequence of alphanumeric characters \() Tj /F3 10 Tf 0 0 0 rg (a..zA..Z0..9) Tj /F1 10 Tf 0 0 0 rg (\). This word will be converted to a 10bit opcode.) Tj T* ET
+Q
+Q
+q
+1 0 0 1 62.69291 519.0236 cm
+q
+0 0 0 rg
+BT 1 0 0 1 0 14 Tm /F1 10 Tf 12 TL 2.147882 Tw (Embedded in the 16bits of a word there is also a 6bit small argument. If a command has no small) Tj T* 0 Tw (argument these bits will be zeroed. In the assembly the command will be only one word, for example:) Tj T* ET
+Q
+Q
+q
+1 0 0 1 62.69291 485.8236 cm
+q
+q
+1 0 0 1 0 0 cm
+q
+1 0 0 1 6.6 6.6 cm
+q
+.662745 .662745 .662745 RG
+.5 w
+.960784 .960784 .862745 rg
+n -6 -6 468.6898 24 re B*
+Q
+q
+0 0 0 rg
+BT 1 0 0 1 0 2 Tm /F3 10 Tf 12 TL (cli) Tj T* ET
+Q
+Q
+Q
+Q
+Q
+q
+1 0 0 1 62.69291 453.8236 cm
+q
+0 0 0 rg
+BT 1 0 0 1 0 14 Tm /F1 10 Tf 12 TL .345988 Tw (If the command has a small argument, the 6 bit will be filled with the small argument. In the assembly the) Tj T* 0 Tw (small argument is separated by one whitespace, for example:) Tj T* ET
+Q
+Q
+q
+1 0 0 1 62.69291 420.6236 cm
+q
+q
+1 0 0 1 0 0 cm
+q
+1 0 0 1 6.6 6.6 cm
+q
+.662745 .662745 .662745 RG
+.5 w
+.960784 .960784 .862745 rg
+n -6 -6 468.6898 24 re B*
+Q
+q
+0 0 0 rg
+BT 1 0 0 1 0 2 Tm /F3 10 Tf 12 TL (inc r0) Tj T* ET
+Q
+Q
+Q
+Q
+Q
+q
+1 0 0 1 62.69291 376.6236 cm
+q
+BT 1 0 0 1 0 26 Tm .629431 Tw 12 TL /F1 10 Tf 0 0 0 rg (Any other arguments are stored in further words and have thus a width of 16bits. They are separated by) Tj T* 0 Tw 2.211751 Tw (commas \() Tj /F3 10 Tf 0 0 0 rg (,) Tj /F1 10 Tf 0 0 0 rg (\) from both the first and any other arguments. It is recommended to only add one more) Tj T* 0 Tw (argument.) Tj T* ET
+Q
+Q
+q
+1 0 0 1 62.69291 358.6236 cm
+q
+0 0 0 rg
+BT 1 0 0 1 0 2 Tm /F1 10 Tf 12 TL (Example for one big argument:) Tj T* ET
+Q
+Q
+q
+1 0 0 1 62.69291 325.4236 cm
+q
+q
+1 0 0 1 0 0 cm
+q
+1 0 0 1 6.6 6.6 cm
+q
+.662745 .662745 .662745 RG
+.5 w
+.960784 .960784 .862745 rg
+n -6 -6 468.6898 24 re B*
+Q
+q
+0 0 0 rg
+BT 1 0 0 1 0 2 Tm /F3 10 Tf 12 TL (ldi r0, 0xdead) Tj T* ET
+Q
+Q
+Q
+Q
+Q
+q
+1 0 0 1 62.69291 293.4236 cm
+q
+0 0 0 rg
+BT 1 0 0 1 0 14 Tm /F1 10 Tf 12 TL 1.571318 Tw (It might be useful to have more arguments for other applications, like double precision floating points.) Tj T* 0 Tw (Example \(not implemented\):) Tj T* ET
+Q
+Q
+q
+1 0 0 1 62.69291 236.2236 cm
+q
+q
+1 0 0 1 0 0 cm
+q
+1 0 0 1 6.6 6.6 cm
+q
+.662745 .662745 .662745 RG
+.5 w
+.960784 .960784 .862745 rg
+n -6 -6 468.6898 48 re B*
+Q
+q
+0 0 0 rg
+BT 1 0 0 1 0 26 Tm /F3 10 Tf 12 TL (lddfi r0, 0xdead, 0xbeef) Tj T* (; load double precision floating point) Tj T* (; to r0 and r1) Tj T* ET
+Q
+Q
+Q
+Q
+Q
+q
+1 0 0 1 62.69291 203.2236 cm
+q
+BT 1 0 0 1 0 3.5 Tm 21 TL /F2 17.5 Tf 0 0 0 rg (Built-In Commands) Tj T* ET
+Q
+Q
+q
+1 0 0 1 62.69291 185.2236 cm
+q
+BT 1 0 0 1 0 2 Tm 12 TL /F3 10 Tf 0 0 0 rg (ldi) Tj ( ) Tj (<) Tj (sa) Tj (>) Tj (,) Tj ( ) Tj (<) Tj (ba) Tj (>) Tj T* ET
+Q
+Q
+q
+1 0 0 1 62.69291 170.2236 cm
+0 0 0 rg
+BT /F1 10 Tf 12 TL ET
+BT 1 0 0 1 0 2 Tm  T* ET
+q
+1 0 0 1 20 0 cm
+q
+BT 1 0 0 1 0 2 Tm 12 TL /F1 10 Tf 0 0 0 rg (Load the value ) Tj /F3 10 Tf 0 0 0 rg (<) Tj (ba) Tj (> ) Tj /F1 10 Tf 0 0 0 rg (into register ) Tj /F3 10 Tf 0 0 0 rg (<) Tj (sa) Tj (>) Tj /F1 10 Tf 0 0 0 rg (.) Tj T* ET
+Q
+Q
+q
+Q
+Q
+q
+1 0 0 1 62.69291 154.2236 cm
+q
+BT 1 0 0 1 0 2 Tm 12 TL /F3 10 Tf 0 0 0 rg (ld) Tj ( ) Tj (<) Tj (sa) Tj (>) Tj (,) Tj ( ) Tj (<) Tj (ba) Tj (>) Tj T* ET
+Q
+Q
+q
+1 0 0 1 62.69291 139.2236 cm
+0 0 0 rg
+BT /F1 10 Tf 12 TL ET
+BT 1 0 0 1 0 2 Tm  T* ET
+q
+1 0 0 1 20 0 cm
+q
+BT 1 0 0 1 0 2 Tm 12 TL /F1 10 Tf 0 0 0 rg (Load the value of the memory cell at ) Tj /F3 10 Tf 0 0 0 rg (<) Tj (ba) Tj (> ) Tj /F1 10 Tf 0 0 0 rg (into register ) Tj /F3 10 Tf 0 0 0 rg (<) Tj (sa) Tj (>) Tj /F1 10 Tf 0 0 0 rg (.) Tj T* ET
+Q
+Q
+q
+Q
+Q
+q
+1 0 0 1 62.69291 123.2236 cm
+q
+BT 1 0 0 1 0 2 Tm 12 TL /F3 10 Tf 0 0 0 rg (st) Tj ( ) Tj (<) Tj (sa) Tj (>) Tj (,) Tj ( ) Tj (<) Tj (ba) Tj (>) Tj T* ET
+Q
+Q
+q
+1 0 0 1 62.69291 108.2236 cm
+0 0 0 rg
+BT /F1 10 Tf 12 TL ET
+BT 1 0 0 1 0 2 Tm  T* ET
+q
+1 0 0 1 20 0 cm
+q
+BT 1 0 0 1 0 2 Tm 12 TL /F1 10 Tf 0 0 0 rg (Store the value of register ) Tj /F3 10 Tf 0 0 0 rg (<) Tj (sa) Tj (> ) Tj /F1 10 Tf 0 0 0 rg (into the memory cell at ) Tj /F3 10 Tf 0 0 0 rg (<) Tj (ba) Tj (>) Tj /F1 10 Tf 0 0 0 rg (.) Tj T* ET
+Q
+Q
+q
+Q
+Q
+q
+1 0 0 1 62.69291 92.22362 cm
+q
+BT 1 0 0 1 0 2 Tm 12 TL /F3 10 Tf 0 0 0 rg (inc) Tj ( ) Tj (<) Tj (sa) Tj (>) Tj T* ET
+Q
+Q
+q
+1 0 0 1 62.69291 77.22362 cm
+0 0 0 rg
+BT /F1 10 Tf 12 TL ET
+BT 1 0 0 1 0 2 Tm  T* ET
+q
+1 0 0 1 20 0 cm
+q
+BT 1 0 0 1 0 2 Tm 12 TL /F1 10 Tf 0 0 0 rg (Increment the value of register ) Tj /F3 10 Tf 0 0 0 rg (<) Tj (sa) Tj (>) Tj /F1 10 Tf 0 0 0 rg (.) Tj T* ET
+Q
+Q
+q
+Q
+Q
+ 
+endstream
+endobj
+29 0 obj
+<< /Length 7261 >>
+stream
+1 0 0 1 0 0 cm  BT /F1 12 Tf 14.4 TL ET
+q
+1 0 0 1 62.69291 753.0236 cm
+q
+BT 1 0 0 1 0 2 Tm 12 TL /F3 10 Tf 0 0 0 rg (dec) Tj ( ) Tj (<) Tj (sa) Tj (>) Tj T* ET
+Q
+Q
+q
+1 0 0 1 62.69291 738.0236 cm
+0 0 0 rg
+BT /F1 10 Tf 12 TL ET
+BT 1 0 0 1 0 2 Tm  T* ET
+q
+1 0 0 1 20 0 cm
+q
+BT 1 0 0 1 0 2 Tm 12 TL /F1 10 Tf 0 0 0 rg (Decrement the value of register ) Tj /F3 10 Tf 0 0 0 rg (<) Tj (sa) Tj (>) Tj /F1 10 Tf 0 0 0 rg (.) Tj T* ET
+Q
+Q
+q
+Q
+Q
+q
+1 0 0 1 62.69291 722.0236 cm
+q
+BT 1 0 0 1 0 2 Tm 12 TL /F3 10 Tf 0 0 0 rg (add|sub|mul|div) Tj ( ) Tj (<) Tj (sa) Tj (>) Tj (,) Tj ( ) Tj (<) Tj (ba) Tj (>) Tj T* ET
+Q
+Q
+q
+1 0 0 1 62.69291 695.0236 cm
+0 0 0 rg
+BT /F1 10 Tf 12 TL ET
+BT 1 0 0 1 0 14 Tm  T* ET
+q
+1 0 0 1 20 0 cm
+q
+BT 1 0 0 1 0 14 Tm 1.127318 Tw 12 TL /F3 10 Tf 0 0 0 rg (<) Tj (sa) Tj (>) Tj ( ) Tj (=) Tj ( ) Tj (<) Tj (sa) Tj (>) Tj ( ) Tj (+|-|*|/) Tj ( ) Tj (<) Tj (ba) Tj (> ) Tj /F1 10 Tf 0 0 0 rg (where ) Tj /F3 10 Tf 0 0 0 rg (<) Tj (sa) Tj (> ) Tj /F1 10 Tf 0 0 0 rg (and ) Tj /F3 10 Tf 0 0 0 rg (<) Tj (ba) Tj (> ) Tj /F1 10 Tf 0 0 0 rg (are registers. Write the overflow into the) Tj T* 0 Tw (status register.) Tj T* ET
+Q
+Q
+q
+Q
+Q
+q
+1 0 0 1 62.69291 679.0236 cm
+q
+BT 1 0 0 1 0 2 Tm 12 TL /F3 10 Tf 0 0 0 rg (gt|ge|lt|le|eq) Tj ( ) Tj (<) Tj (sa) Tj (>) Tj T* ET
+Q
+Q
+q
+1 0 0 1 62.69291 652.0236 cm
+0 0 0 rg
+BT /F1 10 Tf 12 TL ET
+BT 1 0 0 1 0 14 Tm  T* ET
+q
+1 0 0 1 20 0 cm
+q
+BT 1 0 0 1 0 14 Tm 2.431098 Tw 12 TL /F1 10 Tf 0 0 0 rg (Check if the value of register ) Tj /F3 10 Tf 0 0 0 rg (<) Tj (sa) Tj (> ) Tj /F1 10 Tf 0 0 0 rg (is ) Tj /F3 10 Tf 0 0 0 rg (>) Tj (|) Tj (>) Tj (=|) Tj (<) Tj (|) Tj (<) Tj (=|== ) Tj /F1 10 Tf 0 0 0 rg (to ) Tj /F3 10 Tf 0 0 0 rg (0) Tj /F1 10 Tf 0 0 0 rg (. Set the status register to ) Tj /F3 10 Tf 0 0 0 rg (1 ) Tj /F1 10 Tf 0 0 0 rg (if it) Tj T* 0 Tw (evaluates true, else to ) Tj /F3 10 Tf 0 0 0 rg (0) Tj /F1 10 Tf 0 0 0 rg (.) Tj T* ET
+Q
+Q
+q
+Q
+Q
+q
+1 0 0 1 62.69291 636.0236 cm
+q
+0 0 0 rg
+BT 1 0 0 1 0 2 Tm /F3 10 Tf 12 TL (not) Tj T* ET
+Q
+Q
+q
+1 0 0 1 62.69291 621.0236 cm
+0 0 0 rg
+BT /F1 10 Tf 12 TL ET
+BT 1 0 0 1 0 2 Tm  T* ET
+q
+1 0 0 1 20 0 cm
+q
+BT 1 0 0 1 0 2 Tm 12 TL /F1 10 Tf 0 0 0 rg (If the status register is ) Tj /F3 10 Tf 0 0 0 rg (0 ) Tj /F1 10 Tf 0 0 0 rg (set it to ) Tj /F3 10 Tf 0 0 0 rg (1) Tj /F1 10 Tf 0 0 0 rg (, else set it to ) Tj /F3 10 Tf 0 0 0 rg (0) Tj /F1 10 Tf 0 0 0 rg (.) Tj T* ET
+Q
+Q
+q
+Q
+Q
+q
+1 0 0 1 62.69291 605.0236 cm
+q
+BT 1 0 0 1 0 2 Tm 12 TL /F3 10 Tf 0 0 0 rg (jmp) Tj ( ) Tj (<) Tj (sa) Tj (>) Tj T* ET
+Q
+Q
+q
+1 0 0 1 62.69291 590.0236 cm
+0 0 0 rg
+BT /F1 10 Tf 12 TL ET
+BT 1 0 0 1 0 2 Tm  T* ET
+q
+1 0 0 1 20 0 cm
+q
+BT 1 0 0 1 0 2 Tm 12 TL /F1 10 Tf 0 0 0 rg (Set the program counter to the value of register ) Tj /F3 10 Tf 0 0 0 rg (<) Tj (sa) Tj (>) Tj /F1 10 Tf 0 0 0 rg (.) Tj T* ET
+Q
+Q
+q
+Q
+Q
+q
+1 0 0 1 62.69291 574.0236 cm
+q
+BT 1 0 0 1 0 2 Tm 12 TL /F3 10 Tf 0 0 0 rg (call) Tj ( ) Tj (<) Tj (sa) Tj (>) Tj T* ET
+Q
+Q
+q
+1 0 0 1 62.69291 547.0236 cm
+0 0 0 rg
+BT /F1 10 Tf 12 TL ET
+BT 1 0 0 1 0 14 Tm  T* ET
+q
+1 0 0 1 20 0 cm
+q
+BT 1 0 0 1 0 14 Tm .797633 Tw 12 TL /F1 10 Tf 0 0 0 rg (Push the current program counter on the stack and set the program counter to the value of register) Tj T* 0 Tw /F3 10 Tf 0 0 0 rg (<) Tj (sa) Tj (>) Tj /F1 10 Tf 0 0 0 rg (.) Tj T* ET
+Q
+Q
+q
+Q
+Q
+q
+1 0 0 1 62.69291 531.0236 cm
+q
+0 0 0 rg
+BT 1 0 0 1 0 2 Tm /F3 10 Tf 12 TL (ret) Tj T* ET
+Q
+Q
+q
+1 0 0 1 62.69291 516.0236 cm
+0 0 0 rg
+BT /F1 10 Tf 12 TL ET
+BT 1 0 0 1 0 2 Tm  T* ET
+q
+1 0 0 1 20 0 cm
+q
+0 0 0 rg
+BT 1 0 0 1 0 2 Tm /F1 10 Tf 12 TL (Pop the previously pushed program counter from the stack.) Tj T* ET
+Q
+Q
+q
+Q
+Q
+q
+1 0 0 1 62.69291 500.0236 cm
+q
+0 0 0 rg
+BT 1 0 0 1 0 2 Tm /F3 10 Tf 12 TL (stop) Tj T* ET
+Q
+Q
+q
+1 0 0 1 62.69291 485.0236 cm
+0 0 0 rg
+BT /F1 10 Tf 12 TL ET
+BT 1 0 0 1 0 2 Tm  T* ET
+q
+1 0 0 1 20 0 cm
+q
+BT 1 0 0 1 0 2 Tm 12 TL /F1 10 Tf 0 0 0 rg (Write ) Tj /F3 10 Tf 0 0 0 rg (1 ) Tj /F1 10 Tf 0 0 0 rg (into the shutdown register. This will cause the interpreter to halt.) Tj T* ET
+Q
+Q
+q
+Q
+Q
+q
+1 0 0 1 62.69291 469.0236 cm
+q
+0 0 0 rg
+BT 1 0 0 1 0 2 Tm /F3 10 Tf 12 TL (cl) Tj T* ET
+Q
+Q
+q
+1 0 0 1 62.69291 454.0236 cm
+0 0 0 rg
+BT /F1 10 Tf 12 TL ET
+BT 1 0 0 1 0 2 Tm  T* ET
+q
+1 0 0 1 20 0 cm
+q
+BT 1 0 0 1 0 2 Tm 12 TL /F1 10 Tf 0 0 0 rg (Write ) Tj /F3 10 Tf 0 0 0 rg (0 ) Tj /F1 10 Tf 0 0 0 rg (into the status register.) Tj T* ET
+Q
+Q
+q
+Q
+Q
+q
+1 0 0 1 62.69291 438.0236 cm
+q
+BT 1 0 0 1 0 2 Tm 12 TL /F3 10 Tf 0 0 0 rg (cjmp) Tj ( ) Tj (<) Tj (sa) Tj (>) Tj T* ET
+Q
+Q
+q
+1 0 0 1 62.69291 423.0236 cm
+0 0 0 rg
+BT /F1 10 Tf 12 TL ET
+BT 1 0 0 1 0 2 Tm  T* ET
+q
+1 0 0 1 20 0 cm
+q
+BT 1 0 0 1 0 2 Tm 12 TL /F1 10 Tf 0 0 0 rg (If there not a ) Tj /F3 10 Tf 0 0 0 rg (0 ) Tj /F1 10 Tf 0 0 0 rg (in the status register, ) Tj /F3 10 Tf 0 0 0 rg (jmp <) Tj (sa) Tj (>) Tj /F1 10 Tf 0 0 0 rg (, else continue execution.) Tj T* ET
+Q
+Q
+q
+Q
+Q
+q
+1 0 0 1 62.69291 407.0236 cm
+q
+BT 1 0 0 1 0 2 Tm 12 TL /F3 10 Tf 0 0 0 rg (ccall) Tj ( ) Tj (<) Tj (sa) Tj (>) Tj T* ET
+Q
+Q
+q
+1 0 0 1 62.69291 392.0236 cm
+0 0 0 rg
+BT /F1 10 Tf 12 TL ET
+BT 1 0 0 1 0 2 Tm  T* ET
+q
+1 0 0 1 20 0 cm
+q
+BT 1 0 0 1 0 2 Tm 12 TL /F1 10 Tf 0 0 0 rg (Like ) Tj /F3 10 Tf 0 0 0 rg (cjmp) Tj ( ) Tj (<) Tj (sa) Tj (> ) Tj /F1 10 Tf 0 0 0 rg (but with ) Tj /F3 10 Tf 0 0 0 rg (call ) Tj /F1 10 Tf 0 0 0 rg (instead.) Tj T* ET
+Q
+Q
+q
+Q
+Q
+q
+1 0 0 1 62.69291 359.0236 cm
+q
+BT 1 0 0 1 0 3.5 Tm 21 TL /F2 17.5 Tf 0 0 0 rg (Comments) Tj T* ET
+Q
+Q
+q
+1 0 0 1 62.69291 341.0236 cm
+q
+BT 1 0 0 1 0 2 Tm 12 TL /F1 10 Tf 0 0 0 rg (Comments start with a ) Tj /F3 10 Tf 0 0 0 rg (; ) Tj /F1 10 Tf 0 0 0 rg (at the beginning of the line and end at the end of the line.) Tj T* ET
+Q
+Q
+q
+1 0 0 1 62.69291 308.0236 cm
+q
+BT 1 0 0 1 0 3.5 Tm 21 TL /F2 17.5 Tf 0 0 0 rg (Marks) Tj T* ET
+Q
+Q
+q
+1 0 0 1 62.69291 278.0236 cm
+q
+0 0 0 rg
+BT 1 0 0 1 0 14 Tm /F1 10 Tf 12 TL .467485 Tw (Marks represent a special location of the assembly code. The assembler keeps track of those marks and) Tj T* 0 Tw (they can be used as immediate input.) Tj T* ET
+Q
+Q
+q
+1 0 0 1 62.69291 236.0236 cm
+q
+BT 1 0 0 1 0 26 Tm 2.869983 Tw 12 TL /F1 10 Tf 0 0 0 rg (A mark is defined by a single word, starting with an alphabetic character \() Tj /F3 10 Tf 0 0 0 rg (a..zA...Z) Tj /F1 10 Tf 0 0 0 rg (\) containing) Tj T* 0 Tw 2.330814 Tw (alphanumeric characters and underscores \() Tj /F3 10 Tf 0 0 0 rg (a..zA..Z0..9_) Tj /F1 10 Tf 0 0 0 rg (\) followed by a colon \() Tj /F3 10 Tf 0 0 0 rg (:) Tj /F1 10 Tf 0 0 0 rg (\) and a newline) Tj T* 0 Tw (character.) Tj T* ET
+Q
+Q
+q
+1 0 0 1 62.69291 218.0236 cm
+q
+0 0 0 rg
+BT 1 0 0 1 0 2 Tm /F1 10 Tf 12 TL (Example:) Tj T* ET
+Q
+Q
+q
+1 0 0 1 62.69291 100.8236 cm
+q
+q
+1 0 0 1 0 0 cm
+q
+1 0 0 1 6.6 6.6 cm
+q
+.662745 .662745 .662745 RG
+.5 w
+.960784 .960784 .862745 rg
+n -6 -6 468.6898 108 re B*
+Q
+q
+0 0 0 rg
+BT 1 0 0 1 0 86 Tm /F3 10 Tf 12 TL (ldi r0, this_is_a_mark) Tj T* (ldi r1, 0xfefe) Tj T* (ldi r2, 0xefef) Tj T*  T* (this_is_a_mark:) Tj T* (add r2, r1) Tj T* (; this will result in an infinite loop.) Tj T* (jmp r0) Tj T* ET
+Q
+Q
+Q
+Q
+Q
+ 
+endstream
+endobj
+30 0 obj
+<< /Length 2972 >>
+stream
+1 0 0 1 0 0 cm  BT /F1 12 Tf 14.4 TL ET
+q
+1 0 0 1 62.69291 744.0236 cm
+q
+BT 1 0 0 1 0 3.5 Tm 21 TL /F2 17.5 Tf 0 0 0 rg (Direct Input) Tj T* ET
+Q
+Q
+q
+1 0 0 1 62.69291 726.0236 cm
+q
+BT 1 0 0 1 0 2 Tm 12 TL /F1 10 Tf 0 0 0 rg (The core instruction set contains the ) Tj /F3 10 Tf 0 0 0 rg (ldi ) Tj /F1 10 Tf 0 0 0 rg (command that can be used to load data into a register directly.) Tj T* ET
+Q
+Q
+q
+1 0 0 1 62.69291 696.0236 cm
+q
+0 0 0 rg
+BT 1 0 0 1 0 14 Tm /F1 10 Tf 12 TL 2.501318 Tw (The first \(big\) argument of this command is always a 16bit word. The assembler can automatically) Tj T* 0 Tw (generate the correct value if the argument is provided in the following ways:) Tj T* ET
+Q
+Q
+q
+1 0 0 1 62.69291 680.0236 cm
+q
+0 0 .501961 rg
+0 0 .501961 RG
+BT 1 0 0 1 0 2 Tm /F2 10 Tf 12 TL (Marks) Tj T* ET
+Q
+Q
+q
+1 0 0 1 62.69291 665.0236 cm
+0 0 0 rg
+BT /F1 10 Tf 12 TL ET
+BT 1 0 0 1 0 2 Tm  T* ET
+q
+1 0 0 1 20 0 cm
+q
+0 0 0 rg
+BT 1 0 0 1 0 2 Tm /F1 10 Tf 12 TL (The assembler inserts the absolute offset of the Mark.) Tj T* ET
+Q
+Q
+q
+Q
+Q
+q
+1 0 0 1 62.69291 649.0236 cm
+q
+0 0 0 rg
+BT 1 0 0 1 0 2 Tm /F2 10 Tf 12 TL (A decimal value) Tj T* ET
+Q
+Q
+q
+1 0 0 1 62.69291 634.0236 cm
+0 0 0 rg
+BT /F1 10 Tf 12 TL ET
+BT 1 0 0 1 0 2 Tm  T* ET
+q
+1 0 0 1 20 0 cm
+q
+BT 1 0 0 1 0 2 Tm 12 TL /F1 10 Tf 0 0 0 rg (The assembler inserts the value \(i.e. ) Tj /F3 10 Tf 0 0 0 rg (ldi) Tj ( ) Tj (r0, 12) Tj /F1 10 Tf 0 0 0 rg (\).) Tj T* ET
+Q
+Q
+q
+Q
+Q
+q
+1 0 0 1 62.69291 618.0236 cm
+q
+0 0 0 rg
+BT 1 0 0 1 0 2 Tm /F2 10 Tf 12 TL (A hexadecimal value) Tj T* ET
+Q
+Q
+q
+1 0 0 1 62.69291 603.0236 cm
+0 0 0 rg
+BT /F1 10 Tf 12 TL ET
+BT 1 0 0 1 0 2 Tm  T* ET
+q
+1 0 0 1 20 0 cm
+q
+BT 1 0 0 1 0 2 Tm 12 TL /F1 10 Tf 0 0 0 rg (If the argument starts with ) Tj /F3 10 Tf 0 0 0 rg (0x ) Tj /F1 10 Tf 0 0 0 rg (the assembler will interpret the argument as hexadecimal.) Tj T* ET
+Q
+Q
+q
+Q
+Q
+q
+1 0 0 1 62.69291 587.0236 cm
+q
+0 0 0 rg
+BT 1 0 0 1 0 2 Tm /F2 10 Tf 12 TL (A binary value) Tj T* ET
+Q
+Q
+q
+1 0 0 1 62.69291 572.0236 cm
+0 0 0 rg
+BT /F1 10 Tf 12 TL ET
+BT 1 0 0 1 0 2 Tm  T* ET
+q
+1 0 0 1 20 0 cm
+q
+BT 1 0 0 1 0 2 Tm 12 TL /F1 10 Tf 0 0 0 rg (If the argument starts with ) Tj /F3 10 Tf 0 0 0 rg (0b ) Tj /F1 10 Tf 0 0 0 rg (the assembler will interpret the value as binary.) Tj T* ET
+Q
+Q
+q
+Q
+Q
+q
+1 0 0 1 62.69291 556.0236 cm
+q
+0 0 0 rg
+BT 1 0 0 1 0 2 Tm /F2 10 Tf 12 TL (A character) Tj T* ET
+Q
+Q
+q
+1 0 0 1 62.69291 529.0236 cm
+0 0 0 rg
+BT /F1 10 Tf 12 TL ET
+BT 1 0 0 1 0 14 Tm  T* ET
+q
+1 0 0 1 20 0 cm
+q
+BT 1 0 0 1 0 14 Tm .82811 Tw 12 TL /F1 10 Tf 0 0 0 rg (If the argument is either a single character surrounded by two ) Tj /F3 10 Tf 0 0 0 rg (' ) Tj /F1 10 Tf 0 0 0 rg (characters or any unicode escape) Tj T* 0 Tw (sequence surrounded by ) Tj /F3 10 Tf 0 0 0 rg (' ) Tj /F1 10 Tf 0 0 0 rg (characters the assembler will insert the integer representation.) Tj T* ET
+Q
+Q
+q
+Q
+Q
+ 
+endstream
+endobj
+31 0 obj
+<< /Nums [ 0 32 0 R 1 33 0 R 2 34 0 R ] >>
+endobj
+32 0 obj
+<< /S /D /St 1 >>
+endobj
+33 0 obj
+<< /S /D /St 2 >>
+endobj
+34 0 obj
+<< /S /D /St 3 >>
+endobj
+xref
+0 35
+0000000000 65535 f
+0000000075 00000 n
+0000000130 00000 n
+0000000240 00000 n
+0000000355 00000 n
+0000000526 00000 n
+0000000697 00000 n
+0000000868 00000 n
+0000001039 00000 n
+0000001210 00000 n
+0000001381 00000 n
+0000001553 00000 n
+0000001725 00000 n
+0000001897 00000 n
+0000002069 00000 n
+0000002178 00000 n
+0000002464 00000 n
+0000002674 00000 n
+0000002846 00000 n
+0000003075 00000 n
+0000003184 00000 n
+0000003467 00000 n
+0000003544 00000 n
+0000003692 00000 n
+0000003827 00000 n
+0000003953 00000 n
+0000004076 00000 n
+0000004193 00000 n
+0000004271 00000 n
+0000011143 00000 n
+0000018461 00000 n
+0000021490 00000 n
+0000021552 00000 n
+0000021589 00000 n
+0000021626 00000 n
+trailer
+<< /ID 
+ % ReportLab generated PDF document -- digest (http://www.reportlab.com)
+ [(\372\(\217\316\222\3169q\222\376\355\325c1\302>) (\372\(\217\316\222\3169q\222\376\355\325c1\302>)]
+ /Info 20 0 R /Root 19 0 R /Size 35 >>
+startxref
+21663
+%%EOF
diff --git a/language.rst b/language.rst
new file mode 100644
index 0000000..c7f53b5
--- /dev/null
+++ b/language.rst
@@ -0,0 +1,180 @@
+BCI Assembly Language
+*********************
+
+.. contents::
+
+Commands, Small Arguments and Big Arguments
+===========================================
+
+A command in BCI Assembly is a word starting with an
+alphabetic character (``a..zA..Z``) following by a sequence
+of alphanumeric characters (``a..zA..Z0..9``).
+This word will be converted to a 10bit opcode.
+
+Embedded in the 16bits of a word there is also a 6bit small
+argument. If a command has no small argument these bits will
+be zeroed. In the assembly the command will be only one
+word, for example::
+
+	cli
+
+If the command has a small argument, the 6 bit will be
+filled with the small argument. In the assembly the small
+argument is separated by one whitespace, for example::
+
+	inc r0
+
+Any other arguments are stored in further words and have
+thus a width of 16bits. They are separated by commas (``,``)
+from both the first and any other arguments.
+It is recommended to only add one more argument.
+
+Example for one big argument::
+
+	ldi r0, 0xdead
+
+It might be useful to have more arguments for other
+applications, like double precision floating points.
+Example (not implemented)::
+
+	lddfi r0, r1, 0xdead, 0xbeef
+	; load double precision floating point
+	; to r0 and r1
+
+Register Names
+==============
+
+Only data registers can be accessed directly. They are
+prefixed with a ``r`` and are indexed starting with ``0``.
+
+Examples: ``r0, r1, r2, ..., r11, r12``
+
+
+Built-In Commands
+=================
+
+``ldi <sa>, <ba>``
+	Load the value ``<ba>``  into register ``<sa>``.
+
+``ld <sa>, <ba>``
+	Load the value of the memory cell at ``<ba>`` into
+	register ``<sa>``.
+
+``st <sa>, <ba>``
+	Store the value of register ``<sa>`` into the memory
+	cell at ``<ba>``.
+
+``inc <sa>``
+	Increment the value of register ``<sa>``.
+
+``dec <sa>``
+	Decrement the value of register ``<sa>``.
+
+``add|sub|mul|div <sa>, <ba>``
+	``<sa> = <sa> +|-|*|/ <ba>`` where ``<sa>`` and
+	``<ba>`` are registers. Write the overflow into the
+	status register.
+
+``gt|ge|lt|le|eq <sa>``
+	Check if the value of register ``<sa>`` is
+	``>|>=|<|<=|==`` to ``0``. Set the status register
+	to ``1`` if it evaluates true, else to ``0``.
+
+``not``
+	If the status register is ``0`` set it to ``1``,
+	else set it to ``0``.
+
+``jmp <sa>``
+	Set the program counter to the value of register
+	``<sa>``.
+
+``call <sa>``
+	Push the current program counter on the stack and
+	set the program counter to the value of register ``<sa>``.
+
+``ret``
+	Pop the previously pushed program counter from the stack.
+
+``stop``
+	Write ``1`` into the shutdown register. This will
+	cause the interpreter to halt.
+``cl``
+	Write ``0`` into the status register.
+
+``cjmp <sa>``
+	If there not a ``0`` in the status register, ``jmp
+	<sa>``, else continue execution.
+
+``ccall <sa>``
+	Like ``cjmp <sa>`` but with ``call`` instead.
+
+Comments
+========
+
+Comments start with a ``;`` at the beginning of the line and
+end at the end of the line.
+
+Marks
+=====
+
+Marks represent a special location of the assembly code. The
+assembler keeps track of those marks and they can be used as
+immediate input.
+
+A mark is defined by a single word, starting with an
+alphabetic character (``a..zA...Z``) containing alphanumeric
+characters and underscores (``a..zA..Z0..9_``) followed by
+a colon (``:``) and a newline character.
+
+Example::
+
+	ldi r0, this_is_a_mark
+	ldi r1, 0xfefe
+	ldi r2, 0xefef
+
+	this_is_a_mark:
+	add r2, r1
+	; this will result in an infinite loop.
+	jmp r0
+
+
+Direct Input
+============
+
+The core instruction set contains the ``ldi`` command that
+can be used to load data into a register directly.
+
+The first (big) argument of this command is always a 16bit
+word. The assembler can automatically generate the correct
+value if the argument is provided in the following ways:
+
+`Marks`_
+	The assembler inserts the absolute offset of the
+	Mark.
+A decimal value
+	The assembler inserts the value (i.e. ``ldi r0,
+	12``).
+A hexadecimal value
+	If the argument starts with ``0x`` the assembler
+	will interpret the argument as hexadecimal.
+A binary value
+	If the argument starts with ``0b`` the assembler
+	will interpret the value as binary.
+A character
+	If the argument is either a single character
+	surrounded by two ``'`` characters or any unicode
+	escape sequence surrounded by ``'`` characters the
+	assembler will insert the integer representation.
+
+Explicit Data Programming
+=========================
+
+One can explicitly set data in the program memory by using
+the ``.set`` directive. It uses the following semantics::
+
+	".set" "[" <value> {,<value>} "]"
+
+Where ``<value>`` is a `Direct Input`_ value. The assembler
+will insert the data at exactly the location where the
+``.set`` appears. The assembler ignores any whitespace or
+newline characters between the brackets ``[]``.

From 0fd65c721c711fb2e723c61808644ee136e5d4df Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Daniel=20Kn=C3=BCttel?= <daniel.knuettel@daknuett.eu>
Date: Tue, 9 Oct 2018 11:43:42 +0200
Subject: [PATCH 2/9] initial assembler package

---
 assembler/__init__.py   |   0
 assembler/assembler.py  | 280 ++++++++++++++++++++++++++++++++++++++++
 assembler/context.py    |  28 ++++
 assembler/directives.py |  42 ++++++
 assembler/opcodes.py    |  33 +++++
 assembler/tokenize.py   |  55 ++++++++
 assembler/util.py       |  85 ++++++++++++
 7 files changed, 523 insertions(+)
 create mode 100644 assembler/__init__.py
 create mode 100644 assembler/assembler.py
 create mode 100644 assembler/context.py
 create mode 100644 assembler/directives.py
 create mode 100644 assembler/opcodes.py
 create mode 100644 assembler/tokenize.py
 create mode 100644 assembler/util.py

diff --git a/assembler/__init__.py b/assembler/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/assembler/assembler.py b/assembler/assembler.py
new file mode 100644
index 0000000..53f556c
--- /dev/null
+++ b/assembler/assembler.py
@@ -0,0 +1,280 @@
+from collections import deque
+
+from .context import FileContext
+from .tokenize import Tokenizer, WHITESPACE
+from .opcodes import make_opcodes
+from .util import can_be_mark, can_convert_to_int, autoint
+from .directives import SetDirective
+
+class ParsingError(Exception): 
+	pass
+
+class Assembler(object):
+	"""
+	This class is used for generating bytecode from a file containing assembly.
+
+	Also required is the memory definition of the interpreter given as a dict::
+
+		{
+			"registers": <register_count>,
+			"memory": <number of memory words>,
+			"program_memory": <number of words available for the program>
+		}
+
+	And the definition of the commands. This is a list of dicts::
+		
+		[
+			{
+				"mnemonic": <mnemonic>,
+				"args": [
+						("register"|"memory"|"program_memory"|"direct_input"), ...
+					]
+			}
+		]
+	
+	The method ``parse`` will parse the input file and ``bindump`` will dump the binary
+	bytecode into a file.
+	"""
+	def __init__(self, file_, memory_definition, command_definition, custom_directives):
+		self._file_context = FileContext(file_)
+		self._code_objects = deque()
+		self._memory_definition = memory_definition
+		self._command_definition = command_definition
+		self._word_count = 0
+		self._marks = {}
+
+		self._opcodes = make_opcodes([cmd["mnemonic"] for cmd in command_definition])
+		self._commands_by_mnemonic = {cmd["mnemonic"]: cmd for cmd in command_definition}
+		self._tokenizer = Tokenizer(self._file_context)
+
+		self._directives = {"set": SetDirective()}
+		self._directives.update(custom_directives)
+
+	def parse(self):
+		try:
+			for token in self._tokenizer:
+
+				# Comments
+				if(token == ";"):
+					while(token != "\n"):
+						token = next(self._tokenizer)
+					continue
+
+				# Commands
+				if(token in self._commands_by_mnemonic):
+					self.parse_command(token)
+					continue
+
+				# Directives
+				if(token == "."):
+					self.parse_directive()
+					continue
+
+				# The default is trying to parse a mark
+				if(not can_be_mark(token)):
+					self.raise_unexpected_token(token
+						, "comment, command, directive or mark"
+						, token)
+				self.parse_mark(token)
+		except StopIteration:
+			raise ParsingError("Unexpected EOF while parsing.")
+			
+		for mark, mark_data in self._marks.items():
+			if(mark_data["target"] < 0):
+				raise ParsingError("Mark {} undefined. Referenced in lines: {}".format(
+						mark
+						, mark_data["references"]))
+
+		self._code_objects = [self._marks[c]["target"] if c in self._marks else c 
+							for c in self._code_objects ]
+
+	def bindump(self, file_):
+		# FIXME:
+		# Make this work for tons of data.
+		# Or is that necessary?
+		return file_.write(bytes(self._code_objects))
+
+
+	def parse_mark(self, token):
+		should_be_colon = next(self._tokenizer)
+
+		if(should_be_colon != ":"):
+			self.raise_unexpected_token(token, "':'", should_be_colon)
+
+		should_be_newline = next(self._tokenizer)
+		if(should_be_newline != "\n"):
+			self.raise_unexpected_token(token + ":", "'\\n'", should_be_newline)
+
+		if(token in self._marks):
+			if(self._marks[token]["target"] != -1):
+				raise ParsingError("Error in line {} column {} mark already defined: '{}'. Previous was in line {}.".format(
+						self._line
+						, self._column
+						, token
+						, self._marks[token]["target_line"]))
+			self._marks[token]["target"] = self._word_count
+			self._marks[token]["target_line"] = self._line
+
+		else:
+			self._marks[token] = {
+				"target": self._word_count
+				, "target_line": self._line
+				, "references": []
+			}
+
+		should_be_newline = next(self._tokenizer)
+		if(should_be_newline != "\n"):
+			self.raise_unexpected_token(token, "newline", should_be_newline)
+
+
+	def parse_directive(self):
+		should_be_name = next(self._tokenizer)
+		
+		if(not should_be_name in self._directives):
+			self.raise_unexpected_token(".", "directive name", should_be_name)
+
+		words = self._directives[should_be_name].parse(self, self._tokenizer)
+		
+		self._word_count += len(words)
+		self._code_objects.extend(words)
+
+		should_be_newline = next(self._tokenizer)
+		if(should_be_newline != "\n"):
+			self.raise_unexpected_token(".", "newline", should_be_newline)
+
+
+
+
+	def parse_command(self, cmd):
+
+		self._code_objects.append(self._opcodes[cmd])
+		self._word_count += 1
+
+		if(not self._commands_by_mnemonic[cmd]["args"]):
+			token = next(self._tokenizer)
+			if(token != "\n"):
+				self.raise_unexpected_token(cmd, "newline", token)
+			return 
+
+		should_be_whitespace = next(self._tokenizer)
+		if(should_be_whitespace not in WHITESPACE 
+				or should_be_whitespace == "\n"):
+			self.raise_unexpected_token(cmd, "' '", should_be_whitespace)
+
+		should_be_an_argument = next(self._tokenizer)
+		argument = self.check_and_convert_argument(cmd
+				, should_be_an_argument
+				, self._commands_by_mnemonic[cmd]["args"][0])
+		self._word_count += 1
+		self._code_objects.append(argument)
+		
+
+		for argument in self._commands_by_mnemonic[cmd]["args"][1:]:
+			should_be_comma = next(self._tokenizer)
+			if(should_be_comma != ","):
+				self.raise_unexpected_token(cmd, should_be_whitespace)
+
+			self._word_count += 1
+			self._code_objects.append(
+					self.check_and_convert_argument(
+						cmd
+						, next(self._tokenizer)
+						, argument))
+
+		should_be_newline = next(self._tokenizer)
+		if(should_be_newline != "\n"):
+			self.raise_unexpected_token(cmd, "newline", should_be_newline)
+			
+			
+	def raise_unexpected_token(self, after, expected, got):
+		raise ParsingError("Error in line {} column {} after '{}': expected {}, got '{}'".format(
+					self._file_context._line
+					, self._file_context._column
+					, after
+					, expected
+					, got))
+
+	def raise_invalid_address(self, after, memtype, maxval, got):
+		raise ParsingError("Error in line {} column {} after '{}': value {} is invalid for {} (max is {})".format(
+					self._file_context._line
+					, self._file_context._column
+					, after
+					, got
+					, memtype
+					, maxval))
+
+	def check_and_convert_argument(self, cmd, argument, argument_definition):
+		if(argument_definition == "register"):
+			if(not argument.startswith("r")):
+				self.raise_unexpected_token(cmd, "register name", argument)
+			register_offset = argument[1:]
+			raise_could_not_convert_register_offset = False
+			try:
+				register_offset = int(register_offset)
+			except:
+				raise_could_not_convert_register_offset = True
+
+			if(raise_could_not_convert_register_offset):
+				self.raise_unexpected_token(cmd, "register name", argument)
+
+			if(register_offset > self._memory_definition["registers"] 
+					or register_offset < 0):
+				self.raise_invalid_address(cmd
+						, "register"
+						, self._memory_definition["registers"]
+						, register_offset)
+
+			return register_offset
+
+		if(argument_definition == "memory"):
+			if(not can_convert_to_int(argument)):
+				self.raise_unexpected_token(cmd, "integer address", argument)
+			argument = autoint(argument)
+
+			if(argument < 0 or argument > self._memory_definition["memory"]):
+				self.raise_invalid_address(cmd
+						, "memory"
+						, self._memory_definition["memory"]
+						, argument)
+			return argument
+
+		if(argument_definition == "program_memory"):
+			if(not can_convert_to_int(argument)):
+				if(not can_be_mark(argument)):
+					self.raise_unexpected_token(cmd, "integer address or mark", argument)
+				if(argument in self._marks):
+					self._marks[argument]["references"].append(self._line)
+					if(self._marks[argument]["target"] != -1):
+						return self._marks[argument]["target"]
+					return argument
+				self._marks[argument] = {
+					"target": -1
+					, "target_line": 0
+					, "references": [self._line]
+				}
+				return argument
+
+
+			argument = autoint(argument)
+
+			if(argument < 0 or argument > self._memory_definition["program_memory"]):
+				self.raise_invalid_address(cmd
+						, "program_memory"
+						, self._memory_definition["program_memory"]
+						, argument)
+			return argument
+
+
+		if(can_convert_to_int(argument)):
+			return autoint(argument)
+
+		if(not can_be_mark(argument)):
+			self.raise_unexpected_token(cmd, "integer, char or mark", argument)
+		if(argument in self._marks and self._marks[argument] >= 0):
+			return self._marks[argument]
+		self._marks[argument] = -1
+		return argument
+
+
+			
+
diff --git a/assembler/context.py b/assembler/context.py
new file mode 100644
index 0000000..3863a00
--- /dev/null
+++ b/assembler/context.py
@@ -0,0 +1,28 @@
+from collections import deque
+
+class FileContext(object):
+	def __init__(self, file_):
+		self._file = file_
+		self._line = 0
+		self._column = 0
+		self._column_stack = deque()
+
+	def getc(self):
+		c = self._file.read(1)
+		if(c == "\n"):
+			self._line += 1
+			self._column_stack.append(self._column)
+			self._column = 0
+		else:
+			self._column += 1
+
+		return c
+
+	def ungetc(self, c):
+		self._file.seek(self._file.tell() - 1, 0)
+		if(c == "\n"):
+			self._line -= 1
+			self._column = self._column_stack.pop()
+		else:
+			self._column -= 1
+
diff --git a/assembler/directives.py b/assembler/directives.py
new file mode 100644
index 0000000..24b6c99
--- /dev/null
+++ b/assembler/directives.py
@@ -0,0 +1,42 @@
+"""
+Directives for explicitly modifying the program memory.
+"""
+
+from abc import ABC, abstractmethod
+from collections import deque
+
+class AbstractDirective(ABC):
+	@abstractmethod
+	def parse(self, assembler, tokenizer):
+		"""
+		Parse the directive by converting the text to a list of words.
+		Returns a list of 16bit words.
+		"""
+		pass
+
+
+
+class SetDirective(AbstractDirective):
+	def parse(self, assembler, tokenizer):
+		words = deque()
+		should_be_bracket = next(tokenizer)
+		if(not should_be_bracket == "["):
+			assembler.raise_unexpected_token(".set", "'['", should_be_bracket)
+
+		while(True):
+			should_be_value = next(tokenizer)
+			if(not can_convert_to_int(should_be_value)):
+				assembler.raise_unexpected_token(".set"
+						, "integer or character value"
+						, should_be_value)
+			words.append(autoint(should_be_value))
+
+			comma_or_bracket = next(tokenizer)
+			if(not comma_or_bracket in (",", "]")):
+				assembler.raise_unexpected_token(".set"
+						, "',' or ']'"
+						, comma_or_bracket)
+
+			if(comma_or_bracket == "]"):
+				break
+		return list(words)
diff --git a/assembler/opcodes.py b/assembler/opcodes.py
new file mode 100644
index 0000000..15c9ba3
--- /dev/null
+++ b/assembler/opcodes.py
@@ -0,0 +1,33 @@
+
+
+class Autoinserter(object):
+	def __init__(self):
+		self.mayor = 2
+		self.minor = 1
+	def __next__(self):
+		"""Generate the next opcode"""
+		# 10bit opcode
+		opcode = 0b1111111111
+
+		# Calculate this opcode.
+		opcode *= self.minor
+		opcode //= self.mayor
+
+		# The lower 6 bits are reserved.
+		opcode <<= 6
+		# 16 bits in total. Should not matter.
+		opcode &= 0xffff 
+
+		# Update the state.
+		self.minor = (self.minor + 2) % self.mayor
+		if(self.minor == 1):
+			self.mayor *= 2
+
+		return opcode
+	def __iter__(self):
+		return self
+
+def make_opcodes(mnemonics_in_order):
+
+	ai = Autoinserter()
+	return {mnemonic: opcode for (mnemonic, opcode) in zip(mnemonics_in_order, ai)}
diff --git a/assembler/tokenize.py b/assembler/tokenize.py
new file mode 100644
index 0000000..71be23f
--- /dev/null
+++ b/assembler/tokenize.py
@@ -0,0 +1,55 @@
+from collections import deque
+
+WHITESPACE = {" ", "\t", "\n", "\r"}
+TOKENS_END_OF_WORD = WHITESPACE | {";", ":", ",", ".", "[", "]"}
+
+TOKENS_EXPECT_NEWLINE = {":", "]"}
+
+
+class Tokenizer(object):
+	def __init__(self, context):
+		self._context = context
+		self._expect_newline = False
+		self._expect_whitespace = False
+
+	def __iter__(self):
+		return self
+	def __next__(self):
+		"""
+		Return the next token.
+		"""
+
+		current_token = deque()
+
+		while(True):
+			c = self._context.getc()
+
+			# Sometimes we need the explicit newline.
+			if(self._expect_newline and c == "\n"):
+				self._expect_newline = False
+				return c
+
+			# Skip multiple whitespaces.
+			if(c in WHITESPACE and not current_token):
+				if(self._expect_whitespace):
+					self._expect_whitespace = False
+					return c
+				continue
+
+			if(c in TOKENS_END_OF_WORD):
+				if(current_token):
+					self._context.ungetc(c)
+					if(c in WHITESPACE):
+						self._expect_whitespace = True
+					return "".join(current_token)
+				else:
+					if(c in TOKENS_EXPECT_NEWLINE):
+						self._expect_newline = True
+					return c
+
+			if(not c):
+				raise StopIteration()
+
+			current_token.append(c)
+
+
diff --git a/assembler/util.py b/assembler/util.py
new file mode 100644
index 0000000..60d043b
--- /dev/null
+++ b/assembler/util.py
@@ -0,0 +1,85 @@
+"""
+Utility functions used for parsing.
+"""
+
+
+def can_be_mark(argument):
+	"""
+	The ``argument`` can be interpreted as a Mark.
+	"""
+	a = ord("a")
+	a_z = [chr(a + i) for i in range(26)]
+	A = ord("A")
+	A_Z = [chr(A + i) for i in range(26)]
+	null = ord("0")
+	null_9 = [chr(null + i) for i in range(10)]
+
+	if(not argument[0] in a_z):
+		return False
+
+	for char in argument[1:]:
+		if(not (char in a_z
+			or char in A_Z
+			or char in null_9
+			or char == "_")):
+			return False
+	return True
+
+	
+
+def can_convert_to_int(value):
+	"""
+	``value`` can be converted to an integer.
+
+	**Note** that this returns ``True`` if the value is a 
+	character definition like ``'a'``.
+	"""
+	if(value.startswith("0x")):
+		try:
+			int(value[2:], 16)
+			return True
+		except:
+			return False
+
+	if(value.startswith("0b")):
+		try:
+			int(value[2:], 2)
+			return True
+		except:
+			return False
+
+	if(value.startswith("'") and value.endswith("'")):
+		if(len(value) == 3):
+			return True
+		if(len(value) == 4):
+			if(value[1:-1] in {"\\n", "\\r", "\\t"}):
+				return True
+		return False
+
+	try:
+		int(value)
+		return True
+	except:
+		return False
+
+def autoint(value):
+	"""
+	Convert ``value`` to an integer automatically.
+	"""
+	escape_sequences = {"\\n": "\n", "\\r": "\r", "\\t":"\t"}
+	if(value.startswith("0x")):
+		return int(value[2:], 16)
+
+	if(value.startswith("0b")):
+		return int(value[2:], 2)
+
+	if(value.startswith("'") and value.endswith("'")):
+		if(len(value) == 3):
+			return ord(value[1:-1])
+		if(len(value) == 4):
+			if(value[1:-1] in escape_sequences):
+				return ord(escape_sequences[value[1:-1]])
+
+	return int(value)
+	
+

From c684a5f5121faaddec38fef32ace2a70b36346a9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Daniel=20Kn=C3=BCttel?= <daniel.knuettel@daknuett.eu>
Date: Tue, 9 Oct 2018 11:44:00 +0200
Subject: [PATCH 3/9] added first tests

---
 test/conftest.py               | 101 +++++++++++++++++++++++++++++++++
 test/test_010_filecontext.py   |  24 ++++++++
 test/test_011_tokenize.py      |  54 ++++++++++++++++++
 test/test_012_opcodes.py       |  14 +++++
 test/test_013_util.py          |  35 ++++++++++++
 test/test_020_basic_parsing.py |  22 +++++++
 6 files changed, 250 insertions(+)
 create mode 100644 test/conftest.py
 create mode 100644 test/test_010_filecontext.py
 create mode 100644 test/test_011_tokenize.py
 create mode 100644 test/test_012_opcodes.py
 create mode 100644 test/test_013_util.py
 create mode 100644 test/test_020_basic_parsing.py

diff --git a/test/conftest.py b/test/conftest.py
new file mode 100644
index 0000000..9825dfd
--- /dev/null
+++ b/test/conftest.py
@@ -0,0 +1,101 @@
+import pytest
+
+@pytest.fixture
+def basic_machine_definition():
+	return (
+			{
+				"registers": 20
+				, "memory": 100
+				, "program_memory": 500
+			}
+			, [
+				{
+					"mnemonic": "ldi"
+					, "args": ["register", "direct_input"]
+				}
+				, {
+					"mnemonic": "ld"
+					, "args": ["register", "register"]
+				}
+				, {
+					"mnemonic": "st"
+					, "args": ["register", "register"]
+				}
+				, {
+					"mnemonic": "inc"
+					, "args": ["register"]
+				}
+				, {
+					"mnemonic": "dec"
+					, "args": ["register"]
+				}
+				, {
+					"mnemonic": "add"
+					, "args": ["register", "register"]
+				}
+				, {
+					"mnemonic": "sub"
+					, "args": ["register", "register"]
+				}
+				, {
+					"mnemonic": "mul"
+					, "args": ["register", "register"]
+				}
+				, {
+					"mnemonic": "div"
+					, "args": ["register", "register"]
+				}
+				, {
+					"mnemonic": "gt"
+					, "args": ["register"]
+				}
+				, {
+					"mnemonic": "ge"
+					, "args": ["register"]
+				}
+				, {
+					"mnemonic": "lt"
+					, "args": ["register"]
+				}
+				, {
+					"mnemonic": "le"
+					, "args": ["register"]
+				}
+				, {
+					"mnemonic": "eq"
+					, "args": ["register"]
+				}
+				, {
+					"mnemonic": "not"
+					, "args": []
+				}
+				, {
+					"mnemonic": "jmp"
+					, "args": ["register"]
+				}
+				, {
+					"mnemonic": "call"
+					, "args": ["register"]
+				}
+				, {
+					"mnemonic": "ret"
+					, "args": []
+				}
+				, {
+					"mnemonic": "stop"
+					, "args": []
+				}
+				, {
+					"mnemonic": "cl"
+					, "args": []
+				}
+				, {
+					"mnemonic": "cjmp"
+					, "args": ["register"]
+				}
+				, {
+					"mnemonic": "ccall"
+					, "args": ["register"]
+				}
+			]
+		)
diff --git a/test/test_010_filecontext.py b/test/test_010_filecontext.py
new file mode 100644
index 0000000..e8340a4
--- /dev/null
+++ b/test/test_010_filecontext.py
@@ -0,0 +1,24 @@
+from io import StringIO
+from assembler.context import FileContext
+
+
+def test_getc_ungetc():
+	data = StringIO("abc\ndefg")
+	context = FileContext(data)
+
+	assert context.getc() == "a"
+	assert context.getc() == "b"
+	assert context._line == 0
+	assert context._column == 2
+	assert context.getc() == "c"
+	assert context.getc() == "\n"
+	assert context.getc() == "d"
+	assert context._line == 1
+	assert context._column == 1
+
+	context.ungetc("d")
+	context.ungetc("\n")
+
+	assert context._column == 3
+	assert context._line == 0
+	assert context.getc() == "\n"
diff --git a/test/test_011_tokenize.py b/test/test_011_tokenize.py
new file mode 100644
index 0000000..2a2955c
--- /dev/null
+++ b/test/test_011_tokenize.py
@@ -0,0 +1,54 @@
+from io import StringIO
+from assembler.tokenize import Tokenizer
+from assembler.context import FileContext
+
+def test_tokenize_1():
+	data = '''
+	ldi r0, 0xfefe
+	ldi r1, 0xefef
+	add r0, r1
+	'''
+	data = StringIO(data)
+	tokenizer = Tokenizer(FileContext(data))
+
+	result = list(tokenizer)
+
+	assert result == [
+		"ldi", " ", "r0", ",", "0xfefe", "\n"
+		, "ldi", " ", "r1", ",", "0xefef", "\n"
+		, "add", " ", "r0", ",", "r1", "\n"
+		]
+
+def test_tokenize_2():
+	data = '''
+	; This is a comment
+
+	ldi r0, 0xfefe
+	'''
+	data = StringIO(data)
+	tokenizer = Tokenizer(FileContext(data))
+
+	result = list(tokenizer)
+
+	assert result == [
+		";", "This", " ", "is", " ", "a", " ", "comment", "\n"
+		, "ldi", " ", "r0", ",", "0xfefe", "\n"
+		]
+
+
+def test_tokenize_3():
+	data = '''
+	ldi r0, 0xfefe
+	string:
+	.set ['h', 'e', 'l', 'l', 'o']
+	'''
+	data = StringIO(data)
+	tokenizer = Tokenizer(FileContext(data))
+
+	result = list(tokenizer)
+
+	assert result == [
+		"ldi", " ", "r0", ",", "0xfefe", "\n"
+		, "string", ":", "\n"
+		, ".", "set", " ", "[", "'h'", ",", "'e'", ",", "'l'", ",", "'l'", ",", "'o'", "]", "\n"
+		]
diff --git a/test/test_012_opcodes.py b/test/test_012_opcodes.py
new file mode 100644
index 0000000..2b7ba68
--- /dev/null
+++ b/test/test_012_opcodes.py
@@ -0,0 +1,14 @@
+from assembler.opcodes import make_opcodes
+
+
+def test_make_opcodes():
+	mnemonics = ["ldi", "ld", "st", "add", "mul"]
+
+	opcodes = make_opcodes(mnemonics)
+
+	assert opcodes == {"ldi": 32704
+			, "ld" : 16320
+			, "st": 49088
+			, "add": 8128
+			, "mul": 24512
+		}
diff --git a/test/test_013_util.py b/test/test_013_util.py
new file mode 100644
index 0000000..32a5c5f
--- /dev/null
+++ b/test/test_013_util.py
@@ -0,0 +1,35 @@
+from assembler.util import can_be_mark, can_convert_to_int, autoint
+
+
+def test_can_be_mark():
+	assert can_be_mark("this_is_a_mark") == True
+	assert can_be_mark("this_is_a_mark0") == True
+	assert can_be_mark("tHIS0") == True
+
+	assert can_be_mark("This_is_not_a_mark") == False
+	assert can_be_mark("0this_is_no_mark") == False
+	assert can_be_mark("this#is_no_mark") == False
+
+def test_can_convert_to_int():
+	assert can_convert_to_int("0xfe") == True
+	assert can_convert_to_int("0xFE") == True
+	assert can_convert_to_int("10") == True
+	assert can_convert_to_int("0b100") == True
+	assert can_convert_to_int("'a'") == True
+	assert can_convert_to_int("'\\n'") == True
+
+
+	assert can_convert_to_int("0xfg") == False
+	assert can_convert_to_int("0xFG") == False
+	assert can_convert_to_int("10a") == False
+	assert can_convert_to_int("0b20") == False
+	assert can_convert_to_int("'aa'") == False
+	assert can_convert_to_int("'\\z'") == False
+
+def test_autoint():
+	assert autoint("0xfe") == 0xfe
+	assert autoint("0xFE") == 0xfe
+	assert autoint("10") == 10
+	assert autoint("0b1010101") == 0b1010101
+	assert autoint("'a'") == ord("a")
+	assert autoint("'\\n'") == ord("\n")
diff --git a/test/test_020_basic_parsing.py b/test/test_020_basic_parsing.py
new file mode 100644
index 0000000..9e89eb9
--- /dev/null
+++ b/test/test_020_basic_parsing.py
@@ -0,0 +1,22 @@
+from io import StringIO
+import pytest
+
+from assembler.assembler import Assembler
+
+
+def test_commands(basic_machine_definition):
+	memory_definition, command_defintion = basic_machine_definition
+
+	data = StringIO(
+		'''
+		ldi r0, 0xfe
+		ldi r1, 0xfe
+		add r0, r1
+		'''
+		)
+	assembler = Assembler(data, memory_definition, command_defintion, {})
+	assembler.parse()
+
+	assert assembler._code_objects == [32704, 0, 0xfe
+					, 32704, 1, 0xfe
+					, 40896, 0, 1]

From 629a70d2e84c4c4f4067438395bc3963c0b81f38 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Daniel=20Kn=C3=BCttel?= <daniel.knuettel@daknuett.eu>
Date: Tue, 9 Oct 2018 15:55:42 +0200
Subject: [PATCH 4/9] fixed some bugs in the assembler

---
 assembler/assembler.py         | 64 +++++++++++++++++++++++-----------
 assembler/directives.py        |  2 ++
 test/test_011_tokenize.py      | 17 +++++++++
 test/test_020_basic_parsing.py | 52 +++++++++++++++++++++++++--
 4 files changed, 112 insertions(+), 23 deletions(-)

diff --git a/assembler/assembler.py b/assembler/assembler.py
index 53f556c..f4dbbe6 100644
--- a/assembler/assembler.py
+++ b/assembler/assembler.py
@@ -108,24 +108,20 @@ class Assembler(object):
 		if(token in self._marks):
 			if(self._marks[token]["target"] != -1):
 				raise ParsingError("Error in line {} column {} mark already defined: '{}'. Previous was in line {}.".format(
-						self._line
+						self._file_context._line
 						, self._column
 						, token
 						, self._marks[token]["target_line"]))
 			self._marks[token]["target"] = self._word_count
-			self._marks[token]["target_line"] = self._line
+			self._marks[token]["target_line"] = self._file_context._line
 
 		else:
 			self._marks[token] = {
 				"target": self._word_count
-				, "target_line": self._line
+				, "target_line": self._file_context._line
 				, "references": []
 			}
 
-		should_be_newline = next(self._tokenizer)
-		if(should_be_newline != "\n"):
-			self.raise_unexpected_token(token, "newline", should_be_newline)
-
 
 	def parse_directive(self):
 		should_be_name = next(self._tokenizer)
@@ -133,6 +129,10 @@ class Assembler(object):
 		if(not should_be_name in self._directives):
 			self.raise_unexpected_token(".", "directive name", should_be_name)
 
+		should_be_whitespace = next(self._tokenizer)
+		if(not should_be_whitespace in WHITESPACE):
+			self.raise_unexpected_token(should_be_name, "' '", should_be_whitespace)
+
 		words = self._directives[should_be_name].parse(self, self._tokenizer)
 		
 		self._word_count += len(words)
@@ -146,16 +146,16 @@ class Assembler(object):
 
 
 	def parse_command(self, cmd):
-
-		self._code_objects.append(self._opcodes[cmd])
-		self._word_count += 1
-
+		# We have no arguments
 		if(not self._commands_by_mnemonic[cmd]["args"]):
+			self._code_objects.append(self._opcodes[cmd])
+			self._word_count += 1
 			token = next(self._tokenizer)
 			if(token != "\n"):
 				self.raise_unexpected_token(cmd, "newline", token)
 			return 
 
+		# Small argument must be treated separately
 		should_be_whitespace = next(self._tokenizer)
 		if(should_be_whitespace not in WHITESPACE 
 				or should_be_whitespace == "\n"):
@@ -166,9 +166,12 @@ class Assembler(object):
 				, should_be_an_argument
 				, self._commands_by_mnemonic[cmd]["args"][0])
 		self._word_count += 1
-		self._code_objects.append(argument)
+		# NOTE:
+		# The Small Argument is stored within the first word (!)
+		self._code_objects.append(self._opcodes[cmd] | (argument & 0xffff))
 		
 
+		# All the 16bit arguments
 		for argument in self._commands_by_mnemonic[cmd]["args"][1:]:
 			should_be_comma = next(self._tokenizer)
 			if(should_be_comma != ","):
@@ -239,22 +242,31 @@ class Assembler(object):
 			return argument
 
 		if(argument_definition == "program_memory"):
+			# Non-integer Argument.
 			if(not can_convert_to_int(argument)):
+				# Just nonsense.
 				if(not can_be_mark(argument)):
 					self.raise_unexpected_token(cmd, "integer address or mark", argument)
+				# The Mark has appeared before.
 				if(argument in self._marks):
-					self._marks[argument]["references"].append(self._line)
+					# Add this line to the references.
+					self._marks[argument]["references"].append(self._file_context._line)
+					# The target is already known. Insert it now.
 					if(self._marks[argument]["target"] != -1):
 						return self._marks[argument]["target"]
+					# The target is unknown.
 					return argument
+				# The Mark has not appeared before.
 				self._marks[argument] = {
 					"target": -1
 					, "target_line": 0
-					, "references": [self._line]
+					, "references": [self._file_context._line]
 				}
+				# Target is obviously unknown.
 				return argument
 
 
+			# Integer argument.
 			argument = autoint(argument)
 
 			if(argument < 0 or argument > self._memory_definition["program_memory"]):
@@ -264,17 +276,29 @@ class Assembler(object):
 						, argument)
 			return argument
 
+		# This is direct input (default).
 
+		# Integer
 		if(can_convert_to_int(argument)):
 			return autoint(argument)
 
+		# This is nonsense.
 		if(not can_be_mark(argument)):
 			self.raise_unexpected_token(cmd, "integer, char or mark", argument)
-		if(argument in self._marks and self._marks[argument] >= 0):
-			return self._marks[argument]
-		self._marks[argument] = -1
+
+		# It is a Mark.
+		if(argument in self._marks):
+			if(self._marks[argument]["target"] >= 0):
+				self._marks[argument]["references"].append(self._file_context._line)
+				return self._marks[argument]["target"]
+			self._marks[argument]["references"].append(self._file_context._line)
+			return argument
+			
+		self._marks[argument] = {
+			"target": -1
+			, "target_line": 0
+			, "references": [self._file_context._line]
+		}
+
 		return argument
 
-
-			
-
diff --git a/assembler/directives.py b/assembler/directives.py
index 24b6c99..a03383d 100644
--- a/assembler/directives.py
+++ b/assembler/directives.py
@@ -5,6 +5,8 @@ Directives for explicitly modifying the program memory.
 from abc import ABC, abstractmethod
 from collections import deque
 
+from .util import can_convert_to_int, autoint
+
 class AbstractDirective(ABC):
 	@abstractmethod
 	def parse(self, assembler, tokenizer):
diff --git a/test/test_011_tokenize.py b/test/test_011_tokenize.py
index 2a2955c..590d13f 100644
--- a/test/test_011_tokenize.py
+++ b/test/test_011_tokenize.py
@@ -52,3 +52,20 @@ def test_tokenize_3():
 		, "string", ":", "\n"
 		, ".", "set", " ", "[", "'h'", ",", "'e'", ",", "'l'", ",", "'l'", ",", "'o'", "]", "\n"
 		]
+
+def test_tokenize_4():
+	data = '''
+	ldi r0, 0xfefe
+	test_mark:
+	ldi r1, 0xefef
+	'''
+	data = StringIO(data)
+	tokenizer = Tokenizer(FileContext(data))
+
+	result = list(tokenizer)
+
+	assert result == [
+		"ldi", " ", "r0", ",", "0xfefe", "\n"
+		, "test_mark", ":", "\n"
+		, "ldi", " ", "r1", ",", "0xefef", "\n"
+		]
diff --git a/test/test_020_basic_parsing.py b/test/test_020_basic_parsing.py
index 9e89eb9..0b1a569 100644
--- a/test/test_020_basic_parsing.py
+++ b/test/test_020_basic_parsing.py
@@ -17,6 +17,52 @@ def test_commands(basic_machine_definition):
 	assembler = Assembler(data, memory_definition, command_defintion, {})
 	assembler.parse()
 
-	assert assembler._code_objects == [32704, 0, 0xfe
-					, 32704, 1, 0xfe
-					, 40896, 0, 1]
+	assert assembler._code_objects == [32704 | 0, 0xfe
+					, 32704 | 1, 0xfe
+					, 40896 | 0, 1]
+
+def test_mark(basic_machine_definition):
+	memory_definition, command_defintion = basic_machine_definition
+
+	data = StringIO(
+		'''
+		ldi r0, test_mark
+		ldi r1, 0xfe
+		test_mark:
+		add r0, r1
+		'''
+		)
+	assembler = Assembler(data, memory_definition, command_defintion, {})
+	assembler.parse()
+
+
+	assert assembler._code_objects == [32704 | 0, 4
+					, 32704 | 1, 0xfe
+					, 40896 | 0, 1]
+
+
+def test_set_directive(basic_machine_definition):
+	memory_definition, command_defintion = basic_machine_definition
+
+	data = StringIO(
+		'''
+		ldi r0, test_mark
+		ldi r1, 0xfe
+		test_mark:
+		.set [0xfe, 0xef,
+			10, 20,
+			'a', 'b',
+			'\\n', 0b10]
+		'''
+		)
+	assembler = Assembler(data, memory_definition, command_defintion, {})
+	assembler.parse()
+
+
+	assert assembler._code_objects == [32704 | 0, 4
+					, 32704 | 1, 0xfe
+					, 0xfe, 0xef
+					, 10, 20
+					, ord("a"), ord("b")
+					, ord("\n"), 0b10]
+

From a0f78bbe9809527822e5999eff436fc775ac1d60 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Daniel=20Kn=C3=BCttel?= <daniel.knuettel@daknuett.eu>
Date: Wed, 10 Oct 2018 10:27:39 +0200
Subject: [PATCH 5/9] fixed one typo

---
 assembler/assembler.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/assembler/assembler.py b/assembler/assembler.py
index f4dbbe6..4d36209 100644
--- a/assembler/assembler.py
+++ b/assembler/assembler.py
@@ -175,7 +175,7 @@ class Assembler(object):
 		for argument in self._commands_by_mnemonic[cmd]["args"][1:]:
 			should_be_comma = next(self._tokenizer)
 			if(should_be_comma != ","):
-				self.raise_unexpected_token(cmd, should_be_whitespace)
+				self.raise_unexpected_token(cmd, "','", should_be_comma)
 
 			self._word_count += 1
 			self._code_objects.append(

From 353a68aacee310dae8e7956700045bb1a09d1bb6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Daniel=20Kn=C3=BCttel?= <daniel.knuettel@daknuett.eu>
Date: Wed, 10 Oct 2018 10:30:54 +0200
Subject: [PATCH 6/9] added tests for failures

---
 test/test_021_parsing_failures.py | 210 ++++++++++++++++++++++++++++++
 1 file changed, 210 insertions(+)
 create mode 100644 test/test_021_parsing_failures.py

diff --git a/test/test_021_parsing_failures.py b/test/test_021_parsing_failures.py
new file mode 100644
index 0000000..3a64168
--- /dev/null
+++ b/test/test_021_parsing_failures.py
@@ -0,0 +1,210 @@
+from io import StringIO
+import pytest
+
+from assembler.assembler import Assembler, ParsingError
+
+def test_missing_comma(basic_machine_definition):
+	memory_definition, command_defintion = basic_machine_definition
+
+	data = StringIO(
+		'''
+		ldi r0 0xfe
+		ldi r1, 0xfe
+		add r0, r1
+		'''
+		)
+	assembler = Assembler(data, memory_definition, command_defintion, {})
+
+	with pytest.raises(ParsingError):
+		assembler.parse()
+
+def test_missing_newline(basic_machine_definition):
+	memory_definition, command_defintion = basic_machine_definition
+
+	data = StringIO(
+		'''
+		ldi r0, 0xfe ldi r1, 0xfe
+		add r0, r1
+		'''
+		)
+	assembler = Assembler(data, memory_definition, command_defintion, {})
+
+	with pytest.raises(ParsingError):
+		assembler.parse()
+
+
+
+def test_additional_comma1(basic_machine_definition):
+	memory_definition, command_defintion = basic_machine_definition
+
+	data = StringIO(
+		'''
+		ldi, r0, 0xfe 
+		ldi r1, 0xfe
+		add r0, r1
+		'''
+		)
+	assembler = Assembler(data, memory_definition, command_defintion, {})
+
+	with pytest.raises(ParsingError):
+		assembler.parse()
+
+def test_additional_comma2(basic_machine_definition):
+	memory_definition, command_defintion = basic_machine_definition
+
+	data = StringIO(
+		'''
+		ldi r0, 0xfe,
+		ldi r1, 0xfe
+		add r0, r1
+		'''
+		)
+	assembler = Assembler(data, memory_definition, command_defintion, {})
+
+	with pytest.raises(ParsingError):
+		assembler.parse()
+
+
+
+def test_bad_mark1(basic_machine_definition):
+	memory_definition, command_defintion = basic_machine_definition
+
+	data = StringIO(
+		'''
+		ldi r0, 0xfe
+		this_is_a_bad_mark
+		ldi r1, 0xfe
+		add r0, r1
+		'''
+		)
+	assembler = Assembler(data, memory_definition, command_defintion, {})
+
+	with pytest.raises(ParsingError):
+		assembler.parse()
+
+def test_bad_mark2(basic_machine_definition):
+	memory_definition, command_defintion = basic_machine_definition
+
+	data = StringIO(
+		'''
+		ldi r0, 0xfe
+		This_is_a_bad_mark:
+		ldi r1, 0xfe
+		add r0, r1
+		'''
+		)
+	assembler = Assembler(data, memory_definition, command_defintion, {})
+
+	with pytest.raises(ParsingError):
+		assembler.parse()
+
+def test_bad_mark3(basic_machine_definition):
+	memory_definition, command_defintion = basic_machine_definition
+
+	data = StringIO(
+		'''
+		ldi r0, 0xfe
+		0this_is_a_bad_mark:
+		ldi r1, 0xfe
+		add r0, r1
+		'''
+		)
+	assembler = Assembler(data, memory_definition, command_defintion, {})
+
+	with pytest.raises(ParsingError):
+		assembler.parse()
+
+def test_bad_mark4(basic_machine_definition):
+	memory_definition, command_defintion = basic_machine_definition
+
+	data = StringIO(
+		'''
+		ldi r0, this_is_a_missing_mark
+		ldi r1, 0xfe
+		add r0, r1
+		'''
+		)
+	assembler = Assembler(data, memory_definition, command_defintion, {})
+
+	with pytest.raises(ParsingError):
+		assembler.parse()
+
+def test_bad_mark5(basic_machine_definition):
+	memory_definition, command_defintion = basic_machine_definition
+
+	data = StringIO(
+		'''
+		ldi r0, this_is_a_missing_mark:
+		ldi r1, 0xfe
+		add r0, r1
+		'''
+		)
+	assembler = Assembler(data, memory_definition, command_defintion, {})
+
+	with pytest.raises(ParsingError):
+		assembler.parse()
+
+def test_bad_directive1(basic_machine_definition):
+	memory_definition, command_defintion = basic_machine_definition
+
+	data = StringIO(
+		'''
+		ldi r0, 0
+		ldi r1, 0xfe
+		add r0, r1
+		.set data [0x00, 0x10]
+		'''
+		)
+	assembler = Assembler(data, memory_definition, command_defintion, {})
+
+	with pytest.raises(ParsingError):
+		assembler.parse()
+
+def test_bad_directive2(basic_machine_definition):
+	memory_definition, command_defintion = basic_machine_definition
+
+	data = StringIO(
+		'''
+		ldi r0, 0
+		ldi r1, 0xfe
+		add r0, r1
+		.set[0x00, 0x10]
+		'''
+		)
+	assembler = Assembler(data, memory_definition, command_defintion, {})
+
+	with pytest.raises(ParsingError):
+		assembler.parse()
+
+def test_bad_directive3(basic_machine_definition):
+	memory_definition, command_defintion = basic_machine_definition
+
+	data = StringIO(
+		'''
+		ldi r0, 0
+		ldi r1, 0xfe
+		add r0, r1
+		.set [0x00, 0x10,]
+		'''
+		)
+	assembler = Assembler(data, memory_definition, command_defintion, {})
+
+	with pytest.raises(ParsingError):
+		assembler.parse()
+
+def test_bad_directive4(basic_machine_definition):
+	memory_definition, command_defintion = basic_machine_definition
+
+	data = StringIO(
+		'''
+		ldi r0, 0
+		ldi r1, 0xfe
+		add r0, r1
+		.set [0x00, 0x10
+		'''
+		)
+	assembler = Assembler(data, memory_definition, command_defintion, {})
+
+	with pytest.raises(ParsingError):
+		assembler.parse()
+

From 6b8236dc3be90955e03f1d098a54803b801af646 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Daniel=20Kn=C3=BCttel?= <daniel.knuettel@daknuett.eu>
Date: Sat, 27 Oct 2018 16:51:30 +0200
Subject: [PATCH 7/9] fixed the assembler's bindump function

---
 assembler/assembler.py | 11 +++++++++--
 assembler/util.py      |  9 +++++++++
 2 files changed, 18 insertions(+), 2 deletions(-)

diff --git a/assembler/assembler.py b/assembler/assembler.py
index 4d36209..8302f94 100644
--- a/assembler/assembler.py
+++ b/assembler/assembler.py
@@ -3,7 +3,7 @@ from collections import deque
 from .context import FileContext
 from .tokenize import Tokenizer, WHITESPACE
 from .opcodes import make_opcodes
-from .util import can_be_mark, can_convert_to_int, autoint
+from .util import can_be_mark, can_convert_to_int, autoint, int16_2_bytes
 from .directives import SetDirective
 
 class ParsingError(Exception): 
@@ -92,7 +92,14 @@ class Assembler(object):
 		# FIXME:
 		# Make this work for tons of data.
 		# Or is that necessary?
-		return file_.write(bytes(self._code_objects))
+		# TODO:
+		# Figure out whether/what improovements are necessary here
+		length = int16_2_bytes(len(self._code_objects))
+		if(len(self._code_objects).bit_length() > 16):
+			raise ValueError("Program size excceeds 2^16.")
+		file_.write(length)
+		for word in self._code_objects:
+			file_.write(int16_2_bytes(word))
 
 
 	def parse_mark(self, token):
diff --git a/assembler/util.py b/assembler/util.py
index 60d043b..e5d4630 100644
--- a/assembler/util.py
+++ b/assembler/util.py
@@ -1,6 +1,7 @@
 """
 Utility functions used for parsing.
 """
+import struct
 
 
 def can_be_mark(argument):
@@ -83,3 +84,11 @@ def autoint(value):
 	return int(value)
 	
 
+def int16_2_bytes(value):
+	"""
+	Return the bytes representation of a 16bit unsigned
+	integer in 8bit words.
+	"""
+	if(value < 0):
+		return struct.pack("<h", value)
+	return struct.pack("<H", value)

From 15506e2c08c87356436fc5206097814af5f29f8c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Daniel=20Kn=C3=BCttel?= <daniel.knuettel@daknuett.eu>
Date: Sat, 27 Oct 2018 16:51:52 +0200
Subject: [PATCH 8/9] updated tests

---
 test/test_013_util.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/test/test_013_util.py b/test/test_013_util.py
index 32a5c5f..6c316bc 100644
--- a/test/test_013_util.py
+++ b/test/test_013_util.py
@@ -1,4 +1,4 @@
-from assembler.util import can_be_mark, can_convert_to_int, autoint
+from assembler.util import can_be_mark, can_convert_to_int, autoint, int16_2_bytes
 
 
 def test_can_be_mark():
@@ -33,3 +33,8 @@ def test_autoint():
 	assert autoint("0b1010101") == 0b1010101
 	assert autoint("'a'") == ord("a")
 	assert autoint("'\\n'") == ord("\n")
+
+def test_int16_2_bytes():
+	assert int16_2_bytes(2) == b"\x02\00"
+	assert int16_2_bytes(0x0200) == b"\x00\x02"
+	assert int16_2_bytes(-1) == b"\xff\xff"

From 67127d4fe426f82f7eb78186308f8ab94a943ccd Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Daniel=20Kn=C3=BCttel?= <daniel.knuettel@daknuett.eu>
Date: Sat, 27 Oct 2018 16:54:04 +0200
Subject: [PATCH 9/9] added Makefile for testing the assembler

---
 Makefile | 3 +++
 1 file changed, 3 insertions(+)
 create mode 100644 Makefile

diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..f4d0917
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,3 @@
+.PHONY: test
+test:
+	python3 -m pytest test/