From 481c4ff5ba6ca301229160e7d4b8de40dec20288 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Kn=C3=BCttel?= Date: Tue, 9 Oct 2018 11:43:27 +0200 Subject: [PATCH 1/9] added language and bytecode definition --- bytecode.pdf | 244 ++++++++++++++ bytecode.rst | 46 +++ language.pdf | 881 +++++++++++++++++++++++++++++++++++++++++++++++++++ language.rst | 180 +++++++++++ 4 files changed, 1351 insertions(+) create mode 100644 bytecode.pdf create mode 100644 bytecode.rst create mode 100644 language.pdf create mode 100644 language.rst diff --git a/bytecode.pdf b/bytecode.pdf new file mode 100644 index 0000000..bdd4dda --- /dev/null +++ b/bytecode.pdf @@ -0,0 +1,244 @@ +%PDF-1.4 +%“Œ‹ž ReportLab Generated PDF document http://www.reportlab.com +1 0 obj +<< /F1 2 0 R /F2 3 0 R /F3 10 0 R >> +endobj +2 0 obj +<< /BaseFont /Helvetica /Encoding /WinAnsiEncoding /Name /F1 /Subtype /Type1 /Type /Font >> +endobj +3 0 obj +<< /BaseFont /Helvetica-Bold /Encoding /WinAnsiEncoding /Name /F2 /Subtype /Type1 /Type /Font >> +endobj +4 0 obj +<< /Border [ 0 0 0 ] /Contents () /Dest [ 13 0 R /XYZ 62.69291 636.0236 0 ] /Rect [ 62.69291 687.0236 178.2829 699.0236 ] /Subtype /Link /Type /Annot >> +endobj +5 0 obj +<< /Border [ 0 0 0 ] /Contents () /Dest [ 13 0 R /XYZ 62.69291 636.0236 0 ] /Rect [ 527.0227 687.7736 532.5827 699.7736 ] /Subtype /Link /Type /Annot >> +endobj +6 0 obj +<< /Border [ 0 0 0 ] /Contents () /Dest [ 13 0 R /XYZ 62.69291 525.0236 0 ] /Rect [ 62.69291 669.0236 197.7229 681.0236 ] /Subtype /Link /Type /Annot >> +endobj +7 0 obj +<< /Border [ 0 0 0 ] /Contents () /Dest [ 13 0 R /XYZ 62.69291 525.0236 0 ] /Rect [ 527.0227 669.7736 532.5827 681.7736 ] /Subtype /Link /Type /Annot >> +endobj +8 0 obj +<< /Border [ 0 0 0 ] /Contents () /Dest [ 13 0 R /XYZ 62.69291 414.0236 0 ] /Rect [ 62.69291 651.0236 213.8229 663.0236 ] /Subtype /Link /Type /Annot >> +endobj +9 0 obj +<< /Border [ 0 0 0 ] /Contents () /Dest [ 13 0 R /XYZ 62.69291 414.0236 0 ] /Rect [ 527.0227 651.7736 532.5827 663.7736 ] /Subtype /Link /Type /Annot >> +endobj +10 0 obj +<< /BaseFont /Courier /Encoding /WinAnsiEncoding /Name /F3 /Subtype /Type1 /Type /Font >> +endobj +11 0 obj +<< /Border [ 0 0 0 ] /Contents () /Dest [ 13 0 R /XYZ 62.69291 528.5236 0 ] /Rect [ 515.3527 333.0236 532.1177 345.0236 ] /Subtype /Link /Type /Annot >> +endobj +12 0 obj +<< /Border [ 0 0 0 ] /Contents () /Dest [ 13 0 R /XYZ 62.69291 528.5236 0 ] /Rect [ 62.69291 321.0236 168.2829 333.0236 ] /Subtype /Link /Type /Annot >> +endobj +13 0 obj +<< /Annots [ 4 0 R 5 0 R 6 0 R 7 0 R 8 0 R 9 0 R 11 0 R 12 0 R ] /Contents 21 0 R /MediaBox [ 0 0 595.2756 841.8898 ] /Parent 20 0 R /Resources << /Font 1 0 R /ProcSet [ /PDF /Text /ImageB /ImageC /ImageI ] >> /Rotate 0 + /Trans << >> /Type /Page >> +endobj +14 0 obj +<< /Outlines 16 0 R /PageLabels 22 0 R /PageMode /UseNone /Pages 20 0 R /Type /Catalog >> +endobj +15 0 obj +<< /Author () /CreationDate (D:20181006214730+00'00') /Creator (\(unspecified\)) /Keywords () /ModDate (D:20181006214730+00'00') /Producer (ReportLab PDF Library - www.reportlab.com) + /Subject (\(unspecified\)) /Title (BCI Bytecode) /Trapped /False >> +endobj +16 0 obj +<< /Count 3 /First 17 0 R /Last 19 0 R /Type /Outlines >> +endobj +17 0 obj +<< /Dest [ 13 0 R /XYZ 62.69291 636.0236 0 ] /Next 18 0 R /Parent 16 0 R /Title (Assembly and Bytecode) >> +endobj +18 0 obj +<< /Dest [ 13 0 R /XYZ 62.69291 525.0236 0 ] /Next 19 0 R /Parent 16 0 R /Prev 17 0 R /Title (The Dynamic Instruction Set) >> +endobj +19 0 obj +<< /Dest [ 13 0 R /XYZ 62.69291 414.0236 0 ] /Parent 16 0 R /Prev 18 0 R /Title (Byte Code Interpreter Definition) >> +endobj +20 0 obj +<< /Count 1 /Kids [ 13 0 R ] /Type /Pages >> +endobj +21 0 obj +<< /Length 3790 >> +stream +1 0 0 1 0 0 cm BT /F1 12 Tf 14.4 TL ET +q +1 0 0 1 62.69291 741.0236 cm +q +0 0 0 rg +BT 1 0 0 1 0 4 Tm /F2 20 Tf 24 TL 169.9349 0 Td (BCI Bytecode) Tj T* -169.9349 0 Td ET +Q +Q +q +1 0 0 1 62.69291 708.0236 cm +q +BT 1 0 0 1 0 3.5 Tm 21 TL /F2 17.5 Tf 0 0 0 rg (Contents) Tj T* ET +Q +Q +q +1 0 0 1 62.69291 648.0236 cm +0 0 0 rg +BT /F1 10 Tf 12 TL ET +q +1 0 0 1 0 39 cm +q +BT 1 0 0 1 0 2 Tm 12 TL /F2 10 Tf 0 0 .501961 rg (Assembly and Bytecode) Tj T* ET +Q +Q +q +1 0 0 1 397.8898 39 cm +q +0 0 .501961 rg +0 0 .501961 RG +BT 1 0 0 1 0 2 Tm /F2 10 Tf 12 TL 66.44 0 Td (1) Tj T* -66.44 0 Td ET +Q +Q +q +1 0 0 1 0 21 cm +q +BT 1 0 0 1 0 2 Tm 12 TL /F2 10 Tf 0 0 .501961 rg (The Dynamic Instruction Set) Tj T* ET +Q +Q +q +1 0 0 1 397.8898 21 cm +q +0 0 .501961 rg +0 0 .501961 RG +BT 1 0 0 1 0 2 Tm /F2 10 Tf 12 TL 66.44 0 Td (1) Tj T* -66.44 0 Td ET +Q +Q +q +1 0 0 1 0 3 cm +q +BT 1 0 0 1 0 2 Tm 12 TL /F2 10 Tf 0 0 .501961 rg (Byte Code Interpreter Definition) Tj T* ET +Q +Q +q +1 0 0 1 397.8898 3 cm +q +0 0 .501961 rg +0 0 .501961 RG +BT 1 0 0 1 0 2 Tm /F2 10 Tf 12 TL 66.44 0 Td (1) Tj T* -66.44 0 Td ET +Q +Q +q +Q +Q +q +1 0 0 1 62.69291 615.0236 cm +q +BT 1 0 0 1 0 3.5 Tm 21 TL /F2 17.5 Tf 0 0 0 rg (Assembly and Bytecode) Tj T* ET +Q +Q +q +1 0 0 1 62.69291 585.0236 cm +q +BT 1 0 0 1 0 14 Tm 3.309982 Tw 12 TL /F1 10 Tf 0 0 0 rg (Unlike machine code \(and other bytecode\) BCI bytecode has dynamic opcodes. This means that) Tj T* 0 Tw (bytecode is ) Tj /F2 10 Tf (not ) Tj /F1 10 Tf (necessarily portable.) Tj T* ET +Q +Q +q +1 0 0 1 62.69291 567.0236 cm +q +0 0 0 rg +BT 1 0 0 1 0 2 Tm /F1 10 Tf 12 TL (This makes sense since the BCI instruction set can be extended for applications.) Tj T* ET +Q +Q +q +1 0 0 1 62.69291 537.0236 cm +q +0 0 0 rg +BT 1 0 0 1 0 14 Tm /F1 10 Tf 12 TL .742619 Tw (If one wants to share code that should run on any BCI it should be shared as assembly. The assembler) Tj T* 0 Tw (will then use the local interpreter definition and generate suiting bytecode.) Tj T* ET +Q +Q +q +1 0 0 1 62.69291 504.0236 cm +q +BT 1 0 0 1 0 3.5 Tm 21 TL /F2 17.5 Tf 0 0 0 rg (The Dynamic Instruction Set) Tj T* ET +Q +Q +q +1 0 0 1 62.69291 474.0236 cm +q +0 0 0 rg +BT 1 0 0 1 0 14 Tm /F1 10 Tf 12 TL 1.093876 Tw (The BCI comes with a set of prepared instructions. These are complete and provide a way to do basic) Tj T* 0 Tw (operations like routines, loops and branching.) Tj T* ET +Q +Q +q +1 0 0 1 62.69291 444.0236 cm +q +BT 1 0 0 1 0 14 Tm .743672 Tw 12 TL /F1 10 Tf 0 0 0 rg (The methods are organized in a binary tree internally. To build the tree in a comfortable way there is an) Tj T* 0 Tw (autoinserter that can insert up to ) Tj /F3 10 Tf 0 0 0 rg (1023 ) Tj /F1 10 Tf 0 0 0 rg (methods into the tree.) Tj T* ET +Q +Q +q +1 0 0 1 62.69291 426.0236 cm +q +0 0 0 rg +BT 1 0 0 1 0 2 Tm /F1 10 Tf 12 TL (The autoinserter creates the opcode basing on the order of the method that he inserts.) Tj T* ET +Q +Q +q +1 0 0 1 62.69291 393.0236 cm +q +BT 1 0 0 1 0 3.5 Tm 21 TL /F2 17.5 Tf 0 0 0 rg (Byte Code Interpreter Definition) Tj T* ET +Q +Q +q +1 0 0 1 62.69291 351.0236 cm +q +0 0 0 rg +BT 1 0 0 1 0 26 Tm /F1 10 Tf 12 TL 1.843555 Tw (A Bytecode Interpreter Definition consists of two mayor parts: The memory definition that defines the) Tj T* 0 Tw 1.896098 Tw (number of data registers \(up to 63\), the number of memory words \(up to 65535\) and the number of) Tj T* 0 Tw (program memory words \(up to 65535\).) Tj T* ET +Q +Q +q +1 0 0 1 62.69291 321.0236 cm +q +BT 1 0 0 1 0 14 Tm .464985 Tw 12 TL /F1 10 Tf 0 0 0 rg (The second part defines the commands. The definition contains bot the order of the commands \(see ) Tj 0 0 .501961 rg (The) Tj T* 0 Tw (Dynamic Instruction Set) Tj 0 0 0 rg (\) and the required arguments.) Tj T* ET +Q +Q + +endstream +endobj +22 0 obj +<< /Nums [ 0 23 0 R ] >> +endobj +23 0 obj +<< /S /D /St 1 >> +endobj +xref +0 24 +0000000000 65535 f +0000000075 00000 n +0000000130 00000 n +0000000240 00000 n +0000000355 00000 n +0000000526 00000 n +0000000697 00000 n +0000000868 00000 n +0000001039 00000 n +0000001210 00000 n +0000001381 00000 n +0000001490 00000 n +0000001662 00000 n +0000001834 00000 n +0000002106 00000 n +0000002215 00000 n +0000002489 00000 n +0000002566 00000 n +0000002692 00000 n +0000002837 00000 n +0000002974 00000 n +0000003038 00000 n +0000006885 00000 n +0000006929 00000 n +trailer +<< /ID + % ReportLab generated PDF document -- digest (http://www.reportlab.com) + [(\003\236V\37247z\240'\2312!\276\204\362\214) (\003\236V\37247z\240'\2312!\276\204\362\214)] + /Info 15 0 R /Root 14 0 R /Size 24 >> +startxref +6966 +%%EOF diff --git a/bytecode.rst b/bytecode.rst new file mode 100644 index 0000000..ea2f634 --- /dev/null +++ b/bytecode.rst @@ -0,0 +1,46 @@ +BCI Bytecode +************* + +.. contents:: + +Assembly and Bytecode +===================== + +Unlike machine code (and other bytecode) BCI bytecode has +dynamic opcodes. This means that bytecode is **not** +necessarily portable. + +This makes sense since the BCI instruction set can be +extended for applications. + +If one wants to share code that should run on any BCI it +should be shared as assembly. The assembler will then use +the local interpreter definition and generate suiting +bytecode. + +The Dynamic Instruction Set +=========================== + +The BCI comes with a set of prepared instructions. These are +complete and provide a way to do basic operations like +routines, loops and branching. + +The methods are organized in a binary tree internally. To +build the tree in a comfortable way there is an autoinserter +that can insert up to ``1023`` methods into the tree. + +The autoinserter creates the opcode basing on the order of +the method that he inserts. + +Byte Code Interpreter Definition +================================ + +A Bytecode Interpreter Definition consists of two mayor +parts: The memory definition that defines the number of data +registers (up to 63), the number of memory words (up to +65535) and the number of program memory words (up to 65535). + +The second part defines the commands. The definition +contains bot the order of the commands (see `The Dynamic +Instruction Set`_) and the required arguments. + diff --git a/language.pdf b/language.pdf new file mode 100644 index 0000000..0c9f798 --- /dev/null +++ b/language.pdf @@ -0,0 +1,881 @@ +%PDF-1.4 +%“Œ‹ž ReportLab Generated PDF document http://www.reportlab.com +1 0 obj +<< /F1 2 0 R /F2 3 0 R /F3 14 0 R >> +endobj +2 0 obj +<< /BaseFont /Helvetica /Encoding /WinAnsiEncoding /Name /F1 /Subtype /Type1 /Type /Font >> +endobj +3 0 obj +<< /BaseFont /Helvetica-Bold /Encoding /WinAnsiEncoding /Name /F2 /Subtype /Type1 /Type /Font >> +endobj +4 0 obj +<< /Border [ 0 0 0 ] /Contents () /Dest [ 15 0 R /XYZ 62.69291 600.0236 0 ] /Rect [ 62.69291 687.0236 299.9529 699.0236 ] /Subtype /Link /Type /Annot >> +endobj +5 0 obj +<< /Border [ 0 0 0 ] /Contents () /Dest [ 15 0 R /XYZ 62.69291 600.0236 0 ] /Rect [ 527.0227 687.7736 532.5827 699.7736 ] /Subtype /Link /Type /Annot >> +endobj +6 0 obj +<< /Border [ 0 0 0 ] /Contents () /Dest [ 15 0 R /XYZ 62.69291 224.2236 0 ] /Rect [ 62.69291 669.0236 154.3629 681.0236 ] /Subtype /Link /Type /Annot >> +endobj +7 0 obj +<< /Border [ 0 0 0 ] /Contents () /Dest [ 15 0 R /XYZ 62.69291 224.2236 0 ] /Rect [ 527.0227 669.7736 532.5827 681.7736 ] /Subtype /Link /Type /Annot >> +endobj +8 0 obj +<< /Border [ 0 0 0 ] /Contents () /Dest [ 16 0 R /XYZ 62.69291 380.0236 0 ] /Rect [ 62.69291 651.0236 114.3629 663.0236 ] /Subtype /Link /Type /Annot >> +endobj +9 0 obj +<< /Border [ 0 0 0 ] /Contents () /Dest [ 16 0 R /XYZ 62.69291 380.0236 0 ] /Rect [ 527.0227 651.7736 532.5827 663.7736 ] /Subtype /Link /Type /Annot >> +endobj +10 0 obj +<< /Border [ 0 0 0 ] /Contents () /Dest [ 16 0 R /XYZ 62.69291 329.0236 0 ] /Rect [ 62.69291 633.0236 91.59291 645.0236 ] /Subtype /Link /Type /Annot >> +endobj +11 0 obj +<< /Border [ 0 0 0 ] /Contents () /Dest [ 16 0 R /XYZ 62.69291 329.0236 0 ] /Rect [ 527.0227 633.7736 532.5827 645.7736 ] /Subtype /Link /Type /Annot >> +endobj +12 0 obj +<< /Border [ 0 0 0 ] /Contents () /Dest [ 18 0 R /XYZ 62.69291 765.0236 0 ] /Rect [ 62.69291 615.0236 118.2529 627.0236 ] /Subtype /Link /Type /Annot >> +endobj +13 0 obj +<< /Border [ 0 0 0 ] /Contents () /Dest [ 18 0 R /XYZ 62.69291 765.0236 0 ] /Rect [ 527.0227 615.7736 532.5827 627.7736 ] /Subtype /Link /Type /Annot >> +endobj +14 0 obj +<< /BaseFont /Courier /Encoding /WinAnsiEncoding /Name /F3 /Subtype /Type1 /Type /Font >> +endobj +15 0 obj +<< /Annots [ 4 0 R 5 0 R 6 0 R 7 0 R 8 0 R 9 0 R 10 0 R 11 0 R 12 0 R 13 0 R ] /Contents 28 0 R /MediaBox [ 0 0 595.2756 841.8898 ] /Parent 27 0 R /Resources << /Font 1 0 R /ProcSet [ /PDF /Text /ImageB /ImageC /ImageI ] >> /Rotate 0 + /Trans << >> /Type /Page >> +endobj +16 0 obj +<< /Contents 29 0 R /MediaBox [ 0 0 595.2756 841.8898 ] /Parent 27 0 R /Resources << /Font 1 0 R /ProcSet [ /PDF /Text /ImageB /ImageC /ImageI ] >> /Rotate 0 /Trans << >> + /Type /Page >> +endobj +17 0 obj +<< /Border [ 0 0 0 ] /Contents () /Dest [ 16 0 R /XYZ 62.69291 332.5236 0 ] /Rect [ 62.69291 680.7736 91.59291 692.7736 ] /Subtype /Link /Type /Annot >> +endobj +18 0 obj +<< /Annots [ 17 0 R ] /Contents 30 0 R /MediaBox [ 0 0 595.2756 841.8898 ] /Parent 27 0 R /Resources << /Font 1 0 R /ProcSet [ /PDF /Text /ImageB /ImageC /ImageI ] >> /Rotate 0 + /Trans << >> /Type /Page >> +endobj +19 0 obj +<< /Outlines 21 0 R /PageLabels 31 0 R /PageMode /UseNone /Pages 27 0 R /Type /Catalog >> +endobj +20 0 obj +<< /Author () /CreationDate (D:20181006214702+00'00') /Creator (\(unspecified\)) /Keywords () /ModDate (D:20181006214702+00'00') /Producer (ReportLab PDF Library - www.reportlab.com) + /Subject (\(unspecified\)) /Title (BCI Assembly Language) /Trapped /False >> +endobj +21 0 obj +<< /Count 5 /First 22 0 R /Last 26 0 R /Type /Outlines >> +endobj +22 0 obj +<< /Dest [ 15 0 R /XYZ 62.69291 600.0236 0 ] /Next 23 0 R /Parent 21 0 R /Title (Commands, Small Arguments and Big Arguments) >> +endobj +23 0 obj +<< /Dest [ 15 0 R /XYZ 62.69291 224.2236 0 ] /Next 24 0 R /Parent 21 0 R /Prev 22 0 R /Title (Built-In Commands) >> +endobj +24 0 obj +<< /Dest [ 16 0 R /XYZ 62.69291 380.0236 0 ] /Next 25 0 R /Parent 21 0 R /Prev 23 0 R /Title (Comments) >> +endobj +25 0 obj +<< /Dest [ 16 0 R /XYZ 62.69291 329.0236 0 ] /Next 26 0 R /Parent 21 0 R /Prev 24 0 R /Title (Marks) >> +endobj +26 0 obj +<< /Dest [ 18 0 R /XYZ 62.69291 765.0236 0 ] /Parent 21 0 R /Prev 25 0 R /Title (Direct Input) >> +endobj +27 0 obj +<< /Count 3 /Kids [ 15 0 R 16 0 R 18 0 R ] /Type /Pages >> +endobj +28 0 obj +<< /Length 6815 >> +stream +1 0 0 1 0 0 cm BT /F1 12 Tf 14.4 TL ET +q +1 0 0 1 62.69291 741.0236 cm +q +0 0 0 rg +BT 1 0 0 1 0 4 Tm /F2 20 Tf 24 TL 117.6949 0 Td (BCI Assembly Language) Tj T* -117.6949 0 Td ET +Q +Q +q +1 0 0 1 62.69291 708.0236 cm +q +BT 1 0 0 1 0 3.5 Tm 21 TL /F2 17.5 Tf 0 0 0 rg (Contents) Tj T* ET +Q +Q +q +1 0 0 1 62.69291 612.0236 cm +0 0 0 rg +BT /F1 10 Tf 12 TL ET +q +1 0 0 1 0 75 cm +q +BT 1 0 0 1 0 2 Tm 12 TL /F2 10 Tf 0 0 .501961 rg (Commands, Small Arguments and Big Arguments) Tj T* ET +Q +Q +q +1 0 0 1 397.8898 75 cm +q +0 0 .501961 rg +0 0 .501961 RG +BT 1 0 0 1 0 2 Tm /F2 10 Tf 12 TL 66.44 0 Td (1) Tj T* -66.44 0 Td ET +Q +Q +q +1 0 0 1 0 57 cm +q +BT 1 0 0 1 0 2 Tm 12 TL /F2 10 Tf 0 0 .501961 rg (Built-In Commands) Tj T* ET +Q +Q +q +1 0 0 1 397.8898 57 cm +q +0 0 .501961 rg +0 0 .501961 RG +BT 1 0 0 1 0 2 Tm /F2 10 Tf 12 TL 66.44 0 Td (1) Tj T* -66.44 0 Td ET +Q +Q +q +1 0 0 1 0 39 cm +q +BT 1 0 0 1 0 2 Tm 12 TL /F2 10 Tf 0 0 .501961 rg (Comments) Tj T* ET +Q +Q +q +1 0 0 1 397.8898 39 cm +q +0 0 .501961 rg +0 0 .501961 RG +BT 1 0 0 1 0 2 Tm /F2 10 Tf 12 TL 66.44 0 Td (2) Tj T* -66.44 0 Td ET +Q +Q +q +1 0 0 1 0 21 cm +q +BT 1 0 0 1 0 2 Tm 12 TL /F2 10 Tf 0 0 .501961 rg (Marks) Tj T* ET +Q +Q +q +1 0 0 1 397.8898 21 cm +q +0 0 .501961 rg +0 0 .501961 RG +BT 1 0 0 1 0 2 Tm /F2 10 Tf 12 TL 66.44 0 Td (2) Tj T* -66.44 0 Td ET +Q +Q +q +1 0 0 1 0 3 cm +q +BT 1 0 0 1 0 2 Tm 12 TL /F2 10 Tf 0 0 .501961 rg (Direct Input) Tj T* ET +Q +Q +q +1 0 0 1 397.8898 3 cm +q +0 0 .501961 rg +0 0 .501961 RG +BT 1 0 0 1 0 2 Tm /F2 10 Tf 12 TL 66.44 0 Td (3) Tj T* -66.44 0 Td ET +Q +Q +q +Q +Q +q +1 0 0 1 62.69291 579.0236 cm +q +BT 1 0 0 1 0 3.5 Tm 21 TL /F2 17.5 Tf 0 0 0 rg (Commands, Small Arguments and Big Arguments) Tj T* ET +Q +Q +q +1 0 0 1 62.69291 549.0236 cm +q +BT 1 0 0 1 0 14 Tm .87311 Tw 12 TL /F1 10 Tf 0 0 0 rg (A command in BCI Assembly is a word starting with an alphabetic character \() Tj /F3 10 Tf 0 0 0 rg (a..zA..Z) Tj /F1 10 Tf 0 0 0 rg (\) following by a) Tj T* 0 Tw (sequence of alphanumeric characters \() Tj /F3 10 Tf 0 0 0 rg (a..zA..Z0..9) Tj /F1 10 Tf 0 0 0 rg (\). This word will be converted to a 10bit opcode.) Tj T* ET +Q +Q +q +1 0 0 1 62.69291 519.0236 cm +q +0 0 0 rg +BT 1 0 0 1 0 14 Tm /F1 10 Tf 12 TL 2.147882 Tw (Embedded in the 16bits of a word there is also a 6bit small argument. If a command has no small) Tj T* 0 Tw (argument these bits will be zeroed. In the assembly the command will be only one word, for example:) Tj T* ET +Q +Q +q +1 0 0 1 62.69291 485.8236 cm +q +q +1 0 0 1 0 0 cm +q +1 0 0 1 6.6 6.6 cm +q +.662745 .662745 .662745 RG +.5 w +.960784 .960784 .862745 rg +n -6 -6 468.6898 24 re B* +Q +q +0 0 0 rg +BT 1 0 0 1 0 2 Tm /F3 10 Tf 12 TL (cli) Tj T* ET +Q +Q +Q +Q +Q +q +1 0 0 1 62.69291 453.8236 cm +q +0 0 0 rg +BT 1 0 0 1 0 14 Tm /F1 10 Tf 12 TL .345988 Tw (If the command has a small argument, the 6 bit will be filled with the small argument. In the assembly the) Tj T* 0 Tw (small argument is separated by one whitespace, for example:) Tj T* ET +Q +Q +q +1 0 0 1 62.69291 420.6236 cm +q +q +1 0 0 1 0 0 cm +q +1 0 0 1 6.6 6.6 cm +q +.662745 .662745 .662745 RG +.5 w +.960784 .960784 .862745 rg +n -6 -6 468.6898 24 re B* +Q +q +0 0 0 rg +BT 1 0 0 1 0 2 Tm /F3 10 Tf 12 TL (inc r0) Tj T* ET +Q +Q +Q +Q +Q +q +1 0 0 1 62.69291 376.6236 cm +q +BT 1 0 0 1 0 26 Tm .629431 Tw 12 TL /F1 10 Tf 0 0 0 rg (Any other arguments are stored in further words and have thus a width of 16bits. They are separated by) Tj T* 0 Tw 2.211751 Tw (commas \() Tj /F3 10 Tf 0 0 0 rg (,) Tj /F1 10 Tf 0 0 0 rg (\) from both the first and any other arguments. It is recommended to only add one more) Tj T* 0 Tw (argument.) Tj T* ET +Q +Q +q +1 0 0 1 62.69291 358.6236 cm +q +0 0 0 rg +BT 1 0 0 1 0 2 Tm /F1 10 Tf 12 TL (Example for one big argument:) Tj T* ET +Q +Q +q +1 0 0 1 62.69291 325.4236 cm +q +q +1 0 0 1 0 0 cm +q +1 0 0 1 6.6 6.6 cm +q +.662745 .662745 .662745 RG +.5 w +.960784 .960784 .862745 rg +n -6 -6 468.6898 24 re B* +Q +q +0 0 0 rg +BT 1 0 0 1 0 2 Tm /F3 10 Tf 12 TL (ldi r0, 0xdead) Tj T* ET +Q +Q +Q +Q +Q +q +1 0 0 1 62.69291 293.4236 cm +q +0 0 0 rg +BT 1 0 0 1 0 14 Tm /F1 10 Tf 12 TL 1.571318 Tw (It might be useful to have more arguments for other applications, like double precision floating points.) Tj T* 0 Tw (Example \(not implemented\):) Tj T* ET +Q +Q +q +1 0 0 1 62.69291 236.2236 cm +q +q +1 0 0 1 0 0 cm +q +1 0 0 1 6.6 6.6 cm +q +.662745 .662745 .662745 RG +.5 w +.960784 .960784 .862745 rg +n -6 -6 468.6898 48 re B* +Q +q +0 0 0 rg +BT 1 0 0 1 0 26 Tm /F3 10 Tf 12 TL (lddfi r0, 0xdead, 0xbeef) Tj T* (; load double precision floating point) Tj T* (; to r0 and r1) Tj T* ET +Q +Q +Q +Q +Q +q +1 0 0 1 62.69291 203.2236 cm +q +BT 1 0 0 1 0 3.5 Tm 21 TL /F2 17.5 Tf 0 0 0 rg (Built-In Commands) Tj T* ET +Q +Q +q +1 0 0 1 62.69291 185.2236 cm +q +BT 1 0 0 1 0 2 Tm 12 TL /F3 10 Tf 0 0 0 rg (ldi) Tj ( ) Tj (<) Tj (sa) Tj (>) Tj (,) Tj ( ) Tj (<) Tj (ba) Tj (>) Tj T* ET +Q +Q +q +1 0 0 1 62.69291 170.2236 cm +0 0 0 rg +BT /F1 10 Tf 12 TL ET +BT 1 0 0 1 0 2 Tm T* ET +q +1 0 0 1 20 0 cm +q +BT 1 0 0 1 0 2 Tm 12 TL /F1 10 Tf 0 0 0 rg (Load the value ) Tj /F3 10 Tf 0 0 0 rg (<) Tj (ba) Tj (> ) Tj /F1 10 Tf 0 0 0 rg (into register ) Tj /F3 10 Tf 0 0 0 rg (<) Tj (sa) Tj (>) Tj /F1 10 Tf 0 0 0 rg (.) Tj T* ET +Q +Q +q +Q +Q +q +1 0 0 1 62.69291 154.2236 cm +q +BT 1 0 0 1 0 2 Tm 12 TL /F3 10 Tf 0 0 0 rg (ld) Tj ( ) Tj (<) Tj (sa) Tj (>) Tj (,) Tj ( ) Tj (<) Tj (ba) Tj (>) Tj T* ET +Q +Q +q +1 0 0 1 62.69291 139.2236 cm +0 0 0 rg +BT /F1 10 Tf 12 TL ET +BT 1 0 0 1 0 2 Tm T* ET +q +1 0 0 1 20 0 cm +q +BT 1 0 0 1 0 2 Tm 12 TL /F1 10 Tf 0 0 0 rg (Load the value of the memory cell at ) Tj /F3 10 Tf 0 0 0 rg (<) Tj (ba) Tj (> ) Tj /F1 10 Tf 0 0 0 rg (into register ) Tj /F3 10 Tf 0 0 0 rg (<) Tj (sa) Tj (>) Tj /F1 10 Tf 0 0 0 rg (.) Tj T* ET +Q +Q +q +Q +Q +q +1 0 0 1 62.69291 123.2236 cm +q +BT 1 0 0 1 0 2 Tm 12 TL /F3 10 Tf 0 0 0 rg (st) Tj ( ) Tj (<) Tj (sa) Tj (>) Tj (,) Tj ( ) Tj (<) Tj (ba) Tj (>) Tj T* ET +Q +Q +q +1 0 0 1 62.69291 108.2236 cm +0 0 0 rg +BT /F1 10 Tf 12 TL ET +BT 1 0 0 1 0 2 Tm T* ET +q +1 0 0 1 20 0 cm +q +BT 1 0 0 1 0 2 Tm 12 TL /F1 10 Tf 0 0 0 rg (Store the value of register ) Tj /F3 10 Tf 0 0 0 rg (<) Tj (sa) Tj (> ) Tj /F1 10 Tf 0 0 0 rg (into the memory cell at ) Tj /F3 10 Tf 0 0 0 rg (<) Tj (ba) Tj (>) Tj /F1 10 Tf 0 0 0 rg (.) Tj T* ET +Q +Q +q +Q +Q +q +1 0 0 1 62.69291 92.22362 cm +q +BT 1 0 0 1 0 2 Tm 12 TL /F3 10 Tf 0 0 0 rg (inc) Tj ( ) Tj (<) Tj (sa) Tj (>) Tj T* ET +Q +Q +q +1 0 0 1 62.69291 77.22362 cm +0 0 0 rg +BT /F1 10 Tf 12 TL ET +BT 1 0 0 1 0 2 Tm T* ET +q +1 0 0 1 20 0 cm +q +BT 1 0 0 1 0 2 Tm 12 TL /F1 10 Tf 0 0 0 rg (Increment the value of register ) Tj /F3 10 Tf 0 0 0 rg (<) Tj (sa) Tj (>) Tj /F1 10 Tf 0 0 0 rg (.) Tj T* ET +Q +Q +q +Q +Q + +endstream +endobj +29 0 obj +<< /Length 7261 >> +stream +1 0 0 1 0 0 cm BT /F1 12 Tf 14.4 TL ET +q +1 0 0 1 62.69291 753.0236 cm +q +BT 1 0 0 1 0 2 Tm 12 TL /F3 10 Tf 0 0 0 rg (dec) Tj ( ) Tj (<) Tj (sa) Tj (>) Tj T* ET +Q +Q +q +1 0 0 1 62.69291 738.0236 cm +0 0 0 rg +BT /F1 10 Tf 12 TL ET +BT 1 0 0 1 0 2 Tm T* ET +q +1 0 0 1 20 0 cm +q +BT 1 0 0 1 0 2 Tm 12 TL /F1 10 Tf 0 0 0 rg (Decrement the value of register ) Tj /F3 10 Tf 0 0 0 rg (<) Tj (sa) Tj (>) Tj /F1 10 Tf 0 0 0 rg (.) Tj T* ET +Q +Q +q +Q +Q +q +1 0 0 1 62.69291 722.0236 cm +q +BT 1 0 0 1 0 2 Tm 12 TL /F3 10 Tf 0 0 0 rg (add|sub|mul|div) Tj ( ) Tj (<) Tj (sa) Tj (>) Tj (,) Tj ( ) Tj (<) Tj (ba) Tj (>) Tj T* ET +Q +Q +q +1 0 0 1 62.69291 695.0236 cm +0 0 0 rg +BT /F1 10 Tf 12 TL ET +BT 1 0 0 1 0 14 Tm T* ET +q +1 0 0 1 20 0 cm +q +BT 1 0 0 1 0 14 Tm 1.127318 Tw 12 TL /F3 10 Tf 0 0 0 rg (<) Tj (sa) Tj (>) Tj ( ) Tj (=) Tj ( ) Tj (<) Tj (sa) Tj (>) Tj ( ) Tj (+|-|*|/) Tj ( ) Tj (<) Tj (ba) Tj (> ) Tj /F1 10 Tf 0 0 0 rg (where ) Tj /F3 10 Tf 0 0 0 rg (<) Tj (sa) Tj (> ) Tj /F1 10 Tf 0 0 0 rg (and ) Tj /F3 10 Tf 0 0 0 rg (<) Tj (ba) Tj (> ) Tj /F1 10 Tf 0 0 0 rg (are registers. Write the overflow into the) Tj T* 0 Tw (status register.) Tj T* ET +Q +Q +q +Q +Q +q +1 0 0 1 62.69291 679.0236 cm +q +BT 1 0 0 1 0 2 Tm 12 TL /F3 10 Tf 0 0 0 rg (gt|ge|lt|le|eq) Tj ( ) Tj (<) Tj (sa) Tj (>) Tj T* ET +Q +Q +q +1 0 0 1 62.69291 652.0236 cm +0 0 0 rg +BT /F1 10 Tf 12 TL ET +BT 1 0 0 1 0 14 Tm T* ET +q +1 0 0 1 20 0 cm +q +BT 1 0 0 1 0 14 Tm 2.431098 Tw 12 TL /F1 10 Tf 0 0 0 rg (Check if the value of register ) Tj /F3 10 Tf 0 0 0 rg (<) Tj (sa) Tj (> ) Tj /F1 10 Tf 0 0 0 rg (is ) Tj /F3 10 Tf 0 0 0 rg (>) Tj (|) Tj (>) Tj (=|) Tj (<) Tj (|) Tj (<) Tj (=|== ) Tj /F1 10 Tf 0 0 0 rg (to ) Tj /F3 10 Tf 0 0 0 rg (0) Tj /F1 10 Tf 0 0 0 rg (. Set the status register to ) Tj /F3 10 Tf 0 0 0 rg (1 ) Tj /F1 10 Tf 0 0 0 rg (if it) Tj T* 0 Tw (evaluates true, else to ) Tj /F3 10 Tf 0 0 0 rg (0) Tj /F1 10 Tf 0 0 0 rg (.) Tj T* ET +Q +Q +q +Q +Q +q +1 0 0 1 62.69291 636.0236 cm +q +0 0 0 rg +BT 1 0 0 1 0 2 Tm /F3 10 Tf 12 TL (not) Tj T* ET +Q +Q +q +1 0 0 1 62.69291 621.0236 cm +0 0 0 rg +BT /F1 10 Tf 12 TL ET +BT 1 0 0 1 0 2 Tm T* ET +q +1 0 0 1 20 0 cm +q +BT 1 0 0 1 0 2 Tm 12 TL /F1 10 Tf 0 0 0 rg (If the status register is ) Tj /F3 10 Tf 0 0 0 rg (0 ) Tj /F1 10 Tf 0 0 0 rg (set it to ) Tj /F3 10 Tf 0 0 0 rg (1) Tj /F1 10 Tf 0 0 0 rg (, else set it to ) Tj /F3 10 Tf 0 0 0 rg (0) Tj /F1 10 Tf 0 0 0 rg (.) Tj T* ET +Q +Q +q +Q +Q +q +1 0 0 1 62.69291 605.0236 cm +q +BT 1 0 0 1 0 2 Tm 12 TL /F3 10 Tf 0 0 0 rg (jmp) Tj ( ) Tj (<) Tj (sa) Tj (>) Tj T* ET +Q +Q +q +1 0 0 1 62.69291 590.0236 cm +0 0 0 rg +BT /F1 10 Tf 12 TL ET +BT 1 0 0 1 0 2 Tm T* ET +q +1 0 0 1 20 0 cm +q +BT 1 0 0 1 0 2 Tm 12 TL /F1 10 Tf 0 0 0 rg (Set the program counter to the value of register ) Tj /F3 10 Tf 0 0 0 rg (<) Tj (sa) Tj (>) Tj /F1 10 Tf 0 0 0 rg (.) Tj T* ET +Q +Q +q +Q +Q +q +1 0 0 1 62.69291 574.0236 cm +q +BT 1 0 0 1 0 2 Tm 12 TL /F3 10 Tf 0 0 0 rg (call) Tj ( ) Tj (<) Tj (sa) Tj (>) Tj T* ET +Q +Q +q +1 0 0 1 62.69291 547.0236 cm +0 0 0 rg +BT /F1 10 Tf 12 TL ET +BT 1 0 0 1 0 14 Tm T* ET +q +1 0 0 1 20 0 cm +q +BT 1 0 0 1 0 14 Tm .797633 Tw 12 TL /F1 10 Tf 0 0 0 rg (Push the current program counter on the stack and set the program counter to the value of register) Tj T* 0 Tw /F3 10 Tf 0 0 0 rg (<) Tj (sa) Tj (>) Tj /F1 10 Tf 0 0 0 rg (.) Tj T* ET +Q +Q +q +Q +Q +q +1 0 0 1 62.69291 531.0236 cm +q +0 0 0 rg +BT 1 0 0 1 0 2 Tm /F3 10 Tf 12 TL (ret) Tj T* ET +Q +Q +q +1 0 0 1 62.69291 516.0236 cm +0 0 0 rg +BT /F1 10 Tf 12 TL ET +BT 1 0 0 1 0 2 Tm T* ET +q +1 0 0 1 20 0 cm +q +0 0 0 rg +BT 1 0 0 1 0 2 Tm /F1 10 Tf 12 TL (Pop the previously pushed program counter from the stack.) Tj T* ET +Q +Q +q +Q +Q +q +1 0 0 1 62.69291 500.0236 cm +q +0 0 0 rg +BT 1 0 0 1 0 2 Tm /F3 10 Tf 12 TL (stop) Tj T* ET +Q +Q +q +1 0 0 1 62.69291 485.0236 cm +0 0 0 rg +BT /F1 10 Tf 12 TL ET +BT 1 0 0 1 0 2 Tm T* ET +q +1 0 0 1 20 0 cm +q +BT 1 0 0 1 0 2 Tm 12 TL /F1 10 Tf 0 0 0 rg (Write ) Tj /F3 10 Tf 0 0 0 rg (1 ) Tj /F1 10 Tf 0 0 0 rg (into the shutdown register. This will cause the interpreter to halt.) Tj T* ET +Q +Q +q +Q +Q +q +1 0 0 1 62.69291 469.0236 cm +q +0 0 0 rg +BT 1 0 0 1 0 2 Tm /F3 10 Tf 12 TL (cl) Tj T* ET +Q +Q +q +1 0 0 1 62.69291 454.0236 cm +0 0 0 rg +BT /F1 10 Tf 12 TL ET +BT 1 0 0 1 0 2 Tm T* ET +q +1 0 0 1 20 0 cm +q +BT 1 0 0 1 0 2 Tm 12 TL /F1 10 Tf 0 0 0 rg (Write ) Tj /F3 10 Tf 0 0 0 rg (0 ) Tj /F1 10 Tf 0 0 0 rg (into the status register.) Tj T* ET +Q +Q +q +Q +Q +q +1 0 0 1 62.69291 438.0236 cm +q +BT 1 0 0 1 0 2 Tm 12 TL /F3 10 Tf 0 0 0 rg (cjmp) Tj ( ) Tj (<) Tj (sa) Tj (>) Tj T* ET +Q +Q +q +1 0 0 1 62.69291 423.0236 cm +0 0 0 rg +BT /F1 10 Tf 12 TL ET +BT 1 0 0 1 0 2 Tm T* ET +q +1 0 0 1 20 0 cm +q +BT 1 0 0 1 0 2 Tm 12 TL /F1 10 Tf 0 0 0 rg (If there not a ) Tj /F3 10 Tf 0 0 0 rg (0 ) Tj /F1 10 Tf 0 0 0 rg (in the status register, ) Tj /F3 10 Tf 0 0 0 rg (jmp <) Tj (sa) Tj (>) Tj /F1 10 Tf 0 0 0 rg (, else continue execution.) Tj T* ET +Q +Q +q +Q +Q +q +1 0 0 1 62.69291 407.0236 cm +q +BT 1 0 0 1 0 2 Tm 12 TL /F3 10 Tf 0 0 0 rg (ccall) Tj ( ) Tj (<) Tj (sa) Tj (>) Tj T* ET +Q +Q +q +1 0 0 1 62.69291 392.0236 cm +0 0 0 rg +BT /F1 10 Tf 12 TL ET +BT 1 0 0 1 0 2 Tm T* ET +q +1 0 0 1 20 0 cm +q +BT 1 0 0 1 0 2 Tm 12 TL /F1 10 Tf 0 0 0 rg (Like ) Tj /F3 10 Tf 0 0 0 rg (cjmp) Tj ( ) Tj (<) Tj (sa) Tj (> ) Tj /F1 10 Tf 0 0 0 rg (but with ) Tj /F3 10 Tf 0 0 0 rg (call ) Tj /F1 10 Tf 0 0 0 rg (instead.) Tj T* ET +Q +Q +q +Q +Q +q +1 0 0 1 62.69291 359.0236 cm +q +BT 1 0 0 1 0 3.5 Tm 21 TL /F2 17.5 Tf 0 0 0 rg (Comments) Tj T* ET +Q +Q +q +1 0 0 1 62.69291 341.0236 cm +q +BT 1 0 0 1 0 2 Tm 12 TL /F1 10 Tf 0 0 0 rg (Comments start with a ) Tj /F3 10 Tf 0 0 0 rg (; ) Tj /F1 10 Tf 0 0 0 rg (at the beginning of the line and end at the end of the line.) Tj T* ET +Q +Q +q +1 0 0 1 62.69291 308.0236 cm +q +BT 1 0 0 1 0 3.5 Tm 21 TL /F2 17.5 Tf 0 0 0 rg (Marks) Tj T* ET +Q +Q +q +1 0 0 1 62.69291 278.0236 cm +q +0 0 0 rg +BT 1 0 0 1 0 14 Tm /F1 10 Tf 12 TL .467485 Tw (Marks represent a special location of the assembly code. The assembler keeps track of those marks and) Tj T* 0 Tw (they can be used as immediate input.) Tj T* ET +Q +Q +q +1 0 0 1 62.69291 236.0236 cm +q +BT 1 0 0 1 0 26 Tm 2.869983 Tw 12 TL /F1 10 Tf 0 0 0 rg (A mark is defined by a single word, starting with an alphabetic character \() Tj /F3 10 Tf 0 0 0 rg (a..zA...Z) Tj /F1 10 Tf 0 0 0 rg (\) containing) Tj T* 0 Tw 2.330814 Tw (alphanumeric characters and underscores \() Tj /F3 10 Tf 0 0 0 rg (a..zA..Z0..9_) Tj /F1 10 Tf 0 0 0 rg (\) followed by a colon \() Tj /F3 10 Tf 0 0 0 rg (:) Tj /F1 10 Tf 0 0 0 rg (\) and a newline) Tj T* 0 Tw (character.) Tj T* ET +Q +Q +q +1 0 0 1 62.69291 218.0236 cm +q +0 0 0 rg +BT 1 0 0 1 0 2 Tm /F1 10 Tf 12 TL (Example:) Tj T* ET +Q +Q +q +1 0 0 1 62.69291 100.8236 cm +q +q +1 0 0 1 0 0 cm +q +1 0 0 1 6.6 6.6 cm +q +.662745 .662745 .662745 RG +.5 w +.960784 .960784 .862745 rg +n -6 -6 468.6898 108 re B* +Q +q +0 0 0 rg +BT 1 0 0 1 0 86 Tm /F3 10 Tf 12 TL (ldi r0, this_is_a_mark) Tj T* (ldi r1, 0xfefe) Tj T* (ldi r2, 0xefef) Tj T* T* (this_is_a_mark:) Tj T* (add r2, r1) Tj T* (; this will result in an infinite loop.) Tj T* (jmp r0) Tj T* ET +Q +Q +Q +Q +Q + +endstream +endobj +30 0 obj +<< /Length 2972 >> +stream +1 0 0 1 0 0 cm BT /F1 12 Tf 14.4 TL ET +q +1 0 0 1 62.69291 744.0236 cm +q +BT 1 0 0 1 0 3.5 Tm 21 TL /F2 17.5 Tf 0 0 0 rg (Direct Input) Tj T* ET +Q +Q +q +1 0 0 1 62.69291 726.0236 cm +q +BT 1 0 0 1 0 2 Tm 12 TL /F1 10 Tf 0 0 0 rg (The core instruction set contains the ) Tj /F3 10 Tf 0 0 0 rg (ldi ) Tj /F1 10 Tf 0 0 0 rg (command that can be used to load data into a register directly.) Tj T* ET +Q +Q +q +1 0 0 1 62.69291 696.0236 cm +q +0 0 0 rg +BT 1 0 0 1 0 14 Tm /F1 10 Tf 12 TL 2.501318 Tw (The first \(big\) argument of this command is always a 16bit word. The assembler can automatically) Tj T* 0 Tw (generate the correct value if the argument is provided in the following ways:) Tj T* ET +Q +Q +q +1 0 0 1 62.69291 680.0236 cm +q +0 0 .501961 rg +0 0 .501961 RG +BT 1 0 0 1 0 2 Tm /F2 10 Tf 12 TL (Marks) Tj T* ET +Q +Q +q +1 0 0 1 62.69291 665.0236 cm +0 0 0 rg +BT /F1 10 Tf 12 TL ET +BT 1 0 0 1 0 2 Tm T* ET +q +1 0 0 1 20 0 cm +q +0 0 0 rg +BT 1 0 0 1 0 2 Tm /F1 10 Tf 12 TL (The assembler inserts the absolute offset of the Mark.) Tj T* ET +Q +Q +q +Q +Q +q +1 0 0 1 62.69291 649.0236 cm +q +0 0 0 rg +BT 1 0 0 1 0 2 Tm /F2 10 Tf 12 TL (A decimal value) Tj T* ET +Q +Q +q +1 0 0 1 62.69291 634.0236 cm +0 0 0 rg +BT /F1 10 Tf 12 TL ET +BT 1 0 0 1 0 2 Tm T* ET +q +1 0 0 1 20 0 cm +q +BT 1 0 0 1 0 2 Tm 12 TL /F1 10 Tf 0 0 0 rg (The assembler inserts the value \(i.e. ) Tj /F3 10 Tf 0 0 0 rg (ldi) Tj ( ) Tj (r0, 12) Tj /F1 10 Tf 0 0 0 rg (\).) Tj T* ET +Q +Q +q +Q +Q +q +1 0 0 1 62.69291 618.0236 cm +q +0 0 0 rg +BT 1 0 0 1 0 2 Tm /F2 10 Tf 12 TL (A hexadecimal value) Tj T* ET +Q +Q +q +1 0 0 1 62.69291 603.0236 cm +0 0 0 rg +BT /F1 10 Tf 12 TL ET +BT 1 0 0 1 0 2 Tm T* ET +q +1 0 0 1 20 0 cm +q +BT 1 0 0 1 0 2 Tm 12 TL /F1 10 Tf 0 0 0 rg (If the argument starts with ) Tj /F3 10 Tf 0 0 0 rg (0x ) Tj /F1 10 Tf 0 0 0 rg (the assembler will interpret the argument as hexadecimal.) Tj T* ET +Q +Q +q +Q +Q +q +1 0 0 1 62.69291 587.0236 cm +q +0 0 0 rg +BT 1 0 0 1 0 2 Tm /F2 10 Tf 12 TL (A binary value) Tj T* ET +Q +Q +q +1 0 0 1 62.69291 572.0236 cm +0 0 0 rg +BT /F1 10 Tf 12 TL ET +BT 1 0 0 1 0 2 Tm T* ET +q +1 0 0 1 20 0 cm +q +BT 1 0 0 1 0 2 Tm 12 TL /F1 10 Tf 0 0 0 rg (If the argument starts with ) Tj /F3 10 Tf 0 0 0 rg (0b ) Tj /F1 10 Tf 0 0 0 rg (the assembler will interpret the value as binary.) Tj T* ET +Q +Q +q +Q +Q +q +1 0 0 1 62.69291 556.0236 cm +q +0 0 0 rg +BT 1 0 0 1 0 2 Tm /F2 10 Tf 12 TL (A character) Tj T* ET +Q +Q +q +1 0 0 1 62.69291 529.0236 cm +0 0 0 rg +BT /F1 10 Tf 12 TL ET +BT 1 0 0 1 0 14 Tm T* ET +q +1 0 0 1 20 0 cm +q +BT 1 0 0 1 0 14 Tm .82811 Tw 12 TL /F1 10 Tf 0 0 0 rg (If the argument is either a single character surrounded by two ) Tj /F3 10 Tf 0 0 0 rg (' ) Tj /F1 10 Tf 0 0 0 rg (characters or any unicode escape) Tj T* 0 Tw (sequence surrounded by ) Tj /F3 10 Tf 0 0 0 rg (' ) Tj /F1 10 Tf 0 0 0 rg (characters the assembler will insert the integer representation.) Tj T* ET +Q +Q +q +Q +Q + +endstream +endobj +31 0 obj +<< /Nums [ 0 32 0 R 1 33 0 R 2 34 0 R ] >> +endobj +32 0 obj +<< /S /D /St 1 >> +endobj +33 0 obj +<< /S /D /St 2 >> +endobj +34 0 obj +<< /S /D /St 3 >> +endobj +xref +0 35 +0000000000 65535 f +0000000075 00000 n +0000000130 00000 n +0000000240 00000 n +0000000355 00000 n +0000000526 00000 n +0000000697 00000 n +0000000868 00000 n +0000001039 00000 n +0000001210 00000 n +0000001381 00000 n +0000001553 00000 n +0000001725 00000 n +0000001897 00000 n +0000002069 00000 n +0000002178 00000 n +0000002464 00000 n +0000002674 00000 n +0000002846 00000 n +0000003075 00000 n +0000003184 00000 n +0000003467 00000 n +0000003544 00000 n +0000003692 00000 n +0000003827 00000 n +0000003953 00000 n +0000004076 00000 n +0000004193 00000 n +0000004271 00000 n +0000011143 00000 n +0000018461 00000 n +0000021490 00000 n +0000021552 00000 n +0000021589 00000 n +0000021626 00000 n +trailer +<< /ID + % ReportLab generated PDF document -- digest (http://www.reportlab.com) + [(\372\(\217\316\222\3169q\222\376\355\325c1\302>) (\372\(\217\316\222\3169q\222\376\355\325c1\302>)] + /Info 20 0 R /Root 19 0 R /Size 35 >> +startxref +21663 +%%EOF diff --git a/language.rst b/language.rst new file mode 100644 index 0000000..c7f53b5 --- /dev/null +++ b/language.rst @@ -0,0 +1,180 @@ +BCI Assembly Language +********************* + +.. contents:: + +Commands, Small Arguments and Big Arguments +=========================================== + +A command in BCI Assembly is a word starting with an +alphabetic character (``a..zA..Z``) following by a sequence +of alphanumeric characters (``a..zA..Z0..9``). +This word will be converted to a 10bit opcode. + +Embedded in the 16bits of a word there is also a 6bit small +argument. If a command has no small argument these bits will +be zeroed. In the assembly the command will be only one +word, for example:: + + cli + +If the command has a small argument, the 6 bit will be +filled with the small argument. In the assembly the small +argument is separated by one whitespace, for example:: + + inc r0 + +Any other arguments are stored in further words and have +thus a width of 16bits. They are separated by commas (``,``) +from both the first and any other arguments. +It is recommended to only add one more argument. + +Example for one big argument:: + + ldi r0, 0xdead + +It might be useful to have more arguments for other +applications, like double precision floating points. +Example (not implemented):: + + lddfi r0, r1, 0xdead, 0xbeef + ; load double precision floating point + ; to r0 and r1 + +Register Names +============== + +Only data registers can be accessed directly. They are +prefixed with a ``r`` and are indexed starting with ``0``. + +Examples: ``r0, r1, r2, ..., r11, r12`` + + +Built-In Commands +================= + +``ldi , `` + Load the value ```` into register ````. + +``ld , `` + Load the value of the memory cell at ```` into + register ````. + +``st , `` + Store the value of register ```` into the memory + cell at ````. + +``inc `` + Increment the value of register ````. + +``dec `` + Decrement the value of register ````. + +``add|sub|mul|div , `` + `` = +|-|*|/ `` where ```` and + ```` are registers. Write the overflow into the + status register. + +``gt|ge|lt|le|eq `` + Check if the value of register ```` is + ``>|>=|<|<=|==`` to ``0``. Set the status register + to ``1`` if it evaluates true, else to ``0``. + +``not`` + If the status register is ``0`` set it to ``1``, + else set it to ``0``. + +``jmp `` + Set the program counter to the value of register + ````. + +``call `` + Push the current program counter on the stack and + set the program counter to the value of register ````. + +``ret`` + Pop the previously pushed program counter from the stack. + +``stop`` + Write ``1`` into the shutdown register. This will + cause the interpreter to halt. +``cl`` + Write ``0`` into the status register. + +``cjmp `` + If there not a ``0`` in the status register, ``jmp + ``, else continue execution. + +``ccall `` + Like ``cjmp `` but with ``call`` instead. + +Comments +======== + +Comments start with a ``;`` at the beginning of the line and +end at the end of the line. + +Marks +===== + +Marks represent a special location of the assembly code. The +assembler keeps track of those marks and they can be used as +immediate input. + +A mark is defined by a single word, starting with an +alphabetic character (``a..zA...Z``) containing alphanumeric +characters and underscores (``a..zA..Z0..9_``) followed by +a colon (``:``) and a newline character. + +Example:: + + ldi r0, this_is_a_mark + ldi r1, 0xfefe + ldi r2, 0xefef + + this_is_a_mark: + add r2, r1 + ; this will result in an infinite loop. + jmp r0 + + +Direct Input +============ + +The core instruction set contains the ``ldi`` command that +can be used to load data into a register directly. + +The first (big) argument of this command is always a 16bit +word. The assembler can automatically generate the correct +value if the argument is provided in the following ways: + +`Marks`_ + The assembler inserts the absolute offset of the + Mark. +A decimal value + The assembler inserts the value (i.e. ``ldi r0, + 12``). +A hexadecimal value + If the argument starts with ``0x`` the assembler + will interpret the argument as hexadecimal. +A binary value + If the argument starts with ``0b`` the assembler + will interpret the value as binary. +A character + If the argument is either a single character + surrounded by two ``'`` characters or any unicode + escape sequence surrounded by ``'`` characters the + assembler will insert the integer representation. + +Explicit Data Programming +========================= + +One can explicitly set data in the program memory by using +the ``.set`` directive. It uses the following semantics:: + + ".set" "[" {,} "]" + +Where ```` is a `Direct Input`_ value. The assembler +will insert the data at exactly the location where the +``.set`` appears. The assembler ignores any whitespace or +newline characters between the brackets ``[]``. From 0fd65c721c711fb2e723c61808644ee136e5d4df Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Kn=C3=BCttel?= Date: Tue, 9 Oct 2018 11:43:42 +0200 Subject: [PATCH 2/9] initial assembler package --- assembler/__init__.py | 0 assembler/assembler.py | 280 ++++++++++++++++++++++++++++++++++++++++ assembler/context.py | 28 ++++ assembler/directives.py | 42 ++++++ assembler/opcodes.py | 33 +++++ assembler/tokenize.py | 55 ++++++++ assembler/util.py | 85 ++++++++++++ 7 files changed, 523 insertions(+) create mode 100644 assembler/__init__.py create mode 100644 assembler/assembler.py create mode 100644 assembler/context.py create mode 100644 assembler/directives.py create mode 100644 assembler/opcodes.py create mode 100644 assembler/tokenize.py create mode 100644 assembler/util.py diff --git a/assembler/__init__.py b/assembler/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/assembler/assembler.py b/assembler/assembler.py new file mode 100644 index 0000000..53f556c --- /dev/null +++ b/assembler/assembler.py @@ -0,0 +1,280 @@ +from collections import deque + +from .context import FileContext +from .tokenize import Tokenizer, WHITESPACE +from .opcodes import make_opcodes +from .util import can_be_mark, can_convert_to_int, autoint +from .directives import SetDirective + +class ParsingError(Exception): + pass + +class Assembler(object): + """ + This class is used for generating bytecode from a file containing assembly. + + Also required is the memory definition of the interpreter given as a dict:: + + { + "registers": , + "memory": , + "program_memory": + } + + And the definition of the commands. This is a list of dicts:: + + [ + { + "mnemonic": , + "args": [ + ("register"|"memory"|"program_memory"|"direct_input"), ... + ] + } + ] + + The method ``parse`` will parse the input file and ``bindump`` will dump the binary + bytecode into a file. + """ + def __init__(self, file_, memory_definition, command_definition, custom_directives): + self._file_context = FileContext(file_) + self._code_objects = deque() + self._memory_definition = memory_definition + self._command_definition = command_definition + self._word_count = 0 + self._marks = {} + + self._opcodes = make_opcodes([cmd["mnemonic"] for cmd in command_definition]) + self._commands_by_mnemonic = {cmd["mnemonic"]: cmd for cmd in command_definition} + self._tokenizer = Tokenizer(self._file_context) + + self._directives = {"set": SetDirective()} + self._directives.update(custom_directives) + + def parse(self): + try: + for token in self._tokenizer: + + # Comments + if(token == ";"): + while(token != "\n"): + token = next(self._tokenizer) + continue + + # Commands + if(token in self._commands_by_mnemonic): + self.parse_command(token) + continue + + # Directives + if(token == "."): + self.parse_directive() + continue + + # The default is trying to parse a mark + if(not can_be_mark(token)): + self.raise_unexpected_token(token + , "comment, command, directive or mark" + , token) + self.parse_mark(token) + except StopIteration: + raise ParsingError("Unexpected EOF while parsing.") + + for mark, mark_data in self._marks.items(): + if(mark_data["target"] < 0): + raise ParsingError("Mark {} undefined. Referenced in lines: {}".format( + mark + , mark_data["references"])) + + self._code_objects = [self._marks[c]["target"] if c in self._marks else c + for c in self._code_objects ] + + def bindump(self, file_): + # FIXME: + # Make this work for tons of data. + # Or is that necessary? + return file_.write(bytes(self._code_objects)) + + + def parse_mark(self, token): + should_be_colon = next(self._tokenizer) + + if(should_be_colon != ":"): + self.raise_unexpected_token(token, "':'", should_be_colon) + + should_be_newline = next(self._tokenizer) + if(should_be_newline != "\n"): + self.raise_unexpected_token(token + ":", "'\\n'", should_be_newline) + + if(token in self._marks): + if(self._marks[token]["target"] != -1): + raise ParsingError("Error in line {} column {} mark already defined: '{}'. Previous was in line {}.".format( + self._line + , self._column + , token + , self._marks[token]["target_line"])) + self._marks[token]["target"] = self._word_count + self._marks[token]["target_line"] = self._line + + else: + self._marks[token] = { + "target": self._word_count + , "target_line": self._line + , "references": [] + } + + should_be_newline = next(self._tokenizer) + if(should_be_newline != "\n"): + self.raise_unexpected_token(token, "newline", should_be_newline) + + + def parse_directive(self): + should_be_name = next(self._tokenizer) + + if(not should_be_name in self._directives): + self.raise_unexpected_token(".", "directive name", should_be_name) + + words = self._directives[should_be_name].parse(self, self._tokenizer) + + self._word_count += len(words) + self._code_objects.extend(words) + + should_be_newline = next(self._tokenizer) + if(should_be_newline != "\n"): + self.raise_unexpected_token(".", "newline", should_be_newline) + + + + + def parse_command(self, cmd): + + self._code_objects.append(self._opcodes[cmd]) + self._word_count += 1 + + if(not self._commands_by_mnemonic[cmd]["args"]): + token = next(self._tokenizer) + if(token != "\n"): + self.raise_unexpected_token(cmd, "newline", token) + return + + should_be_whitespace = next(self._tokenizer) + if(should_be_whitespace not in WHITESPACE + or should_be_whitespace == "\n"): + self.raise_unexpected_token(cmd, "' '", should_be_whitespace) + + should_be_an_argument = next(self._tokenizer) + argument = self.check_and_convert_argument(cmd + , should_be_an_argument + , self._commands_by_mnemonic[cmd]["args"][0]) + self._word_count += 1 + self._code_objects.append(argument) + + + for argument in self._commands_by_mnemonic[cmd]["args"][1:]: + should_be_comma = next(self._tokenizer) + if(should_be_comma != ","): + self.raise_unexpected_token(cmd, should_be_whitespace) + + self._word_count += 1 + self._code_objects.append( + self.check_and_convert_argument( + cmd + , next(self._tokenizer) + , argument)) + + should_be_newline = next(self._tokenizer) + if(should_be_newline != "\n"): + self.raise_unexpected_token(cmd, "newline", should_be_newline) + + + def raise_unexpected_token(self, after, expected, got): + raise ParsingError("Error in line {} column {} after '{}': expected {}, got '{}'".format( + self._file_context._line + , self._file_context._column + , after + , expected + , got)) + + def raise_invalid_address(self, after, memtype, maxval, got): + raise ParsingError("Error in line {} column {} after '{}': value {} is invalid for {} (max is {})".format( + self._file_context._line + , self._file_context._column + , after + , got + , memtype + , maxval)) + + def check_and_convert_argument(self, cmd, argument, argument_definition): + if(argument_definition == "register"): + if(not argument.startswith("r")): + self.raise_unexpected_token(cmd, "register name", argument) + register_offset = argument[1:] + raise_could_not_convert_register_offset = False + try: + register_offset = int(register_offset) + except: + raise_could_not_convert_register_offset = True + + if(raise_could_not_convert_register_offset): + self.raise_unexpected_token(cmd, "register name", argument) + + if(register_offset > self._memory_definition["registers"] + or register_offset < 0): + self.raise_invalid_address(cmd + , "register" + , self._memory_definition["registers"] + , register_offset) + + return register_offset + + if(argument_definition == "memory"): + if(not can_convert_to_int(argument)): + self.raise_unexpected_token(cmd, "integer address", argument) + argument = autoint(argument) + + if(argument < 0 or argument > self._memory_definition["memory"]): + self.raise_invalid_address(cmd + , "memory" + , self._memory_definition["memory"] + , argument) + return argument + + if(argument_definition == "program_memory"): + if(not can_convert_to_int(argument)): + if(not can_be_mark(argument)): + self.raise_unexpected_token(cmd, "integer address or mark", argument) + if(argument in self._marks): + self._marks[argument]["references"].append(self._line) + if(self._marks[argument]["target"] != -1): + return self._marks[argument]["target"] + return argument + self._marks[argument] = { + "target": -1 + , "target_line": 0 + , "references": [self._line] + } + return argument + + + argument = autoint(argument) + + if(argument < 0 or argument > self._memory_definition["program_memory"]): + self.raise_invalid_address(cmd + , "program_memory" + , self._memory_definition["program_memory"] + , argument) + return argument + + + if(can_convert_to_int(argument)): + return autoint(argument) + + if(not can_be_mark(argument)): + self.raise_unexpected_token(cmd, "integer, char or mark", argument) + if(argument in self._marks and self._marks[argument] >= 0): + return self._marks[argument] + self._marks[argument] = -1 + return argument + + + + diff --git a/assembler/context.py b/assembler/context.py new file mode 100644 index 0000000..3863a00 --- /dev/null +++ b/assembler/context.py @@ -0,0 +1,28 @@ +from collections import deque + +class FileContext(object): + def __init__(self, file_): + self._file = file_ + self._line = 0 + self._column = 0 + self._column_stack = deque() + + def getc(self): + c = self._file.read(1) + if(c == "\n"): + self._line += 1 + self._column_stack.append(self._column) + self._column = 0 + else: + self._column += 1 + + return c + + def ungetc(self, c): + self._file.seek(self._file.tell() - 1, 0) + if(c == "\n"): + self._line -= 1 + self._column = self._column_stack.pop() + else: + self._column -= 1 + diff --git a/assembler/directives.py b/assembler/directives.py new file mode 100644 index 0000000..24b6c99 --- /dev/null +++ b/assembler/directives.py @@ -0,0 +1,42 @@ +""" +Directives for explicitly modifying the program memory. +""" + +from abc import ABC, abstractmethod +from collections import deque + +class AbstractDirective(ABC): + @abstractmethod + def parse(self, assembler, tokenizer): + """ + Parse the directive by converting the text to a list of words. + Returns a list of 16bit words. + """ + pass + + + +class SetDirective(AbstractDirective): + def parse(self, assembler, tokenizer): + words = deque() + should_be_bracket = next(tokenizer) + if(not should_be_bracket == "["): + assembler.raise_unexpected_token(".set", "'['", should_be_bracket) + + while(True): + should_be_value = next(tokenizer) + if(not can_convert_to_int(should_be_value)): + assembler.raise_unexpected_token(".set" + , "integer or character value" + , should_be_value) + words.append(autoint(should_be_value)) + + comma_or_bracket = next(tokenizer) + if(not comma_or_bracket in (",", "]")): + assembler.raise_unexpected_token(".set" + , "',' or ']'" + , comma_or_bracket) + + if(comma_or_bracket == "]"): + break + return list(words) diff --git a/assembler/opcodes.py b/assembler/opcodes.py new file mode 100644 index 0000000..15c9ba3 --- /dev/null +++ b/assembler/opcodes.py @@ -0,0 +1,33 @@ + + +class Autoinserter(object): + def __init__(self): + self.mayor = 2 + self.minor = 1 + def __next__(self): + """Generate the next opcode""" + # 10bit opcode + opcode = 0b1111111111 + + # Calculate this opcode. + opcode *= self.minor + opcode //= self.mayor + + # The lower 6 bits are reserved. + opcode <<= 6 + # 16 bits in total. Should not matter. + opcode &= 0xffff + + # Update the state. + self.minor = (self.minor + 2) % self.mayor + if(self.minor == 1): + self.mayor *= 2 + + return opcode + def __iter__(self): + return self + +def make_opcodes(mnemonics_in_order): + + ai = Autoinserter() + return {mnemonic: opcode for (mnemonic, opcode) in zip(mnemonics_in_order, ai)} diff --git a/assembler/tokenize.py b/assembler/tokenize.py new file mode 100644 index 0000000..71be23f --- /dev/null +++ b/assembler/tokenize.py @@ -0,0 +1,55 @@ +from collections import deque + +WHITESPACE = {" ", "\t", "\n", "\r"} +TOKENS_END_OF_WORD = WHITESPACE | {";", ":", ",", ".", "[", "]"} + +TOKENS_EXPECT_NEWLINE = {":", "]"} + + +class Tokenizer(object): + def __init__(self, context): + self._context = context + self._expect_newline = False + self._expect_whitespace = False + + def __iter__(self): + return self + def __next__(self): + """ + Return the next token. + """ + + current_token = deque() + + while(True): + c = self._context.getc() + + # Sometimes we need the explicit newline. + if(self._expect_newline and c == "\n"): + self._expect_newline = False + return c + + # Skip multiple whitespaces. + if(c in WHITESPACE and not current_token): + if(self._expect_whitespace): + self._expect_whitespace = False + return c + continue + + if(c in TOKENS_END_OF_WORD): + if(current_token): + self._context.ungetc(c) + if(c in WHITESPACE): + self._expect_whitespace = True + return "".join(current_token) + else: + if(c in TOKENS_EXPECT_NEWLINE): + self._expect_newline = True + return c + + if(not c): + raise StopIteration() + + current_token.append(c) + + diff --git a/assembler/util.py b/assembler/util.py new file mode 100644 index 0000000..60d043b --- /dev/null +++ b/assembler/util.py @@ -0,0 +1,85 @@ +""" +Utility functions used for parsing. +""" + + +def can_be_mark(argument): + """ + The ``argument`` can be interpreted as a Mark. + """ + a = ord("a") + a_z = [chr(a + i) for i in range(26)] + A = ord("A") + A_Z = [chr(A + i) for i in range(26)] + null = ord("0") + null_9 = [chr(null + i) for i in range(10)] + + if(not argument[0] in a_z): + return False + + for char in argument[1:]: + if(not (char in a_z + or char in A_Z + or char in null_9 + or char == "_")): + return False + return True + + + +def can_convert_to_int(value): + """ + ``value`` can be converted to an integer. + + **Note** that this returns ``True`` if the value is a + character definition like ``'a'``. + """ + if(value.startswith("0x")): + try: + int(value[2:], 16) + return True + except: + return False + + if(value.startswith("0b")): + try: + int(value[2:], 2) + return True + except: + return False + + if(value.startswith("'") and value.endswith("'")): + if(len(value) == 3): + return True + if(len(value) == 4): + if(value[1:-1] in {"\\n", "\\r", "\\t"}): + return True + return False + + try: + int(value) + return True + except: + return False + +def autoint(value): + """ + Convert ``value`` to an integer automatically. + """ + escape_sequences = {"\\n": "\n", "\\r": "\r", "\\t":"\t"} + if(value.startswith("0x")): + return int(value[2:], 16) + + if(value.startswith("0b")): + return int(value[2:], 2) + + if(value.startswith("'") and value.endswith("'")): + if(len(value) == 3): + return ord(value[1:-1]) + if(len(value) == 4): + if(value[1:-1] in escape_sequences): + return ord(escape_sequences[value[1:-1]]) + + return int(value) + + From c684a5f5121faaddec38fef32ace2a70b36346a9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Kn=C3=BCttel?= Date: Tue, 9 Oct 2018 11:44:00 +0200 Subject: [PATCH 3/9] added first tests --- test/conftest.py | 101 +++++++++++++++++++++++++++++++++ test/test_010_filecontext.py | 24 ++++++++ test/test_011_tokenize.py | 54 ++++++++++++++++++ test/test_012_opcodes.py | 14 +++++ test/test_013_util.py | 35 ++++++++++++ test/test_020_basic_parsing.py | 22 +++++++ 6 files changed, 250 insertions(+) create mode 100644 test/conftest.py create mode 100644 test/test_010_filecontext.py create mode 100644 test/test_011_tokenize.py create mode 100644 test/test_012_opcodes.py create mode 100644 test/test_013_util.py create mode 100644 test/test_020_basic_parsing.py diff --git a/test/conftest.py b/test/conftest.py new file mode 100644 index 0000000..9825dfd --- /dev/null +++ b/test/conftest.py @@ -0,0 +1,101 @@ +import pytest + +@pytest.fixture +def basic_machine_definition(): + return ( + { + "registers": 20 + , "memory": 100 + , "program_memory": 500 + } + , [ + { + "mnemonic": "ldi" + , "args": ["register", "direct_input"] + } + , { + "mnemonic": "ld" + , "args": ["register", "register"] + } + , { + "mnemonic": "st" + , "args": ["register", "register"] + } + , { + "mnemonic": "inc" + , "args": ["register"] + } + , { + "mnemonic": "dec" + , "args": ["register"] + } + , { + "mnemonic": "add" + , "args": ["register", "register"] + } + , { + "mnemonic": "sub" + , "args": ["register", "register"] + } + , { + "mnemonic": "mul" + , "args": ["register", "register"] + } + , { + "mnemonic": "div" + , "args": ["register", "register"] + } + , { + "mnemonic": "gt" + , "args": ["register"] + } + , { + "mnemonic": "ge" + , "args": ["register"] + } + , { + "mnemonic": "lt" + , "args": ["register"] + } + , { + "mnemonic": "le" + , "args": ["register"] + } + , { + "mnemonic": "eq" + , "args": ["register"] + } + , { + "mnemonic": "not" + , "args": [] + } + , { + "mnemonic": "jmp" + , "args": ["register"] + } + , { + "mnemonic": "call" + , "args": ["register"] + } + , { + "mnemonic": "ret" + , "args": [] + } + , { + "mnemonic": "stop" + , "args": [] + } + , { + "mnemonic": "cl" + , "args": [] + } + , { + "mnemonic": "cjmp" + , "args": ["register"] + } + , { + "mnemonic": "ccall" + , "args": ["register"] + } + ] + ) diff --git a/test/test_010_filecontext.py b/test/test_010_filecontext.py new file mode 100644 index 0000000..e8340a4 --- /dev/null +++ b/test/test_010_filecontext.py @@ -0,0 +1,24 @@ +from io import StringIO +from assembler.context import FileContext + + +def test_getc_ungetc(): + data = StringIO("abc\ndefg") + context = FileContext(data) + + assert context.getc() == "a" + assert context.getc() == "b" + assert context._line == 0 + assert context._column == 2 + assert context.getc() == "c" + assert context.getc() == "\n" + assert context.getc() == "d" + assert context._line == 1 + assert context._column == 1 + + context.ungetc("d") + context.ungetc("\n") + + assert context._column == 3 + assert context._line == 0 + assert context.getc() == "\n" diff --git a/test/test_011_tokenize.py b/test/test_011_tokenize.py new file mode 100644 index 0000000..2a2955c --- /dev/null +++ b/test/test_011_tokenize.py @@ -0,0 +1,54 @@ +from io import StringIO +from assembler.tokenize import Tokenizer +from assembler.context import FileContext + +def test_tokenize_1(): + data = ''' + ldi r0, 0xfefe + ldi r1, 0xefef + add r0, r1 + ''' + data = StringIO(data) + tokenizer = Tokenizer(FileContext(data)) + + result = list(tokenizer) + + assert result == [ + "ldi", " ", "r0", ",", "0xfefe", "\n" + , "ldi", " ", "r1", ",", "0xefef", "\n" + , "add", " ", "r0", ",", "r1", "\n" + ] + +def test_tokenize_2(): + data = ''' + ; This is a comment + + ldi r0, 0xfefe + ''' + data = StringIO(data) + tokenizer = Tokenizer(FileContext(data)) + + result = list(tokenizer) + + assert result == [ + ";", "This", " ", "is", " ", "a", " ", "comment", "\n" + , "ldi", " ", "r0", ",", "0xfefe", "\n" + ] + + +def test_tokenize_3(): + data = ''' + ldi r0, 0xfefe + string: + .set ['h', 'e', 'l', 'l', 'o'] + ''' + data = StringIO(data) + tokenizer = Tokenizer(FileContext(data)) + + result = list(tokenizer) + + assert result == [ + "ldi", " ", "r0", ",", "0xfefe", "\n" + , "string", ":", "\n" + , ".", "set", " ", "[", "'h'", ",", "'e'", ",", "'l'", ",", "'l'", ",", "'o'", "]", "\n" + ] diff --git a/test/test_012_opcodes.py b/test/test_012_opcodes.py new file mode 100644 index 0000000..2b7ba68 --- /dev/null +++ b/test/test_012_opcodes.py @@ -0,0 +1,14 @@ +from assembler.opcodes import make_opcodes + + +def test_make_opcodes(): + mnemonics = ["ldi", "ld", "st", "add", "mul"] + + opcodes = make_opcodes(mnemonics) + + assert opcodes == {"ldi": 32704 + , "ld" : 16320 + , "st": 49088 + , "add": 8128 + , "mul": 24512 + } diff --git a/test/test_013_util.py b/test/test_013_util.py new file mode 100644 index 0000000..32a5c5f --- /dev/null +++ b/test/test_013_util.py @@ -0,0 +1,35 @@ +from assembler.util import can_be_mark, can_convert_to_int, autoint + + +def test_can_be_mark(): + assert can_be_mark("this_is_a_mark") == True + assert can_be_mark("this_is_a_mark0") == True + assert can_be_mark("tHIS0") == True + + assert can_be_mark("This_is_not_a_mark") == False + assert can_be_mark("0this_is_no_mark") == False + assert can_be_mark("this#is_no_mark") == False + +def test_can_convert_to_int(): + assert can_convert_to_int("0xfe") == True + assert can_convert_to_int("0xFE") == True + assert can_convert_to_int("10") == True + assert can_convert_to_int("0b100") == True + assert can_convert_to_int("'a'") == True + assert can_convert_to_int("'\\n'") == True + + + assert can_convert_to_int("0xfg") == False + assert can_convert_to_int("0xFG") == False + assert can_convert_to_int("10a") == False + assert can_convert_to_int("0b20") == False + assert can_convert_to_int("'aa'") == False + assert can_convert_to_int("'\\z'") == False + +def test_autoint(): + assert autoint("0xfe") == 0xfe + assert autoint("0xFE") == 0xfe + assert autoint("10") == 10 + assert autoint("0b1010101") == 0b1010101 + assert autoint("'a'") == ord("a") + assert autoint("'\\n'") == ord("\n") diff --git a/test/test_020_basic_parsing.py b/test/test_020_basic_parsing.py new file mode 100644 index 0000000..9e89eb9 --- /dev/null +++ b/test/test_020_basic_parsing.py @@ -0,0 +1,22 @@ +from io import StringIO +import pytest + +from assembler.assembler import Assembler + + +def test_commands(basic_machine_definition): + memory_definition, command_defintion = basic_machine_definition + + data = StringIO( + ''' + ldi r0, 0xfe + ldi r1, 0xfe + add r0, r1 + ''' + ) + assembler = Assembler(data, memory_definition, command_defintion, {}) + assembler.parse() + + assert assembler._code_objects == [32704, 0, 0xfe + , 32704, 1, 0xfe + , 40896, 0, 1] From 629a70d2e84c4c4f4067438395bc3963c0b81f38 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Kn=C3=BCttel?= Date: Tue, 9 Oct 2018 15:55:42 +0200 Subject: [PATCH 4/9] fixed some bugs in the assembler --- assembler/assembler.py | 64 +++++++++++++++++++++++----------- assembler/directives.py | 2 ++ test/test_011_tokenize.py | 17 +++++++++ test/test_020_basic_parsing.py | 52 +++++++++++++++++++++++++-- 4 files changed, 112 insertions(+), 23 deletions(-) diff --git a/assembler/assembler.py b/assembler/assembler.py index 53f556c..f4dbbe6 100644 --- a/assembler/assembler.py +++ b/assembler/assembler.py @@ -108,24 +108,20 @@ class Assembler(object): if(token in self._marks): if(self._marks[token]["target"] != -1): raise ParsingError("Error in line {} column {} mark already defined: '{}'. Previous was in line {}.".format( - self._line + self._file_context._line , self._column , token , self._marks[token]["target_line"])) self._marks[token]["target"] = self._word_count - self._marks[token]["target_line"] = self._line + self._marks[token]["target_line"] = self._file_context._line else: self._marks[token] = { "target": self._word_count - , "target_line": self._line + , "target_line": self._file_context._line , "references": [] } - should_be_newline = next(self._tokenizer) - if(should_be_newline != "\n"): - self.raise_unexpected_token(token, "newline", should_be_newline) - def parse_directive(self): should_be_name = next(self._tokenizer) @@ -133,6 +129,10 @@ class Assembler(object): if(not should_be_name in self._directives): self.raise_unexpected_token(".", "directive name", should_be_name) + should_be_whitespace = next(self._tokenizer) + if(not should_be_whitespace in WHITESPACE): + self.raise_unexpected_token(should_be_name, "' '", should_be_whitespace) + words = self._directives[should_be_name].parse(self, self._tokenizer) self._word_count += len(words) @@ -146,16 +146,16 @@ class Assembler(object): def parse_command(self, cmd): - - self._code_objects.append(self._opcodes[cmd]) - self._word_count += 1 - + # We have no arguments if(not self._commands_by_mnemonic[cmd]["args"]): + self._code_objects.append(self._opcodes[cmd]) + self._word_count += 1 token = next(self._tokenizer) if(token != "\n"): self.raise_unexpected_token(cmd, "newline", token) return + # Small argument must be treated separately should_be_whitespace = next(self._tokenizer) if(should_be_whitespace not in WHITESPACE or should_be_whitespace == "\n"): @@ -166,9 +166,12 @@ class Assembler(object): , should_be_an_argument , self._commands_by_mnemonic[cmd]["args"][0]) self._word_count += 1 - self._code_objects.append(argument) + # NOTE: + # The Small Argument is stored within the first word (!) + self._code_objects.append(self._opcodes[cmd] | (argument & 0xffff)) + # All the 16bit arguments for argument in self._commands_by_mnemonic[cmd]["args"][1:]: should_be_comma = next(self._tokenizer) if(should_be_comma != ","): @@ -239,22 +242,31 @@ class Assembler(object): return argument if(argument_definition == "program_memory"): + # Non-integer Argument. if(not can_convert_to_int(argument)): + # Just nonsense. if(not can_be_mark(argument)): self.raise_unexpected_token(cmd, "integer address or mark", argument) + # The Mark has appeared before. if(argument in self._marks): - self._marks[argument]["references"].append(self._line) + # Add this line to the references. + self._marks[argument]["references"].append(self._file_context._line) + # The target is already known. Insert it now. if(self._marks[argument]["target"] != -1): return self._marks[argument]["target"] + # The target is unknown. return argument + # The Mark has not appeared before. self._marks[argument] = { "target": -1 , "target_line": 0 - , "references": [self._line] + , "references": [self._file_context._line] } + # Target is obviously unknown. return argument + # Integer argument. argument = autoint(argument) if(argument < 0 or argument > self._memory_definition["program_memory"]): @@ -264,17 +276,29 @@ class Assembler(object): , argument) return argument + # This is direct input (default). + # Integer if(can_convert_to_int(argument)): return autoint(argument) + # This is nonsense. if(not can_be_mark(argument)): self.raise_unexpected_token(cmd, "integer, char or mark", argument) - if(argument in self._marks and self._marks[argument] >= 0): - return self._marks[argument] - self._marks[argument] = -1 + + # It is a Mark. + if(argument in self._marks): + if(self._marks[argument]["target"] >= 0): + self._marks[argument]["references"].append(self._file_context._line) + return self._marks[argument]["target"] + self._marks[argument]["references"].append(self._file_context._line) + return argument + + self._marks[argument] = { + "target": -1 + , "target_line": 0 + , "references": [self._file_context._line] + } + return argument - - - diff --git a/assembler/directives.py b/assembler/directives.py index 24b6c99..a03383d 100644 --- a/assembler/directives.py +++ b/assembler/directives.py @@ -5,6 +5,8 @@ Directives for explicitly modifying the program memory. from abc import ABC, abstractmethod from collections import deque +from .util import can_convert_to_int, autoint + class AbstractDirective(ABC): @abstractmethod def parse(self, assembler, tokenizer): diff --git a/test/test_011_tokenize.py b/test/test_011_tokenize.py index 2a2955c..590d13f 100644 --- a/test/test_011_tokenize.py +++ b/test/test_011_tokenize.py @@ -52,3 +52,20 @@ def test_tokenize_3(): , "string", ":", "\n" , ".", "set", " ", "[", "'h'", ",", "'e'", ",", "'l'", ",", "'l'", ",", "'o'", "]", "\n" ] + +def test_tokenize_4(): + data = ''' + ldi r0, 0xfefe + test_mark: + ldi r1, 0xefef + ''' + data = StringIO(data) + tokenizer = Tokenizer(FileContext(data)) + + result = list(tokenizer) + + assert result == [ + "ldi", " ", "r0", ",", "0xfefe", "\n" + , "test_mark", ":", "\n" + , "ldi", " ", "r1", ",", "0xefef", "\n" + ] diff --git a/test/test_020_basic_parsing.py b/test/test_020_basic_parsing.py index 9e89eb9..0b1a569 100644 --- a/test/test_020_basic_parsing.py +++ b/test/test_020_basic_parsing.py @@ -17,6 +17,52 @@ def test_commands(basic_machine_definition): assembler = Assembler(data, memory_definition, command_defintion, {}) assembler.parse() - assert assembler._code_objects == [32704, 0, 0xfe - , 32704, 1, 0xfe - , 40896, 0, 1] + assert assembler._code_objects == [32704 | 0, 0xfe + , 32704 | 1, 0xfe + , 40896 | 0, 1] + +def test_mark(basic_machine_definition): + memory_definition, command_defintion = basic_machine_definition + + data = StringIO( + ''' + ldi r0, test_mark + ldi r1, 0xfe + test_mark: + add r0, r1 + ''' + ) + assembler = Assembler(data, memory_definition, command_defintion, {}) + assembler.parse() + + + assert assembler._code_objects == [32704 | 0, 4 + , 32704 | 1, 0xfe + , 40896 | 0, 1] + + +def test_set_directive(basic_machine_definition): + memory_definition, command_defintion = basic_machine_definition + + data = StringIO( + ''' + ldi r0, test_mark + ldi r1, 0xfe + test_mark: + .set [0xfe, 0xef, + 10, 20, + 'a', 'b', + '\\n', 0b10] + ''' + ) + assembler = Assembler(data, memory_definition, command_defintion, {}) + assembler.parse() + + + assert assembler._code_objects == [32704 | 0, 4 + , 32704 | 1, 0xfe + , 0xfe, 0xef + , 10, 20 + , ord("a"), ord("b") + , ord("\n"), 0b10] + From a0f78bbe9809527822e5999eff436fc775ac1d60 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Kn=C3=BCttel?= Date: Wed, 10 Oct 2018 10:27:39 +0200 Subject: [PATCH 5/9] fixed one typo --- assembler/assembler.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/assembler/assembler.py b/assembler/assembler.py index f4dbbe6..4d36209 100644 --- a/assembler/assembler.py +++ b/assembler/assembler.py @@ -175,7 +175,7 @@ class Assembler(object): for argument in self._commands_by_mnemonic[cmd]["args"][1:]: should_be_comma = next(self._tokenizer) if(should_be_comma != ","): - self.raise_unexpected_token(cmd, should_be_whitespace) + self.raise_unexpected_token(cmd, "','", should_be_comma) self._word_count += 1 self._code_objects.append( From 353a68aacee310dae8e7956700045bb1a09d1bb6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Kn=C3=BCttel?= Date: Wed, 10 Oct 2018 10:30:54 +0200 Subject: [PATCH 6/9] added tests for failures --- test/test_021_parsing_failures.py | 210 ++++++++++++++++++++++++++++++ 1 file changed, 210 insertions(+) create mode 100644 test/test_021_parsing_failures.py diff --git a/test/test_021_parsing_failures.py b/test/test_021_parsing_failures.py new file mode 100644 index 0000000..3a64168 --- /dev/null +++ b/test/test_021_parsing_failures.py @@ -0,0 +1,210 @@ +from io import StringIO +import pytest + +from assembler.assembler import Assembler, ParsingError + +def test_missing_comma(basic_machine_definition): + memory_definition, command_defintion = basic_machine_definition + + data = StringIO( + ''' + ldi r0 0xfe + ldi r1, 0xfe + add r0, r1 + ''' + ) + assembler = Assembler(data, memory_definition, command_defintion, {}) + + with pytest.raises(ParsingError): + assembler.parse() + +def test_missing_newline(basic_machine_definition): + memory_definition, command_defintion = basic_machine_definition + + data = StringIO( + ''' + ldi r0, 0xfe ldi r1, 0xfe + add r0, r1 + ''' + ) + assembler = Assembler(data, memory_definition, command_defintion, {}) + + with pytest.raises(ParsingError): + assembler.parse() + + + +def test_additional_comma1(basic_machine_definition): + memory_definition, command_defintion = basic_machine_definition + + data = StringIO( + ''' + ldi, r0, 0xfe + ldi r1, 0xfe + add r0, r1 + ''' + ) + assembler = Assembler(data, memory_definition, command_defintion, {}) + + with pytest.raises(ParsingError): + assembler.parse() + +def test_additional_comma2(basic_machine_definition): + memory_definition, command_defintion = basic_machine_definition + + data = StringIO( + ''' + ldi r0, 0xfe, + ldi r1, 0xfe + add r0, r1 + ''' + ) + assembler = Assembler(data, memory_definition, command_defintion, {}) + + with pytest.raises(ParsingError): + assembler.parse() + + + +def test_bad_mark1(basic_machine_definition): + memory_definition, command_defintion = basic_machine_definition + + data = StringIO( + ''' + ldi r0, 0xfe + this_is_a_bad_mark + ldi r1, 0xfe + add r0, r1 + ''' + ) + assembler = Assembler(data, memory_definition, command_defintion, {}) + + with pytest.raises(ParsingError): + assembler.parse() + +def test_bad_mark2(basic_machine_definition): + memory_definition, command_defintion = basic_machine_definition + + data = StringIO( + ''' + ldi r0, 0xfe + This_is_a_bad_mark: + ldi r1, 0xfe + add r0, r1 + ''' + ) + assembler = Assembler(data, memory_definition, command_defintion, {}) + + with pytest.raises(ParsingError): + assembler.parse() + +def test_bad_mark3(basic_machine_definition): + memory_definition, command_defintion = basic_machine_definition + + data = StringIO( + ''' + ldi r0, 0xfe + 0this_is_a_bad_mark: + ldi r1, 0xfe + add r0, r1 + ''' + ) + assembler = Assembler(data, memory_definition, command_defintion, {}) + + with pytest.raises(ParsingError): + assembler.parse() + +def test_bad_mark4(basic_machine_definition): + memory_definition, command_defintion = basic_machine_definition + + data = StringIO( + ''' + ldi r0, this_is_a_missing_mark + ldi r1, 0xfe + add r0, r1 + ''' + ) + assembler = Assembler(data, memory_definition, command_defintion, {}) + + with pytest.raises(ParsingError): + assembler.parse() + +def test_bad_mark5(basic_machine_definition): + memory_definition, command_defintion = basic_machine_definition + + data = StringIO( + ''' + ldi r0, this_is_a_missing_mark: + ldi r1, 0xfe + add r0, r1 + ''' + ) + assembler = Assembler(data, memory_definition, command_defintion, {}) + + with pytest.raises(ParsingError): + assembler.parse() + +def test_bad_directive1(basic_machine_definition): + memory_definition, command_defintion = basic_machine_definition + + data = StringIO( + ''' + ldi r0, 0 + ldi r1, 0xfe + add r0, r1 + .set data [0x00, 0x10] + ''' + ) + assembler = Assembler(data, memory_definition, command_defintion, {}) + + with pytest.raises(ParsingError): + assembler.parse() + +def test_bad_directive2(basic_machine_definition): + memory_definition, command_defintion = basic_machine_definition + + data = StringIO( + ''' + ldi r0, 0 + ldi r1, 0xfe + add r0, r1 + .set[0x00, 0x10] + ''' + ) + assembler = Assembler(data, memory_definition, command_defintion, {}) + + with pytest.raises(ParsingError): + assembler.parse() + +def test_bad_directive3(basic_machine_definition): + memory_definition, command_defintion = basic_machine_definition + + data = StringIO( + ''' + ldi r0, 0 + ldi r1, 0xfe + add r0, r1 + .set [0x00, 0x10,] + ''' + ) + assembler = Assembler(data, memory_definition, command_defintion, {}) + + with pytest.raises(ParsingError): + assembler.parse() + +def test_bad_directive4(basic_machine_definition): + memory_definition, command_defintion = basic_machine_definition + + data = StringIO( + ''' + ldi r0, 0 + ldi r1, 0xfe + add r0, r1 + .set [0x00, 0x10 + ''' + ) + assembler = Assembler(data, memory_definition, command_defintion, {}) + + with pytest.raises(ParsingError): + assembler.parse() + From 6b8236dc3be90955e03f1d098a54803b801af646 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Kn=C3=BCttel?= Date: Sat, 27 Oct 2018 16:51:30 +0200 Subject: [PATCH 7/9] fixed the assembler's bindump function --- assembler/assembler.py | 11 +++++++++-- assembler/util.py | 9 +++++++++ 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/assembler/assembler.py b/assembler/assembler.py index 4d36209..8302f94 100644 --- a/assembler/assembler.py +++ b/assembler/assembler.py @@ -3,7 +3,7 @@ from collections import deque from .context import FileContext from .tokenize import Tokenizer, WHITESPACE from .opcodes import make_opcodes -from .util import can_be_mark, can_convert_to_int, autoint +from .util import can_be_mark, can_convert_to_int, autoint, int16_2_bytes from .directives import SetDirective class ParsingError(Exception): @@ -92,7 +92,14 @@ class Assembler(object): # FIXME: # Make this work for tons of data. # Or is that necessary? - return file_.write(bytes(self._code_objects)) + # TODO: + # Figure out whether/what improovements are necessary here + length = int16_2_bytes(len(self._code_objects)) + if(len(self._code_objects).bit_length() > 16): + raise ValueError("Program size excceeds 2^16.") + file_.write(length) + for word in self._code_objects: + file_.write(int16_2_bytes(word)) def parse_mark(self, token): diff --git a/assembler/util.py b/assembler/util.py index 60d043b..e5d4630 100644 --- a/assembler/util.py +++ b/assembler/util.py @@ -1,6 +1,7 @@ """ Utility functions used for parsing. """ +import struct def can_be_mark(argument): @@ -83,3 +84,11 @@ def autoint(value): return int(value) +def int16_2_bytes(value): + """ + Return the bytes representation of a 16bit unsigned + integer in 8bit words. + """ + if(value < 0): + return struct.pack(" Date: Sat, 27 Oct 2018 16:51:52 +0200 Subject: [PATCH 8/9] updated tests --- test/test_013_util.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/test/test_013_util.py b/test/test_013_util.py index 32a5c5f..6c316bc 100644 --- a/test/test_013_util.py +++ b/test/test_013_util.py @@ -1,4 +1,4 @@ -from assembler.util import can_be_mark, can_convert_to_int, autoint +from assembler.util import can_be_mark, can_convert_to_int, autoint, int16_2_bytes def test_can_be_mark(): @@ -33,3 +33,8 @@ def test_autoint(): assert autoint("0b1010101") == 0b1010101 assert autoint("'a'") == ord("a") assert autoint("'\\n'") == ord("\n") + +def test_int16_2_bytes(): + assert int16_2_bytes(2) == b"\x02\00" + assert int16_2_bytes(0x0200) == b"\x00\x02" + assert int16_2_bytes(-1) == b"\xff\xff" From 67127d4fe426f82f7eb78186308f8ab94a943ccd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Kn=C3=BCttel?= Date: Sat, 27 Oct 2018 16:54:04 +0200 Subject: [PATCH 9/9] added Makefile for testing the assembler --- Makefile | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 Makefile diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..f4d0917 --- /dev/null +++ b/Makefile @@ -0,0 +1,3 @@ +.PHONY: test +test: + python3 -m pytest test/