From dd9ec54c0b93d904b522686e3b447cf079ec835b Mon Sep 17 00:00:00 2001 From: Lachlan Harris Date: Tue, 23 Sep 2025 18:44:50 +1000 Subject: [PATCH] Refactor embedding into python tool, refresh documentation --- Makefile | 37 ++++------------- docs/isa.md | 91 +--------------------------------------- docs/vm.md | 33 +++------------ tools/embed_image.py | 98 ++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 113 insertions(+), 146 deletions(-) create mode 100644 tools/embed_image.py diff --git a/Makefile b/Makefile index a6281e5..a39d8b0 100644 --- a/Makefile +++ b/Makefile @@ -3,20 +3,17 @@ CC = gcc INC = -Iinclude DIR = bin BIN = $(DIR)/vbo +PYTHON ?= python3 SRC_COMMON = src/instructions.c src/utils.c src/hardware.c SRC_MAIN = src/main.c $(SRC_COMMON) FLAGS = -Wall -Wextra -Werror -g -std=c11 -pedantic -## Optional embedded image -# e.g. make build EMBED_IMAGE=images/2048.obj EMBED_IMAGE ?= EMBED_OBJ := -EMBED_GLUE := ifneq ($(strip $(EMBED_IMAGE)),) -EMBED_OBJ := $(EMBED_IMAGE:.obj=.o) -EMBED_GLUE := images/embed_glue.o +EMBED_OBJ := images/vbo_image.o endif TEST_DIR = test @@ -25,36 +22,20 @@ TEST_DIR = test all: build -build: $(EMBED_OBJ) $(EMBED_GLUE) +build: $(EMBED_OBJ) mkdir -p $(DIR) - $(CC) $(INC) $(SRC_MAIN) $(EMBED_OBJ) $(EMBED_GLUE) -o $(BIN) $(FLAGS) - -# Convert raw image to linkable object (ld -b binary) -%.o: %.obj - ld -r -b binary -o $@ $< - -# Glue exposes vbo_image_start/end for the chosen image -images/embed_glue.o: $(EMBED_OBJ) - @stem=_binary_$$(echo $(EMBED_IMAGE) | sed 's/[^A-Za-z0-9_]/_/g'); \ - mkdir -p images; \ - printf '%s\n' \ - '/* generated */' \ - "extern const unsigned char $${stem}_start[];" \ - "extern const unsigned char $${stem}_end[];" \ - "const unsigned char* vbo_image_start = $${stem}_start;" \ - "const unsigned char* vbo_image_end = $${stem}_end;" \ - > images/embed_glue.c; \ - $(CC) -c images/embed_glue.c -o images/embed_glue.o $(FLAGS) + $(CC) $(INC) $(SRC_MAIN) $(EMBED_OBJ) -o $(BIN) $(FLAGS) + +images/vbo_image.o: $(EMBED_IMAGE) tools/embed_image.py + mkdir -p images + $(PYTHON) tools/embed_image.py --input "$(EMBED_IMAGE)" --output $@ clean: rm -f $(BIN) rm -f $(TEST_DIR)/test_utils - rm -f images/embed_glue.c images/embed_glue.o + rm -f images/vbo_image.o distclean: clean - ifneq ($(strip $(EMBED_OBJ)),) - rm -f $(EMBED_OBJ) - endif test: $(CC) $(INC) $(TEST_DIR)/test_utils.c src/utils.c src/hardware.c -o $(TEST_DIR)/test_utils $(FLAGS) diff --git a/docs/isa.md b/docs/isa.md index f407951..4f17142 100644 --- a/docs/isa.md +++ b/docs/isa.md @@ -13,93 +13,4 @@ refer to `include/hardware.h` ## Scope -This document captures the current draft ISA for the VM and the stable parts of the design that are already present in the codebase. It intentionally leaves unspecified items blank until the implementation lands. The design targets an 18-bit, randomised ISA in later iterations while the first implementation runs a non-randomised, 16-bit equivalent. - -## Word size - -* Early implementation: 16-bit words -* Planned implementation: 18-bit words (with a 5-bit opcode in the high-order bits) - -## Register file - -* General-purpose: `R0`–`R7` (8 total) -* Special: `PC` (program counter), `COND` (condition flags) -* Condition flags (one-hot): `FL_POS`, `FL_ZRO`, `FL_NEG` - -## Memory model - -* Flat 64K-word address space in early 16-bit design -* `PC_START` set to `0x3000` for initial entry -* Unified code/data; device/port mapping is TBD - -## Instruction formats (planned 18-bit layouts) - -The 18-bit ISA defines three canonical encodings. The early 16-bit ISA follows the same taxonomy but may use narrower immediates and offsets. - -| Format | Bits | Fields | -| --- | --- | --- | -| R-type | 18 | `opcode[17:13] rd[12:10] rs[9:7] rt[6:4] flags[3:0]` | -| I-type | 18 | `opcode[17:13] rd[12:10] rs[9:7] imm7[6:0]` (signed) | -| M-type | 18 | `opcode[17:13] rd[12:10] offset10[9:0]` (signed) | - -The 16-bit variant keeps the same conceptual fields but packs them within 16 bits. Final bit positions for the 16-bit encoding are TBD in this document and will be aligned with the implementation. - -## Opcodes - -The following opcodes are defined in the headers and constitute the core instruction set. Semantics follow conventional interpretations unless specified otherwise. Execution details for edge cases will be specified alongside the implementation. - -| Mnemonic | Category | Brief semantics | -| --- | --- | --- | -| `NOP` | Control | No operation | -| `HALT` | Control | Stop execution | -| `ADD` | R-type | `rd = rs + rt`; updates `COND` | -| `SUB` | R-type | `rd = rs - rt`; updates `COND` | -| `AND` | R-type | `rd = rs & rt`; updates `COND` | -| `OR` | R-type | `rd = rs | rt`; updates `COND` | -| `XOR` | R-type | `rd = rs ^ rt`; updates `COND` | -| `NOT` | R-type | `rd = ~rs`; updates `COND` | -| `MOV` | R-type | `rd = rs`; may update `COND` (TBD) | -| `LSH` | R-type | Logical left shift (operands/amount TBD) | -| `RSH` | R-type | Logical right shift (operands/amount TBD) | -| `CMP` | R-type | Compare `rs` and `rt`, set `COND` | -| `ADDI` | I-type | `rd = rs + sext(imm)`; updates `COND` | -| `ANDI` | I-type | `rd = rs & imm`; updates `COND` | -| `ORI` | I-type | `rd = rs | imm`; updates `COND` | -| `LDI` | M-type | Load immediate/addressing (exact mode TBD) | -| `LD` | M-type | `rd = MEM[PC + off]` or base+off (TBD) | -| `ST` | M-type | `MEM[PC + off] = rd` or base+off (TBD) | -| `LDIND` | M-type | `rd = MEM[ MEM[base] + off ]` (TBD) | -| `STIND` | M-type | `MEM[ MEM[base] + off ] = rs` (TBD) | -| `JMP` | M-type | Unconditional jump (target encoding TBD) | -| `JZ` | M-type | Jump if zero flag set | -| `JNZ` | M-type | Jump if zero flag clear | -| `CALL` | M-type | Call subroutine (linkage/stack TBD) | -| `RET` | R-type/Implicit | Return from subroutine (exact source of target TBD) | -| `PUSH` | R-type | Push register to stack (stack reg TBD) | -| `POP` | R-type | Pop into register (stack reg TBD) | -| `IN` | I-type | Read from device/port (map TBD) | -| `OUT` | I-type | Write to device/port (map TBD) | -| `TRAP` | I-type | Supervisor/service call (vector table TBD) | -| `RAND` | R-type | Random number generator (source TBD) | -| `SLEEP` | I-type | Delay/sleep for a duration (units TBD) | - -Notes: - -* Branch conditions use `COND` flags. Precise signedness rules for comparisons are TBD. -* Shifts are logical in the current design; arithmetic right shift may be added later if required. - -## Condition codes - -Operations that write a result typically update `COND`: - -* Result > 0 → `FL_POS` -* Result == 0 → `FL_ZRO` -* Result < 0 (interpreting the 16-bit result as signed) → `FL_NEG` - -Exact update policy for non-arithmetic instructions (e.g., `MOV`, shifts, loads) will be specified with their implementations. - -## Encoding constraints and compatibility - -* The opcode field is planned to be 5 bits in the 18-bit design -* The early 16-bit encoding mirrors the same opcode taxonomy to ease transition -* Randomised opcode mapping is out of scope for the first implementation \ No newline at end of file +This document outlines the design specification for the custom ISA. This document is a stub. \ No newline at end of file diff --git a/docs/vm.md b/docs/vm.md index 7ce6b0a..8426455 100644 --- a/docs/vm.md +++ b/docs/vm.md @@ -12,7 +12,7 @@ refer to `include/hardware.h` ## Scope -This document describes the VM that executes the virtualised program. It covers the current 16-bit early implementation and highlights planned shifts to an 18-bit, randomised ISA. It intentionally omits unfinished or unspecified details. +This document describes the VM that executes the virtualised program. It covers the current 16-bit early implementation of the VM interpreter. It intentionally omits unfinished or unspecified details. ## Architecture overview @@ -22,6 +22,8 @@ This document describes the VM that executes the virtualised program. It covers * Fetch–decode–execute loop drives execution * Image files are loaded into memory before execution begins +This is to align with the design of LC-3 + ## Registers | Name | Width | Purpose | @@ -49,39 +51,14 @@ This document describes the VM that executes the virtualised program. It covers | --- | --- | | Address space | 0x0000 to 0xFFFF | | Capacity | 65,536 words | -| Word width | 16 bits (early implementation) | +| Word width | 16 bits | | Layout | Unified code and data | `PC` is initialised to `PC_START` (currently `0x3000`). This leaves lower memory available for images, vectors, and future system structures. Exact segmenting is to be defined. ## Program loading -* Command-line accepts one or more image files -* Each image is validated and loaded into memory -* On failure to load, execution aborts - -Image format and relocation rules are to be defined. The loader interface exists; the on-disk format is not yet documented here. - -## Execution model - -* Initialise arguments and memory -* Set `COND` to `FL_ZRO` -* Set `PC` to `PC_START` -* Main loop repeats while running - * Fetch: read instruction word at `PC`, then increment `PC` - * Decode: extract opcode from the high-order bits - * Execute: dispatch to the operation handler - * Update flags and registers as specified by the operation - -At present, `HALT` terminates the loop. Additional operations are defined by the ISA but may be implemented incrementally. - -## Instruction width and encoding - -* Early implementation uses 16-bit instruction words -* Planned implementation uses 18-bit instruction words with a 5-bit opcode field and structured operand fields -* To maintain forward compatibility, the VM structure and instruction taxonomy align with the 18-bit design even when running 16-bit words - -Exact 16-bit field layouts are intentionally not fixed in this document. See the ISA document for the intended 18-bit layouts. +Images are embedded into the memory with an image embedding tool that creates a relocatable object (containing the image as a byte array), exposes symbols for the image start & end, and the C code accesses the image through these two pointers. ## I/O diff --git a/tools/embed_image.py b/tools/embed_image.py new file mode 100644 index 0000000..abe3d77 --- /dev/null +++ b/tools/embed_image.py @@ -0,0 +1,98 @@ +#!/usr/bin/env python3 +""" +Embed a binary payload as a relocatable object that exports: + - vbo_image_start (const unsigned char*) + - vbo_image_end (const unsigned char*) + +Usage: + embed_image.py --input path/to/image.obj --output images/vbo_image.o + +The resulting object can be linked into the final binary. main.c declares the +symbols as weak pointers, so linking this object makes the embedded image usable +without glue files. +""" + +import argparse +import os +import shlex +import subprocess +import sys +import tempfile + +ASM_TEMPLATE = r""" + .section .rodata.vbo_payload, "a", @progbits + .globl vbo_payload_start + .type vbo_payload_start, @object +vbo_payload_start: + .incbin "{input_path}" + + .globl vbo_payload_end + .type vbo_payload_end, @object +vbo_payload_end: + .byte 0 + + .size vbo_payload_start, vbo_payload_end - vbo_payload_start + + .section .rodata.vbo_ptrs, "a", @progbits + .globl vbo_image_start + .type vbo_image_start, @object +#if __SIZEOF_POINTER__ == 8 +vbo_image_start: + .quad vbo_payload_start + .size vbo_image_start, 8 + + .globl vbo_image_end + .type vbo_image_end, @object +vbo_image_end: + .quad vbo_payload_end + .size vbo_image_end, 8 +#else +vbo_image_start: + .long vbo_payload_start + .size vbo_image_start, 4 + + .globl vbo_image_end + .type vbo_image_end, @object +vbo_image_end: + .long vbo_payload_end + .size vbo_image_end, 4 +#endif +""" + +def run(cmd, cwd=None): + try: + subprocess.check_call(cmd, cwd=cwd) + except subprocess.CalledProcessError as e: + print(f"Command failed: {' '.join(map(shlex.quote, cmd))}", file=sys.stderr) + raise + + +def main(): + p = argparse.ArgumentParser() + p.add_argument('--input', required=True, help='Path to payload binary to embed') + p.add_argument('--output', required=True, help='Path to output object file (.o)') + args = p.parse_args() + + inp = os.path.abspath(args.input) + out = os.path.abspath(args.output) + + if not os.path.isfile(inp): + print(f"Input file not found: {inp}", file=sys.stderr) + return 2 + + os.makedirs(os.path.dirname(out), exist_ok=True) + + with tempfile.TemporaryDirectory() as td: + asm_path = os.path.join(td, 'vbo_embed.S') + with open(asm_path, 'w') as f: + f.write(ASM_TEMPLATE.format(input_path=inp.replace('\\', '\\\\').replace('"', '\\"'))) + + # Assemble via gcc driver for portability + cmd = ['gcc', '-x', 'assembler-with-cpp', '-c', asm_path, '-o', out, '-nostdlib'] + run(cmd) + + return 0 + + +if __name__ == '__main__': + sys.exit(main())