Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 9 additions & 28 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -3,20 +3,17 @@ CC = gcc
INC = -Iinclude
DIR = bin
BIN = $(DIR)/vbo
PYTHON ?= python3

SRC_COMMON = src/instructions.c src/utils.c src/hardware.c
SRC_MAIN = src/main.c $(SRC_COMMON)

FLAGS = -Wall -Wextra -Werror -g -std=c11 -pedantic

## Optional embedded image
# e.g. make build EMBED_IMAGE=images/2048.obj
EMBED_IMAGE ?=
EMBED_OBJ :=
EMBED_GLUE :=
ifneq ($(strip $(EMBED_IMAGE)),)
EMBED_OBJ := $(EMBED_IMAGE:.obj=.o)
EMBED_GLUE := images/embed_glue.o
EMBED_OBJ := images/vbo_image.o
endif

TEST_DIR = test
Expand All @@ -25,36 +22,20 @@ TEST_DIR = test

all: build

build: $(EMBED_OBJ) $(EMBED_GLUE)
build: $(EMBED_OBJ)
mkdir -p $(DIR)
$(CC) $(INC) $(SRC_MAIN) $(EMBED_OBJ) $(EMBED_GLUE) -o $(BIN) $(FLAGS)

# Convert raw image to linkable object (ld -b binary)
%.o: %.obj
ld -r -b binary -o $@ $<

# Glue exposes vbo_image_start/end for the chosen image
images/embed_glue.o: $(EMBED_OBJ)
@stem=_binary_$$(echo $(EMBED_IMAGE) | sed 's/[^A-Za-z0-9_]/_/g'); \
mkdir -p images; \
printf '%s\n' \
'/* generated */' \
"extern const unsigned char $${stem}_start[];" \
"extern const unsigned char $${stem}_end[];" \
"const unsigned char* vbo_image_start = $${stem}_start;" \
"const unsigned char* vbo_image_end = $${stem}_end;" \
> images/embed_glue.c; \
$(CC) -c images/embed_glue.c -o images/embed_glue.o $(FLAGS)
$(CC) $(INC) $(SRC_MAIN) $(EMBED_OBJ) -o $(BIN) $(FLAGS)

images/vbo_image.o: $(EMBED_IMAGE) tools/embed_image.py
mkdir -p images
$(PYTHON) tools/embed_image.py --input "$(EMBED_IMAGE)" --output $@

clean:
rm -f $(BIN)
rm -f $(TEST_DIR)/test_utils
rm -f images/embed_glue.c images/embed_glue.o
rm -f images/vbo_image.o

distclean: clean
ifneq ($(strip $(EMBED_OBJ)),)
rm -f $(EMBED_OBJ)
endif

test:
$(CC) $(INC) $(TEST_DIR)/test_utils.c src/utils.c src/hardware.c -o $(TEST_DIR)/test_utils $(FLAGS)
Expand Down
91 changes: 1 addition & 90 deletions docs/isa.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,93 +13,4 @@ refer to `include/hardware.h`

## Scope

This document captures the current draft ISA for the VM and the stable parts of the design that are already present in the codebase. It intentionally leaves unspecified items blank until the implementation lands. The design targets an 18-bit, randomised ISA in later iterations while the first implementation runs a non-randomised, 16-bit equivalent.

## Word size

* Early implementation: 16-bit words
* Planned implementation: 18-bit words (with a 5-bit opcode in the high-order bits)

## Register file

* General-purpose: `R0`–`R7` (8 total)
* Special: `PC` (program counter), `COND` (condition flags)
* Condition flags (one-hot): `FL_POS`, `FL_ZRO`, `FL_NEG`

## Memory model

* Flat 64K-word address space in early 16-bit design
* `PC_START` set to `0x3000` for initial entry
* Unified code/data; device/port mapping is TBD

## Instruction formats (planned 18-bit layouts)

The 18-bit ISA defines three canonical encodings. The early 16-bit ISA follows the same taxonomy but may use narrower immediates and offsets.

| Format | Bits | Fields |
| --- | --- | --- |
| R-type | 18 | `opcode[17:13] rd[12:10] rs[9:7] rt[6:4] flags[3:0]` |
| I-type | 18 | `opcode[17:13] rd[12:10] rs[9:7] imm7[6:0]` (signed) |
| M-type | 18 | `opcode[17:13] rd[12:10] offset10[9:0]` (signed) |

The 16-bit variant keeps the same conceptual fields but packs them within 16 bits. Final bit positions for the 16-bit encoding are TBD in this document and will be aligned with the implementation.

## Opcodes

The following opcodes are defined in the headers and constitute the core instruction set. Semantics follow conventional interpretations unless specified otherwise. Execution details for edge cases will be specified alongside the implementation.

| Mnemonic | Category | Brief semantics |
| --- | --- | --- |
| `NOP` | Control | No operation |
| `HALT` | Control | Stop execution |
| `ADD` | R-type | `rd = rs + rt`; updates `COND` |
| `SUB` | R-type | `rd = rs - rt`; updates `COND` |
| `AND` | R-type | `rd = rs & rt`; updates `COND` |
| `OR` | R-type | `rd = rs | rt`; updates `COND` |
| `XOR` | R-type | `rd = rs ^ rt`; updates `COND` |
| `NOT` | R-type | `rd = ~rs`; updates `COND` |
| `MOV` | R-type | `rd = rs`; may update `COND` (TBD) |
| `LSH` | R-type | Logical left shift (operands/amount TBD) |
| `RSH` | R-type | Logical right shift (operands/amount TBD) |
| `CMP` | R-type | Compare `rs` and `rt`, set `COND` |
| `ADDI` | I-type | `rd = rs + sext(imm)`; updates `COND` |
| `ANDI` | I-type | `rd = rs & imm`; updates `COND` |
| `ORI` | I-type | `rd = rs | imm`; updates `COND` |
| `LDI` | M-type | Load immediate/addressing (exact mode TBD) |
| `LD` | M-type | `rd = MEM[PC + off]` or base+off (TBD) |
| `ST` | M-type | `MEM[PC + off] = rd` or base+off (TBD) |
| `LDIND` | M-type | `rd = MEM[ MEM[base] + off ]` (TBD) |
| `STIND` | M-type | `MEM[ MEM[base] + off ] = rs` (TBD) |
| `JMP` | M-type | Unconditional jump (target encoding TBD) |
| `JZ` | M-type | Jump if zero flag set |
| `JNZ` | M-type | Jump if zero flag clear |
| `CALL` | M-type | Call subroutine (linkage/stack TBD) |
| `RET` | R-type/Implicit | Return from subroutine (exact source of target TBD) |
| `PUSH` | R-type | Push register to stack (stack reg TBD) |
| `POP` | R-type | Pop into register (stack reg TBD) |
| `IN` | I-type | Read from device/port (map TBD) |
| `OUT` | I-type | Write to device/port (map TBD) |
| `TRAP` | I-type | Supervisor/service call (vector table TBD) |
| `RAND` | R-type | Random number generator (source TBD) |
| `SLEEP` | I-type | Delay/sleep for a duration (units TBD) |

Notes:

* Branch conditions use `COND` flags. Precise signedness rules for comparisons are TBD.
* Shifts are logical in the current design; arithmetic right shift may be added later if required.

## Condition codes

Operations that write a result typically update `COND`:

* Result > 0 → `FL_POS`
* Result == 0 → `FL_ZRO`
* Result < 0 (interpreting the 16-bit result as signed) → `FL_NEG`

Exact update policy for non-arithmetic instructions (e.g., `MOV`, shifts, loads) will be specified with their implementations.

## Encoding constraints and compatibility

* The opcode field is planned to be 5 bits in the 18-bit design
* The early 16-bit encoding mirrors the same opcode taxonomy to ease transition
* Randomised opcode mapping is out of scope for the first implementation
This document outlines the design specification for the custom ISA. This document is a stub.
33 changes: 5 additions & 28 deletions docs/vm.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ refer to `include/hardware.h`

## Scope

This document describes the VM that executes the virtualised program. It covers the current 16-bit early implementation and highlights planned shifts to an 18-bit, randomised ISA. It intentionally omits unfinished or unspecified details.
This document describes the VM that executes the virtualised program. It covers the current 16-bit early implementation of the VM interpreter. It intentionally omits unfinished or unspecified details.

## Architecture overview

Expand All @@ -22,6 +22,8 @@ This document describes the VM that executes the virtualised program. It covers
* Fetch–decode–execute loop drives execution
* Image files are loaded into memory before execution begins

This is to align with the design of LC-3

## Registers

| Name | Width | Purpose |
Expand Down Expand Up @@ -49,39 +51,14 @@ This document describes the VM that executes the virtualised program. It covers
| --- | --- |
| Address space | 0x0000 to 0xFFFF |
| Capacity | 65,536 words |
| Word width | 16 bits (early implementation) |
| Word width | 16 bits |
| Layout | Unified code and data |

`PC` is initialised to `PC_START` (currently `0x3000`). This leaves lower memory available for images, vectors, and future system structures. Exact segmenting is to be defined.

## Program loading

* Command-line accepts one or more image files
* Each image is validated and loaded into memory
* On failure to load, execution aborts

Image format and relocation rules are to be defined. The loader interface exists; the on-disk format is not yet documented here.

## Execution model

* Initialise arguments and memory
* Set `COND` to `FL_ZRO`
* Set `PC` to `PC_START`
* Main loop repeats while running
* Fetch: read instruction word at `PC`, then increment `PC`
* Decode: extract opcode from the high-order bits
* Execute: dispatch to the operation handler
* Update flags and registers as specified by the operation

At present, `HALT` terminates the loop. Additional operations are defined by the ISA but may be implemented incrementally.

## Instruction width and encoding

* Early implementation uses 16-bit instruction words
* Planned implementation uses 18-bit instruction words with a 5-bit opcode field and structured operand fields
* To maintain forward compatibility, the VM structure and instruction taxonomy align with the 18-bit design even when running 16-bit words

Exact 16-bit field layouts are intentionally not fixed in this document. See the ISA document for the intended 18-bit layouts.
Images are embedded into the memory with an image embedding tool that creates a relocatable object (containing the image as a byte array), exposes symbols for the image start & end, and the C code accesses the image through these two pointers.

## I/O

Expand Down
98 changes: 98 additions & 0 deletions tools/embed_image.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
#!/usr/bin/env python3
"""
Embed a binary payload as a relocatable object that exports:
- vbo_image_start (const unsigned char*)
- vbo_image_end (const unsigned char*)

Usage:
embed_image.py --input path/to/image.obj --output images/vbo_image.o

The resulting object can be linked into the final binary. main.c declares the
symbols as weak pointers, so linking this object makes the embedded image usable
without glue files.
"""

import argparse
import os
import shlex
import subprocess
import sys
import tempfile

ASM_TEMPLATE = r"""
.section .rodata.vbo_payload, "a", @progbits
.globl vbo_payload_start
.type vbo_payload_start, @object
vbo_payload_start:
.incbin "{input_path}"

.globl vbo_payload_end
.type vbo_payload_end, @object
vbo_payload_end:
.byte 0

.size vbo_payload_start, vbo_payload_end - vbo_payload_start

.section .rodata.vbo_ptrs, "a", @progbits
.globl vbo_image_start
.type vbo_image_start, @object
#if __SIZEOF_POINTER__ == 8
vbo_image_start:
.quad vbo_payload_start
.size vbo_image_start, 8

.globl vbo_image_end
.type vbo_image_end, @object
vbo_image_end:
.quad vbo_payload_end
.size vbo_image_end, 8
#else
vbo_image_start:
.long vbo_payload_start
.size vbo_image_start, 4

.globl vbo_image_end
.type vbo_image_end, @object
vbo_image_end:
.long vbo_payload_end
.size vbo_image_end, 4
#endif
"""

def run(cmd, cwd=None):
try:
subprocess.check_call(cmd, cwd=cwd)
except subprocess.CalledProcessError as e:
print(f"Command failed: {' '.join(map(shlex.quote, cmd))}", file=sys.stderr)
raise


def main():
p = argparse.ArgumentParser()
p.add_argument('--input', required=True, help='Path to payload binary to embed')
p.add_argument('--output', required=True, help='Path to output object file (.o)')
args = p.parse_args()

inp = os.path.abspath(args.input)
out = os.path.abspath(args.output)

if not os.path.isfile(inp):
print(f"Input file not found: {inp}", file=sys.stderr)
return 2

os.makedirs(os.path.dirname(out), exist_ok=True)

with tempfile.TemporaryDirectory() as td:
asm_path = os.path.join(td, 'vbo_embed.S')
with open(asm_path, 'w') as f:
f.write(ASM_TEMPLATE.format(input_path=inp.replace('\\', '\\\\').replace('"', '\\"')))

# Assemble via gcc driver for portability
cmd = ['gcc', '-x', 'assembler-with-cpp', '-c', asm_path, '-o', out, '-nostdlib']
run(cmd)

return 0


if __name__ == '__main__':
sys.exit(main())