From 227d0ae9d4f00c031ec4678a55b98bc21e1f8958 Mon Sep 17 00:00:00 2001 From: Lachlan Harris Date: Wed, 24 Sep 2025 11:20:13 +1000 Subject: [PATCH 1/5] Add unimplemented RISC-V 32I ISA --- include/riscv32i.h | 202 ++++++++++++++++++ src/riscv32i.c | 519 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 721 insertions(+) create mode 100644 include/riscv32i.h create mode 100644 src/riscv32i.c diff --git a/include/riscv32i.h b/include/riscv32i.h new file mode 100644 index 0000000..1e78dc1 --- /dev/null +++ b/include/riscv32i.h @@ -0,0 +1,202 @@ +// riscv32i.h +/* +implementation of RISC-V 32I ISA, as according to +https://riscv.atlassian.net/wiki/spaces/HOME/pages/16154769/RISC-V+Technical+Specifications +document version 20250508, accessed: 23/09/2025 + +refer to src/riscv32i.c +*/ + +#ifndef RISCV32I_H +#define RISCV32I_H + +#include + +// define memory (byte-addressable, local to RV32I) +// keep this realistic; 1 MiB default, must be power of two for cheap masking +#define RV32I_MEM_SIZE (1u << 20) +extern uint8_t rv32i_mem[RV32I_MEM_SIZE]; + + + +// define registers in an enum +enum +{ + x0 = 0, // hard-wired zero + x1, x2, x3, x4, x5, x6, x7, x8, + x9, x10, x11, x12, x13, x14, x15, x16, + x17, x18, x19, x20, x21, x22, x23, x24, + x25, x26, x27, x28, x29, x30, x31, pc +}; extern uint32_t rv32i_reg[32 + 1]; + +// register/write helpers +static inline void rv32i_write_reg(uint32_t rd, uint32_t val) { + if (rd != 0) rv32i_reg[rd] = val; // keep x0 hard-wired zero +} +static inline uint32_t rv32i_read_reg(uint32_t r) { + // allow pc (index 32) while keeping GPRs masked + return (r == pc) ? rv32i_reg[pc] : rv32i_reg[r & 0x1F]; +} + +// field extraction helpers +#define RV32I_OP(i) ((uint32_t)((i) & 0x7F)) +#define RV32I_RD(i) ((uint32_t)(((i) >> 7) & 0x1F)) +#define RV32I_FUNCT3(i) ((uint32_t)(((i) >> 12) & 0x7)) +#define RV32I_RS1(i) ((uint32_t)(((i) >> 15) & 0x1F)) +#define RV32I_RS2(i) ((uint32_t)(((i) >> 20) & 0x1F)) +#define RV32I_FUNCT7(i) ((uint32_t)(((i) >> 25) & 0x7F)) + +static inline int32_t rv32i_sext(uint32_t val, int bits) { + return (int32_t)(val << (32 - bits)) >> (32 - bits); +} + +// immediates (already positioned/shifted per spec) +#define RV32I_IMM_I(i) (rv32i_sext(((uint32_t)(i) >> 20) & 0xFFF, 12)) +#define RV32I_IMM_U(i) ((int32_t)((i) & 0xFFFFF000)) +#define RV32I_IMM_S(i) (rv32i_sext((((uint32_t)(i) >> 25) << 5) | (((uint32_t)(i) >> 7) & 0x1F), 12)) +#define RV32I_IMM_B(i) (rv32i_sext((((((uint32_t)(i) >> 31) & 0x1) << 12) | \ + ((((uint32_t)(i) >> 7) & 0x1) << 11) | \ + ((((uint32_t)(i) >> 25) & 0x3F) << 5) | \ + ((((uint32_t)(i) >> 8) & 0xF) << 1)), 13)) +#define RV32I_IMM_J(i) (rv32i_sext((((((uint32_t)(i) >> 31) & 0x1) << 20) | \ + ((((uint32_t)(i) >> 12) & 0xFF) << 12) | \ + ((((uint32_t)(i) >> 20) & 0x1) << 11) | \ + ((((uint32_t)(i) >> 21) & 0x3FF) << 1)), 21)) + +// shift amount mask for RV32 +#define RV32I_SHAMT_MASK 0x1F + + +// define instructions +void exec_riscv32i(uint32_t instruction); + +// functions for all instructions, in the format: +// void exec_riscv32i_(uint32_t instruction); + +/* canonical order: +addi, slti[u], andi, ori, xori, slli, srli, srai, lui, auipc, // integer register-immediate +add, slt[u], and, or, xor, sll, srl, sub, sra, // integer register-register +nop, // no operation +jal, jalr, // unconditional jumps +beq, bne, blt[u], bge[u], // conditional branches +load, store, // load and store +fence, // memory ordering +ecall, ebreak // environment call and breakpoints + +*/ + + +// opcodes +enum +{ + OP_IMM = 0b0010011, // integer register-immediate + OP_LUI = 0b0110111, // load upper immediate + OP_AUIPC = 0b0010111, // add upper immediate to + OP_REG = 0b0110011, // integer register-register + OP_JAL = 0b1101111, // unconditional jump and link + OP_JALR = 0b1100111, // unconditional jump and link register + OP_BRANCH = 0b1100011, // conditional branches + OP_LOAD = 0b0000011, // load + OP_STORE = 0b0100011, // store + OP_FENCE = 0b0001111, // memory ordering + OP_SYSTEM = 0b1110011 // environment call and breakpoints +}; + + +// funct3 reg-reg (store/branch/reg-reg/imm [14:12]) +enum +{ + F3_REG_ADD_SUB = 0b000, + F3_REG_SLL = 0b001, + F3_REG_SLT = 0b010, + F3_REG_SLTU = 0b011, + F3_REG_XOR = 0b100, + F3_REG_SRL_SRA = 0b101, + F3_REG_OR = 0b110, + F3_REG_AND = 0b111, +}; +// funct3 imm [14:12] +enum +{ + F3_IMM_ADDI = 0b000, + F3_IMM_SLTI = 0b010, + F3_IMM_SLTIU = 0b011, + F3_IMM_XORI = 0b100, + F3_IMM_SRLI_SRAI = 0b101, + F3_IMM_ORI = 0b110, + F3_IMM_ANDI = 0b111, + F3_IMM_SLLI = 0b001, // out of order canonically +}; +// funct3 branch [14:12] +enum +{ + F3_BRANCH_BEQ = 0b000, + F3_BRANCH_BNE = 0b001, + F3_BRANCH_BLT = 0b100, + F3_BRANCH_BGE = 0b101, + F3_BRANCH_BLTU = 0b110, + F3_BRANCH_BGEU = 0b111, +}; +// funct3 load [14:12] +enum +{ + F3_LOAD_LB = 0b000, + F3_LOAD_LH = 0b001, + F3_LOAD_LW = 0b010, + F3_LOAD_LBU = 0b100, + F3_LOAD_LHU = 0b101, +}; +// funct3 store [14:12] +enum +{ + F3_STORE_SB = 0b000, + F3_STORE_SH = 0b001, + F3_STORE_SW = 0b010, +}; + +// integer register-immediate +void exec_riscv32i_addi(uint32_t instruction); +void exec_riscv32i_slti(uint32_t instruction); +void exec_riscv32i_andi(uint32_t instruction); +void exec_riscv32i_ori(uint32_t instruction); +void exec_riscv32i_xori(uint32_t instruction); +void exec_riscv32i_slli(uint32_t instruction); +void exec_riscv32i_srli_srai(uint32_t instruction); +void exec_riscv32i_lui(uint32_t instruction); +void exec_riscv32i_auipc(uint32_t instruction); +void exec_riscv32i_sltiu(uint32_t instruction); +// integer register-register +void exec_riscv32i_add_sub(uint32_t instruction); +void exec_riscv32i_slt(uint32_t instruction); +void exec_riscv32i_sltu(uint32_t instruction); +void exec_riscv32i_and(uint32_t instruction); +void exec_riscv32i_or(uint32_t instruction); +void exec_riscv32i_xor(uint32_t instruction); +void exec_riscv32i_sll(uint32_t instruction); +void exec_riscv32i_srl_sra(uint32_t instruction); +// unconditional jumps +void exec_riscv32i_jal(uint32_t instruction); +void exec_riscv32i_jalr(uint32_t instruction); +// conditional branches +void exec_riscv32i_beq(uint32_t instruction); +void exec_riscv32i_bne(uint32_t instruction); +void exec_riscv32i_blt(uint32_t instruction); +void exec_riscv32i_bge(uint32_t instruction); +void exec_riscv32i_bltu(uint32_t instruction); +void exec_riscv32i_bgeu(uint32_t instruction); +// load and store +void exec_riscv32i_load(uint32_t instruction); +void exec_riscv32i_store(uint32_t instruction); +// memory ordering +void exec_riscv32i_fence(uint32_t instruction); +// environment call and breakpoints +void exec_riscv32i_ecall(uint32_t instruction); +void exec_riscv32i_ebreak(uint32_t instruction); + + +// error handling +void exec_riscv32i_bad_opcode(uint32_t instruction); + +// + +#endif // RISCV32I_H \ No newline at end of file diff --git a/src/riscv32i.c b/src/riscv32i.c new file mode 100644 index 0000000..cea4b82 --- /dev/null +++ b/src/riscv32i.c @@ -0,0 +1,519 @@ +// riscv32i.c +/* +implementation of RISC-V 32I ISA, as according to +https://riscv.atlassian.net/wiki/spaces/HOME/pages/16154769/RISC-V+Technical+Specifications +document version 20250508, accessed: 23/09/2025 +*/ + +#include "riscv32i.h" + +uint8_t rv32i_mem[RV32I_MEM_SIZE]; +uint32_t rv32i_reg[32 + 1]; + +// little-endian byte-addressable memory helpers +static inline uint32_t rv32i_mem_read8(uint32_t addr) { + return rv32i_mem[addr & (RV32I_MEM_SIZE - 1)]; +} +static inline void rv32i_mem_write8(uint32_t addr, uint8_t val) { + rv32i_mem[addr & (RV32I_MEM_SIZE - 1)] = val; +} +static inline uint32_t rv32i_mem_read16(uint32_t addr) { + uint32_t a = addr & (RV32I_MEM_SIZE - 1); + return (uint32_t)rv32i_mem[a] | ((uint32_t)rv32i_mem[(a + 1) & (RV32I_MEM_SIZE - 1)] << 8); +} +static inline void rv32i_mem_write16(uint32_t addr, uint16_t val) { + uint32_t a = addr & (RV32I_MEM_SIZE - 1); + rv32i_mem[a] = (uint8_t)(val & 0xFF); + rv32i_mem[(a + 1) & (RV32I_MEM_SIZE - 1)] = (uint8_t)((val >> 8) & 0xFF); +} +static inline uint32_t rv32i_mem_read32(uint32_t addr) { + uint32_t a = addr & (RV32I_MEM_SIZE - 1); + return (uint32_t)rv32i_mem[a] + | ((uint32_t)rv32i_mem[(a + 1) & (RV32I_MEM_SIZE - 1)] << 8) + | ((uint32_t)rv32i_mem[(a + 2) & (RV32I_MEM_SIZE - 1)] << 16) + | ((uint32_t)rv32i_mem[(a + 3) & (RV32I_MEM_SIZE - 1)] << 24); +} +static inline void rv32i_mem_write32(uint32_t addr, uint32_t val) { + uint32_t a = addr & (RV32I_MEM_SIZE - 1); + rv32i_mem[a] = (uint8_t)(val & 0xFF); + rv32i_mem[(a + 1) & (RV32I_MEM_SIZE - 1)] = (uint8_t)((val >> 8) & 0xFF); + rv32i_mem[(a + 2) & (RV32I_MEM_SIZE - 1)] = (uint8_t)((val >> 16) & 0xFF); + rv32i_mem[(a + 3) & (RV32I_MEM_SIZE - 1)] = (uint8_t)((val >> 24) & 0xFF); +} + + + + + + + +// integer register-immediate +/* ++-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +| imm[11:0] | rs1 |func3| rd | opcode | ++-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +31 20 19 15 14 12 11 7 6 0 +*/ +void exec_riscv32i_addi(uint32_t instruction) +{ + uint32_t rd = RV32I_RD(instruction); + uint32_t rs1 = RV32I_RS1(instruction); + int32_t imm = RV32I_IMM_I(instruction); + rv32i_write_reg(rd, rv32i_read_reg(rs1) + (uint32_t)imm); +} +void exec_riscv32i_slti(uint32_t instruction) +{ + uint32_t rd = RV32I_RD(instruction); + uint32_t rs1 = RV32I_RS1(instruction); + int32_t imm = RV32I_IMM_I(instruction); + int32_t lhs = (int32_t)rv32i_read_reg(rs1); + rv32i_write_reg(rd, (uint32_t)(lhs < imm)); +} +void exec_riscv32i_andi(uint32_t instruction) +{ + uint32_t rd = RV32I_RD(instruction); + uint32_t rs1 = RV32I_RS1(instruction); + int32_t imm = RV32I_IMM_I(instruction); + rv32i_write_reg(rd, rv32i_read_reg(rs1) & (uint32_t)imm); +} +void exec_riscv32i_ori(uint32_t instruction) +{ + uint32_t rd = RV32I_RD(instruction); + uint32_t rs1 = RV32I_RS1(instruction); + int32_t imm = RV32I_IMM_I(instruction); + rv32i_write_reg(rd, rv32i_read_reg(rs1) | (uint32_t)imm); +} +void exec_riscv32i_xori(uint32_t instruction) +{ + uint32_t rd = RV32I_RD(instruction); + uint32_t rs1 = RV32I_RS1(instruction); + int32_t imm = RV32I_IMM_I(instruction); + rv32i_write_reg(rd, rv32i_read_reg(rs1) ^ (uint32_t)imm); +} +void exec_riscv32i_sltiu(uint32_t instruction) +{ + uint32_t rd = RV32I_RD(instruction); + uint32_t rs1 = RV32I_RS1(instruction); + uint32_t imm = (uint32_t)RV32I_IMM_I(instruction); + rv32i_write_reg(rd, (uint32_t)(rv32i_read_reg(rs1) < imm)); +} +void exec_riscv32i_slli(uint32_t instruction) +{ + uint32_t rd = RV32I_RD(instruction); + uint32_t rs1 = RV32I_RS1(instruction); + uint32_t shamt = (instruction >> 20) & RV32I_SHAMT_MASK; // shamt encoded in imm[4:0] + rv32i_write_reg(rd, rv32i_read_reg(rs1) << shamt); +} +void exec_riscv32i_srli_srai(uint32_t instruction) +{ + uint32_t rd = RV32I_RD(instruction); + uint32_t rs1 = RV32I_RS1(instruction); + uint32_t shamt = (instruction >> 20) & RV32I_SHAMT_MASK; // shamt encoded in imm[4:0] + uint32_t funct7 = RV32I_FUNCT7(instruction); + + if (funct7 == 0b0100000) { // SRA + int32_t val = (int32_t)rv32i_read_reg(rs1); + rv32i_write_reg(rd, (uint32_t)(val >> shamt)); + return; + } // SRLI, default + rv32i_write_reg(rd, rv32i_read_reg(rs1) >> shamt); +} +void exec_riscv32i_lui(uint32_t instruction) +{ + uint32_t rd = RV32I_RD(instruction); + int32_t imm = RV32I_IMM_U(instruction); + rv32i_write_reg(rd, (uint32_t)imm); +} +void exec_riscv32i_auipc(uint32_t instruction) +{ + uint32_t rd = RV32I_RD(instruction); + int32_t imm = RV32I_IMM_U(instruction); + rv32i_write_reg(rd, rv32i_read_reg(pc) + (uint32_t)imm); +} + + +// integer register-register +/* ++-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +| funct7 | rs2 | rs1 |func3| rd | opcode | ++-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +31 25 24 20 19 15 14 12 11 7 6 0 +*/ +void exec_riscv32i_add_sub(uint32_t instruction) +{ + uint32_t rd = RV32I_RD(instruction); + uint32_t rs1 = RV32I_RS1(instruction); + uint32_t rs2 = RV32I_RS2(instruction); + uint32_t funct3 = RV32I_FUNCT3(instruction); + if (funct3 == F3_REG_ADD_SUB && ((instruction >> 25) & 0x7F) == 0b0100000) { + // SUB + rv32i_write_reg(rd, rv32i_read_reg(rs1) - rv32i_read_reg(rs2)); + return; + } + rv32i_write_reg(rd, rv32i_read_reg(rs1) + rv32i_read_reg(rs2)); +} +void exec_riscv32i_slt(uint32_t instruction) +{ + uint32_t rd = RV32I_RD(instruction); + uint32_t rs1 = RV32I_RS1(instruction); + uint32_t rs2 = RV32I_RS2(instruction); + rv32i_write_reg(rd, (uint32_t)((int32_t)rv32i_read_reg(rs1) < (int32_t)rv32i_read_reg(rs2))); +} +void exec_riscv32i_sltu(uint32_t instruction) +{ + uint32_t rd = RV32I_RD(instruction); + uint32_t rs1 = RV32I_RS1(instruction); + uint32_t rs2 = RV32I_RS2(instruction); + rv32i_write_reg(rd, (uint32_t)(rv32i_read_reg(rs1) < rv32i_read_reg(rs2))); +} +void exec_riscv32i_and(uint32_t instruction) +{ + uint32_t rd = RV32I_RD(instruction); + uint32_t rs1 = RV32I_RS1(instruction); + uint32_t rs2 = RV32I_RS2(instruction); + rv32i_write_reg(rd, rv32i_read_reg(rs1) & rv32i_read_reg(rs2)); +} +void exec_riscv32i_or(uint32_t instruction) +{ + uint32_t rd = RV32I_RD(instruction); + uint32_t rs1 = RV32I_RS1(instruction); + uint32_t rs2 = RV32I_RS2(instruction); + rv32i_write_reg(rd, rv32i_read_reg(rs1) | rv32i_read_reg(rs2)); +} +void exec_riscv32i_xor(uint32_t instruction) +{ + uint32_t rd = RV32I_RD(instruction); + uint32_t rs1 = RV32I_RS1(instruction); + uint32_t rs2 = RV32I_RS2(instruction); + rv32i_write_reg(rd, rv32i_read_reg(rs1) ^ rv32i_read_reg(rs2)); +} +void exec_riscv32i_sll(uint32_t instruction) +{ + uint32_t rd = RV32I_RD(instruction); + uint32_t rs1 = RV32I_RS1(instruction); + uint32_t rs2 = RV32I_RS2(instruction); + rv32i_write_reg(rd, rv32i_read_reg(rs1) << (rv32i_read_reg(rs2) & RV32I_SHAMT_MASK)); +} +void exec_riscv32i_srl_sra(uint32_t instruction) +{ + uint32_t rd = RV32I_RD(instruction); + uint32_t rs1 = RV32I_RS1(instruction); + uint32_t rs2 = RV32I_RS2(instruction); + uint32_t funct7 = RV32I_FUNCT7(instruction); + + if (funct7 == 0b0100000) { // SRA + rv32i_write_reg(rd, (uint32_t)((int32_t)rv32i_read_reg(rs1) >> (rv32i_read_reg(rs2) & RV32I_SHAMT_MASK))); + return; + } // SRL, default + rv32i_write_reg(rd, rv32i_read_reg(rs1) >> (rv32i_read_reg(rs2) & RV32I_SHAMT_MASK)); +} + + +// unconditional jumps +/* ++-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +| | imm[10:1] | | imm[19:12] | rd | opcode | ++-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + 30 21 19 12 11 7 6 0 + ^imm[20] (31) ^imm[11] (20) +*/ + +void exec_riscv32i_jal(uint32_t instruction) +{ + uint32_t rd = RV32I_RD(instruction); + int32_t imm = RV32I_IMM_J(instruction); + rv32i_write_reg(rd, rv32i_read_reg(pc) + 4); + rv32i_write_reg(pc, rv32i_read_reg(pc) + (uint32_t)imm); +} +void exec_riscv32i_jalr(uint32_t instruction) +{ + uint32_t rd = RV32I_RD(instruction); + uint32_t rs1 = RV32I_RS1(instruction); + int32_t imm = RV32I_IMM_I(instruction); + uint32_t target = (rv32i_read_reg(rs1) + (uint32_t)imm) & ~1; + rv32i_write_reg(rd, rv32i_read_reg(pc) + 4); + rv32i_write_reg(pc, target); +} + + +// conditional branches +/* ++-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +| | imm[10:5] | rs2 | rs1 |func3| |imm4:1 | opcode | ++-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + 30 25 24 20 19 15 14 12 10 8 7 0 + ^imm[12] (31) ^imm[11] (7) +*/ + +void exec_riscv32i_beq(uint32_t instruction) +{ + uint32_t rs1 = RV32I_RS1(instruction); + uint32_t rs2 = RV32I_RS2(instruction); + int32_t imm = RV32I_IMM_B(instruction); + if (rv32i_read_reg(rs1) == rv32i_read_reg(rs2)) { + rv32i_write_reg(pc, rv32i_read_reg(pc) + (uint32_t)imm); + } else { + rv32i_write_reg(pc, rv32i_read_reg(pc) + 4); + } +} +void exec_riscv32i_bne(uint32_t instruction) +{ + uint32_t rs1 = RV32I_RS1(instruction); + uint32_t rs2 = RV32I_RS2(instruction); + int32_t imm = RV32I_IMM_B(instruction); + if (rv32i_read_reg(rs1) != rv32i_read_reg(rs2)) { + rv32i_write_reg(pc, rv32i_read_reg(pc) + (uint32_t)imm); + } else { + rv32i_write_reg(pc, rv32i_read_reg(pc) + 4); + } +} +void exec_riscv32i_blt(uint32_t instruction) +{ + uint32_t rs1 = RV32I_RS1(instruction); + uint32_t rs2 = RV32I_RS2(instruction); + int32_t imm = RV32I_IMM_B(instruction); + if ((int32_t)rv32i_read_reg(rs1) < (int32_t)rv32i_read_reg(rs2)) { + rv32i_write_reg(pc, rv32i_read_reg(pc) + (uint32_t)imm); + } else { + rv32i_write_reg(pc, rv32i_read_reg(pc) + 4); + } +} +void exec_riscv32i_bge(uint32_t instruction) +{ + uint32_t rs1 = RV32I_RS1(instruction); + uint32_t rs2 = RV32I_RS2(instruction); + int32_t imm = RV32I_IMM_B(instruction); + if ((int32_t)rv32i_read_reg(rs1) >= (int32_t)rv32i_read_reg(rs2)) { + rv32i_write_reg(pc, rv32i_read_reg(pc) + (uint32_t)imm); + } else { + rv32i_write_reg(pc, rv32i_read_reg(pc) + 4); + } +} +void exec_riscv32i_bltu(uint32_t instruction) +{ + uint32_t rs1 = RV32I_RS1(instruction); + uint32_t rs2 = RV32I_RS2(instruction); + int32_t imm = RV32I_IMM_B(instruction); + if (rv32i_read_reg(rs1) < rv32i_read_reg(rs2)) { + rv32i_write_reg(pc, rv32i_read_reg(pc) + (uint32_t)imm); + } else { + rv32i_write_reg(pc, rv32i_read_reg(pc) + 4); + } +} +void exec_riscv32i_bgeu(uint32_t instruction) +{ + uint32_t rs1 = RV32I_RS1(instruction); + uint32_t rs2 = RV32I_RS2(instruction); + int32_t imm = RV32I_IMM_B(instruction); + if (rv32i_read_reg(rs1) >= rv32i_read_reg(rs2)) { + rv32i_write_reg(pc, rv32i_read_reg(pc) + (uint32_t)imm); + } else { + rv32i_write_reg(pc, rv32i_read_reg(pc) + 4); + } +} + + +// remaining instructions +void exec_riscv32i_load(uint32_t instruction) +{ + uint32_t funct3 = RV32I_FUNCT3(instruction); + uint32_t rd = RV32I_RD(instruction); + uint32_t rs1 = RV32I_RS1(instruction); + int32_t imm = RV32I_IMM_I(instruction); + uint32_t addr = rv32i_read_reg(rs1) + (uint32_t)imm; + switch (funct3) + { + case F3_LOAD_LB: // load byte, sign-extended + rv32i_write_reg(rd, (uint32_t)(int32_t)(int8_t)rv32i_mem_read8(addr)); + break; + case F3_LOAD_LH: // load halfword, sign-extended + rv32i_write_reg(rd, (uint32_t)(int32_t)(int16_t)rv32i_mem_read16(addr)); + break; + case F3_LOAD_LW: // load word + rv32i_write_reg(rd, rv32i_mem_read32(addr)); + break; + case F3_LOAD_LBU: // load byte, zero-extended + rv32i_write_reg(rd, (uint32_t)rv32i_mem_read8(addr)); + break; + case F3_LOAD_LHU: // load halfword, zero-extended + rv32i_write_reg(rd, (uint32_t)rv32i_mem_read16(addr)); + break; + default: + exec_riscv32i_bad_opcode(instruction); + break; + } +} + +void exec_riscv32i_store(uint32_t instruction) +{ + uint32_t funct3 = RV32I_FUNCT3(instruction); + uint32_t rs1 = RV32I_RS1(instruction); + uint32_t rs2 = RV32I_RS2(instruction); + int32_t imm = RV32I_IMM_S(instruction); + uint32_t addr = rv32i_read_reg(rs1) + (uint32_t)imm; + uint32_t val = rv32i_read_reg(rs2); + switch (funct3) + { + case F3_STORE_SB: + rv32i_mem_write8(addr, (uint8_t)(val & 0xFF)); + break; + case F3_STORE_SH: + rv32i_mem_write16(addr, (uint16_t)(val & 0xFFFF)); + break; + case F3_STORE_SW: + rv32i_mem_write32(addr, val); + break; + default: + exec_riscv32i_bad_opcode(instruction); + break; + } +} + +void exec_riscv32i_fence(uint32_t instruction) { (void)instruction; } +void exec_riscv32i_ecall(uint32_t instruction) { (void)instruction; } +void exec_riscv32i_ebreak(uint32_t instruction) { (void)instruction; } + +void exec_riscv32i_bad_opcode(uint32_t instruction) { (void)instruction; } + + + + + + +void exec_riscv32i(uint32_t instruction) +{ + uint32_t opcode = RV32I_OP(instruction); + switch (opcode) + { + case OP_IMM: + { + uint32_t funct3 = RV32I_FUNCT3(instruction); + switch (funct3) + { + case F3_IMM_ADDI: + exec_riscv32i_addi(instruction); + break; + case F3_IMM_SLTI: + exec_riscv32i_slti(instruction); + break; + case F3_IMM_SLTIU: + exec_riscv32i_sltiu(instruction); + break; + case F3_IMM_ANDI: + exec_riscv32i_andi(instruction); + break; + case F3_IMM_ORI: + exec_riscv32i_ori(instruction); + break; + case F3_IMM_XORI: + exec_riscv32i_xori(instruction); + break; + case F3_IMM_SLLI: + exec_riscv32i_slli(instruction); + break; + case F3_IMM_SRLI_SRAI: + exec_riscv32i_srli_srai(instruction); + break; + default: + exec_riscv32i_bad_opcode(instruction); + break; + } + } + break; + case OP_LUI: + exec_riscv32i_lui(instruction); + break; + case OP_AUIPC: + exec_riscv32i_auipc(instruction); + break; + case OP_REG: + { + uint32_t funct3 = RV32I_FUNCT3(instruction); + switch (funct3) + { + case F3_REG_ADD_SUB: + exec_riscv32i_add_sub(instruction); + break; + case F3_REG_SLT: + exec_riscv32i_slt(instruction); + break; + case F3_REG_SLTU: + exec_riscv32i_sltu(instruction); + break; + case F3_REG_AND: + exec_riscv32i_and(instruction); + break; + case F3_REG_OR: + exec_riscv32i_or(instruction); + break; + case F3_REG_XOR: + exec_riscv32i_xor(instruction); + break; + case F3_REG_SLL: + exec_riscv32i_sll(instruction); + break; + case F3_REG_SRL_SRA: + exec_riscv32i_srl_sra(instruction); + break; + default: + exec_riscv32i_bad_opcode(instruction); + break; + } + } + break; + case OP_JAL: + exec_riscv32i_jal(instruction); + break; + case OP_JALR: + exec_riscv32i_jalr(instruction); + break; + case OP_BRANCH: + { + uint32_t funct3 = RV32I_FUNCT3(instruction); + switch (funct3) + { + case F3_BRANCH_BEQ: + exec_riscv32i_beq(instruction); + break; + case F3_BRANCH_BNE: + exec_riscv32i_bne(instruction); + break; + case F3_BRANCH_BLT: + exec_riscv32i_blt(instruction); + break; + case F3_BRANCH_BGE: + exec_riscv32i_bge(instruction); + break; + case F3_BRANCH_BLTU: + exec_riscv32i_bltu(instruction); + break; + case F3_BRANCH_BGEU: + exec_riscv32i_bgeu(instruction); + break; + default: + exec_riscv32i_bad_opcode(instruction); + break; + } + } + break; + case OP_LOAD: + exec_riscv32i_load(instruction); + break; + case OP_STORE: + exec_riscv32i_store(instruction); + break; + case OP_FENCE: + exec_riscv32i_fence(instruction); + break; + case OP_SYSTEM: + // Could further decode for ECALL/EBREAK/CSR, keep minimal + exec_riscv32i_ecall(instruction); + break; + default: + exec_riscv32i_bad_opcode(instruction); + break; + } + + // keep x0 clamped to zero regardless of any helper misuse + rv32i_reg[0] = 0; +} \ No newline at end of file From a0b58a0925e5c52efdcabf7bc88596da969c4b1f Mon Sep 17 00:00:00 2001 From: Lachlan Harris Date: Wed, 24 Sep 2025 13:26:28 +1000 Subject: [PATCH 2/5] Implement RV32I architecture - Updated utils.h to support 32-bit memory operations and removed obsolete functions. - Deprecate hardware.c and instructions.c as part of the transition to a new architecture. - Updated utililities in utils.c to use 32-bit rv32i compatible instructions - Remove the bulk of test_utils.c temporarily - Update compiler documentation to explain installation of toolchain. Note: still incomplete --- .gitignore | 2 + Makefile | 5 +- docs/compiler.md | 70 +++++++-- include/hardware.h | 142 ------------------- include/instructions.h | 40 ------ include/riscv32i.h | 101 +++++++------ include/utils.h | 11 +- src/hardware.c | 8 -- src/instructions.c | 315 ----------------------------------------- src/main.c | 12 +- src/riscv32i.c | 223 ++++++++++++++++++++++++++--- src/utils.c | 154 ++++++++++---------- test/test_utils.c | 53 ++----- 13 files changed, 422 insertions(+), 714 deletions(-) delete mode 100644 include/hardware.h delete mode 100644 include/instructions.h delete mode 100644 src/hardware.c delete mode 100644 src/instructions.c diff --git a/.gitignore b/.gitignore index 72fa754..aea1c2a 100644 --- a/.gitignore +++ b/.gitignore @@ -64,5 +64,7 @@ dkms.conf bin/ *.key* +riscv-gnu-toolchain-rv32i/ + test/**/* !test/**/*.c \ No newline at end of file diff --git a/Makefile b/Makefile index a39d8b0..e5f4415 100644 --- a/Makefile +++ b/Makefile @@ -5,7 +5,8 @@ DIR = bin BIN = $(DIR)/vbo PYTHON ?= python3 -SRC_COMMON = src/instructions.c src/utils.c src/hardware.c +# minimal RISCV32I VM build +SRC_COMMON = src/riscv32i.c src/utils.c SRC_MAIN = src/main.c $(SRC_COMMON) FLAGS = -Wall -Wextra -Werror -g -std=c11 -pedantic @@ -38,6 +39,6 @@ clean: distclean: clean test: - $(CC) $(INC) $(TEST_DIR)/test_utils.c src/utils.c src/hardware.c -o $(TEST_DIR)/test_utils $(FLAGS) + $(CC) $(INC) $(TEST_DIR)/test_utils.c src/utils.c src/riscv32i.c -o $(TEST_DIR)/test_utils $(FLAGS) $(TEST_DIR)/test_utils rm $(TEST_DIR)/test_utils \ No newline at end of file diff --git a/docs/compiler.md b/docs/compiler.md index c0322f5..253c0cf 100644 --- a/docs/compiler.md +++ b/docs/compiler.md @@ -1,14 +1,66 @@ - +# Toolchain -- How the assembler works -- Insturction encoding examples -- Pseudocode of compilation pipeline +## Scope -NOTE: only to be filled in once first iteration of assembler / compiler is complete +This document will cover the compiler and related tooling for producing VM images. This document provides minimal instructions and is incomplete. ---> -# Toolchain +## RISC-V 32I Toolchain -## Scope +An external toolchain has been used to compile C code into RV32I compatible instructions for the virtual machine. The toolchain can be found [here](https://xpack-dev-tools.github.io/riscv-none-elf-gcc-xpack). + +### Installation + +Installation can be done manually or through XPM. XPM is preferred. + +#### XPM + +#### Prerequisites + +To quote the documentation: +*"The only requirement for an automated install is a recent xpm, which is a portable Node.js command line application that complements npm with several extra features specific to C/C++ projects."* + +``` +npm install --location=global xpm@latest +``` + +#### Installation + +``` +xpm install @xpack-dev-tools/riscv-none-elf-gcc@14.2.0-3.1 --verbose +``` + +For more information, or to perform different types of installs (e.g. Global install), please see [the documentation.](https://xpack-dev-tools.github.io/riscv-none-elf-gcc-xpack/docs/install/#automated-install) + +### Manual + +``` +mkdir -p ~/.local/xPacks/riscv-none-elf-gcc +cd ~/.local/xPacks/riscv-none-elf-gcc + +tar xvf ~/Downloads/xpack-riscv-none-elf-gcc-14.2.0-3-linux-x64.tar.gz +chmod -R -w xpack-riscv-none-elf-gcc-14.2.0-3 +``` + +For more information, please see [the documentation](https://xpack-dev-tools.github.io/riscv-none-elf-gcc-xpack/docs/install/#manual-install-download-and-unpack) + +## Usage + +For detailed usage guide, please see [the documentation](https://xpack-dev-tools.github.io/riscv-none-elf-gcc-xpack/docs/user/). + +For this project, we will be compiling with the following options: +``` +-march=rv32i -mabi=ilp32 -nostdlib -Os -s -fno-pic \ + -Wl,--build-id=none -Wl,-Ttext=0x3000 -Wl,-e,_start \ + -o output.elf crt0_rv32i.S path/to/file.c +``` + +If you have chosen to install manually, the full command to compile a C file for embedding is as follows: + +``` +~/.local/xPacks/riscv-none-elf-gcc/xpack-riscv-none-elf-gcc-14.2.0-3/bin/riscv-none-elf-gcc -march=rv32i -mabi=ilp32 file/path.c -o output.out +``` + +# Embedding -This document will cover the assembler and related tooling for producing VM images. This document is a stub. \ No newline at end of file +This section is incomplete. \ No newline at end of file diff --git a/include/hardware.h b/include/hardware.h deleted file mode 100644 index b1b379e..0000000 --- a/include/hardware.h +++ /dev/null @@ -1,142 +0,0 @@ -// hardware.h -// simulated hardware components of the VM - -#ifndef HARDWARE_H -#define HARDWARE_H - -#include - - -// define memory -#define MEMORY_MAX ( 1 << 16) // 65,536 memory locations, 2^16 -extern uint16_t memory[MEMORY_MAX]; - -// define registers -enum -{ - R_R0 = 0, // 8 general-purpose registers; - R_R1, - R_R2, - R_R3, - R_R4, - R_R5, - R_R6, - R_R7, - R_PC, // program counter; - R_COND, // condition flags; - R_COUNT // number of registers -}; -extern uint16_t reg[R_COUNT]; - -// memory-mapped registers -enum -{ - MR_KBSR = 0xFE00, - MR_KBDR = 0xFE02 -}; - -// define instructions -/* -R-type: three register or two-register + small flags - opcode: bits [17:13] (5) - rd: bits [12:10] (3) - rs: bits [9:7] (3) - rt: bits [6:4] (3) - flags / unused: bits [3:0] (4) - -I-type: register + immediate - opcode: bits [17:13] (5) - rd: bits [12:10] (3) - rs: bits [9:7] (3) - imm7: bits [6:0] (7) // signed immediate with sext when needed - -M-type: memory/branch addressing - opcode: bits [17:13] (5) - rd: bits [12:10] (3) - offset10: bits [9:0] (10) // signed immediate with sext when needed - -*/ - - - -// Phantom-18 (later iteration): -/* -enum -{ - NOP = 0, // - OP_HALT, // - OP_ADD, // r-type - OP_SUB, // r-type - OP_AND, // r-type - OP_OR, // r-type - OP_XOR, // r-type - OP_NOT, // r-type, rd = ~rs - OP_MOV, // r-type, rd = rs - OP_LSH, // r-type, logical - OP_RSH, // r-type, logical - OP_CMP, // r-type, set condition codes - OP_ADDI, // i-type - OP_ANDI, // i-type - OP_ORI, // i-type - OP_LDI, // m-type, load immediate/addressing - OP_LD, // m-type, load from memory - OP_ST, // m-type, store to memory - OP_LDIND, // m-type, load via pointer - OP_STIND, // m-type, store via pointer - OP_JMP, // m-type - OP_JZ, // m-type - OP_JNZ, // m-type - OP_CALL, // m-type - OP_RET, // r-type OR implicit - OP_PUSH, // r-type - OP_POP, // r-type - OP_IN, // i-type, I/O - OP_OUT, // i-type, I/O - OP_TRAP, // i-type - OP_RAND, // r-type - OP_SLEEP // i-type -}; -*/ - -// LC-3: -enum -{ - OP_BR = 0, // branch - OP_ADD, // add - OP_LD, // load - OP_ST, // store - OP_JSR, // jump register - OP_AND, // bitwise and - OP_LDR, // load register - OP_STR, // store register - OP_RTI, // unused - OP_NOT, // bitwise not - OP_LDI, // load indirect - OP_STI, // store indirect - OP_JMP, // jump - OP_RES, // reserved (unused) - OP_LEA, // load effective address - OP_TRAP // execute trap -}; - -// define cond flags -enum -{ - FL_POS = 1 << 0, // P - FL_ZRO = 1 << 1, // Z - FL_NEG = 1 << 2 // N -}; - - -// define trap codes -enum -{ - TRAP_GETC = 0x20, // get character from keyboard, not echoed onto the terminal - TRAP_OUT = 0x21, // output a character - TRAP_PUTS = 0x22, // output a word string - TRAP_IN = 0x23, // get character from keyboard, echoed onto the terminal - TRAP_PUTSP = 0x24, // output a byte string - TRAP_HALT = 0x25 // halt the program -}; - -#endif // HARDWARE_H \ No newline at end of file diff --git a/include/instructions.h b/include/instructions.h deleted file mode 100644 index 4334408..0000000 --- a/include/instructions.h +++ /dev/null @@ -1,40 +0,0 @@ -// instructions.h -// refer to src/instructions.c for implementation - -#include - -#ifndef INSTRUCTIONS_H -#define INSTRUCTIONS_H - -// function prototypes for instruction handling -void execute_instruction(uint16_t instruction); - -void execute_add(uint16_t instr); -void execute_and(uint16_t instr); -void execute_not(uint16_t instr); -void execute_br(uint16_t instr); -void execute_jmp(uint16_t instr); -void execute_jsr(uint16_t instr); -void execute_ld(uint16_t instr); -void execute_ldi(uint16_t instr); -void execute_ldr(uint16_t instr); -void execute_lea(uint16_t instr); -void execute_st(uint16_t instr); -void execute_sti(uint16_t instr); -void execute_str(uint16_t instr); -void execute_trap(uint16_t instr); -void execute_bad_opcode(uint16_t instr); - -// LC-3 decode helpers -#define GET_OP(x) (((x) >> 12) & 0xF) -#define GET_DR(x) (((x) >> 9) & 0x7) -#define GET_SR1(x) (((x) >> 6) & 0x7) -#define GET_SR2(x) ((x) & 0x7) -#define GET_IMM_FLAG5(x) (((x) >> 5) & 0x1) -#define GET_IMM5(x) ((x) & 0x1F) -#define GET_PC_OFFSET9(x) ((x) & 0x1FF) -#define GET_PC_OFFSET11(x) ((x) & 0x7FF) -#define GET_BASE_R(x) (((x) >> 6) & 0x7) -#define GET_OFFSET6(x) ((x) & 0x3F) - -#endif // INSTRUCTIONS_H \ No newline at end of file diff --git a/include/riscv32i.h b/include/riscv32i.h index 1e78dc1..147ec19 100644 --- a/include/riscv32i.h +++ b/include/riscv32i.h @@ -11,6 +11,7 @@ refer to src/riscv32i.c #define RISCV32I_H #include +#include // define memory (byte-addressable, local to RV32I) // keep this realistic; 1 MiB default, must be power of two for cheap masking @@ -89,70 +90,85 @@ ecall, ebreak // environment cal // opcodes enum { - OP_IMM = 0b0010011, // integer register-immediate - OP_LUI = 0b0110111, // load upper immediate - OP_AUIPC = 0b0010111, // add upper immediate to - OP_REG = 0b0110011, // integer register-register - OP_JAL = 0b1101111, // unconditional jump and link - OP_JALR = 0b1100111, // unconditional jump and link register - OP_BRANCH = 0b1100011, // conditional branches - OP_LOAD = 0b0000011, // load - OP_STORE = 0b0100011, // store - OP_FENCE = 0b0001111, // memory ordering - OP_SYSTEM = 0b1110011 // environment call and breakpoints + OP_IMM = 0x13, // integer register-immediate + OP_LUI = 0x37, // load upper immediate + OP_AUIPC = 0x17, // add upper immediate to + OP_REG = 0x33, // integer register-register + OP_JAL = 0x6F, // unconditional jump and link + OP_JALR = 0x67, // unconditional jump and link register + OP_BRANCH = 0x63, // conditional branches + OP_LOAD = 0x03, // load + OP_STORE = 0x23, // store + OP_FENCE = 0x0F, // memory ordering + OP_SYSTEM = 0x73 // environment call and breakpoints }; // funct3 reg-reg (store/branch/reg-reg/imm [14:12]) enum { - F3_REG_ADD_SUB = 0b000, - F3_REG_SLL = 0b001, - F3_REG_SLT = 0b010, - F3_REG_SLTU = 0b011, - F3_REG_XOR = 0b100, - F3_REG_SRL_SRA = 0b101, - F3_REG_OR = 0b110, - F3_REG_AND = 0b111, + F3_REG_ADD_SUB = 0x0, + F3_REG_SLL = 0x1, + F3_REG_SLT = 0x2, + F3_REG_SLTU = 0x3, + F3_REG_XOR = 0x4, + F3_REG_SRL_SRA = 0x5, + F3_REG_OR = 0x6, + F3_REG_AND = 0x7, }; // funct3 imm [14:12] enum { - F3_IMM_ADDI = 0b000, - F3_IMM_SLTI = 0b010, - F3_IMM_SLTIU = 0b011, - F3_IMM_XORI = 0b100, - F3_IMM_SRLI_SRAI = 0b101, - F3_IMM_ORI = 0b110, - F3_IMM_ANDI = 0b111, - F3_IMM_SLLI = 0b001, // out of order canonically + F3_IMM_ADDI = 0x0, + F3_IMM_SLTI = 0x2, + F3_IMM_SLTIU = 0x3, + F3_IMM_XORI = 0x4, + F3_IMM_SRLI_SRAI = 0x5, + F3_IMM_ORI = 0x6, + F3_IMM_ANDI = 0x7, + F3_IMM_SLLI = 0x1, // out of order canonically }; // funct3 branch [14:12] enum { - F3_BRANCH_BEQ = 0b000, - F3_BRANCH_BNE = 0b001, - F3_BRANCH_BLT = 0b100, - F3_BRANCH_BGE = 0b101, - F3_BRANCH_BLTU = 0b110, - F3_BRANCH_BGEU = 0b111, + F3_BRANCH_BEQ = 0x0, + F3_BRANCH_BNE = 0x1, + F3_BRANCH_BLT = 0x4, + F3_BRANCH_BGE = 0x5, + F3_BRANCH_BLTU = 0x6, + F3_BRANCH_BGEU = 0x7, }; // funct3 load [14:12] enum { - F3_LOAD_LB = 0b000, - F3_LOAD_LH = 0b001, - F3_LOAD_LW = 0b010, - F3_LOAD_LBU = 0b100, - F3_LOAD_LHU = 0b101, + F3_LOAD_LB = 0x0, + F3_LOAD_LH = 0x1, + F3_LOAD_LW = 0x2, + F3_LOAD_LBU = 0x4, + F3_LOAD_LHU = 0x5, }; // funct3 store [14:12] enum { - F3_STORE_SB = 0b000, - F3_STORE_SH = 0b001, - F3_STORE_SW = 0b010, + F3_STORE_SB = 0x0, + F3_STORE_SH = 0x1, + F3_STORE_SW = 0x2, }; +// funct3 system [14:12] +enum +{ + F3_SYS_PRIV = 0x0, + F3_SYS_CSRRW = 0x1, + F3_SYS_CSRRS = 0x2, + F3_SYS_CSRRC = 0x3, + F3_SYS_CSRRWI = 0x5, + F3_SYS_CSRRSI = 0x6, + F3_SYS_CSRRCI = 0x7, +}; + + + + // integer register-immediate void exec_riscv32i_addi(uint32_t instruction); @@ -190,8 +206,7 @@ void exec_riscv32i_store(uint32_t instruction); // memory ordering void exec_riscv32i_fence(uint32_t instruction); // environment call and breakpoints -void exec_riscv32i_ecall(uint32_t instruction); -void exec_riscv32i_ebreak(uint32_t instruction); +void exec_riscv32i_system(uint32_t instruction); // error handling diff --git a/include/utils.h b/include/utils.h index d1faf4b..ee68edb 100644 --- a/include/utils.h +++ b/include/utils.h @@ -12,16 +12,15 @@ void handle_interrupt(int signal); void disable_input_buffering(); void restore_input_buffering(); -void update_flags(uint16_t r); -void mem_write(uint16_t address, uint16_t val); +uint32_t check_key(); +// byte-addressable memory helpers for RV32I (little-endian) +void mem_write(uint32_t address, uint32_t val); // writes a 32-bit word (4 bytes) +uint32_t mem_read(uint32_t address); // reads a 32-bit word (4 bytes) + void read_image_file(FILE* file); void read_image_buffer(const uint8_t* data, size_t size); int read_image(const char* image_path); -uint16_t sext(uint16_t x, int bit_count); -uint16_t swap16(uint16_t x); -uint16_t mem_read(uint16_t address); - #endif // UTILS_H \ No newline at end of file diff --git a/src/hardware.c b/src/hardware.c deleted file mode 100644 index 9823887..0000000 --- a/src/hardware.c +++ /dev/null @@ -1,8 +0,0 @@ -// hardware.c -// definitions for VM hardware globals - -#include "hardware.h" - -// define memory and registers -uint16_t memory[MEMORY_MAX]; -uint16_t reg[R_COUNT]; diff --git a/src/instructions.c b/src/instructions.c deleted file mode 100644 index 805884f..0000000 --- a/src/instructions.c +++ /dev/null @@ -1,315 +0,0 @@ -// instructions.c -// implementation of the instructions specified in the ISA - -#include -#include -#include - -#include "instructions.h" -#include "hardware.h" -#include "utils.h" - - - -// trap routines -void trap_puts() { - uint16_t* c = memory + reg[R_R0]; - while (*c) { - putc((char)*c, stdout); - ++c; - } - fflush(stdout); -} - -void trap_getc() { - reg[R_R0] = (uint16_t)getchar(); - update_flags(R_R0); -} - -void trap_out() { - putc((char)reg[R_R0], stdout); - fflush(stdout); -} - -void trap_in() { - char c = getchar(); - putc(c, stdout); - fflush(stdout); - reg[R_R0] = (uint16_t)c; - update_flags(R_R0); -} - -void trap_putsp() { - // one char per byte, two bytes per word - uint16_t* c = memory + reg[R_R0]; - while (*c) { - char char1 = (*c) & 0xFF; - putc(char1, stdout); - char char2 = (*c >> 8) & 0xFF; - if (char2) putc(char2, stdout); - ++c; - } - fflush(stdout); -} - -void trap_halt() { - puts("HALT"); - fflush(stdout); - exit(0); -} - - - -// define instructions -/* -R-type: three register or two-register + small flags - opcode: bits [17:13] (5) - rd: bits [12:10] (3) - rs: bits [9:7] (3) - rt: bits [6:4] (3) - flags / unused: bits [3:0] (4) - -I-type: register + immediate - opcode: bits [17:13] (5) - rd: bits [12:10] (3) - rs: bits [9:7] (3) - imm7: bits [6:0] (7) // signed immediate with sext when needed - -M-type: memory/branch addressing - opcode: bits [17:13] (5) - rd: bits [12:10] (3) - offset10: bits [9:0] (10) // signed immediate with sext when needed - - -Phantom-18: -{ - NOP = 0, // - OP_HALT, // - OP_ADD, // r-type - OP_SUB, // r-type - OP_AND, // r-type - OP_OR, // r-type - OP_XOR, // r-type - OP_NOT, // r-type, rd = ~rs - OP_MOV, // r-type, rd = rs - OP_LSH, // r-type, logical - OP_RSH, // r-type, logical - OP_CMP, // r-type, set condition codes - OP_ADDI, // i-type - OP_ANDI, // i-type - OP_ORI, // i-type - OP_LDI, // m-type, load immediate/addressing - OP_LD, // m-type, load from memory - OP_ST, // m-type, store to memory - OP_LDIND, // m-type, load via pointer - OP_STIND, // m-type, store via pointer - OP_JMP, // m-type - OP_JZ, // m-type - OP_JNZ, // m-type - OP_CALL, // m-type - OP_RET, // r-type OR implicit - OP_PUSH, // r-type - OP_POP, // r-type - OP_IN, // i-type, I/O - OP_OUT, // i-type, I/O - OP_TRAP, // i-type - OP_RAND, // r-type - OP_SLEEP // i-type -}; - - -LC-3: -enum -{ - OP_BR = 0, // branch - OP_ADD, // add - OP_LD, // load - OP_ST, // store - OP_JSR, // jump register - OP_AND, // bitwise and - OP_LDR, // load register - OP_STR, // store register - OP_RTI, // unused - OP_NOT, // bitwise not - OP_LDI, // load indirect - OP_STI, // store indirect - OP_JMP, // jump - OP_RES, // reserved (unused) - OP_LEA, // load effective address - OP_TRAP // execute trap -}; -*/ - -void execute_add(uint16_t instr) -{ - uint16_t r0 = GET_DR(instr); - uint16_t r1 = GET_SR1(instr); - uint16_t imm_flag = GET_IMM_FLAG5(instr); - - if (imm_flag) { - uint16_t imm5 = sext(GET_IMM5(instr), 5); - reg[r0] = reg[r1] + imm5; - } else { - uint16_t r2 = GET_SR2(instr); - reg[r0] = reg[r1] + reg[r2]; - } - update_flags(r0); -} - -void execute_and(uint16_t instr) -{ - uint16_t r0 = GET_DR(instr); - uint16_t r1 = GET_SR1(instr); - uint16_t imm_flag = GET_IMM_FLAG5(instr); - - if (imm_flag) { - uint16_t imm5 = sext(GET_IMM5(instr), 5); - reg[r0] = reg[r1] & imm5; - } else { - uint16_t r2 = GET_SR2(instr); - reg[r0] = reg[r1] & reg[r2]; - } - update_flags(r0); -} - -void execute_not(uint16_t instr) -{ - uint16_t r0 = GET_DR(instr); - uint16_t r1 = GET_SR1(instr); - reg[r0] = ~reg[r1]; - update_flags(r0); -} - -void execute_br(uint16_t instr) -{ - uint16_t pc_offset = sext(GET_PC_OFFSET9(instr), 9); - uint16_t cond_flag = (instr >> 9) & 0x7; - if (cond_flag & reg[R_COND]) { - reg[R_PC] += pc_offset; - } -} - -void execute_jmp(uint16_t instr) -{ - uint16_t r1 = GET_BASE_R(instr); - reg[R_PC] = reg[r1]; -} - -void execute_jsr(uint16_t instr) -{ - uint16_t long_flag = (instr >> 11) & 1; - reg[R_R7] = reg[R_PC]; - if (long_flag) { - uint16_t pc_offset = sext(GET_PC_OFFSET11(instr), 11); - reg[R_PC] += pc_offset; - } else { - uint16_t r1 = GET_BASE_R(instr); - reg[R_PC] = reg[r1]; - } -} - -void execute_ld(uint16_t instr) -{ - uint16_t r0 = GET_DR(instr); - uint16_t pc_offset = sext(GET_PC_OFFSET9(instr), 9); - reg[r0] = mem_read(reg[R_PC] + pc_offset); - update_flags(r0); -} - -void execute_ldr(uint16_t instr) -{ - uint16_t r0 = GET_DR(instr); - uint16_t r1 = GET_BASE_R(instr); - uint16_t offset6 = sext(GET_OFFSET6(instr), 6); - reg[r0] = mem_read(reg[r1] + offset6); - update_flags(r0); -} - -void execute_ldi(uint16_t instr) -{ - uint16_t r0 = GET_DR(instr); - uint16_t pc_offset = sext(GET_PC_OFFSET9(instr), 9); - reg[r0] = mem_read(mem_read(reg[R_PC] + pc_offset)); - update_flags(r0); -} - -void execute_lea(uint16_t instr) -{ - uint16_t r0 = GET_DR(instr); - uint16_t pc_offset = sext(GET_PC_OFFSET9(instr), 9); - reg[r0] = reg[R_PC] + pc_offset; - update_flags(r0); -} - -void execute_st(uint16_t instr) -{ - uint16_t r0 = GET_DR(instr); - uint16_t pc_offset = sext(GET_PC_OFFSET9(instr), 9); - mem_write(reg[R_PC] + pc_offset, reg[r0]); -} - -void execute_sti(uint16_t instr) -{ - uint16_t r0 = GET_DR(instr); - uint16_t pc_offset = sext(GET_PC_OFFSET9(instr), 9); - mem_write(mem_read(reg[R_PC] + pc_offset), reg[r0]); -} - -void execute_str(uint16_t instr) -{ - uint16_t r0 = GET_DR(instr); - uint16_t r1 = GET_BASE_R(instr); - uint16_t offset6 = sext(GET_OFFSET6(instr), 6); - mem_write(reg[r1] + offset6, reg[r0]); -} - -static void trap_dispatch(uint8_t trapvect8) { - switch (trapvect8) { - case TRAP_GETC: trap_getc(); break; - case TRAP_OUT: trap_out(); break; - case TRAP_PUTS: trap_puts(); break; - case TRAP_IN: trap_in(); break; - case TRAP_PUTSP:trap_putsp();break; - case TRAP_HALT: trap_halt(); break; - default: - fprintf(stderr, "Bad TRAP vector: 0x%02X\n", trapvect8); - exit(1); - } -} - -void execute_trap(uint16_t instr) { - uint8_t trapvect8 = instr & 0xFF; - reg[R_R7] = reg[R_PC]; - trap_dispatch(trapvect8); -} - -void execute_bad_opcode(uint16_t instr) { - uint16_t op = (instr >> 12) & 0xF; - fprintf(stderr, "Bad opcode: 0x%X (instr=0x%04X)\n", op, instr); - exit(1); -} - - - - -void execute_instruction(uint16_t instruction) { - uint16_t op = GET_OP(instruction); - switch (op) { - case OP_ADD: execute_add(instruction); break; - case OP_AND: execute_and(instruction); break; - case OP_NOT: execute_not(instruction); break; - case OP_TRAP: execute_trap(instruction); break; - case OP_BR: execute_br(instruction); break; - case OP_JMP: execute_jmp(instruction); break; - case OP_JSR: execute_jsr(instruction); break; - case OP_LD: execute_ld(instruction); break; - case OP_LDI: execute_ldi(instruction); break; - case OP_LDR: execute_ldr(instruction); break; - case OP_LEA: execute_lea(instruction); break; - case OP_ST: execute_st(instruction); break; - case OP_STI: execute_sti(instruction); break; - case OP_STR: execute_str(instruction); break; - default: execute_bad_opcode(instruction); break; - } -} - diff --git a/src/main.c b/src/main.c index a32fab8..a8e8535 100644 --- a/src/main.c +++ b/src/main.c @@ -8,8 +8,7 @@ #include "config.h" -#include "hardware.h" -#include "instructions.h" +#include "riscv32i.h" #include "utils.h" @@ -59,10 +58,8 @@ int main(int argc, const char* argv[]) setup(); - reg[R_COND] = FL_ZRO; // one cond flag should be set at any given time - // use config from conf.h - reg[R_PC] = PC_START; + rv32i_reg[pc] = PC_START; // load header metadata from binary // load_header(); @@ -70,7 +67,8 @@ int main(int argc, const char* argv[]) int running = 1; while (running) { - uint16_t instr = mem_read(reg[R_PC]++); - execute_instruction(instr); + uint32_t instr = mem_read(rv32i_reg[pc]); + rv32i_reg[pc] += 4; // advance to next 32-bit instruction + exec_riscv32i(instr); } } diff --git a/src/riscv32i.c b/src/riscv32i.c index cea4b82..9b4474a 100644 --- a/src/riscv32i.c +++ b/src/riscv32i.c @@ -5,6 +5,17 @@ implementation of RISC-V 32I ISA, as according to document version 20250508, accessed: 23/09/2025 */ +#define _GNU_SOURCE 1 +#define _POSIX_C_SOURCE 200809L +#include +#include +#include +#include +#include +#include +#include +#include +#include #include "riscv32i.h" uint8_t rv32i_mem[RV32I_MEM_SIZE]; @@ -111,7 +122,7 @@ void exec_riscv32i_srli_srai(uint32_t instruction) uint32_t shamt = (instruction >> 20) & RV32I_SHAMT_MASK; // shamt encoded in imm[4:0] uint32_t funct7 = RV32I_FUNCT7(instruction); - if (funct7 == 0b0100000) { // SRA + if (funct7 == 0x20) { // SRA int32_t val = (int32_t)rv32i_read_reg(rs1); rv32i_write_reg(rd, (uint32_t)(val >> shamt)); return; @@ -145,7 +156,7 @@ void exec_riscv32i_add_sub(uint32_t instruction) uint32_t rs1 = RV32I_RS1(instruction); uint32_t rs2 = RV32I_RS2(instruction); uint32_t funct3 = RV32I_FUNCT3(instruction); - if (funct3 == F3_REG_ADD_SUB && ((instruction >> 25) & 0x7F) == 0b0100000) { + if (funct3 == F3_REG_ADD_SUB && ((instruction >> 25) & 0x7F) == 0x20) { // SUB rv32i_write_reg(rd, rv32i_read_reg(rs1) - rv32i_read_reg(rs2)); return; @@ -201,7 +212,7 @@ void exec_riscv32i_srl_sra(uint32_t instruction) uint32_t rs2 = RV32I_RS2(instruction); uint32_t funct7 = RV32I_FUNCT7(instruction); - if (funct7 == 0b0100000) { // SRA + if (funct7 == 0x20) { // SRA rv32i_write_reg(rd, (uint32_t)((int32_t)rv32i_read_reg(rs1) >> (rv32i_read_reg(rs2) & RV32I_SHAMT_MASK))); return; } // SRL, default @@ -222,8 +233,11 @@ void exec_riscv32i_jal(uint32_t instruction) { uint32_t rd = RV32I_RD(instruction); int32_t imm = RV32I_IMM_J(instruction); - rv32i_write_reg(rd, rv32i_read_reg(pc) + 4); - rv32i_write_reg(pc, rv32i_read_reg(pc) + (uint32_t)imm); + // PC was pre-incremented in main() (pc = old_pc + 4) + // Link should be old_pc + 4 => current pc + rv32i_write_reg(rd, rv32i_read_reg(pc)); + // Target = old_pc + imm => (pc - 4) + imm + rv32i_write_reg(pc, (rv32i_read_reg(pc) - 4) + (uint32_t)imm); } void exec_riscv32i_jalr(uint32_t instruction) { @@ -231,7 +245,8 @@ void exec_riscv32i_jalr(uint32_t instruction) uint32_t rs1 = RV32I_RS1(instruction); int32_t imm = RV32I_IMM_I(instruction); uint32_t target = (rv32i_read_reg(rs1) + (uint32_t)imm) & ~1; - rv32i_write_reg(rd, rv32i_read_reg(pc) + 4); + // Link = current pc (old_pc + 4) + rv32i_write_reg(rd, rv32i_read_reg(pc)); rv32i_write_reg(pc, target); } @@ -251,9 +266,10 @@ void exec_riscv32i_beq(uint32_t instruction) uint32_t rs2 = RV32I_RS2(instruction); int32_t imm = RV32I_IMM_B(instruction); if (rv32i_read_reg(rs1) == rv32i_read_reg(rs2)) { - rv32i_write_reg(pc, rv32i_read_reg(pc) + (uint32_t)imm); + // target = (pc - 4) + imm + rv32i_write_reg(pc, (rv32i_read_reg(pc) - 4) + (uint32_t)imm); } else { - rv32i_write_reg(pc, rv32i_read_reg(pc) + 4); + // not taken: pc already points to next instruction } } void exec_riscv32i_bne(uint32_t instruction) @@ -262,9 +278,9 @@ void exec_riscv32i_bne(uint32_t instruction) uint32_t rs2 = RV32I_RS2(instruction); int32_t imm = RV32I_IMM_B(instruction); if (rv32i_read_reg(rs1) != rv32i_read_reg(rs2)) { - rv32i_write_reg(pc, rv32i_read_reg(pc) + (uint32_t)imm); + rv32i_write_reg(pc, (rv32i_read_reg(pc) - 4) + (uint32_t)imm); } else { - rv32i_write_reg(pc, rv32i_read_reg(pc) + 4); + // not taken } } void exec_riscv32i_blt(uint32_t instruction) @@ -273,9 +289,9 @@ void exec_riscv32i_blt(uint32_t instruction) uint32_t rs2 = RV32I_RS2(instruction); int32_t imm = RV32I_IMM_B(instruction); if ((int32_t)rv32i_read_reg(rs1) < (int32_t)rv32i_read_reg(rs2)) { - rv32i_write_reg(pc, rv32i_read_reg(pc) + (uint32_t)imm); + rv32i_write_reg(pc, (rv32i_read_reg(pc) - 4) + (uint32_t)imm); } else { - rv32i_write_reg(pc, rv32i_read_reg(pc) + 4); + // not taken } } void exec_riscv32i_bge(uint32_t instruction) @@ -284,9 +300,9 @@ void exec_riscv32i_bge(uint32_t instruction) uint32_t rs2 = RV32I_RS2(instruction); int32_t imm = RV32I_IMM_B(instruction); if ((int32_t)rv32i_read_reg(rs1) >= (int32_t)rv32i_read_reg(rs2)) { - rv32i_write_reg(pc, rv32i_read_reg(pc) + (uint32_t)imm); + rv32i_write_reg(pc, (rv32i_read_reg(pc) - 4) + (uint32_t)imm); } else { - rv32i_write_reg(pc, rv32i_read_reg(pc) + 4); + // not taken } } void exec_riscv32i_bltu(uint32_t instruction) @@ -295,9 +311,9 @@ void exec_riscv32i_bltu(uint32_t instruction) uint32_t rs2 = RV32I_RS2(instruction); int32_t imm = RV32I_IMM_B(instruction); if (rv32i_read_reg(rs1) < rv32i_read_reg(rs2)) { - rv32i_write_reg(pc, rv32i_read_reg(pc) + (uint32_t)imm); + rv32i_write_reg(pc, (rv32i_read_reg(pc) - 4) + (uint32_t)imm); } else { - rv32i_write_reg(pc, rv32i_read_reg(pc) + 4); + // not taken } } void exec_riscv32i_bgeu(uint32_t instruction) @@ -306,9 +322,9 @@ void exec_riscv32i_bgeu(uint32_t instruction) uint32_t rs2 = RV32I_RS2(instruction); int32_t imm = RV32I_IMM_B(instruction); if (rv32i_read_reg(rs1) >= rv32i_read_reg(rs2)) { - rv32i_write_reg(pc, rv32i_read_reg(pc) + (uint32_t)imm); + rv32i_write_reg(pc, (rv32i_read_reg(pc) - 4) + (uint32_t)imm); } else { - rv32i_write_reg(pc, rv32i_read_reg(pc) + 4); + // not taken } } @@ -369,11 +385,174 @@ void exec_riscv32i_store(uint32_t instruction) } } + + +// atomic read/writes +// ! NOT IMPLEMENTED IN THIS PoC +/* ++-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +| csr | rs1 |func3| rd | opcode | ++-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +31 20 19 15 14 12 11 7 6 0 +*/ + + + void exec_riscv32i_fence(uint32_t instruction) { (void)instruction; } -void exec_riscv32i_ecall(uint32_t instruction) { (void)instruction; } -void exec_riscv32i_ebreak(uint32_t instruction) { (void)instruction; } -void exec_riscv32i_bad_opcode(uint32_t instruction) { (void)instruction; } +static inline uint8_t* rv32i_mem_ptr(uint32_t addr) { + return &rv32i_mem[addr & (RV32I_MEM_SIZE - 1)]; +} +static size_t rv32i_copy_cstr(uint32_t addr, char* dst, size_t maxlen) { + size_t i = 0; + while (i + 1 < maxlen) { + uint8_t c = *rv32i_mem_ptr(addr + (uint32_t)i); + dst[i++] = (char)c; + if (c == '\0') break; + } + if (i == 0 || dst[i-1] != '\0') dst[i] = '\0'; + return i; +} + +// minimal Linux-like syscall emulation for user programs +static void rv32i_do_ecall(void) { + uint32_t num = rv32i_reg[17]; // a7 + switch (num) { + case 63: { // read + int fd = (int)rv32i_reg[10]; // a0 + uint32_t buf = rv32i_reg[11]; // a1 + size_t cnt = (size_t)rv32i_reg[12]; // a2 + if (buf >= RV32I_MEM_SIZE) { rv32i_reg[10] = (uint32_t)(-EFAULT); return; } + size_t max = RV32I_MEM_SIZE - buf; + if (cnt > max) cnt = max; + ssize_t r = read(fd, rv32i_mem_ptr(buf), cnt); + rv32i_reg[10] = (r >= 0) ? (uint32_t)r : (uint32_t)(-errno); + break; + } + case 64: { // write + int fd = (int)rv32i_reg[10]; + uint32_t buf = rv32i_reg[11]; + size_t cnt = (size_t)rv32i_reg[12]; + if (buf >= RV32I_MEM_SIZE) { rv32i_reg[10] = (uint32_t)(-EFAULT); return; } + size_t max = RV32I_MEM_SIZE - buf; + if (cnt > max) cnt = max; + ssize_t r = write(fd, rv32i_mem_ptr(buf), cnt); + rv32i_reg[10] = (r >= 0) ? (uint32_t)r : (uint32_t)(-errno); + break; + } + case 56: { // openat(dirfd, path, flags, mode) + int dirfd = (int)rv32i_reg[10]; + uint32_t path = rv32i_reg[11]; + int flags = (int)rv32i_reg[12]; + mode_t mode = (mode_t)rv32i_reg[13]; + char tmp[4096]; + rv32i_copy_cstr(path, tmp, sizeof(tmp)); + int r = openat(dirfd, tmp, flags, mode); + rv32i_reg[10] = (r >= 0) ? (uint32_t)r : (uint32_t)(-errno); + break; + } + case 57: { // close(fd) + int fd = (int)rv32i_reg[10]; + int r = close(fd); + rv32i_reg[10] = (r == 0) ? 0u : (uint32_t)(-errno); + break; + } + case 62: { // lseek (simplified, 32-bit offset) + // a0 fd, a1 offset (low), a2 whence + int fd = (int)rv32i_reg[10]; + off_t off = (off_t)(int32_t)rv32i_reg[11]; + int wh = (int)rv32i_reg[12]; + off_t r = lseek(fd, off, wh); + rv32i_reg[10] = (r >= 0) ? (uint32_t)r : (uint32_t)(-errno); + break; + } + case 93: { // exit(code) + int code = (int)rv32i_reg[10]; + exit(code); + break; + } + case 94: { // exit_group(code) + int code = (int)rv32i_reg[10]; + exit(code); + break; + } + case 169: { // gettimeofday(tv, tz) + uint32_t tv = rv32i_reg[10]; + // tz is ignored + struct timeval host_tv; + int r = gettimeofday(&host_tv, NULL); + if (r == 0) { + if (tv + 8 <= RV32I_MEM_SIZE) { + // write 32-bit tv_sec and tv_usec + uint32_t sec = (uint32_t)host_tv.tv_sec; + uint32_t usec = (uint32_t)host_tv.tv_usec; + rv32i_mem_write32(tv, sec); + rv32i_mem_write32(tv + 4, usec); + rv32i_reg[10] = 0; + } else { + rv32i_reg[10] = (uint32_t)(-EFAULT); + } + } else { + rv32i_reg[10] = (uint32_t)(-errno); + } + break; + } + case 214: { // brk + static uint32_t program_break = 0; + uint32_t addr = rv32i_reg[10]; + if (addr == 0) { + rv32i_reg[10] = program_break; + } else { + if (addr >= RV32I_MEM_SIZE) addr = RV32I_MEM_SIZE - 1; + program_break = addr; + rv32i_reg[10] = program_break; + } + break; + } + default: + fprintf(stderr, "[RISCV32I] Unimplemented ecall %u at PC=0x%08X\n", num, rv32i_reg[pc]); + rv32i_reg[10] = (uint32_t)(-ENOSYS); + break; + } +} + +void exec_riscv32i_system(uint32_t instruction) +{ + uint32_t funct3 = RV32I_FUNCT3(instruction); + switch (funct3) + { + case F3_SYS_PRIV: { + uint32_t imm = (instruction >> 20) & 0xFFF; + if (imm == 0) { + // ECALL + rv32i_do_ecall(); + } else if (imm == 1) { + // EBREAK + fprintf(stderr, "[RISCV32I] EBREAK at PC=0x%08X\n", rv32i_reg[pc]); + exit(1); + } else { + exec_riscv32i_bad_opcode(instruction); + } + break; + } + /* + case F3_SYS_CSRRW: + case F3_SYS_CSRRS: + case F3_SYS_CSRRC: + case F3_SYS_CSRRWI: + case F3_SYS_CSRRSI: + case F3_SYS_CSRRCI: + */ + default: + exec_riscv32i_bad_opcode(instruction); + break; + } +} + +void exec_riscv32i_bad_opcode(uint32_t instruction) { + fprintf(stderr, "[RISCV32I] Bad/illegal opcode 0x%08X at PC=0x%08X.\n", instruction, rv32i_reg[pc]); + exit(2); +} @@ -507,7 +686,7 @@ void exec_riscv32i(uint32_t instruction) break; case OP_SYSTEM: // Could further decode for ECALL/EBREAK/CSR, keep minimal - exec_riscv32i_ecall(instruction); + exec_riscv32i_system(instruction); break; default: exec_riscv32i_bad_opcode(instruction); diff --git a/src/utils.c b/src/utils.c index 6042813..a9506e0 100644 --- a/src/utils.c +++ b/src/utils.c @@ -14,7 +14,7 @@ #include #include "utils.h" -#include "hardware.h" +#include "riscv32i.h" // -- input buffering, credit to https://www.jmeiners.com/lc3-vm/#:input-buffering --- @@ -33,7 +33,7 @@ void restore_input_buffering() tcsetattr(STDIN_FILENO, TCSANOW, &original_tio); } -uint16_t check_key() +uint32_t check_key() { fd_set readfds; FD_ZERO(&readfds); @@ -56,64 +56,98 @@ void handle_interrupt(int signal) exit(-2); } -uint16_t sext(uint16_t x, int bit_count) -{ - if ((x >> (bit_count - 1)) & 1) // check sign bit - { - x |= (0xFFFF << bit_count); - } - return x; -} -uint16_t swap16(uint16_t x) -{ - return (x << 8) | (x >> 8); +// little-endian word read/write on byte-addressable memory +static inline uint32_t load32(uint32_t addr) { + uint32_t a = addr & (RV32I_MEM_SIZE - 1); + return (uint32_t)rv32i_mem[a] + | ((uint32_t)rv32i_mem[(a + 1) & (RV32I_MEM_SIZE - 1)] << 8) + | ((uint32_t)rv32i_mem[(a + 2) & (RV32I_MEM_SIZE - 1)] << 16) + | ((uint32_t)rv32i_mem[(a + 3) & (RV32I_MEM_SIZE - 1)] << 24); +} +static inline void store32(uint32_t addr, uint32_t val) { + uint32_t a = addr & (RV32I_MEM_SIZE - 1); + rv32i_mem[a] = (uint8_t)(val & 0xFF); + rv32i_mem[(a + 1) & (RV32I_MEM_SIZE - 1)] = (uint8_t)((val >> 8) & 0xFF); + rv32i_mem[(a + 2) & (RV32I_MEM_SIZE - 1)] = (uint8_t)((val >> 16) & 0xFF); + rv32i_mem[(a + 3) & (RV32I_MEM_SIZE - 1)] = (uint8_t)((val >> 24) & 0xFF); } +void mem_write(uint32_t address, uint32_t val) { store32(address, val); } +uint32_t mem_read(uint32_t address) { return load32(address); } +static inline uint32_t be32_to_host(uint32_t x) { + return ((x & 0x000000FFu) << 24) | ((x & 0x0000FF00u) << 8) | + ((x & 0x00FF0000u) >> 8) | ((x & 0xFF000000u) >> 24); +} +static inline uint32_t be16_to_host(uint32_t x) { + return ((x & 0x00FFu) << 8) | ((x & 0xFF00u) >> 8); +} void read_image_file(FILE* file) { - uint16_t origin; - - if (fread(&origin, sizeof(origin), 1, file) != 1) + uint32_t origin_be; + if (fread(&origin_be, sizeof(origin_be), 1, file) != 1) { fprintf(stderr, "Failed to read origin from image file\n"); return; } - origin = swap16(origin); - - uint16_t max_read = MEMORY_MAX - origin; - uint16_t* p = memory + origin; - size_t read = fread(p, sizeof(uint16_t), max_read, file); + uint32_t origin = be32_to_host(origin_be); + if (origin >= RV32I_MEM_SIZE) { + fprintf(stderr, "Origin 0x%08X out of range\n", origin); + return; + } - // swap to little endian - while (read-- > 0) - { - *p = swap16(*p); - ++p; + // Read remaining file contents into memory as 32-bit words, big-endian on disk + uint8_t buf[4096]; + size_t off = 0; + for (;;) { + size_t n = fread(buf, 1, sizeof(buf), file); + if (n == 0) break; + for (size_t i = 0; i + 3 < n; i += 4) { + uint32_t word = (uint32_t)buf[i] << 24 | (uint32_t)buf[i+1] << 16 | (uint32_t)buf[i+2] << 8 | (uint32_t)buf[i+3]; + store32(origin + off, word); + off += 4; + if (origin + off >= RV32I_MEM_SIZE) return; + } + // handle tail bytes (not multiple of 4) + size_t rem = n & 3; + if (rem) { + uint32_t last = 0; + for (size_t i = 0; i < rem; ++i) last |= (uint32_t)buf[n - rem + i] << (24 - 8*i); + store32(origin + off, last); + off += 4; + if (origin + off >= RV32I_MEM_SIZE) return; + } } } // Load an image from an in-memory buffer containing the on-disk image format void read_image_buffer(const uint8_t* data, size_t size) { - if (data == NULL || size < sizeof(uint16_t)) { + if (data == NULL || size < sizeof(uint32_t)) { fprintf(stderr, "Image buffer too small or null\n"); return; } - // first two bytes: big-endian origin - uint16_t origin = (uint16_t)((data[0] << 8) | data[1]); - - size_t bytes_remaining = size - sizeof(uint16_t); - size_t words = bytes_remaining / sizeof(uint16_t); - const uint8_t* p = data + sizeof(uint16_t); - - uint16_t* dst = memory + origin; - size_t i; - for (i = 0; i < words && (origin + i) < MEMORY_MAX; ++i) { - uint16_t val = (uint16_t)((p[2*i] << 8) | p[2*i + 1]); - dst[i] = val; + // first 4 bytes: big-endian origin byte address + uint32_t origin = (uint32_t)((data[0] << 24) | (data[1] << 16) | (data[2] << 8) | data[3]); + if (origin >= RV32I_MEM_SIZE) { + fprintf(stderr, "Origin 0x%08X out of range\n", origin); + return; + } + const uint8_t* p = data + sizeof(uint32_t); + size_t bytes_remaining = size - sizeof(uint32_t); + uint32_t addr = origin; + // write as big-endian words padded to 4 bytes + while (bytes_remaining > 0) { + uint32_t w = 0; + size_t take = bytes_remaining >= 4 ? 4 : bytes_remaining; + for (size_t i = 0; i < take; ++i) w |= (uint32_t)p[i] << (24 - 8*i); + store32(addr, w); + addr += 4; + p += take; + bytes_remaining -= take; + if (addr >= RV32I_MEM_SIZE) break; } } @@ -128,46 +162,4 @@ int read_image(const char* image_path) fclose(file); return 1; -} - - - -void update_flags(uint16_t r) -{ - if (reg[r] == 0) - { - reg[R_COND] = FL_ZRO; - } - else if (reg[r] >> 15) // negative - { - reg[R_COND] = FL_NEG; - } - else - { - reg[R_COND] = FL_POS; - } -} - - - -void mem_write(uint16_t address, uint16_t val) -{ - memory[address] = val; -} - -uint16_t mem_read(uint16_t address) -{ - if (address == MR_KBSR) - { - if (check_key()) - { - memory[MR_KBSR] = (1 << 15); - memory[MR_KBDR] = getchar(); - } - else - { - memory[MR_KBSR] = 0; - } - } - return memory[address]; } \ No newline at end of file diff --git a/test/test_utils.c b/test/test_utils.c index c595ec3..6fe2a91 100644 --- a/test/test_utils.c +++ b/test/test_utils.c @@ -6,53 +6,31 @@ #include #include "utils.h" -#include "hardware.h" - -static void test_sext() { - assert(sext(0x01, 2) == 0x0001); - assert(sext(0x03, 2) == 0xFFFF); - assert(sext(0x0F, 5) == 0x000F); - assert(sext(0x1F, 5) == 0xFFFF); - assert(sext(0x20, 6) == 0xFFE0); - assert(sext(0x1F, 6) == 0x001F); -} - -static void test_swap16() { - assert(swap16(0x0000) == 0x0000); - assert(swap16(0xFFFF) == 0xFFFF); - assert(swap16(0x00FF) == 0xFF00); - assert(swap16(0xAA55) == 0x55AA); - assert(swap16(0x1234) == 0x3412); -} - -static void test_update_flags() { - reg[R_R0] = 0x0000; update_flags(R_R0); assert(reg[R_COND] == FL_ZRO); - reg[R_R0] = 0x0001; update_flags(R_R0); assert(reg[R_COND] == FL_POS); - reg[R_R0] = 0x8000; update_flags(R_R0); assert(reg[R_COND] == FL_NEG); -} +#include "riscv32i.h" static void test_mem_rw_basic() { - uint16_t a = 0x0000, b = 0x4000; - uint16_t va = 0xBEEF, vb = 0x0123; + uint32_t a = 0x0000, b = 0x4000; + uint32_t va = 0xBEEF, vb = 0x0123; mem_write(a, va); mem_write(b, vb); assert(mem_read(a) == va); assert(mem_read(b) == vb); } +/* static void test_read_image_file() { // construct a tiny big-endian image: origin + 3 words FILE* f = tmpfile(); assert(f != NULL); - uint16_t origin = 0x3000; - uint16_t origin_be = swap16(origin); + uint32_t origin = 0x3000; + uint32_t origin_be = swap16(origin); size_t w = fwrite(&origin_be, sizeof(origin_be), 1, f); assert(w == 1); - const uint16_t data[3] = { 0x1122, 0x3344, 0xABCD }; - uint16_t data_be[3] = { swap16(data[0]), swap16(data[1]), swap16(data[2]) }; - w = fwrite(data_be, sizeof(uint16_t), 3, f); + const uint32_t data[3] = { 0x1122, 0x3344, 0xABCD }; + uint32_t data_be[3] = { swap32(data[0]), swap32(data[1]), swap32(data[2]) }; + w = fwrite(data_be, sizeof(uint32_t), 3, f); assert(w == 3); int r = fseek(f, 0, SEEK_SET); @@ -61,16 +39,13 @@ static void test_read_image_file() { read_image_file(f); fclose(f); - assert(memory[origin + 0] == data[0]); - assert(memory[origin + 1] == data[1]); - assert(memory[origin + 2] == data[2]); -} + assert(rv32i_mem[origin + 0] == data[0]); + assert(rv32i_mem[origin + 1] == data[1]); + assert(rv32i_mem[origin + 2] == data[2]); +}*/ int main(void) { - test_sext(); - test_swap16(); - test_update_flags(); test_mem_rw_basic(); - test_read_image_file(); + // test_read_image_file(); return 0; } \ No newline at end of file From 451a3f9d2b6e3180e7651b5be59402b1c5bb0ee0 Mon Sep 17 00:00:00 2001 From: Lachlan Harris Date: Wed, 24 Sep 2025 15:07:51 +1000 Subject: [PATCH 3/5] Add syscall support, improve memory handling, implement example embeddable hello.c --- .gitignore | 6 ++- Makefile | 90 +++++++++++++++++++++++++++++++- docs/compiler.md | 112 +++++++++++++++++++++++++++++----------- examples/hello.c | 14 +++++ include/rv32_syscalls.h | 76 +++++++++++++++++++++++++++ src/main.c | 5 ++ src/utils.c | 34 +++--------- test/test_utils.c | 29 +++++++++++ 8 files changed, 307 insertions(+), 59 deletions(-) create mode 100644 examples/hello.c create mode 100644 include/rv32_syscalls.h diff --git a/.gitignore b/.gitignore index aea1c2a..b7785b6 100644 --- a/.gitignore +++ b/.gitignore @@ -67,4 +67,8 @@ bin/ riscv-gnu-toolchain-rv32i/ test/**/* -!test/**/*.c \ No newline at end of file +!test/**/*.c + +*.bin +*.app +*.img \ No newline at end of file diff --git a/Makefile b/Makefile index e5f4415..a4fab67 100644 --- a/Makefile +++ b/Makefile @@ -11,6 +11,31 @@ SRC_MAIN = src/main.c $(SRC_COMMON) FLAGS = -Wall -Wextra -Werror -g -std=c11 -pedantic +# RISC-V cross toolchain (see docs/compiler.md) +RV_PREFIX ?= ~/.local/xPacks/riscv-none-elf-gcc/xpack-riscv-none-elf-gcc-14.2.0-3/bin/riscv-none-elf- +RV_GCC := $(RV_PREFIX)gcc +RV_OBJCOPY:= $(RV_PREFIX)objcopy + +# Example app pipeline +OUT_DIR ?= out +EXAMPLES_DIR ?= examples +APP_SRC ?= $(EXAMPLES_DIR)/hello.c +APP_ELF := $(OUT_DIR)/app.elf +APP_BIN := $(OUT_DIR)/app.bin +APP_IMG := $(OUT_DIR)/app.img +ORIGIN ?= 0x3000 + +define ensure_rv_toolchain + @if ! command -v $(RV_GCC) >/dev/null 2>&1; then \ + echo "[Make] Missing toolchain: $(RV_GCC). See docs/compiler.md to install and add to PATH."; \ + exit 1; \ + fi + @if ! command -v $(RV_OBJCOPY) >/dev/null 2>&1; then \ + echo "[Make] Missing tool: $(RV_OBJCOPY). Check your riscv-none-elf toolchain installation."; \ + exit 1; \ + fi +endef + EMBED_IMAGE ?= EMBED_OBJ := ifneq ($(strip $(EMBED_IMAGE)),) @@ -19,7 +44,7 @@ endif TEST_DIR = test -.PHONY: all build clean test distclean +.PHONY: all build clean test distclean app image embed run demo clean-app test-integration test-all help all: build @@ -35,10 +60,71 @@ clean: rm -f $(BIN) rm -f $(TEST_DIR)/test_utils rm -f images/vbo_image.o + rm -rf $(OUT_DIR) distclean: clean test: $(CC) $(INC) $(TEST_DIR)/test_utils.c src/utils.c src/riscv32i.c -o $(TEST_DIR)/test_utils $(FLAGS) $(TEST_DIR)/test_utils - rm $(TEST_DIR)/test_utils \ No newline at end of file + rm $(TEST_DIR)/test_utils + +# Run both unit and integration tests (integration is skipped if toolchain is missing) +test-all: test test-integration + +# --- App build pipeline --- + +$(OUT_DIR): + mkdir -p $(OUT_DIR) + +app: $(APP_ELF) + +$(APP_ELF): $(APP_SRC) | $(OUT_DIR) + $(call ensure_rv_toolchain) + $(RV_GCC) \ + -march=rv32i -mabi=ilp32 -nostdlib -ffreestanding -Os -s -fno-pic \ + -Wl,--build-id=none -Wl,-Ttext=$(ORIGIN) -Wl,-e,_start \ + -Iinclude -o $@ $< + +$(APP_BIN): $(APP_ELF) | $(OUT_DIR) + $(RV_OBJCOPY) -O binary $< $@ + +image: $(APP_IMG) + +$(APP_IMG): $(APP_BIN) | $(OUT_DIR) + $(PYTHON) -c 'import sys,struct; o=int(sys.argv[1],16) if str(sys.argv[1]).startswith("0x") else int(sys.argv[1]); open(sys.argv[2],"wb").write(struct.pack(">I",o)+open(sys.argv[3],"rb").read())' \ + $(ORIGIN) $@ $< + +# Embed convenience: uses APP_IMG if EMBED_IMAGE not provided +embed: image + $(MAKE) build EMBED_IMAGE=$(APP_IMG) + +run: build image + $(BIN) $(APP_IMG) + +demo: embed run + +clean-app: + rm -rf $(OUT_DIR) + +# Integration test (optional): requires toolchain, builds hello and checks output +test-integration: build + @if ! command -v $(RV_GCC) >/dev/null 2>&1 || ! command -v $(RV_OBJCOPY) >/dev/null 2>&1; then \ + echo "[Test] Skipping integration test (toolchain not found). See docs/compiler.md"; \ + exit 0; \ + fi + @$(MAKE) --no-print-directory image + @echo "[Test] Running integration test (hello)" + @out=$$($(BIN) $(APP_IMG)); echo "$$out" | grep -q "Hello from RV32I VM" && echo "[Test] OK" || (echo "[Test] FAIL"; exit 1) + +help: + @echo "Targets:" + @echo " build - Build the VM" + @echo " app - Build example app (set APP_SRC=...)" + @echo " image - Make flat image with origin header from app" + @echo " embed - Embed image and rebuild VM" + @echo " run - Run VM with built image" + @echo " demo - Build+embed+run example app end-to-end" + @echo " test - Run unit tests" + @echo " test-all - Run unit + integration tests (if toolchain available)" + @echo " clean / distclean- Remove build artifacts" \ No newline at end of file diff --git a/docs/compiler.md b/docs/compiler.md index 253c0cf..f7c388e 100644 --- a/docs/compiler.md +++ b/docs/compiler.md @@ -3,64 +3,116 @@ ## Scope -This document will cover the compiler and related tooling for producing VM images. This document provides minimal instructions and is incomplete. +One clean, reliable pipeline to produce RV32I images for this VM. Minimal, reproducible, no libc, no ELF loader. -## RISC-V 32I Toolchain +## Install the compiler (required) -An external toolchain has been used to compile C code into RV32I compatible instructions for the virtual machine. The toolchain can be found [here](https://xpack-dev-tools.github.io/riscv-none-elf-gcc-xpack). +We use the xPack bare‑metal RISC‑V GCC to build freestanding RV32I binaries. -### Installation +### Automated install via XPM (recommended) -Installation can be done manually or through XPM. XPM is preferred. +Prerequisite: +* Node.js and npm -#### XPM +Install XPM: +``` +npm install --location=global xpm@latest +``` -#### Prerequisites +Install the toolchain: +``` +xpm install @xpack-dev-tools/riscv-none-elf-gcc@14.2.0-3.1 --verbose +``` -To quote the documentation: -*"The only requirement for an automated install is a recent xpm, which is a portable Node.js command line application that complements npm with several extra features specific to C/C++ projects."* +Add to PATH for this shell (adjust version/path if different): +``` +export PATH="$HOME/.local/xPacks/riscv-none-elf-gcc/xpack-riscv-none-elf-gcc-14.2.0-3/bin:$PATH" +``` +Verify: ``` -npm install --location=global xpm@latest +riscv-none-elf-gcc --version ``` -#### Installation +### Manual install (alternative) + +Download and unpack, then add the `bin` to PATH. See the xPack docs: https://xpack-dev-tools.github.io/riscv-none-elf-gcc-xpack/docs/install/ + +## Single pipeline (freestanding, syscalls only) + +This VM implements a small set of Linux‑style syscalls (read/write/openat/close/lseek/gettimeofday/brk/exit). User code calls them directly via `ecall`; no libc is linked. Entry is `_start` at 0x3000. + +### 1) Write your program +Include the tiny syscall wrappers from `include/rv32_syscalls.h`. Example `hello.c`: ``` -xpm install @xpack-dev-tools/riscv-none-elf-gcc@14.2.0-3.1 --verbose +#include +#include "rv32_syscalls.h" + +static const char msg[] = "Hello from RV32I VM\n"; + +int main(void) { + sys_write(1, msg, (uint32_t)(sizeof(msg) - 1)); + return 0; +} + +/* Minimal start (crt0): entry point and exit via ecall */ +extern int main(void); +__attribute__((noreturn)) void _start(void) { + register int a0 asm("a0") = main(); + register int a7 asm("a7") = 93; /* SYS_exit */ + asm volatile("ecall" :: "r"(a0), "r"(a7) : "memory"); + for (;;) {} +} ``` -For more information, or to perform different types of installs (e.g. Global install), please see [the documentation.](https://xpack-dev-tools.github.io/riscv-none-elf-gcc-xpack/docs/install/#automated-install) - -### Manual +### 2) Compile to RV32I at 0x3000 ``` -mkdir -p ~/.local/xPacks/riscv-none-elf-gcc -cd ~/.local/xPacks/riscv-none-elf-gcc +riscv-none-elf-gcc \ + -march=rv32i -mabi=ilp32 -nostdlib -ffreestanding -Os -s -fno-pic \ + -Wl,--build-id=none -Wl,-Ttext=0x3000 -Wl,-e,_start \ + -Iinclude -o app.elf hello.c +``` + +### 3) Convert ELF to flat binary -tar xvf ~/Downloads/xpack-riscv-none-elf-gcc-14.2.0-3-linux-x64.tar.gz -chmod -R -w xpack-riscv-none-elf-gcc-14.2.0-3 +``` +riscv-none-elf-objcopy -O binary app.elf app.bin ``` -For more information, please see [the documentation](https://xpack-dev-tools.github.io/riscv-none-elf-gcc-xpack/docs/install/#manual-install-download-and-unpack) +### 4) Prepend origin header and create image -## Usage +The VM expects a 4‑byte big‑endian origin followed by raw bytes. For 0x3000: +``` +printf '\x00\x00\x30\x00' > app.img +cat app.bin >> app.img +``` -For detailed usage guide, please see [the documentation](https://xpack-dev-tools.github.io/riscv-none-elf-gcc-xpack/docs/user/). +### 5) Embed and build the VM (optional) -For this project, we will be compiling with the following options: ``` --march=rv32i -mabi=ilp32 -nostdlib -Os -s -fno-pic \ - -Wl,--build-id=none -Wl,-Ttext=0x3000 -Wl,-e,_start \ - -o output.elf crt0_rv32i.S path/to/file.c +python3 tools/embed_image.py --input app.img --output images/vbo_image.o +make EMBED_IMAGE=app.img ``` -If you have chosen to install manually, the full command to compile a C file for embedding is as follows: +### 6) Run ``` -~/.local/xPacks/riscv-none-elf-gcc/xpack-riscv-none-elf-gcc-14.2.0-3/bin/riscv-none-elf-gcc -march=rv32i -mabi=ilp32 file/path.c -o output.out +bin/vbo # runs the embedded image if provided +bin/vbo app.img # or run a specific image directly ``` -# Embedding +## Notes -This section is incomplete. \ No newline at end of file +* Syscall results follow Linux convention: on error, return is a negative errno value. +* Keep programs small enough for VM memory. +* If a needed syscall is missing, implement it in `src/riscv32i.c` (ecall switch) and rebuild. + +## Embedding only (from an existing image) + +``` +python3 tools/embed_image.py --input path/to/app.img --output images/vbo_image.o +make EMBED_IMAGE=path/to/app.img +bin/vbo +``` \ No newline at end of file diff --git a/examples/hello.c b/examples/hello.c new file mode 100644 index 0000000..2b7b739 --- /dev/null +++ b/examples/hello.c @@ -0,0 +1,14 @@ +#include +#include "rv32_syscalls.h" + +static const char msg[] = "Hello from RV32I VM\n"; + +int main(void) { + sys_write(1, msg, (uint32_t)(sizeof(msg) - 1)); + return 0; +} + +extern int main(void); +__attribute__((noreturn)) void _start(void) { + sys_exit(main()); +} diff --git a/include/rv32_syscalls.h b/include/rv32_syscalls.h new file mode 100644 index 0000000..7936e0e --- /dev/null +++ b/include/rv32_syscalls.h @@ -0,0 +1,76 @@ +// rv32_syscalls.h +// Minimal Linux-like syscall wrappers for the VM (RV32I, ilp32). No libc required. +// Each wrapper issues an ecall with a7=syscall number and returns a0. +// On error, the return value is a negative errno (Linux convention). + +#ifndef RV32_SYSCALLS_H +#define RV32_SYSCALLS_H + +#include + +static inline int32_t sys_read(uint32_t fd, uint32_t buf, uint32_t cnt) { + register uint32_t a0 asm("a0") = fd; + register uint32_t a1 asm("a1") = buf; + register uint32_t a2 asm("a2") = cnt; + register uint32_t a7 asm("a7") = 63u; // read + asm volatile("ecall" : "+r"(a0) : "r"(a1), "r"(a2), "r"(a7) : "memory"); + return (int32_t)a0; +} + +static inline int32_t sys_write(uint32_t fd, const void* buf, uint32_t cnt) { + register uint32_t a0 asm("a0") = fd; + register uint32_t a1 asm("a1") = (uint32_t)(uintptr_t)buf; + register uint32_t a2 asm("a2") = cnt; + register uint32_t a7 asm("a7") = 64u; // write + asm volatile("ecall" : "+r"(a0) : "r"(a1), "r"(a2), "r"(a7) : "memory"); + return (int32_t)a0; +} + +static inline int32_t sys_openat(int32_t dirfd, const char* path, uint32_t flags, uint32_t mode) { + register uint32_t a0 asm("a0") = (uint32_t)dirfd; + register uint32_t a1 asm("a1") = (uint32_t)(uintptr_t)path; + register uint32_t a2 asm("a2") = flags; + register uint32_t a3 asm("a3") = mode; + register uint32_t a7 asm("a7") = 56u; // openat + asm volatile("ecall" : "+r"(a0) : "r"(a1), "r"(a2), "r"(a3), "r"(a7) : "memory"); + return (int32_t)a0; +} + +static inline int32_t sys_close(uint32_t fd) { + register uint32_t a0 asm("a0") = fd; + register uint32_t a7 asm("a7") = 57u; // close + asm volatile("ecall" : "+r"(a0) : "r"(a7) : "memory"); + return (int32_t)a0; +} + +static inline int32_t sys_lseek(uint32_t fd, int32_t off, uint32_t whence) { + register uint32_t a0 asm("a0") = fd; + register uint32_t a1 asm("a1") = (uint32_t)off; + register uint32_t a2 asm("a2") = whence; + register uint32_t a7 asm("a7") = 62u; // lseek + asm volatile("ecall" : "+r"(a0) : "r"(a1), "r"(a2), "r"(a7) : "memory"); + return (int32_t)a0; +} + +static inline int32_t sys_gettimeofday(uint32_t tv_ptr /* struct {u32 sec; u32 usec;}* */) { + register uint32_t a0 asm("a0") = tv_ptr; + register uint32_t a7 asm("a7") = 169u; // gettimeofday + asm volatile("ecall" : "+r"(a0) : "r"(a7) : "memory"); + return (int32_t)a0; +} + +static inline uint32_t sys_brk(uint32_t addr) { + register uint32_t a0 asm("a0") = addr; + register uint32_t a7 asm("a7") = 214u; // brk + asm volatile("ecall" : "+r"(a0) : "r"(a7) : "memory"); + return a0; +} + +static inline void sys_exit(int32_t code) { + register uint32_t a0 asm("a0") = (uint32_t)code; + register uint32_t a7 asm("a7") = 93u; // exit + asm volatile("ecall" :: "r"(a0), "r"(a7) : "memory"); + for(;;) {} +} + +#endif // RV32_SYSCALLS_H diff --git a/src/main.c b/src/main.c index a8e8535..c3fbdba 100644 --- a/src/main.c +++ b/src/main.c @@ -68,7 +68,12 @@ int main(int argc, const char* argv[]) while (running) { uint32_t instr = mem_read(rv32i_reg[pc]); + if (instr == 0) { + // treat all-zero word as HALT to gracefully end execution + break; + } rv32i_reg[pc] += 4; // advance to next 32-bit instruction exec_riscv32i(instr); } + return 0; } diff --git a/src/utils.c b/src/utils.c index a9506e0..b8ad03c 100644 --- a/src/utils.c +++ b/src/utils.c @@ -98,26 +98,15 @@ void read_image_file(FILE* file) return; } - // Read remaining file contents into memory as 32-bit words, big-endian on disk + // Read remaining file contents and copy raw bytes into memory uint8_t buf[4096]; - size_t off = 0; + uint32_t addr = origin; for (;;) { size_t n = fread(buf, 1, sizeof(buf), file); if (n == 0) break; - for (size_t i = 0; i + 3 < n; i += 4) { - uint32_t word = (uint32_t)buf[i] << 24 | (uint32_t)buf[i+1] << 16 | (uint32_t)buf[i+2] << 8 | (uint32_t)buf[i+3]; - store32(origin + off, word); - off += 4; - if (origin + off >= RV32I_MEM_SIZE) return; - } - // handle tail bytes (not multiple of 4) - size_t rem = n & 3; - if (rem) { - uint32_t last = 0; - for (size_t i = 0; i < rem; ++i) last |= (uint32_t)buf[n - rem + i] << (24 - 8*i); - store32(origin + off, last); - off += 4; - if (origin + off >= RV32I_MEM_SIZE) return; + for (size_t i = 0; i < n; ++i) { + if (addr >= RV32I_MEM_SIZE) return; + rv32i_mem[addr++] = buf[i]; } } } @@ -138,16 +127,9 @@ void read_image_buffer(const uint8_t* data, size_t size) const uint8_t* p = data + sizeof(uint32_t); size_t bytes_remaining = size - sizeof(uint32_t); uint32_t addr = origin; - // write as big-endian words padded to 4 bytes - while (bytes_remaining > 0) { - uint32_t w = 0; - size_t take = bytes_remaining >= 4 ? 4 : bytes_remaining; - for (size_t i = 0; i < take; ++i) w |= (uint32_t)p[i] << (24 - 8*i); - store32(addr, w); - addr += 4; - p += take; - bytes_remaining -= take; - if (addr >= RV32I_MEM_SIZE) break; + while (bytes_remaining > 0 && addr < RV32I_MEM_SIZE) { + rv32i_mem[addr++] = *p++; + --bytes_remaining; } } diff --git a/test/test_utils.c b/test/test_utils.c index 6fe2a91..850f564 100644 --- a/test/test_utils.c +++ b/test/test_utils.c @@ -4,6 +4,7 @@ #include #include #include +#include #include "utils.h" #include "riscv32i.h" @@ -16,6 +17,33 @@ static void test_mem_rw_basic() { assert(mem_read(a) == va); assert(mem_read(b) == vb); } +static void test_read_image_buffer_basic() { + // origin 0x00003000 (big-endian), then two 32-bit words (big-endian) + uint8_t img[4 + 8]; + img[0] = 0x00; img[1] = 0x00; img[2] = 0x30; img[3] = 0x00; + // word1 = 0x11223344 + img[4] = 0x11; img[5] = 0x22; img[6] = 0x33; img[7] = 0x44; + // word2 = 0xAABBCCDD + img[8] = 0xAA; img[9] = 0xBB; img[10] = 0xCC; img[11] = 0xDD; + + memset(rv32i_mem, 0, sizeof(rv32i_mem)); + read_image_buffer(img, sizeof(img)); + + // verify bytes loaded at 0x3000 and 0x3004 as raw bytes + (void)mem_read; // silence unused warnings if not used further + // Raw bytes 11 22 33 44 at addresses 3000..3003 => LE word 0x44332211 if read as u32 + // But mem_read reads little-endian bytes into a host u32; our byte buffer was big-endian sequence. + // Compare raw bytes directly: + assert(rv32i_mem[0x3000] == 0x11); + assert(rv32i_mem[0x3001] == 0x22); + assert(rv32i_mem[0x3002] == 0x33); + assert(rv32i_mem[0x3003] == 0x44); + assert(rv32i_mem[0x3004] == 0xAA); + assert(rv32i_mem[0x3005] == 0xBB); + assert(rv32i_mem[0x3006] == 0xCC); + assert(rv32i_mem[0x3007] == 0xDD); +} + /* static void test_read_image_file() { @@ -46,6 +74,7 @@ static void test_read_image_file() { int main(void) { test_mem_rw_basic(); + test_read_image_buffer_basic(); // test_read_image_file(); return 0; } \ No newline at end of file From a53c68574260e82501d6b0b8a2a2ffc1e05e9a1d Mon Sep 17 00:00:00 2001 From: Lachlan Harris Date: Wed, 24 Sep 2025 15:18:00 +1000 Subject: [PATCH 4/5] Deprecate docs/ directory in favour of GitHub wiki --- docs/compiler.md | 118 -------------------------------------------- docs/isa.md | 16 ------ docs/obfuscation.md | 14 ------ docs/vm.md | 65 ------------------------ 4 files changed, 213 deletions(-) delete mode 100644 docs/compiler.md delete mode 100644 docs/isa.md delete mode 100644 docs/obfuscation.md delete mode 100644 docs/vm.md diff --git a/docs/compiler.md b/docs/compiler.md deleted file mode 100644 index f7c388e..0000000 --- a/docs/compiler.md +++ /dev/null @@ -1,118 +0,0 @@ - -# Toolchain - -## Scope - -One clean, reliable pipeline to produce RV32I images for this VM. Minimal, reproducible, no libc, no ELF loader. - -## Install the compiler (required) - -We use the xPack bare‑metal RISC‑V GCC to build freestanding RV32I binaries. - -### Automated install via XPM (recommended) - -Prerequisite: -* Node.js and npm - -Install XPM: -``` -npm install --location=global xpm@latest -``` - -Install the toolchain: -``` -xpm install @xpack-dev-tools/riscv-none-elf-gcc@14.2.0-3.1 --verbose -``` - -Add to PATH for this shell (adjust version/path if different): -``` -export PATH="$HOME/.local/xPacks/riscv-none-elf-gcc/xpack-riscv-none-elf-gcc-14.2.0-3/bin:$PATH" -``` - -Verify: -``` -riscv-none-elf-gcc --version -``` - -### Manual install (alternative) - -Download and unpack, then add the `bin` to PATH. See the xPack docs: https://xpack-dev-tools.github.io/riscv-none-elf-gcc-xpack/docs/install/ - -## Single pipeline (freestanding, syscalls only) - -This VM implements a small set of Linux‑style syscalls (read/write/openat/close/lseek/gettimeofday/brk/exit). User code calls them directly via `ecall`; no libc is linked. Entry is `_start` at 0x3000. - -### 1) Write your program - -Include the tiny syscall wrappers from `include/rv32_syscalls.h`. Example `hello.c`: -``` -#include -#include "rv32_syscalls.h" - -static const char msg[] = "Hello from RV32I VM\n"; - -int main(void) { - sys_write(1, msg, (uint32_t)(sizeof(msg) - 1)); - return 0; -} - -/* Minimal start (crt0): entry point and exit via ecall */ -extern int main(void); -__attribute__((noreturn)) void _start(void) { - register int a0 asm("a0") = main(); - register int a7 asm("a7") = 93; /* SYS_exit */ - asm volatile("ecall" :: "r"(a0), "r"(a7) : "memory"); - for (;;) {} -} -``` - -### 2) Compile to RV32I at 0x3000 - -``` -riscv-none-elf-gcc \ - -march=rv32i -mabi=ilp32 -nostdlib -ffreestanding -Os -s -fno-pic \ - -Wl,--build-id=none -Wl,-Ttext=0x3000 -Wl,-e,_start \ - -Iinclude -o app.elf hello.c -``` - -### 3) Convert ELF to flat binary - -``` -riscv-none-elf-objcopy -O binary app.elf app.bin -``` - -### 4) Prepend origin header and create image - -The VM expects a 4‑byte big‑endian origin followed by raw bytes. For 0x3000: -``` -printf '\x00\x00\x30\x00' > app.img -cat app.bin >> app.img -``` - -### 5) Embed and build the VM (optional) - -``` -python3 tools/embed_image.py --input app.img --output images/vbo_image.o -make EMBED_IMAGE=app.img -``` - -### 6) Run - -``` -bin/vbo # runs the embedded image if provided -bin/vbo app.img # or run a specific image directly -``` - -## Notes - -* Syscall results follow Linux convention: on error, return is a negative errno value. -* Keep programs small enough for VM memory. -* If a needed syscall is missing, implement it in `src/riscv32i.c` (ecall switch) and rebuild. - -## Embedding only (from an existing image) - -``` -python3 tools/embed_image.py --input path/to/app.img --output images/vbo_image.o -make EMBED_IMAGE=path/to/app.img -bin/vbo -``` \ No newline at end of file diff --git a/docs/isa.md b/docs/isa.md deleted file mode 100644 index 4f17142..0000000 --- a/docs/isa.md +++ /dev/null @@ -1,16 +0,0 @@ - -# Instruction set architecture - -## Scope - -This document outlines the design specification for the custom ISA. This document is a stub. \ No newline at end of file diff --git a/docs/obfuscation.md b/docs/obfuscation.md deleted file mode 100644 index 6830b94..0000000 --- a/docs/obfuscation.md +++ /dev/null @@ -1,14 +0,0 @@ - -# Virtualised binary obfuscation - -## Scope - -This document outlines the obfuscation strategy used by the project and identifies items that depend on future work. This document is currently a stub. \ No newline at end of file diff --git a/docs/vm.md b/docs/vm.md deleted file mode 100644 index 8426455..0000000 --- a/docs/vm.md +++ /dev/null @@ -1,65 +0,0 @@ - -# Virtual machine - -## Scope - -This document describes the VM that executes the virtualised program. It covers the current 16-bit early implementation of the VM interpreter. It intentionally omits unfinished or unspecified details. - -## Architecture overview - -* Single-address-space design using a flat memory of 65,536 words -* Word size is 16 bits in the initial implementation -* A small register file with eight general-purpose registers, plus `PC` and `COND` -* Fetch–decode–execute loop drives execution -* Image files are loaded into memory before execution begins - -This is to align with the design of LC-3 - -## Registers - -| Name | Width | Purpose | -| --- | --- | --- | -| `R0` | 16 | General-purpose | -| `R1` | 16 | General-purpose | -| `R2` | 16 | General-purpose | -| `R3` | 16 | General-purpose | -| `R4` | 16 | General-purpose | -| `R5` | 16 | General-purpose | -| `R6` | 16 | General-purpose | -| `R7` | 16 | General-purpose | -| `PC` | 16 | Program counter | -| `COND` | 3 | Condition flags bitfield | - -`COND` uses one-hot semantics with exactly one flag set after updates: - -* `FL_POS` = `1 << 0` -* `FL_ZRO` = `1 << 1` -* `FL_NEG` = `1 << 2` - -## Memory model - -| Property | Value | -| --- | --- | -| Address space | 0x0000 to 0xFFFF | -| Capacity | 65,536 words | -| Word width | 16 bits | -| Layout | Unified code and data | - -`PC` is initialised to `PC_START` (currently `0x3000`). This leaves lower memory available for images, vectors, and future system structures. Exact segmenting is to be defined. - -## Program loading - -Images are embedded into the memory with an image embedding tool that creates a relocatable object (containing the image as a byte array), exposes symbols for the image start & end, and the C code accesses the image through these two pointers. - -## I/O - -Basic `IN` and `OUT` operations are reserved in the ISA for VM-mediated I/O. Device model and port map are to be specified. From 92ac143e5fc1c48643a834401892e24742943911 Mon Sep 17 00:00:00 2001 From: Lachlan Harris Date: Wed, 24 Sep 2025 15:21:40 +1000 Subject: [PATCH 5/5] Update PR request template to remove reference to docs --- .github/PULL_REQUEST_TEMPLATE.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index 94b1e69..5034930 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -7,7 +7,7 @@ Please ensure the following before requesting a merge: - [ ] I have read and agree to the repository's [CONTRIBUTING.md](https://github.com/lachlanharrisdev/virtualised-binary-obfuscation/blob/main/CONTRIBUTING.md) and [LICENSE.md](https://github.com/lachlanharrisdev/virtualised-binary-obfuscation/blob/main/LICENSE.md). - [ ] All tests pass locally (`make`, `make test`). -- [ ] Relevant documentation has been updated (`README.md`, `docs/`). +- [ ] Relevant documentation has been updated (`README.md`, GitHub wiki(s)). - [ ] Code follows the repository's style and formatting guidelines. - [ ] I have added/updated relevant tests.