From 2839c0daff8f4436aa8bb8499440404d2697c894 Mon Sep 17 00:00:00 2001 From: zekexiao Date: Mon, 19 Jan 2026 23:10:09 +0800 Subject: [PATCH] Add vm --- CMakeLists.txt | 9 + src/camellya.h | 5 + src/chunk.cpp | 184 ++++++++++++ src/chunk.h | 66 +++++ src/compiler.cpp | 636 +++++++++++++++++++++++++++++++++++++++++ src/compiler.h | 101 +++++++ src/exceptions.h | 32 +++ src/interpreter.h | 7 +- src/opcode.h | 131 +++++++++ src/parser.h | 7 +- src/state.cpp | 55 +++- src/state.h | 20 +- src/value.h | 6 +- src/vm.cpp | 655 +++++++++++++++++++++++++++++++++++++++++++ src/vm.h | 75 +++++ tests/test_basic.cpp | 62 ++-- tests/test_vm.cpp | 269 ++++++++++++++++++ 17 files changed, 2274 insertions(+), 46 deletions(-) create mode 100644 src/chunk.cpp create mode 100644 src/chunk.h create mode 100644 src/compiler.cpp create mode 100644 src/compiler.h create mode 100644 src/exceptions.h create mode 100644 src/opcode.h create mode 100644 src/vm.cpp create mode 100644 src/vm.h create mode 100644 tests/test_vm.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 8efc8cc..978d708 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -15,6 +15,9 @@ set(LIB_SOURCES src/value.cpp src/interpreter.cpp src/state.cpp + src/chunk.cpp + src/compiler.cpp + src/vm.cpp ) # Library headers @@ -26,6 +29,11 @@ set(LIB_HEADERS src/value.h src/interpreter.h src/state.h + src/chunk.h + src/compiler.h + src/vm.h + src/opcode.h + src/exceptions.h ) if(CAMELLYA_BUILD_STATIC) @@ -63,6 +71,7 @@ if(CAMELLYA_BUILD_TESTS) add_executable(camellya_tests tests/test_basic.cpp tests/test_utf8.cpp + tests/test_vm.cpp ) target_include_directories(camellya_tests diff --git a/src/camellya.h b/src/camellya.h index 58e3f2a..6d26121 100644 --- a/src/camellya.h +++ b/src/camellya.h @@ -11,6 +11,11 @@ #include "parser.h" #include "interpreter.h" #include "ast.h" +#include "vm.h" +#include "compiler.h" +#include "chunk.h" +#include "opcode.h" +#include "exceptions.h" namespace camellya { diff --git a/src/chunk.cpp b/src/chunk.cpp new file mode 100644 index 0000000..cdc4976 --- /dev/null +++ b/src/chunk.cpp @@ -0,0 +1,184 @@ +#include "chunk.h" +#include +#include +#include + +namespace camellya { + +void Chunk::write(uint8_t byte, int line) { + code.push_back(byte); + lines.push_back(line); +} + +void Chunk::write_opcode(OpCode op, int line) { + write(static_cast(op), line); +} + +size_t Chunk::add_constant(ValuePtr value) { + constants.push_back(value); + return constants.size() - 1; +} + +ValuePtr Chunk::get_constant(size_t index) const { + if (index >= constants.size()) { + return std::make_shared(); + } + return constants[index]; +} + +int Chunk::get_line(size_t offset) const { + if (offset >= lines.size()) { + return -1; + } + return lines[offset]; +} + +void Chunk::patch_jump(size_t offset) { + // Calculate the jump offset + // -2 to adjust for the bytecode for the jump offset itself + size_t jump = code.size() - offset - 2; + + if (jump > UINT16_MAX) { + throw std::runtime_error("Too much code to jump over."); + } + + code[offset] = (jump >> 8) & 0xff; + code[offset + 1] = jump & 0xff; +} + +void Chunk::disassemble(const std::string& name) const { + std::cout << "== " << name << " ==" << std::endl; + + for (size_t offset = 0; offset < code.size();) { + offset = disassemble_instruction(offset); + } +} + +size_t Chunk::disassemble_instruction(size_t offset) const { + std::cout << std::format("{:04d} ", offset); + + if (offset > 0 && get_line(offset) == get_line(offset - 1)) { + std::cout << " | "; + } else { + std::cout << std::format("{:4d} ", get_line(offset)); + } + + uint8_t instruction = code[offset]; + OpCode op = static_cast(instruction); + + switch (op) { + case OpCode::OP_CONSTANT: + return constant_instruction("OP_CONSTANT", offset); + case OpCode::OP_NIL: + return simple_instruction("OP_NIL", offset); + case OpCode::OP_TRUE: + return simple_instruction("OP_TRUE", offset); + case OpCode::OP_FALSE: + return simple_instruction("OP_FALSE", offset); + case OpCode::OP_ADD: + return simple_instruction("OP_ADD", offset); + case OpCode::OP_SUBTRACT: + return simple_instruction("OP_SUBTRACT", offset); + case OpCode::OP_MULTIPLY: + return simple_instruction("OP_MULTIPLY", offset); + case OpCode::OP_DIVIDE: + return simple_instruction("OP_DIVIDE", offset); + case OpCode::OP_MODULO: + return simple_instruction("OP_MODULO", offset); + case OpCode::OP_NEGATE: + return simple_instruction("OP_NEGATE", offset); + case OpCode::OP_EQUAL: + return simple_instruction("OP_EQUAL", offset); + case OpCode::OP_NOT_EQUAL: + return simple_instruction("OP_NOT_EQUAL", offset); + case OpCode::OP_GREATER: + return simple_instruction("OP_GREATER", offset); + case OpCode::OP_GREATER_EQUAL: + return simple_instruction("OP_GREATER_EQUAL", offset); + case OpCode::OP_LESS: + return simple_instruction("OP_LESS", offset); + case OpCode::OP_LESS_EQUAL: + return simple_instruction("OP_LESS_EQUAL", offset); + case OpCode::OP_NOT: + return simple_instruction("OP_NOT", offset); + case OpCode::OP_POP: + return simple_instruction("OP_POP", offset); + case OpCode::OP_POPN: + return byte_instruction("OP_POPN", offset); + case OpCode::OP_DUP: + return simple_instruction("OP_DUP", offset); + case OpCode::OP_GET_GLOBAL: + return constant_instruction("OP_GET_GLOBAL", offset); + case OpCode::OP_SET_GLOBAL: + return constant_instruction("OP_SET_GLOBAL", offset); + case OpCode::OP_DEFINE_GLOBAL: + return constant_instruction("OP_DEFINE_GLOBAL", offset); + case OpCode::OP_GET_LOCAL: + return byte_instruction("OP_GET_LOCAL", offset); + case OpCode::OP_SET_LOCAL: + return byte_instruction("OP_SET_LOCAL", offset); + case OpCode::OP_JUMP: + return jump_instruction("OP_JUMP", 1, offset); + case OpCode::OP_JUMP_IF_FALSE: + return jump_instruction("OP_JUMP_IF_FALSE", 1, offset); + case OpCode::OP_LOOP: + return jump_instruction("OP_LOOP", -1, offset); + case OpCode::OP_CALL: + return byte_instruction("OP_CALL", offset); + case OpCode::OP_RETURN: + return simple_instruction("OP_RETURN", offset); + case OpCode::OP_BUILD_LIST: + return byte_instruction("OP_BUILD_LIST", offset); + case OpCode::OP_BUILD_MAP: + return byte_instruction("OP_BUILD_MAP", offset); + case OpCode::OP_INDEX: + return simple_instruction("OP_INDEX", offset); + case OpCode::OP_INDEX_SET: + return simple_instruction("OP_INDEX_SET", offset); + case OpCode::OP_CLASS: + return constant_instruction("OP_CLASS", offset); + case OpCode::OP_GET_PROPERTY: + return constant_instruction("OP_GET_PROPERTY", offset); + case OpCode::OP_SET_PROPERTY: + return constant_instruction("OP_SET_PROPERTY", offset); + case OpCode::OP_METHOD: + return constant_instruction("OP_METHOD", offset); + case OpCode::OP_PRINT: + return simple_instruction("OP_PRINT", offset); + case OpCode::OP_HALT: + return simple_instruction("OP_HALT", offset); + default: + std::cout << "Unknown opcode " << static_cast(instruction) << std::endl; + return offset + 1; + } +} + +size_t Chunk::simple_instruction(const std::string& name, size_t offset) const { + std::cout << name << std::endl; + return offset + 1; +} + +size_t Chunk::constant_instruction(const std::string& name, size_t offset) const { + uint8_t constant_idx = code[offset + 1]; + std::cout << std::format("{:<16} {:4d} '", name, constant_idx); + if (constant_idx < constants.size()) { + std::cout << constants[constant_idx]->to_string(); + } + std::cout << "'" << std::endl; + return offset + 2; +} + +size_t Chunk::byte_instruction(const std::string& name, size_t offset) const { + uint8_t slot = code[offset + 1]; + std::cout << std::format("{:<16} {:4d}", name, slot) << std::endl; + return offset + 2; +} + +size_t Chunk::jump_instruction(const std::string& name, int sign, size_t offset) const { + uint16_t jump = (static_cast(code[offset + 1]) << 8) | code[offset + 2]; + std::cout << std::format("{:<16} {:4d} -> {:4d}", name, offset, + offset + 3 + sign * jump) << std::endl; + return offset + 3; +} + +} // namespace camellya diff --git a/src/chunk.h b/src/chunk.h new file mode 100644 index 0000000..4265282 --- /dev/null +++ b/src/chunk.h @@ -0,0 +1,66 @@ +#ifndef CAMELLYA_CHUNK_H +#define CAMELLYA_CHUNK_H + +#include "opcode.h" +#include "value.h" +#include +#include +#include + +namespace camellya { + +// A chunk represents a sequence of bytecode instructions +// along with associated constants and debug information +class Chunk { +public: + Chunk() = default; + + // Write a byte to the chunk + void write(uint8_t byte, int line); + + // Write an opcode to the chunk + void write_opcode(OpCode op, int line); + + // Add a constant to the constant pool + // Returns the index of the constant + size_t add_constant(ValuePtr value); + + // Get a constant from the constant pool + ValuePtr get_constant(size_t index) const; + + // Get the size of the bytecode + size_t size() const { return code.size(); } + + // Get the bytecode at an index + uint8_t get_code(size_t index) const { return code[index]; } + + // Get line number for a bytecode offset + int get_line(size_t offset) const; + + // Disassemble the chunk for debugging + void disassemble(const std::string& name) const; + + // Disassemble a single instruction + size_t disassemble_instruction(size_t offset) const; + + // Patch a jump instruction with the correct offset + void patch_jump(size_t offset); + + // Get current offset (useful for jump patching) + size_t current_offset() const { return code.size(); } + +private: + std::vector code; // Bytecode instructions + std::vector constants; // Constant pool + std::vector lines; // Line information for debugging + + // Helper for disassembly + size_t simple_instruction(const std::string& name, size_t offset) const; + size_t constant_instruction(const std::string& name, size_t offset) const; + size_t byte_instruction(const std::string& name, size_t offset) const; + size_t jump_instruction(const std::string& name, int sign, size_t offset) const; +}; + +} // namespace camellya + +#endif // CAMELLYA_CHUNK_H diff --git a/src/compiler.cpp b/src/compiler.cpp new file mode 100644 index 0000000..f2a78c1 --- /dev/null +++ b/src/compiler.cpp @@ -0,0 +1,636 @@ +#include "compiler.h" +#include +#include + +namespace camellya { + +Compiler::Compiler() + : current_chunk(nullptr), scope_depth(0), had_error(false) { +} + +std::shared_ptr Compiler::compile(const Program& program) { + current_chunk = std::make_shared(); + had_error = false; + error_message.clear(); + + try { + for (const auto& stmt : program.statements) { + compile_stmt(*stmt); + } + + // Emit halt instruction at the end + emit_opcode(OpCode::OP_HALT); + + if (had_error) { + return nullptr; + } + + return current_chunk; + } catch (const std::exception& e) { + report_error(e.what()); + return nullptr; + } +} + +void Compiler::compile_expr(const Expr& expr) { + if (auto* binary = dynamic_cast(&expr)) { + compile_binary(*binary); + } else if (auto* unary = dynamic_cast(&expr)) { + compile_unary(*unary); + } else if (auto* literal = dynamic_cast(&expr)) { + compile_literal(*literal); + } else if (auto* variable = dynamic_cast(&expr)) { + compile_variable(*variable); + } else if (auto* assign = dynamic_cast(&expr)) { + compile_assign(*assign); + } else if (auto* call = dynamic_cast(&expr)) { + compile_call(*call); + } else if (auto* get = dynamic_cast(&expr)) { + compile_get(*get); + } else if (auto* set = dynamic_cast(&expr)) { + compile_set(*set); + } else if (auto* index = dynamic_cast(&expr)) { + compile_index(*index); + } else if (auto* index_set = dynamic_cast(&expr)) { + compile_index_set(*index_set); + } else if (auto* list = dynamic_cast(&expr)) { + compile_list(*list); + } else if (auto* map = dynamic_cast(&expr)) { + compile_map(*map); + } else { + report_error("Unknown expression type"); + } +} + +void Compiler::compile_binary(const BinaryExpr& expr) { + // Special handling for logical operators (short-circuit evaluation) + if (expr.op == "and") { + compile_expr(*expr.left); + size_t end_jump = emit_jump(OpCode::OP_JUMP_IF_FALSE); + emit_opcode(OpCode::OP_POP); + compile_expr(*expr.right); + patch_jump(end_jump); + return; + } + + if (expr.op == "or") { + compile_expr(*expr.left); + size_t else_jump = emit_jump(OpCode::OP_JUMP_IF_FALSE); + size_t end_jump = emit_jump(OpCode::OP_JUMP); + patch_jump(else_jump); + emit_opcode(OpCode::OP_POP); + compile_expr(*expr.right); + patch_jump(end_jump); + return; + } + + // Regular binary operators + compile_expr(*expr.left); + compile_expr(*expr.right); + + if (expr.op == "+") { + emit_opcode(OpCode::OP_ADD); + } else if (expr.op == "-") { + emit_opcode(OpCode::OP_SUBTRACT); + } else if (expr.op == "*") { + emit_opcode(OpCode::OP_MULTIPLY); + } else if (expr.op == "/") { + emit_opcode(OpCode::OP_DIVIDE); + } else if (expr.op == "%") { + emit_opcode(OpCode::OP_MODULO); + } else if (expr.op == "==") { + emit_opcode(OpCode::OP_EQUAL); + } else if (expr.op == "!=") { + emit_opcode(OpCode::OP_NOT_EQUAL); + } else if (expr.op == ">") { + emit_opcode(OpCode::OP_GREATER); + } else if (expr.op == ">=") { + emit_opcode(OpCode::OP_GREATER_EQUAL); + } else if (expr.op == "<") { + emit_opcode(OpCode::OP_LESS); + } else if (expr.op == "<=") { + emit_opcode(OpCode::OP_LESS_EQUAL); + } else { + report_error("Unknown binary operator: " + expr.op); + } +} + +void Compiler::compile_unary(const UnaryExpr& expr) { + compile_expr(*expr.operand); + + if (expr.op == "-") { + emit_opcode(OpCode::OP_NEGATE); + } else if (expr.op == "!") { + emit_opcode(OpCode::OP_NOT); + } else { + report_error("Unknown unary operator: " + expr.op); + } +} + +void Compiler::compile_literal(const LiteralExpr& expr) { + std::visit([this](auto&& arg) { + using T = std::decay_t; + if constexpr (std::is_same_v) { + emit_constant(std::make_shared(arg)); + } else if constexpr (std::is_same_v) { + emit_constant(std::make_shared(arg)); + } else if constexpr (std::is_same_v) { + if (arg) { + emit_opcode(OpCode::OP_TRUE); + } else { + emit_opcode(OpCode::OP_FALSE); + } + } else { + emit_opcode(OpCode::OP_NIL); + } + }, expr.value); +} + +void Compiler::compile_variable(const VariableExpr& expr) { + int local = resolve_local(expr.name); + if (local != -1) { + emit_bytes(static_cast(OpCode::OP_GET_LOCAL), static_cast(local)); + } else { + emit_bytes(static_cast(OpCode::OP_GET_GLOBAL), identifier_constant(expr.name)); + } +} + +void Compiler::compile_assign(const AssignExpr& expr) { + compile_expr(*expr.value); + + int local = resolve_local(expr.name); + if (local != -1) { + emit_bytes(static_cast(OpCode::OP_SET_LOCAL), static_cast(local)); + } else { + emit_bytes(static_cast(OpCode::OP_SET_GLOBAL), identifier_constant(expr.name)); + } +} + +void Compiler::compile_call(const CallExpr& expr) { + compile_expr(*expr.callee); + + for (const auto& arg : expr.arguments) { + compile_expr(*arg); + } + + emit_bytes(static_cast(OpCode::OP_CALL), + static_cast(expr.arguments.size())); +} + +void Compiler::compile_get(const GetExpr& expr) { + compile_expr(*expr.object); + emit_bytes(static_cast(OpCode::OP_GET_PROPERTY), + identifier_constant(expr.name)); +} + +void Compiler::compile_set(const SetExpr& expr) { + compile_expr(*expr.object); + compile_expr(*expr.value); + emit_bytes(static_cast(OpCode::OP_SET_PROPERTY), + identifier_constant(expr.name)); +} + +void Compiler::compile_index(const IndexExpr& expr) { + compile_expr(*expr.object); + compile_expr(*expr.index); + emit_opcode(OpCode::OP_INDEX); +} + +void Compiler::compile_index_set(const IndexSetExpr& expr) { + compile_expr(*expr.object); + compile_expr(*expr.index); + compile_expr(*expr.value); + emit_opcode(OpCode::OP_INDEX_SET); +} + +void Compiler::compile_list(const ListExpr& expr) { + for (const auto& elem : expr.elements) { + compile_expr(*elem); + } + emit_bytes(static_cast(OpCode::OP_BUILD_LIST), + static_cast(expr.elements.size())); +} + +void Compiler::compile_map(const MapExpr& expr) { + for (const auto& [key, value] : expr.pairs) { + compile_expr(*key); + compile_expr(*value); + } + emit_bytes(static_cast(OpCode::OP_BUILD_MAP), + static_cast(expr.pairs.size())); +} + +void Compiler::compile_stmt(const Stmt& stmt) { + if (auto* expr_stmt = dynamic_cast(&stmt)) { + compile_expr_stmt(*expr_stmt); + } else if (auto* var_decl = dynamic_cast(&stmt)) { + compile_var_decl(*var_decl); + } else if (auto* block = dynamic_cast(&stmt)) { + compile_block(*block); + } else if (auto* if_stmt = dynamic_cast(&stmt)) { + compile_if(*if_stmt); + } else if (auto* while_stmt = dynamic_cast(&stmt)) { + compile_while(*while_stmt); + } else if (auto* for_stmt = dynamic_cast(&stmt)) { + compile_for(*for_stmt); + } else if (auto* return_stmt = dynamic_cast(&stmt)) { + compile_return(*return_stmt); + } else if (auto* break_stmt = dynamic_cast(&stmt)) { + compile_break(*break_stmt); + } else if (auto* continue_stmt = dynamic_cast(&stmt)) { + compile_continue(*continue_stmt); + } else if (auto* func_decl = dynamic_cast(&stmt)) { + compile_function_decl(*func_decl); + } else if (auto* class_decl = dynamic_cast(&stmt)) { + compile_class_decl(*class_decl); + } else { + report_error("Unknown statement type"); + } +} + +void Compiler::compile_expr_stmt(const ExprStmt& stmt) { + compile_expr(*stmt.expression); + emit_opcode(OpCode::OP_POP); +} + +void Compiler::compile_var_decl(const VarDecl& stmt) { + if (stmt.initializer) { + compile_expr(*stmt.initializer); + } else if (!stmt.type_name.empty() && stmt.type_name != "number" && + stmt.type_name != "string" && stmt.type_name != "bool" && + stmt.type_name != "list" && stmt.type_name != "map") { + // It's a class type - emit code to get the class and call it + emit_bytes(static_cast(OpCode::OP_GET_GLOBAL), + identifier_constant(stmt.type_name)); + emit_bytes(static_cast(OpCode::OP_CALL), 0); // Call with 0 arguments + } else { + emit_opcode(OpCode::OP_NIL); + } + + if (scope_depth == 0) { + // Global variable + emit_bytes(static_cast(OpCode::OP_DEFINE_GLOBAL), + identifier_constant(stmt.name)); + } else { + // Local variable + add_local(stmt.name); + } +} + +void Compiler::compile_block(const BlockStmt& stmt) { + begin_scope(); + for (const auto& statement : stmt.statements) { + compile_stmt(*statement); + } + end_scope(); +} + +void Compiler::compile_if(const IfStmt& stmt) { + compile_expr(*stmt.condition); + + size_t then_jump = emit_jump(OpCode::OP_JUMP_IF_FALSE); + emit_opcode(OpCode::OP_POP); // Pop condition if it's truthy + compile_stmt(*stmt.then_branch); + + if (stmt.else_branch) { + size_t else_jump = emit_jump(OpCode::OP_JUMP); + patch_jump(then_jump); + emit_opcode(OpCode::OP_POP); // Pop condition if it's falsey + compile_stmt(*stmt.else_branch); + patch_jump(else_jump); + } else { + size_t end_jump = emit_jump(OpCode::OP_JUMP); + patch_jump(then_jump); + emit_opcode(OpCode::OP_POP); // Pop condition if it's falsey + patch_jump(end_jump); + } +} + +void Compiler::compile_while(const WhileStmt& stmt) { + size_t loop_start = current_chunk->current_offset(); + + // Push loop info for break/continue + loops.push_back({loop_start, {}, scope_depth}); + + compile_expr(*stmt.condition); + + size_t exit_jump = emit_jump(OpCode::OP_JUMP_IF_FALSE); + emit_opcode(OpCode::OP_POP); + + compile_stmt(*stmt.body); + emit_loop(loop_start); + + patch_jump(exit_jump); + emit_opcode(OpCode::OP_POP); + + // Patch all break statements + for (size_t break_jump : loops.back().breaks) { + patch_jump(break_jump); + } + + loops.pop_back(); +} + +void Compiler::compile_for(const ForStmt& stmt) { + begin_scope(); + + if (stmt.initializer) { + compile_stmt(*stmt.initializer); + } + + size_t loop_start = current_chunk->current_offset(); + + // Push loop info for break/continue + loops.push_back({loop_start, {}, scope_depth}); + + size_t exit_jump = 0; + if (stmt.condition) { + compile_expr(*stmt.condition); + exit_jump = emit_jump(OpCode::OP_JUMP_IF_FALSE); + emit_opcode(OpCode::OP_POP); + } + + // Jump over increment for first iteration + size_t body_jump = emit_jump(OpCode::OP_JUMP); + + size_t increment_start = current_chunk->current_offset(); + if (stmt.increment) { + compile_expr(*stmt.increment); + emit_opcode(OpCode::OP_POP); + } + emit_loop(loop_start); + + // Update loop start to increment (for continue) + loops.back().start = increment_start; + + patch_jump(body_jump); + compile_stmt(*stmt.body); + emit_loop(increment_start); + + if (stmt.condition) { + patch_jump(exit_jump); + emit_opcode(OpCode::OP_POP); + } + + // Patch all break statements + for (size_t break_jump : loops.back().breaks) { + patch_jump(break_jump); + } + + loops.pop_back(); + end_scope(); +} + +void Compiler::compile_return(const ReturnStmt& stmt) { + if (stmt.value) { + compile_expr(*stmt.value); + } else { + emit_opcode(OpCode::OP_NIL); + } + emit_opcode(OpCode::OP_RETURN); +} + +void Compiler::compile_break(const BreakStmt& stmt) { + if (loops.empty()) { + report_error("Cannot use 'break' outside of a loop."); + return; + } + + // Pop locals until we're at the loop's scope + for (int i = static_cast(locals.size()) - 1; i >= 0; i--) { + if (locals[i].depth <= loops.back().scope_depth) { + break; + } + emit_opcode(OpCode::OP_POP); + } + + // Emit jump and record it for later patching + size_t jump = emit_jump(OpCode::OP_JUMP); + loops.back().breaks.push_back(jump); +} + +void Compiler::compile_continue(const ContinueStmt& stmt) { + if (loops.empty()) { + report_error("Cannot use 'continue' outside of a loop."); + return; + } + + // Pop locals until we're at the loop's scope + for (int i = static_cast(locals.size()) - 1; i >= 0; i--) { + if (locals[i].depth <= loops.back().scope_depth) { + break; + } + emit_opcode(OpCode::OP_POP); + } + + // Jump back to loop start + emit_loop(loops.back().start); +} + +void Compiler::compile_function_decl(const FunctionDecl& stmt) { + // Save current state + auto prev_chunk = current_chunk; + auto prev_locals = std::move(locals); + auto prev_scope_depth = scope_depth; + auto prev_loops = std::move(loops); + + // Setup new state for function + current_chunk = std::make_shared(); + locals.clear(); + loops.clear(); + scope_depth = 0; + + // Add an empty local for the function itself (or 'this') at slot 0 + add_local("this"); + + // Add parameters as locals at depth 0 + for (const auto& param : stmt.parameters) { + add_local(param.second); + } + + // Compile body + try { + compile_stmt(*stmt.body); + } catch (const CompileError&) { + // Error already reported + had_error = true; + } + + // Ensure function returns + emit_opcode(OpCode::OP_NIL); + emit_opcode(OpCode::OP_RETURN); + + auto func_chunk = current_chunk; + + // Restore state + current_chunk = prev_chunk; + locals = std::move(prev_locals); + scope_depth = prev_scope_depth; + loops = std::move(prev_loops); + + auto func_decl = std::make_shared(stmt); + auto func = std::make_shared(stmt.name, func_decl, func_chunk); + + emit_constant(func); + + if (scope_depth == 0) { + emit_bytes(static_cast(OpCode::OP_DEFINE_GLOBAL), + identifier_constant(stmt.name)); + } else { + add_local(stmt.name); + } +} + +void Compiler::compile_class_decl(const ClassDecl& stmt) { + // Create class value with fields and methods + auto klass = std::make_shared(stmt.name); + + // Add fields and methods to the class + for (const auto& member : stmt.members) { + if (auto* var_decl = dynamic_cast(member.get())) { + // Field declaration + klass->add_field(var_decl->name, var_decl->type_name); + } else if (auto* func_decl = dynamic_cast(member.get())) { + // Method declaration - compile to bytecode + auto prev_chunk = current_chunk; + auto prev_locals = std::move(locals); + auto prev_scope_depth = scope_depth; + auto prev_loops = std::move(loops); + + current_chunk = std::make_shared(); + locals.clear(); + loops.clear(); + scope_depth = 0; + + add_local("this"); + for (const auto& param : func_decl->parameters) { + add_local(param.second); + } + + try { + compile_stmt(*func_decl->body); + } catch (const CompileError&) { + had_error = true; + } + + if (func_decl->name == "init") { + emit_bytes(static_cast(OpCode::OP_GET_LOCAL), 0); + } else { + emit_opcode(OpCode::OP_NIL); + } + emit_opcode(OpCode::OP_RETURN); + + auto method_chunk = current_chunk; + + current_chunk = prev_chunk; + locals = std::move(prev_locals); + scope_depth = prev_scope_depth; + loops = std::move(prev_loops); + + auto func_decl_ptr = std::make_shared(*func_decl); + auto func = std::make_shared(func_decl->name, func_decl_ptr, method_chunk); + klass->add_method(func_decl->name, func); + } + } + + // Push the class as a constant and define it as a global + emit_constant(klass); + emit_bytes(static_cast(OpCode::OP_DEFINE_GLOBAL), + identifier_constant(stmt.name)); +} + +// Helper methods + +void Compiler::emit_byte(uint8_t byte) { + current_chunk->write(byte, 0); // Line number tracking could be improved +} + +void Compiler::emit_opcode(OpCode op) { + emit_byte(static_cast(op)); +} + +void Compiler::emit_bytes(uint8_t byte1, uint8_t byte2) { + emit_byte(byte1); + emit_byte(byte2); +} + +void Compiler::emit_constant(ValuePtr value) { + emit_bytes(static_cast(OpCode::OP_CONSTANT), make_constant(value)); +} + +size_t Compiler::emit_jump(OpCode op) { + emit_opcode(op); + emit_byte(0xff); + emit_byte(0xff); + return current_chunk->current_offset() - 2; +} + +void Compiler::patch_jump(size_t offset) { + current_chunk->patch_jump(offset); +} + +void Compiler::emit_loop(size_t loop_start) { + emit_opcode(OpCode::OP_LOOP); + + size_t offset = current_chunk->current_offset() - loop_start + 2; + if (offset > UINT16_MAX) { + report_error("Loop body too large."); + return; + } + + emit_byte((offset >> 8) & 0xff); + emit_byte(offset & 0xff); +} + +void Compiler::begin_scope() { + scope_depth++; +} + +void Compiler::end_scope() { + scope_depth--; + + // Pop all local variables in this scope + while (!locals.empty() && locals.back().depth > scope_depth) { + emit_opcode(OpCode::OP_POP); + locals.pop_back(); + } +} + +void Compiler::add_local(const std::string& name) { + if (locals.size() >= UINT8_MAX) { + report_error("Too many local variables in scope."); + return; + } + + locals.push_back({name, scope_depth, false}); +} + +int Compiler::resolve_local(const std::string& name) { + for (int i = static_cast(locals.size()) - 1; i >= 0; i--) { + if (locals[i].name == name) { + return i; + } + } + return -1; +} + +uint8_t Compiler::make_constant(ValuePtr value) { + size_t constant = current_chunk->add_constant(value); + if (constant > UINT8_MAX) { + report_error("Too many constants in one chunk."); + return 0; + } + return static_cast(constant); +} + +uint8_t Compiler::identifier_constant(const std::string& name) { + return make_constant(std::make_shared(name)); +} + +void Compiler::report_error(const std::string& message) { + had_error = true; + error_message = message; + throw CompileError(message); +} + +} // namespace camellya diff --git a/src/compiler.h b/src/compiler.h new file mode 100644 index 0000000..e0b808c --- /dev/null +++ b/src/compiler.h @@ -0,0 +1,101 @@ +#ifndef CAMELLYA_COMPILER_H +#define CAMELLYA_COMPILER_H + +#include "ast.h" +#include "chunk.h" +#include "value.h" +#include "exceptions.h" +#include +#include +#include + +namespace camellya { + +// Local variable information +struct Local { + std::string name; + int depth; + bool is_captured; +}; + +// Loop information for break/continue +struct LoopInfo { + size_t start; // Loop start position (for continue) + std::vector breaks; // Break jump positions to patch + int scope_depth; // Scope depth at loop start +}; + +// Compiler class for converting AST to bytecode +class Compiler { +public: + Compiler(); + + // Compile a program into a chunk + std::shared_ptr compile(const Program& program); + + // Get the last error message + const std::string& get_error() const { return error_message; } + +private: + std::shared_ptr current_chunk; + std::vector locals; + std::vector loops; // Stack of loop information + int scope_depth; + std::string error_message; + bool had_error; + + // Compilation methods for expressions + void compile_expr(const Expr& expr); + void compile_binary(const BinaryExpr& expr); + void compile_unary(const UnaryExpr& expr); + void compile_literal(const LiteralExpr& expr); + void compile_variable(const VariableExpr& expr); + void compile_assign(const AssignExpr& expr); + void compile_call(const CallExpr& expr); + void compile_get(const GetExpr& expr); + void compile_set(const SetExpr& expr); + void compile_index(const IndexExpr& expr); + void compile_index_set(const IndexSetExpr& expr); + void compile_list(const ListExpr& expr); + void compile_map(const MapExpr& expr); + + // Compilation methods for statements + void compile_stmt(const Stmt& stmt); + void compile_expr_stmt(const ExprStmt& stmt); + void compile_var_decl(const VarDecl& stmt); + void compile_block(const BlockStmt& stmt); + void compile_if(const IfStmt& stmt); + void compile_while(const WhileStmt& stmt); + void compile_for(const ForStmt& stmt); + void compile_return(const ReturnStmt& stmt); + void compile_break(const BreakStmt& stmt); + void compile_continue(const ContinueStmt& stmt); + void compile_function_decl(const FunctionDecl& stmt); + void compile_class_decl(const ClassDecl& stmt); + + // Helper methods + void emit_byte(uint8_t byte); + void emit_opcode(OpCode op); + void emit_bytes(uint8_t byte1, uint8_t byte2); + void emit_constant(ValuePtr value); + size_t emit_jump(OpCode op); + void patch_jump(size_t offset); + void emit_loop(size_t loop_start); + + // Variable management + void begin_scope(); + void end_scope(); + void add_local(const std::string& name); + int resolve_local(const std::string& name); + + // Constant pool + uint8_t make_constant(ValuePtr value); + uint8_t identifier_constant(const std::string& name); + + // Error handling + void report_error(const std::string& message); +}; + +} // namespace camellya + +#endif // CAMELLYA_COMPILER_H diff --git a/src/exceptions.h b/src/exceptions.h new file mode 100644 index 0000000..7711a06 --- /dev/null +++ b/src/exceptions.h @@ -0,0 +1,32 @@ +#ifndef CAMELLYA_EXCEPTIONS_H +#define CAMELLYA_EXCEPTIONS_H + +#include +#include + +namespace camellya { + +// Runtime error for both interpreter and VM +class RuntimeError : public std::runtime_error { +public: + explicit RuntimeError(const std::string& message) + : std::runtime_error(message) {} +}; + +// Compile-time error +class CompileError : public std::runtime_error { +public: + explicit CompileError(const std::string& message) + : std::runtime_error(message) {} +}; + +// Parse error +class ParseError : public std::runtime_error { +public: + explicit ParseError(const std::string& message) + : std::runtime_error(message) {} +}; + +} // namespace camellya + +#endif // CAMELLYA_EXCEPTIONS_H diff --git a/src/interpreter.h b/src/interpreter.h index c0fa6ce..8734744 100644 --- a/src/interpreter.h +++ b/src/interpreter.h @@ -3,17 +3,12 @@ #include "ast.h" #include "value.h" +#include "exceptions.h" #include #include -#include namespace camellya { -class RuntimeError : public std::runtime_error { -public: - explicit RuntimeError(const std::string& message) : std::runtime_error(message) {} -}; - class ReturnException : public std::exception { public: ValuePtr value; diff --git a/src/opcode.h b/src/opcode.h new file mode 100644 index 0000000..74cd6bf --- /dev/null +++ b/src/opcode.h @@ -0,0 +1,131 @@ +#ifndef CAMELLYA_OPCODE_H +#define CAMELLYA_OPCODE_H + +#include +#include + +namespace camellya { + +// Bytecode operation codes +enum class OpCode : uint8_t { + // Constants and literals + OP_CONSTANT, // Load constant from constant pool + OP_NIL, // Push nil + OP_TRUE, // Push true + OP_FALSE, // Push false + + // Arithmetic operations + OP_ADD, // Binary + + OP_SUBTRACT, // Binary - + OP_MULTIPLY, // Binary * + OP_DIVIDE, // Binary / + OP_MODULO, // Binary % + OP_NEGATE, // Unary - + + // Comparison operations + OP_EQUAL, // == + OP_NOT_EQUAL, // != + OP_GREATER, // > + OP_GREATER_EQUAL, // >= + OP_LESS, // < + OP_LESS_EQUAL, // <= + + // Logical operations + OP_NOT, // Unary ! + OP_AND, // and + OP_OR, // or + + // Variables + OP_GET_GLOBAL, // Get global variable + OP_SET_GLOBAL, // Set global variable + OP_DEFINE_GLOBAL, // Define global variable + OP_GET_LOCAL, // Get local variable + OP_SET_LOCAL, // Set local variable + + // Control flow + OP_JUMP, // Unconditional jump + OP_JUMP_IF_FALSE, // Jump if top of stack is false + OP_LOOP, // Loop back (negative jump) + + // Functions + OP_CALL, // Call function with N arguments + OP_RETURN, // Return from function + OP_CLOSURE, // Create closure + + // Collections + OP_BUILD_LIST, // Build list from N stack values + OP_BUILD_MAP, // Build map from N key-value pairs + OP_INDEX, // Index access obj[index] + OP_INDEX_SET, // Index assignment obj[index] = value + + // Object-oriented + OP_CLASS, // Define class + OP_GET_PROPERTY, // Get object property + OP_SET_PROPERTY, // Set object property + OP_METHOD, // Define method + OP_INVOKE, // Optimized method call + + // Stack operations + OP_POP, // Pop and discard top of stack + OP_POPN, // Pop N values from stack + OP_DUP, // Duplicate top of stack + + // Other + OP_PRINT, // Built-in print (for debugging) + OP_HALT, // Halt execution +}; + +// Get human-readable name for opcode +inline std::string opcode_name(OpCode op) { + switch (op) { + case OpCode::OP_CONSTANT: return "OP_CONSTANT"; + case OpCode::OP_NIL: return "OP_NIL"; + case OpCode::OP_TRUE: return "OP_TRUE"; + case OpCode::OP_FALSE: return "OP_FALSE"; + case OpCode::OP_ADD: return "OP_ADD"; + case OpCode::OP_SUBTRACT: return "OP_SUBTRACT"; + case OpCode::OP_MULTIPLY: return "OP_MULTIPLY"; + case OpCode::OP_DIVIDE: return "OP_DIVIDE"; + case OpCode::OP_MODULO: return "OP_MODULO"; + case OpCode::OP_NEGATE: return "OP_NEGATE"; + case OpCode::OP_EQUAL: return "OP_EQUAL"; + case OpCode::OP_NOT_EQUAL: return "OP_NOT_EQUAL"; + case OpCode::OP_GREATER: return "OP_GREATER"; + case OpCode::OP_GREATER_EQUAL: return "OP_GREATER_EQUAL"; + case OpCode::OP_LESS: return "OP_LESS"; + case OpCode::OP_LESS_EQUAL: return "OP_LESS_EQUAL"; + case OpCode::OP_NOT: return "OP_NOT"; + case OpCode::OP_AND: return "OP_AND"; + case OpCode::OP_OR: return "OP_OR"; + case OpCode::OP_GET_GLOBAL: return "OP_GET_GLOBAL"; + case OpCode::OP_SET_GLOBAL: return "OP_SET_GLOBAL"; + case OpCode::OP_DEFINE_GLOBAL: return "OP_DEFINE_GLOBAL"; + case OpCode::OP_GET_LOCAL: return "OP_GET_LOCAL"; + case OpCode::OP_SET_LOCAL: return "OP_SET_LOCAL"; + case OpCode::OP_JUMP: return "OP_JUMP"; + case OpCode::OP_JUMP_IF_FALSE: return "OP_JUMP_IF_FALSE"; + case OpCode::OP_LOOP: return "OP_LOOP"; + case OpCode::OP_CALL: return "OP_CALL"; + case OpCode::OP_RETURN: return "OP_RETURN"; + case OpCode::OP_CLOSURE: return "OP_CLOSURE"; + case OpCode::OP_BUILD_LIST: return "OP_BUILD_LIST"; + case OpCode::OP_BUILD_MAP: return "OP_BUILD_MAP"; + case OpCode::OP_INDEX: return "OP_INDEX"; + case OpCode::OP_INDEX_SET: return "OP_INDEX_SET"; + case OpCode::OP_CLASS: return "OP_CLASS"; + case OpCode::OP_GET_PROPERTY: return "OP_GET_PROPERTY"; + case OpCode::OP_SET_PROPERTY: return "OP_SET_PROPERTY"; + case OpCode::OP_METHOD: return "OP_METHOD"; + case OpCode::OP_INVOKE: return "OP_INVOKE"; + case OpCode::OP_POP: return "OP_POP"; + case OpCode::OP_POPN: return "OP_POPN"; + case OpCode::OP_DUP: return "OP_DUP"; + case OpCode::OP_PRINT: return "OP_PRINT"; + case OpCode::OP_HALT: return "OP_HALT"; + default: return "UNKNOWN"; + } +} + +} // namespace camellya + +#endif // CAMELLYA_OPCODE_H diff --git a/src/parser.h b/src/parser.h index dffc1d2..6c2f747 100644 --- a/src/parser.h +++ b/src/parser.h @@ -3,15 +3,10 @@ #include "lexer.h" #include "ast.h" -#include +#include "exceptions.h" namespace camellya { -class ParseError : public std::runtime_error { -public: - explicit ParseError(const std::string& message) : std::runtime_error(message) {} -}; - class Parser { public: explicit Parser(std::vector tokens); diff --git a/src/state.cpp b/src/state.cpp index 0a6c0bb..d842b3b 100644 --- a/src/state.cpp +++ b/src/state.cpp @@ -4,7 +4,11 @@ namespace camellya { -State::State() : interpreter_(std::make_unique()) {} +State::State(ExecutionMode mode) + : execution_mode_(mode), + interpreter_(std::make_unique()), + vm_(std::make_unique()), + compiler_(std::make_unique()) {} bool State::do_string(const std::string& script) { try { @@ -14,10 +18,11 @@ bool State::do_string(const std::string& script) { Parser parser(std::move(tokens)); Program program = parser.parse(); - interpreter_->execute(program); - - last_error_.clear(); - return true; + bool success = execute_program(program); + if (success) { + last_error_.clear(); + } + return success; } catch (const std::exception& e) { last_error_ = e.what(); return false; @@ -38,15 +43,28 @@ bool State::do_file(const std::string& filename) { void State::register_function(const std::string& name, NativeFunction func) { auto func_value = std::make_shared(name, func); - interpreter_->global_environment->define(name, func_value); + + if (execution_mode_ == ExecutionMode::INTERPRETER) { + interpreter_->global_environment->define(name, func_value); + } else { + vm_->register_native_function(name, func); + } } ValuePtr State::get_global(const std::string& name) { - return interpreter_->global_environment->get(name); + if (execution_mode_ == ExecutionMode::INTERPRETER) { + return interpreter_->global_environment->get(name); + } else { + return vm_->get_global(name); + } } void State::set_global(const std::string& name, ValuePtr value) { - interpreter_->global_environment->define(name, value); + if (execution_mode_ == ExecutionMode::INTERPRETER) { + interpreter_->global_environment->define(name, value); + } else { + vm_->set_global(name, value); + } } void State::push_number(double value) { @@ -132,4 +150,25 @@ ValuePtr State::get_stack_value(int index) { return stack_[index]; } +bool State::execute_program(const Program& program) { + if (execution_mode_ == ExecutionMode::INTERPRETER) { + // Use tree-walking interpreter + interpreter_->execute(program); + return true; + } else { + // Use VM + auto chunk = compiler_->compile(program); + if (!chunk) { + last_error_ = compiler_->get_error(); + return false; + } + + bool success = vm_->execute(chunk); + if (!success) { + last_error_ = vm_->get_error(); + } + return success; + } +} + } // namespace camellya diff --git a/src/state.h b/src/state.h index 75ff4e4..3f01278 100644 --- a/src/state.h +++ b/src/state.h @@ -4,18 +4,30 @@ #include "lexer.h" #include "parser.h" #include "interpreter.h" +#include "vm.h" +#include "compiler.h" #include "value.h" #include #include namespace camellya { +// Execution mode +enum class ExecutionMode { + INTERPRETER, // Tree-walking interpreter + VM // Bytecode VM +}; + // Main state class - similar to lua_State class State { public: - State(); + State(ExecutionMode mode = ExecutionMode::VM); ~State() = default; + // Set execution mode + void set_execution_mode(ExecutionMode mode) { execution_mode_ = mode; } + ExecutionMode get_execution_mode() const { return execution_mode_; } + // Execute script from string bool do_string(const std::string& script); @@ -51,11 +63,17 @@ public: const std::string& get_error() const { return last_error_; } private: + ExecutionMode execution_mode_; std::unique_ptr interpreter_; + std::unique_ptr vm_; + std::unique_ptr compiler_; std::vector stack_; std::string last_error_; ValuePtr get_stack_value(int index); + + // Helper for execution + bool execute_program(const Program& program); }; } // namespace camellya diff --git a/src/value.h b/src/value.h index 3c2510f..55ea2f6 100644 --- a/src/value.h +++ b/src/value.h @@ -106,6 +106,7 @@ public: }; // Forward declarations +class Chunk; struct FunctionDecl; class ClassValue; class InstanceValue; @@ -116,15 +117,18 @@ public: std::vector> parameters; std::string return_type; std::shared_ptr declaration; + std::shared_ptr chunk; NativeFunction native_func; bool is_native; std::shared_ptr bound_instance; // Script function FunctionValue(std::string name, std::shared_ptr declaration, + std::shared_ptr chunk = nullptr, std::shared_ptr bound_instance = nullptr) : name(std::move(name)), declaration(std::move(declaration)), - is_native(false), bound_instance(std::move(bound_instance)) {} + chunk(std::move(chunk)), is_native(false), + bound_instance(std::move(bound_instance)) {} // Native function FunctionValue(std::string name, NativeFunction func) diff --git a/src/vm.cpp b/src/vm.cpp new file mode 100644 index 0000000..a9f1908 --- /dev/null +++ b/src/vm.cpp @@ -0,0 +1,655 @@ +#include "vm.h" +#include "interpreter.h" +#include +#include +#include +#include + +namespace camellya { + +VM::VM() : current_frame(nullptr) { + register_builtin_functions(); +} + +bool VM::execute(std::shared_ptr chunk) { + if (!chunk) { + runtime_error("Cannot execute null chunk"); + return false; + } + + frames.clear(); + frames.emplace_back(chunk, 0); + current_frame = &frames.back(); + stack.clear(); + + try { + return run(); + } catch (const std::exception& e) { + runtime_error(e.what()); + return false; + } +} + +bool VM::run() { + while (true) { + // Debug: print instruction + #ifdef DEBUG_TRACE_EXECUTION + std::cout << "Stack: "; + for (const auto& value : stack) { + std::cout << "[ " << value->to_string() << " ]"; + } + std::cout << std::endl; + current_frame->chunk->disassemble_instruction(current_frame->ip); + #endif + + OpCode instruction = static_cast(read_byte()); + + switch (instruction) { + case OpCode::OP_CONSTANT: { + ValuePtr constant = read_constant(); + push(constant); + break; + } + + case OpCode::OP_NIL: + push(std::make_shared()); + break; + + case OpCode::OP_TRUE: + push(std::make_shared(true)); + break; + + case OpCode::OP_FALSE: + push(std::make_shared(false)); + break; + + case OpCode::OP_ADD: + case OpCode::OP_SUBTRACT: + case OpCode::OP_MULTIPLY: + case OpCode::OP_DIVIDE: + case OpCode::OP_MODULO: + case OpCode::OP_GREATER: + case OpCode::OP_GREATER_EQUAL: + case OpCode::OP_LESS: + case OpCode::OP_LESS_EQUAL: + if (!binary_op(instruction)) { + return false; + } + break; + + case OpCode::OP_EQUAL: { + ValuePtr b = pop(); + ValuePtr a = pop(); + push(std::make_shared(values_equal(a, b))); + break; + } + + case OpCode::OP_NOT_EQUAL: { + ValuePtr b = pop(); + ValuePtr a = pop(); + push(std::make_shared(!values_equal(a, b))); + break; + } + + case OpCode::OP_NEGATE: { + ValuePtr value = pop(); + if (value->type() != Type::NUMBER) { + runtime_error("Operand must be a number."); + return false; + } + double num = std::dynamic_pointer_cast(value)->value; + push(std::make_shared(-num)); + break; + } + + case OpCode::OP_NOT: { + ValuePtr value = pop(); + push(std::make_shared(is_falsey(value))); + break; + } + + case OpCode::OP_POP: + pop(); + break; + + case OpCode::OP_POPN: { + uint8_t count = read_byte(); + for (int i = 0; i < count; i++) { + pop(); + } + break; + } + + case OpCode::OP_DUP: + push(peek(0)); + break; + + case OpCode::OP_GET_GLOBAL: { + std::string name = read_string(); + auto it = globals.find(name); + if (it == globals.end()) { + // Fallback: check if 'this' is at slot 0 and has the property + ValuePtr receiver = stack[current_frame->stack_offset]; + if (receiver && receiver->type() == Type::INSTANCE) { + auto instance = std::dynamic_pointer_cast(receiver); + if (instance->fields.find(name) != instance->fields.end()) { + push(instance->get(name)); + break; + } + } + runtime_error("Undefined variable '" + name + "'."); + return false; + } + push(it->second); + break; + } + + case OpCode::OP_SET_GLOBAL: { + std::string name = read_string(); + auto it = globals.find(name); + if (it == globals.end()) { + // Fallback: check if 'this' is at slot 0 and has the property + ValuePtr receiver = stack[current_frame->stack_offset]; + if (receiver && receiver->type() == Type::INSTANCE) { + auto instance = std::dynamic_pointer_cast(receiver); + if (instance->fields.find(name) != instance->fields.end()) { + instance->set(name, peek(0)); + break; + } + } + runtime_error("Undefined variable '" + name + "'."); + return false; + } + it->second = peek(0); + break; + } + + case OpCode::OP_DEFINE_GLOBAL: { + std::string name = read_string(); + globals[name] = pop(); + break; + } + + case OpCode::OP_GET_LOCAL: { + uint8_t slot = read_byte(); + push(stack[current_frame->stack_offset + slot]); + break; + } + + case OpCode::OP_SET_LOCAL: { + uint8_t slot = read_byte(); + stack[current_frame->stack_offset + slot] = peek(0); + break; + } + + case OpCode::OP_JUMP: { + uint16_t offset = read_short(); + current_frame->ip += offset; + break; + } + + case OpCode::OP_JUMP_IF_FALSE: { + uint16_t offset = read_short(); + if (is_falsey(peek(0))) { + current_frame->ip += offset; + } + break; + } + + case OpCode::OP_LOOP: { + uint16_t offset = read_short(); + current_frame->ip -= offset; + break; + } + + case OpCode::OP_CALL: { + uint8_t arg_count = read_byte(); + ValuePtr callee = peek(arg_count); + + if (callee->type() == Type::FUNCTION) { + auto func = std::dynamic_pointer_cast(callee); + + if (func->is_native) { + // Native function call + std::vector args; + for (int i = arg_count - 1; i >= 0; i--) { + args.push_back(peek(i)); + } + + ValuePtr result = func->native_func(args); + + // Pop arguments and function + for (int i = 0; i <= arg_count; i++) { + pop(); + } + + push(result); + } else { + // Script function - call bytecode + if (frames.size() >= 64) { + runtime_error("Stack overflow."); + return false; + } + + // Parameters are already on the stack + // Function is at stack index: stack.size() - arg_count - 1 + size_t stack_offset = stack.size() - arg_count - 1; + + // If it's a bound method, replace function on stack with 'this' + if (func->bound_instance) { + stack[stack_offset] = func->bound_instance; + } + + frames.emplace_back(func->chunk, stack_offset); + current_frame = &frames.back(); + } + } else if (callee->type() == Type::CLASS) { + // Class instantiation + auto klass = std::dynamic_pointer_cast(callee); + auto instance = std::make_shared(klass); + + // The class is at stack[stack.size() - arg_count - 1] + // Replace it with the instance + stack[stack.size() - arg_count - 1] = instance; + + // Check if class has init method + auto init_method = klass->methods.find("init"); + if (init_method != klass->methods.end()) { + auto init_func = init_method->second; + if (init_func->is_native) { + // ... native init (rare) + std::vector args; + for (int i = arg_count - 1; i >= 0; i--) { + args.push_back(peek(i)); + } + init_func->native_func(args); + // pop arguments, instance stays on stack + for (int i = 0; i < arg_count; i++) pop(); + } else { + // Script init - push a new frame + if (frames.size() >= 64) { + runtime_error("Stack overflow."); + return false; + } + + // stack_offset points to the instance we just put there + size_t stack_offset = stack.size() - arg_count - 1; + frames.emplace_back(init_func->chunk, stack_offset); + current_frame = &frames.back(); + } + } else { + // No init, just pop arguments + for (int i = 0; i < arg_count; i++) { + pop(); + } + } + } else { + runtime_error("Can only call functions and classes."); + return false; + } + break; + } + + case OpCode::OP_RETURN: { + ValuePtr result = pop(); + + // Pop the frame + size_t stack_offset = current_frame->stack_offset; + frames.pop_back(); + + // Pop locals and function from the stack + while (stack.size() > stack_offset) { + pop(); + } + + if (frames.empty()) { + return true; + } + + current_frame = &frames.back(); + push(result); + break; + } + + case OpCode::OP_BUILD_LIST: { + uint8_t count = read_byte(); + auto list = std::make_shared(); + + std::vector elements; + for (int i = 0; i < count; i++) { + elements.push_back(pop()); + } + + // Reverse because we popped in reverse order + for (int i = count - 1; i >= 0; i--) { + list->push(elements[i]); + } + + push(list); + break; + } + + case OpCode::OP_BUILD_MAP: { + uint8_t count = read_byte(); + auto map = std::make_shared(); + + for (int i = 0; i < count; i++) { + ValuePtr value = pop(); + ValuePtr key = pop(); + + if (key->type() != Type::STRING) { + runtime_error("Map keys must be strings."); + return false; + } + + std::string key_str = std::dynamic_pointer_cast(key)->value; + map->set(key_str, value); + } + + push(map); + break; + } + + case OpCode::OP_INDEX: { + ValuePtr index = pop(); + ValuePtr object = pop(); + + if (object->type() == Type::LIST) { + auto list = std::dynamic_pointer_cast(object); + if (index->type() != Type::NUMBER) { + runtime_error("List index must be a number."); + return false; + } + size_t idx = static_cast( + std::dynamic_pointer_cast(index)->value); + push(list->get(idx)); + } else if (object->type() == Type::MAP) { + auto map = std::dynamic_pointer_cast(object); + if (index->type() != Type::STRING) { + runtime_error("Map key must be a string."); + return false; + } + std::string key = std::dynamic_pointer_cast(index)->value; + push(map->get(key)); + } else { + runtime_error("Only lists and maps support indexing."); + return false; + } + break; + } + + case OpCode::OP_INDEX_SET: { + ValuePtr value = pop(); + ValuePtr index = pop(); + ValuePtr object = pop(); + + if (object->type() == Type::LIST) { + auto list = std::dynamic_pointer_cast(object); + if (index->type() != Type::NUMBER) { + runtime_error("List index must be a number."); + return false; + } + size_t idx = static_cast( + std::dynamic_pointer_cast(index)->value); + list->set(idx, value); + push(value); + } else if (object->type() == Type::MAP) { + auto map = std::dynamic_pointer_cast(object); + if (index->type() != Type::STRING) { + runtime_error("Map key must be a string."); + return false; + } + std::string key = std::dynamic_pointer_cast(index)->value; + map->set(key, value); + push(value); + } else { + runtime_error("Only lists and maps support index assignment."); + return false; + } + break; + } + + case OpCode::OP_CLASS: { + std::string name = read_string(); + auto klass = std::make_shared(name); + push(klass); + break; + } + + case OpCode::OP_GET_PROPERTY: { + std::string name = read_string(); + ValuePtr object = pop(); + + if (object->type() == Type::INSTANCE) { + auto instance = std::dynamic_pointer_cast(object); + push(instance->get(name)); + } else { + runtime_error("Only instances have properties."); + return false; + } + break; + } + + case OpCode::OP_SET_PROPERTY: { + std::string name = read_string(); + ValuePtr value = pop(); + ValuePtr object = pop(); + + if (object->type() == Type::INSTANCE) { + auto instance = std::dynamic_pointer_cast(object); + instance->set(name, value); + push(value); + } else { + runtime_error("Only instances have fields."); + return false; + } + break; + } + + case OpCode::OP_PRINT: { + std::cout << pop()->to_string() << std::endl; + break; + } + + case OpCode::OP_HALT: + return true; + + default: + runtime_error("Unknown opcode: " + + std::to_string(static_cast(instruction))); + return false; + } + } +} + +uint8_t VM::read_byte() { + return current_frame->chunk->get_code(current_frame->ip++); +} + +uint16_t VM::read_short() { + uint16_t high = read_byte(); + uint16_t low = read_byte(); + return (high << 8) | low; +} + +ValuePtr VM::read_constant() { + uint8_t constant_idx = read_byte(); + return current_frame->chunk->get_constant(constant_idx); +} + +std::string VM::read_string() { + ValuePtr constant = read_constant(); + if (constant->type() != Type::STRING) { + throw RuntimeError("Expected string constant"); + } + return std::dynamic_pointer_cast(constant)->value; +} + +void VM::runtime_error(const std::string& message) { + error_message = message; + std::cerr << "Runtime Error: " << message << std::endl; + + // Print stack trace + for (auto it = frames.rbegin(); it != frames.rend(); ++it) { + size_t instruction = it->ip - 1; + int line = it->chunk->get_line(instruction); + std::cerr << "[line " << line << "] in script" << std::endl; + } +} + +bool VM::is_falsey(ValuePtr value) { + if (!value || value->type() == Type::NIL) return true; + if (value->type() == Type::BOOL) { + return !std::dynamic_pointer_cast(value)->value; + } + return false; +} + +bool VM::binary_op(OpCode op) { + ValuePtr b = pop(); + ValuePtr a = pop(); + + if (op == OpCode::OP_ADD) { + if (a->type() == Type::NUMBER && b->type() == Type::NUMBER) { + double av = std::dynamic_pointer_cast(a)->value; + double bv = std::dynamic_pointer_cast(b)->value; + push(std::make_shared(av + bv)); + return true; + } + if (a->type() == Type::STRING && b->type() == Type::STRING) { + std::string av = std::dynamic_pointer_cast(a)->value; + std::string bv = std::dynamic_pointer_cast(b)->value; + push(std::make_shared(av + bv)); + return true; + } + runtime_error("Operands must be two numbers or two strings."); + return false; + } + + // Other arithmetic operations require numbers + if (a->type() != Type::NUMBER || b->type() != Type::NUMBER) { + runtime_error("Operands must be numbers."); + return false; + } + + double av = std::dynamic_pointer_cast(a)->value; + double bv = std::dynamic_pointer_cast(b)->value; + + switch (op) { + case OpCode::OP_SUBTRACT: + push(std::make_shared(av - bv)); + break; + case OpCode::OP_MULTIPLY: + push(std::make_shared(av * bv)); + break; + case OpCode::OP_DIVIDE: + if (bv == 0) { + runtime_error("Division by zero."); + return false; + } + push(std::make_shared(av / bv)); + break; + case OpCode::OP_MODULO: + push(std::make_shared(std::fmod(av, bv))); + break; + case OpCode::OP_GREATER: + push(std::make_shared(av > bv)); + break; + case OpCode::OP_GREATER_EQUAL: + push(std::make_shared(av >= bv)); + break; + case OpCode::OP_LESS: + push(std::make_shared(av < bv)); + break; + case OpCode::OP_LESS_EQUAL: + push(std::make_shared(av <= bv)); + break; + default: + runtime_error("Unknown binary operator."); + return false; + } + + return true; +} + +void VM::register_builtin_functions() { + // print function + auto print_func = std::make_shared("print", + [](const std::vector& args) -> ValuePtr { + if (args.empty()) { + std::cout << std::endl; + return std::make_shared(); + } + + for (size_t i = 0; i < args.size(); ++i) { + if (i > 0) std::cout << " "; + std::cout << args[i]->to_string(); + } + std::cout << std::endl; + + return std::make_shared(); + }); + globals["print"] = print_func; + + // len function + auto len_func = std::make_shared("len", + [](const std::vector& args) -> ValuePtr { + if (args.size() != 1) { + throw RuntimeError("len() expects 1 argument."); + } + + auto& arg = args[0]; + if (arg->type() == Type::LIST) { + auto list = std::dynamic_pointer_cast(arg); + return std::make_shared(static_cast(list->size())); + } else if (arg->type() == Type::STRING) { + auto str = std::dynamic_pointer_cast(arg); + return std::make_shared(static_cast(str->value.length())); + } else if (arg->type() == Type::MAP) { + auto map = std::dynamic_pointer_cast(arg); + return std::make_shared(static_cast(map->pairs.size())); + } + + throw RuntimeError("len() expects list, string, or map."); + }); + globals["len"] = len_func; +} + +void VM::register_native_function(const std::string& name, NativeFunction func) { + auto func_value = std::make_shared(name, func); + globals[name] = func_value; +} + +void VM::set_global(const std::string& name, ValuePtr value) { + globals[name] = value; +} + +ValuePtr VM::get_global(const std::string& name) { + auto it = globals.find(name); + if (it == globals.end()) { + return nullptr; + } + return it->second; +} + +void VM::push(ValuePtr value) { + stack.push_back(value); +} + +ValuePtr VM::pop() { + if (stack.empty()) { + throw RuntimeError("Stack underflow"); + } + ValuePtr value = stack.back(); + stack.pop_back(); + return value; +} + +ValuePtr VM::peek(int distance) { + if (distance >= static_cast(stack.size())) { + throw RuntimeError("Stack underflow in peek"); + } + return stack[stack.size() - 1 - distance]; +} + +} // namespace camellya diff --git a/src/vm.h b/src/vm.h new file mode 100644 index 0000000..891f04f --- /dev/null +++ b/src/vm.h @@ -0,0 +1,75 @@ +#ifndef CAMELLYA_VM_H +#define CAMELLYA_VM_H + +#include "chunk.h" +#include "value.h" +#include "exceptions.h" +#include +#include +#include + +namespace camellya { + +// Call frame for function calls +struct CallFrame { + std::shared_ptr chunk; + size_t ip; // Instruction pointer + size_t stack_offset; // Where this frame's locals start on the stack + + CallFrame(std::shared_ptr chunk, size_t stack_offset) + : chunk(std::move(chunk)), ip(0), stack_offset(stack_offset) {} +}; + +// Virtual Machine - stack-based bytecode interpreter +class VM { +public: + VM(); + + // Execute a chunk of bytecode + bool execute(std::shared_ptr chunk); + + // Get the last error message + const std::string& get_error() const { return error_message; } + + // Register native functions + void register_native_function(const std::string& name, NativeFunction func); + + // Global variable access + void set_global(const std::string& name, ValuePtr value); + ValuePtr get_global(const std::string& name); + + // Stack operations (for C++ API) + void push(ValuePtr value); + ValuePtr pop(); + ValuePtr peek(int distance = 0); + +private: + std::vector stack; + std::vector frames; + CallFrame* current_frame; + std::map globals; + std::string error_message; + + // Main execution loop + bool run(); + + // Helper methods + uint8_t read_byte(); + uint16_t read_short(); + ValuePtr read_constant(); + std::string read_string(); + + // Stack operations + void runtime_error(const std::string& message); + bool is_falsey(ValuePtr value); + + // Binary operations + bool binary_op(OpCode op); + + // Built-in functions + void register_builtin_functions(); +}; + +} // namespace camellya + +#endif // CAMELLYA_VM_H diff --git a/tests/test_basic.cpp b/tests/test_basic.cpp index facf156..f4b3bdf 100644 --- a/tests/test_basic.cpp +++ b/tests/test_basic.cpp @@ -123,31 +123,45 @@ TEST_CASE("class init is called on declaration", "[class][init]") { REQUIRE(a_num->value == 18.0); } -// TEST_CASE("interpreter performance: simple loop", "[perf][script]") { -// State state; -// const char* script = R"( -// func sum_to(number n) -> number { -// number s = 0; -// for (number i = 0; i < n; i = i + 1) { -// s = s + i; -// } -// return s; -// } -// number r = sum_to(1000); -// )"; +TEST_CASE("interpreter performance: simple loop", "[perf][script]") { + State state; + State state_vm(ExecutionMode::VM); + const char* script = R"( + func sum_to(number n) -> number { + var s = 0; + for (var i = 0; i < n; i = i + 1) { + s = s + i; + } + return s; + } + var r = sum_to(1000); + )"; -// BENCHMARK("sum_to(1000)") { -// if (!state.do_string(script)) { -// auto last_error = state.get_error(); -// REQUIRE(last_error.empty()); -// } -// auto r_val = state.get_global("r"); -// REQUIRE(r_val); -// REQUIRE(r_val->type() == Type::NUMBER); -// auto r_num = std::dynamic_pointer_cast(r_val); -// REQUIRE(r_num->value == 499500.0); -// }; -// } + BENCHMARK("sum_to(1000)") { + if (!state.do_string(script)) { + auto last_error = state.get_error(); + REQUIRE(last_error.empty()); + } + + auto r_val = state.get_global("r"); + REQUIRE(r_val); + REQUIRE(r_val->type() == Type::NUMBER); + auto r_num = std::dynamic_pointer_cast(r_val); + REQUIRE(r_num->value == 499500.0); + }; + + BENCHMARK("state_vm sum_to(1000)") { + if (!state_vm.do_string(script)) { + auto last_error = state_vm.get_error(); + REQUIRE(last_error.empty()); + } + auto r_val = state_vm.get_global("r"); + REQUIRE(r_val); + REQUIRE(r_val->type() == Type::NUMBER); + auto r_num = std::dynamic_pointer_cast(r_val); + REQUIRE(r_num->value == 499500.0); + }; +} TEST_CASE("loop break", "[script][loop]") { State state; diff --git a/tests/test_vm.cpp b/tests/test_vm.cpp new file mode 100644 index 0000000..48642f8 --- /dev/null +++ b/tests/test_vm.cpp @@ -0,0 +1,269 @@ +#include +#include "src/camellya.h" + +using namespace camellya; + +TEST_CASE("VM - Basic arithmetic", "[vm]") { + State state(ExecutionMode::VM); + + SECTION("Addition") { + REQUIRE(state.do_string("var x = 10 + 20;")); + auto x = state.get_global("x"); + REQUIRE(x != nullptr); + REQUIRE(x->type() == Type::NUMBER); + REQUIRE(std::dynamic_pointer_cast(x)->value == 30.0); + } + + SECTION("Subtraction") { + REQUIRE(state.do_string("var y = 50 - 15;")); + auto y = state.get_global("y"); + REQUIRE(y != nullptr); + REQUIRE(std::dynamic_pointer_cast(y)->value == 35.0); + } + + SECTION("Multiplication") { + REQUIRE(state.do_string("var z = 7 * 8;")); + auto z = state.get_global("z"); + REQUIRE(z != nullptr); + REQUIRE(std::dynamic_pointer_cast(z)->value == 56.0); + } + + SECTION("Division") { + REQUIRE(state.do_string("var w = 100 / 4;")); + auto w = state.get_global("w"); + REQUIRE(w != nullptr); + REQUIRE(std::dynamic_pointer_cast(w)->value == 25.0); + } +} + +TEST_CASE("VM - Variables and assignment", "[vm]") { + State state(ExecutionMode::VM); + + SECTION("Variable declaration and initialization") { + REQUIRE(state.do_string("var a = 42;")); + auto a = state.get_global("a"); + REQUIRE(a != nullptr); + REQUIRE(std::dynamic_pointer_cast(a)->value == 42.0); + } + + SECTION("Variable assignment") { + REQUIRE(state.do_string(R"( + var b = 10; + b = 20; + )")); + auto b = state.get_global("b"); + REQUIRE(std::dynamic_pointer_cast(b)->value == 20.0); + } +} + +TEST_CASE("VM - String operations", "[vm]") { + State state(ExecutionMode::VM); + + SECTION("String concatenation") { + REQUIRE(state.do_string(R"(var greeting = "Hello" + " " + "World";)")); + auto greeting = state.get_global("greeting"); + REQUIRE(greeting != nullptr); + REQUIRE(greeting->type() == Type::STRING); + REQUIRE(std::dynamic_pointer_cast(greeting)->value == "Hello World"); + } +} + +TEST_CASE("VM - Comparison operators", "[vm]") { + State state(ExecutionMode::VM); + + SECTION("Equality") { + REQUIRE(state.do_string("var eq = 10 == 10;")); + auto eq = state.get_global("eq"); + REQUIRE(std::dynamic_pointer_cast(eq)->value == true); + } + + SECTION("Greater than") { + REQUIRE(state.do_string("var gt = 20 > 10;")); + auto gt = state.get_global("gt"); + REQUIRE(std::dynamic_pointer_cast(gt)->value == true); + } + + SECTION("Less than") { + REQUIRE(state.do_string("var lt = 5 < 10;")); + auto lt = state.get_global("lt"); + REQUIRE(std::dynamic_pointer_cast(lt)->value == true); + } +} + +TEST_CASE("VM - Lists", "[vm]") { + State state(ExecutionMode::VM); + + SECTION("Create list") { + REQUIRE(state.do_string("var numbers = [1, 2, 3, 4, 5];")); + auto numbers = state.get_global("numbers"); + REQUIRE(numbers != nullptr); + REQUIRE(numbers->type() == Type::LIST); + auto list = std::dynamic_pointer_cast(numbers); + REQUIRE(list->size() == 5); + } + + SECTION("List indexing") { + REQUIRE(state.do_string(R"( + var arr = [10, 20, 30]; + var item = arr[1]; + )")); + auto item = state.get_global("item"); + REQUIRE(std::dynamic_pointer_cast(item)->value == 20.0); + } +} + +TEST_CASE("VM - Maps", "[vm]") { + State state(ExecutionMode::VM); + + SECTION("Create map") { + REQUIRE(state.do_string(R"(var person = {"name": "Alice", "age": "30"};)")); + auto person = state.get_global("person"); + REQUIRE(person != nullptr); + REQUIRE(person->type() == Type::MAP); + } + + SECTION("Map access") { + REQUIRE(state.do_string(R"( + var data = {"key": "value"}; + var val = data["key"]; + )")); + auto val = state.get_global("val"); + REQUIRE(val->type() == Type::STRING); + REQUIRE(std::dynamic_pointer_cast(val)->value == "value"); + } +} + +TEST_CASE("VM - If statements", "[vm]") { + State state(ExecutionMode::VM); + + SECTION("If branch taken") { + REQUIRE(state.do_string(R"( + var x = 10; + if (x > 5) { + x = 100; + } + )")); + auto x = state.get_global("x"); + REQUIRE(std::dynamic_pointer_cast(x)->value == 100.0); + } + + SECTION("Else branch taken") { + REQUIRE(state.do_string(R"( + var y = 3; + if (y > 5) { + y = 100; + } else { + y = 200; + } + )")); + auto y = state.get_global("y"); + REQUIRE(std::dynamic_pointer_cast(y)->value == 200.0); + } +} + +TEST_CASE("VM - While loops", "[vm]") { + State state(ExecutionMode::VM); + + SECTION("While loop") { + REQUIRE(state.do_string(R"( + var counter = 0; + var sum = 0; + while (counter < 5) { + sum = sum + counter; + counter = counter + 1; + } + )")); + auto sum = state.get_global("sum"); + REQUIRE(std::dynamic_pointer_cast(sum)->value == 10.0); + } +} + +TEST_CASE("VM - Native functions", "[vm]") { + State state(ExecutionMode::VM); + + SECTION("len function") { + REQUIRE(state.do_string(R"( + var arr = [1, 2, 3, 4]; + var size = len(arr); + )")); + auto size = state.get_global("size"); + REQUIRE(std::dynamic_pointer_cast(size)->value == 4.0); + } +} + +TEST_CASE("VM vs Interpreter - Same results", "[vm][interpreter]") { + const char* script = R"( + var x = 10; + var y = 20; + var sum = x + y; + var product = x * y; + )"; + + State vm_state(ExecutionMode::VM); + State interp_state(ExecutionMode::INTERPRETER); + + REQUIRE(vm_state.do_string(script)); + REQUIRE(interp_state.do_string(script)); + + auto vm_sum = vm_state.get_global("sum"); + auto interp_sum = interp_state.get_global("sum"); + REQUIRE(std::dynamic_pointer_cast(vm_sum)->value == + std::dynamic_pointer_cast(interp_sum)->value); + + auto vm_product = vm_state.get_global("product"); + auto interp_product = interp_state.get_global("product"); + REQUIRE(std::dynamic_pointer_cast(vm_product)->value == + std::dynamic_pointer_cast(interp_product)->value); +} + +TEST_CASE("class init is called on declaration", "[vm][class][init]") { + State state(ExecutionMode::VM); + const char* script = R"( + class Person { + var age : number; + var name : string; + + func init() -> nil { + age = 18; + name = "Default"; + } + + func getAge() -> number { + return this.age; + } + } + + var p : Person; + var a = p.getAge(); + )"; + + auto ret = state.do_string(script); + if(!ret) { + REQUIRE(state.get_error() == ""); + } + + auto p_val = state.get_global("p"); + REQUIRE(p_val); + REQUIRE(p_val->type() == Type::INSTANCE); + + auto instance = std::dynamic_pointer_cast(p_val); + REQUIRE(instance); + + auto age_val = instance->get("age"); + auto name_val = instance->get("name"); + + REQUIRE(age_val->type() == Type::NUMBER); + REQUIRE(name_val->type() == Type::STRING); + + auto age_num = std::dynamic_pointer_cast(age_val); + auto name_str = std::dynamic_pointer_cast(name_val); + + REQUIRE(age_num->value == 18.0); + REQUIRE(name_str->value == "Default"); + + auto a_val = state.get_global("a"); + REQUIRE(a_val); + REQUIRE(a_val->type() == Type::NUMBER); + auto a_num = std::dynamic_pointer_cast(a_val); + REQUIRE(a_num->value == 18.0); +} \ No newline at end of file