init

2026-01-13 22:52:55 +08:00
commit 211a837468
18 changed files with 2831 additions and 0 deletions
--- a/lexer.cpp
+++ b/lexer.cpp
@@ -0,0 +1,235 @@
+#include "lexer.h"
+#include <cctype>
+#include <unordered_map>
+
+namespace camellya {
+
+Lexer::Lexer(std::string source) : source_(std::move(source)) {}
+
+std::vector<Token> Lexer::tokenize() {
+    while (!is_at_end()) {
+        start_ = current_;
+        scan_token();
+    }
+    
+    tokens_.emplace_back(TokenType::END_OF_FILE, "", line_, column_);
+    return tokens_;
+}
+
+char Lexer::advance() {
+    column_++;
+    return source_[current_++];
+}
+
+char Lexer::peek() const {
+    if (is_at_end()) return '\0';
+    return source_[current_];
+}
+
+char Lexer::peek_next() const {
+    if (current_ + 1 >= source_.length()) return '\0';
+    return source_[current_ + 1];
+}
+
+bool Lexer::match(char expected) {
+    if (is_at_end()) return false;
+    if (source_[current_] != expected) return false;
+    
+    current_++;
+    column_++;
+    return true;
+}
+
+void Lexer::skip_whitespace() {
+    while (!is_at_end()) {
+        char c = peek();
+        switch (c) {
+            case ' ':
+            case '\r':
+            case '\t':
+                advance();
+                break;
+            case '\n':
+                line_++;
+                column_ = 0;
+                advance();
+                break;
+            default:
+                return;
+        }
+    }
+}
+
+void Lexer::skip_comment() {
+    if (peek() == '/' && peek_next() == '/') {
+        while (peek() != '\n' && !is_at_end()) {
+            advance();
+        }
+    }
+}
+
+void Lexer::scan_token() {
+    skip_whitespace();
+    
+    if (is_at_end()) return;
+    
+    start_ = current_;
+    int start_column = column_;
+    char c = advance();
+    
+    switch (c) {
+        case '(': add_token(TokenType::LEFT_PAREN); break;
+        case ')': add_token(TokenType::RIGHT_PAREN); break;
+        case '{': add_token(TokenType::LEFT_BRACE); break;
+        case '}': add_token(TokenType::RIGHT_BRACE); break;
+        case '[': add_token(TokenType::LEFT_BRACKET); break;
+        case ']': add_token(TokenType::RIGHT_BRACKET); break;
+        case ',': add_token(TokenType::COMMA); break;
+        case '.': add_token(TokenType::DOT); break;
+        case ';': add_token(TokenType::SEMICOLON); break;
+        case ':': add_token(TokenType::COLON); break;
+        case '+': add_token(TokenType::PLUS); break;
+        case '*': add_token(TokenType::STAR); break;
+        case '%': add_token(TokenType::PERCENT); break;
+        case '-':
+            if (match('>')) {
+                add_token(TokenType::ARROW);
+            } else {
+                add_token(TokenType::MINUS);
+            }
+            break;
+        case '!':
+            add_token(match('=') ? TokenType::BANG_EQUAL : TokenType::BANG);
+            break;
+        case '=':
+            add_token(match('=') ? TokenType::EQUAL_EQUAL : TokenType::EQUAL);
+            break;
+        case '<':
+            add_token(match('=') ? TokenType::LESS_EQUAL : TokenType::LESS);
+            break;
+        case '>':
+            add_token(match('=') ? TokenType::GREATER_EQUAL : TokenType::GREATER);
+            break;
+        case '/':
+            if (peek() == '/') {
+                skip_comment();
+            } else {
+                add_token(TokenType::SLASH);
+            }
+            break;
+        case '"':
+            scan_string();
+            break;
+        default:
+            if (std::isdigit(c)) {
+                scan_number();
+            } else if (std::isalpha(c) || c == '_') {
+                scan_identifier();
+            } else {
+                add_token(TokenType::INVALID);
+            }
+            break;
+    }
+}
+
+void Lexer::add_token(TokenType type) {
+    std::string text = source_.substr(start_, current_ - start_);
+    tokens_.emplace_back(type, text, line_, column_ - static_cast<int>(text.length()));
+}
+
+void Lexer::add_token(TokenType type, std::variant<std::monostate, double, std::string> literal) {
+    std::string text = source_.substr(start_, current_ - start_);
+    tokens_.emplace_back(type, text, literal, line_, column_ - static_cast<int>(text.length()));
+}
+
+void Lexer::scan_string() {
+    std::string value;
+    
+    while (peek() != '"' && !is_at_end()) {
+        if (peek() == '\n') {
+            line_++;
+            column_ = 0;
+        }
+        if (peek() == '\\' && peek_next() != '\0') {
+            advance(); // consume backslash
+            char escaped = advance();
+            switch (escaped) {
+                case 'n': value += '\n'; break;
+                case 't': value += '\t'; break;
+                case 'r': value += '\r'; break;
+                case '\\': value += '\\'; break;
+                case '"': value += '"'; break;
+                default: value += escaped; break;
+            }
+        } else {
+            value += advance();
+        }
+    }
+    
+    if (is_at_end()) {
+        add_token(TokenType::INVALID);
+        return;
+    }
+    
+    advance(); // closing "
+    add_token(TokenType::STRING_LITERAL, value);
+}
+
+void Lexer::scan_number() {
+    while (std::isdigit(peek())) {
+        advance();
+    }
+    
+    if (peek() == '.' && std::isdigit(peek_next())) {
+        advance(); // consume '.'
+        while (std::isdigit(peek())) {
+            advance();
+        }
+    }
+    
+    std::string text = source_.substr(start_, current_ - start_);
+    double value = std::stod(text);
+    add_token(TokenType::NUMBER_LITERAL, value);
+}
+
+void Lexer::scan_identifier() {
+    while (std::isalnum(peek()) || peek() == '_') {
+        advance();
+    }
+    
+    std::string text = source_.substr(start_, current_ - start_);
+    TokenType type = get_keyword_type(text);
+    add_token(type);
+}
+
+TokenType Lexer::get_keyword_type(const std::string& text) const {
+    static const std::unordered_map<std::string, TokenType> keywords = {
+        {"class", TokenType::CLASS},
+        {"func", TokenType::FUNC},
+        {"number", TokenType::NUMBER},
+        {"string", TokenType::STRING},
+        {"bool", TokenType::BOOL},
+        {"list", TokenType::LIST},
+        {"map", TokenType::MAP},
+        {"if", TokenType::IF},
+        {"else", TokenType::ELSE},
+        {"while", TokenType::WHILE},
+        {"for", TokenType::FOR},
+        {"return", TokenType::RETURN},
+        {"var", TokenType::VAR},
+        {"true", TokenType::TRUE},
+        {"false", TokenType::FALSE},
+        {"nil", TokenType::NIL},
+        {"and", TokenType::AND},
+        {"or", TokenType::OR},
+        {"this", TokenType::THIS},
+    };
+    
+    auto it = keywords.find(text);
+    if (it != keywords.end()) {
+        return it->second;
+    }
+    return TokenType::IDENTIFIER;
+}
+
+} // namespace camellya