This commit is contained in:
2026-01-13 22:52:55 +08:00
commit 211a837468
18 changed files with 2831 additions and 0 deletions

235
lexer.cpp Normal file
View File

@@ -0,0 +1,235 @@
#include "lexer.h"
#include <cctype>
#include <unordered_map>
namespace camellya {
Lexer::Lexer(std::string source) : source_(std::move(source)) {}
std::vector<Token> Lexer::tokenize() {
while (!is_at_end()) {
start_ = current_;
scan_token();
}
tokens_.emplace_back(TokenType::END_OF_FILE, "", line_, column_);
return tokens_;
}
char Lexer::advance() {
column_++;
return source_[current_++];
}
char Lexer::peek() const {
if (is_at_end()) return '\0';
return source_[current_];
}
char Lexer::peek_next() const {
if (current_ + 1 >= source_.length()) return '\0';
return source_[current_ + 1];
}
bool Lexer::match(char expected) {
if (is_at_end()) return false;
if (source_[current_] != expected) return false;
current_++;
column_++;
return true;
}
void Lexer::skip_whitespace() {
while (!is_at_end()) {
char c = peek();
switch (c) {
case ' ':
case '\r':
case '\t':
advance();
break;
case '\n':
line_++;
column_ = 0;
advance();
break;
default:
return;
}
}
}
void Lexer::skip_comment() {
if (peek() == '/' && peek_next() == '/') {
while (peek() != '\n' && !is_at_end()) {
advance();
}
}
}
void Lexer::scan_token() {
skip_whitespace();
if (is_at_end()) return;
start_ = current_;
int start_column = column_;
char c = advance();
switch (c) {
case '(': add_token(TokenType::LEFT_PAREN); break;
case ')': add_token(TokenType::RIGHT_PAREN); break;
case '{': add_token(TokenType::LEFT_BRACE); break;
case '}': add_token(TokenType::RIGHT_BRACE); break;
case '[': add_token(TokenType::LEFT_BRACKET); break;
case ']': add_token(TokenType::RIGHT_BRACKET); break;
case ',': add_token(TokenType::COMMA); break;
case '.': add_token(TokenType::DOT); break;
case ';': add_token(TokenType::SEMICOLON); break;
case ':': add_token(TokenType::COLON); break;
case '+': add_token(TokenType::PLUS); break;
case '*': add_token(TokenType::STAR); break;
case '%': add_token(TokenType::PERCENT); break;
case '-':
if (match('>')) {
add_token(TokenType::ARROW);
} else {
add_token(TokenType::MINUS);
}
break;
case '!':
add_token(match('=') ? TokenType::BANG_EQUAL : TokenType::BANG);
break;
case '=':
add_token(match('=') ? TokenType::EQUAL_EQUAL : TokenType::EQUAL);
break;
case '<':
add_token(match('=') ? TokenType::LESS_EQUAL : TokenType::LESS);
break;
case '>':
add_token(match('=') ? TokenType::GREATER_EQUAL : TokenType::GREATER);
break;
case '/':
if (peek() == '/') {
skip_comment();
} else {
add_token(TokenType::SLASH);
}
break;
case '"':
scan_string();
break;
default:
if (std::isdigit(c)) {
scan_number();
} else if (std::isalpha(c) || c == '_') {
scan_identifier();
} else {
add_token(TokenType::INVALID);
}
break;
}
}
void Lexer::add_token(TokenType type) {
std::string text = source_.substr(start_, current_ - start_);
tokens_.emplace_back(type, text, line_, column_ - static_cast<int>(text.length()));
}
void Lexer::add_token(TokenType type, std::variant<std::monostate, double, std::string> literal) {
std::string text = source_.substr(start_, current_ - start_);
tokens_.emplace_back(type, text, literal, line_, column_ - static_cast<int>(text.length()));
}
void Lexer::scan_string() {
std::string value;
while (peek() != '"' && !is_at_end()) {
if (peek() == '\n') {
line_++;
column_ = 0;
}
if (peek() == '\\' && peek_next() != '\0') {
advance(); // consume backslash
char escaped = advance();
switch (escaped) {
case 'n': value += '\n'; break;
case 't': value += '\t'; break;
case 'r': value += '\r'; break;
case '\\': value += '\\'; break;
case '"': value += '"'; break;
default: value += escaped; break;
}
} else {
value += advance();
}
}
if (is_at_end()) {
add_token(TokenType::INVALID);
return;
}
advance(); // closing "
add_token(TokenType::STRING_LITERAL, value);
}
void Lexer::scan_number() {
while (std::isdigit(peek())) {
advance();
}
if (peek() == '.' && std::isdigit(peek_next())) {
advance(); // consume '.'
while (std::isdigit(peek())) {
advance();
}
}
std::string text = source_.substr(start_, current_ - start_);
double value = std::stod(text);
add_token(TokenType::NUMBER_LITERAL, value);
}
void Lexer::scan_identifier() {
while (std::isalnum(peek()) || peek() == '_') {
advance();
}
std::string text = source_.substr(start_, current_ - start_);
TokenType type = get_keyword_type(text);
add_token(type);
}
TokenType Lexer::get_keyword_type(const std::string& text) const {
static const std::unordered_map<std::string, TokenType> keywords = {
{"class", TokenType::CLASS},
{"func", TokenType::FUNC},
{"number", TokenType::NUMBER},
{"string", TokenType::STRING},
{"bool", TokenType::BOOL},
{"list", TokenType::LIST},
{"map", TokenType::MAP},
{"if", TokenType::IF},
{"else", TokenType::ELSE},
{"while", TokenType::WHILE},
{"for", TokenType::FOR},
{"return", TokenType::RETURN},
{"var", TokenType::VAR},
{"true", TokenType::TRUE},
{"false", TokenType::FALSE},
{"nil", TokenType::NIL},
{"and", TokenType::AND},
{"or", TokenType::OR},
{"this", TokenType::THIS},
};
auto it = keywords.find(text);
if (it != keywords.end()) {
return it->second;
}
return TokenType::IDENTIFIER;
}
} // namespace camellya