#include "lexer.h" #include #include namespace camellya { Lexer::Lexer(std::string source) : source_(std::move(source)) {} std::vector Lexer::tokenize() { while (!is_at_end()) { start_ = current_; scan_token(); } tokens_.emplace_back(TokenType::END_OF_FILE, "", line_, column_); return tokens_; } char Lexer::advance() { column_++; return source_[current_++]; } char Lexer::peek() const { if (is_at_end()) return '\0'; return source_[current_]; } char Lexer::peek_next() const { if (current_ + 1 >= source_.length()) return '\0'; return source_[current_ + 1]; } bool Lexer::match(char expected) { if (is_at_end()) return false; if (source_[current_] != expected) return false; current_++; column_++; return true; } void Lexer::skip_whitespace() { while (!is_at_end()) { char c = peek(); switch (c) { case ' ': case '\r': case '\t': advance(); break; case '\n': line_++; column_ = 0; advance(); break; default: return; } } } void Lexer::skip_comment() { if (peek() == '/' && peek_next() == '/') { while (peek() != '\n' && !is_at_end()) { advance(); } } } void Lexer::scan_token() { skip_whitespace(); if (is_at_end()) return; start_ = current_; int start_column = column_; char c = advance(); switch (c) { case '(': add_token(TokenType::LEFT_PAREN); break; case ')': add_token(TokenType::RIGHT_PAREN); break; case '{': add_token(TokenType::LEFT_BRACE); break; case '}': add_token(TokenType::RIGHT_BRACE); break; case '[': add_token(TokenType::LEFT_BRACKET); break; case ']': add_token(TokenType::RIGHT_BRACKET); break; case ',': add_token(TokenType::COMMA); break; case '.': add_token(TokenType::DOT); break; case ';': add_token(TokenType::SEMICOLON); break; case ':': add_token(TokenType::COLON); break; case '+': add_token(TokenType::PLUS); break; case '*': add_token(TokenType::STAR); break; case '%': add_token(TokenType::PERCENT); break; case '-': if (match('>')) { add_token(TokenType::ARROW); } else { add_token(TokenType::MINUS); } break; case '!': add_token(match('=') ? TokenType::BANG_EQUAL : TokenType::BANG); break; case '=': add_token(match('=') ? TokenType::EQUAL_EQUAL : TokenType::EQUAL); break; case '<': add_token(match('=') ? TokenType::LESS_EQUAL : TokenType::LESS); break; case '>': add_token(match('=') ? TokenType::GREATER_EQUAL : TokenType::GREATER); break; case '/': if (peek() == '/') { skip_comment(); } else { add_token(TokenType::SLASH); } break; case '"': scan_string(); break; default: if (std::isdigit(c)) { scan_number(); } else if (std::isalpha(c) || c == '_') { scan_identifier(); } else { add_token(TokenType::INVALID); } break; } } void Lexer::add_token(TokenType type) { std::string text = source_.substr(start_, current_ - start_); tokens_.emplace_back(type, text, line_, column_ - static_cast(text.length())); } void Lexer::add_token(TokenType type, std::variant literal) { std::string text = source_.substr(start_, current_ - start_); tokens_.emplace_back(type, text, literal, line_, column_ - static_cast(text.length())); } void Lexer::scan_string() { std::string value; while (peek() != '"' && !is_at_end()) { if (peek() == '\n') { line_++; column_ = 0; } if (peek() == '\\' && peek_next() != '\0') { advance(); // consume backslash char escaped = advance(); switch (escaped) { case 'n': value += '\n'; break; case 't': value += '\t'; break; case 'r': value += '\r'; break; case '\\': value += '\\'; break; case '"': value += '"'; break; default: value += escaped; break; } } else { value += advance(); } } if (is_at_end()) { add_token(TokenType::INVALID); return; } advance(); // closing " add_token(TokenType::STRING_LITERAL, value); } void Lexer::scan_number() { while (std::isdigit(peek())) { advance(); } if (peek() == '.' && std::isdigit(peek_next())) { advance(); // consume '.' while (std::isdigit(peek())) { advance(); } } std::string text = source_.substr(start_, current_ - start_); double value = std::stod(text); add_token(TokenType::NUMBER_LITERAL, value); } void Lexer::scan_identifier() { while (std::isalnum(peek()) || peek() == '_') { advance(); } std::string text = source_.substr(start_, current_ - start_); TokenType type = get_keyword_type(text); add_token(type); } TokenType Lexer::get_keyword_type(const std::string& text) const { static const std::unordered_map keywords = { {"class", TokenType::CLASS}, {"func", TokenType::FUNC}, {"number", TokenType::NUMBER}, {"string", TokenType::STRING}, {"bool", TokenType::BOOL}, {"list", TokenType::LIST}, {"map", TokenType::MAP}, {"if", TokenType::IF}, {"else", TokenType::ELSE}, {"while", TokenType::WHILE}, {"for", TokenType::FOR}, {"return", TokenType::RETURN}, {"var", TokenType::VAR}, {"true", TokenType::TRUE}, {"false", TokenType::FALSE}, {"nil", TokenType::NIL}, {"and", TokenType::AND}, {"or", TokenType::OR}, {"this", TokenType::THIS}, }; auto it = keywords.find(text); if (it != keywords.end()) { return it->second; } return TokenType::IDENTIFIER; } } // namespace camellya