206 lines
5.1 KiB
C++
206 lines
5.1 KiB
C++
#include "scanner.hh"
|
|
#include "token.hh"
|
|
#include "error_handler.hh"
|
|
#include "object.hh"
|
|
#include <string>
|
|
#include <vector>
|
|
|
|
extern ErrorHandler error_handler;
|
|
|
|
Scanner::Scanner(const std::string &code) : code{code}, tokens{}, start{0}, current{0}, line{1} {
|
|
keywords = {
|
|
std::pair<std::string, TokenType>("and", TokenType::AND),
|
|
std::pair<std::string, TokenType>("class", TokenType::CLASS),
|
|
std::pair<std::string, TokenType>("else", TokenType::ELSE),
|
|
std::pair<std::string, TokenType>("false", TokenType::FALSE),
|
|
std::pair<std::string, TokenType>("for", TokenType::FOR),
|
|
std::pair<std::string, TokenType>("fun", TokenType::FUN),
|
|
std::pair<std::string, TokenType>("if", TokenType::IF),
|
|
std::pair<std::string, TokenType>("nil", TokenType::NIL),
|
|
std::pair<std::string, TokenType>("or", TokenType::OR),
|
|
std::pair<std::string, TokenType>("print", TokenType::PRINT),
|
|
std::pair<std::string, TokenType>("return", TokenType::RETURN),
|
|
std::pair<std::string, TokenType>("super", TokenType::SUPER),
|
|
std::pair<std::string, TokenType>("this", TokenType::THIS),
|
|
std::pair<std::string, TokenType>("true", TokenType::TRUE),
|
|
std::pair<std::string, TokenType>("var", TokenType::VAR),
|
|
std::pair<std::string, TokenType>("while", TokenType::WHILE),
|
|
};
|
|
}
|
|
|
|
std::vector<Token> Scanner::scan_tokens() {
|
|
while (!is_at_end()) {
|
|
start = current;
|
|
scan_token();
|
|
}
|
|
|
|
tokens.push_back(Token(TokenType::EOFILE, "", Object{}, line));
|
|
return tokens;
|
|
}
|
|
|
|
bool Scanner::is_at_end() {
|
|
return current >= code.size();
|
|
}
|
|
|
|
bool Scanner::is_digit(char c) {
|
|
return c >= '0' && c <= '9';
|
|
}
|
|
|
|
bool Scanner::is_alpha(char c) {
|
|
return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_';
|
|
}
|
|
|
|
bool Scanner::is_alphanumeric(char c) {
|
|
return is_alpha(c) || is_digit(c);
|
|
}
|
|
|
|
void Scanner::scan_token() {
|
|
char c = advance();
|
|
switch (c) {
|
|
case '(': add_token(TokenType::LEFT_PAREN); break;
|
|
case ')': add_token(TokenType::RIGHT_PAREN); break;
|
|
case '{': add_token(TokenType::LEFT_BRACE); break;
|
|
case '}': add_token(TokenType::RIGHT_BRACE); break;
|
|
case ',': add_token(TokenType::COMMA); break;
|
|
case '.': add_token(TokenType::DOT); break;
|
|
case '-': add_token(TokenType::MINUS); break;
|
|
case '+': add_token(TokenType::PLUS); break;
|
|
case ';': add_token(TokenType::SEMICOLON); break;
|
|
case '*': add_token(TokenType::STAR); break;
|
|
case '!':
|
|
add_token(match('=') ? TokenType::BANG_EQUAL : TokenType::BANG);
|
|
break;
|
|
case '=':
|
|
add_token(match('=') ? TokenType::EQUAL_EQUAL : TokenType::EQUAL);
|
|
break;
|
|
case '<':
|
|
add_token(match('=') ? TokenType::LESS_EQUAL : TokenType::LESS);
|
|
break;
|
|
case '>':
|
|
add_token(match('=') ? TokenType::GREATER_EQUAL : TokenType::GREATER);
|
|
break;
|
|
case '/':
|
|
if (match('/')) {
|
|
while (peek() != '\n' && !is_at_end()) {
|
|
advance();
|
|
}
|
|
} else {
|
|
add_token(TokenType::SLASH);
|
|
}
|
|
|
|
break;
|
|
case ' ':
|
|
case '\r':
|
|
case '\t':
|
|
break;
|
|
case '\n':
|
|
++line;
|
|
break;
|
|
case '"':
|
|
string();
|
|
break;
|
|
default:
|
|
if (is_digit(c)) {
|
|
number();
|
|
} else if(is_alpha(c)) {
|
|
identifier();
|
|
} else {
|
|
error_handler.error(line, "Unexpected character.");
|
|
}
|
|
|
|
break;
|
|
}
|
|
}
|
|
|
|
char Scanner::advance() {
|
|
return code.at(current++);
|
|
}
|
|
|
|
bool Scanner::match(char expected) {
|
|
if (is_at_end() || peek() != expected) {
|
|
return false;
|
|
}
|
|
|
|
current++;
|
|
return true;
|
|
}
|
|
|
|
char Scanner::peek() {
|
|
if (is_at_end()) {
|
|
return '\0';
|
|
}
|
|
|
|
return code.at(current);
|
|
}
|
|
|
|
char Scanner::peek_next() {
|
|
if (current + 1 >= code.size()) {
|
|
return '\0';
|
|
}
|
|
|
|
return code.at(current + 1);
|
|
}
|
|
|
|
void Scanner::add_token(TokenType type) {
|
|
add_token(type, Object{});
|
|
}
|
|
|
|
void Scanner::add_token(TokenType type, Object literal) {
|
|
std::string text = code.substr(start, current - start);
|
|
tokens.push_back(Token(type, text, literal, line));
|
|
}
|
|
|
|
void Scanner::string() {
|
|
while (peek() != '"' && !is_at_end()) {
|
|
if (peek() == '\n') line++;
|
|
advance();
|
|
}
|
|
|
|
if (is_at_end()) {
|
|
error_handler.error(line, "Unterminated string.");
|
|
return;
|
|
}
|
|
|
|
// The closing ".
|
|
advance();
|
|
|
|
// Trim the surrounding quotes.
|
|
size_t no_quote_start = start + 1;
|
|
size_t no_quote_end = current - 1;
|
|
std::string value = code.substr(no_quote_start, no_quote_end - no_quote_start);
|
|
add_token(TokenType::STRING, Object(StringObjectType::LITERAL, value));
|
|
}
|
|
|
|
void Scanner::number() {
|
|
while (is_digit(peek())) {
|
|
advance();
|
|
}
|
|
|
|
if (peek() == '.' && is_digit(peek_next())) {
|
|
// Consume the "."
|
|
advance();
|
|
|
|
while (is_digit(peek())) {
|
|
advance();
|
|
}
|
|
}
|
|
|
|
std::string value = code.substr(start, current - start);
|
|
add_token(TokenType::NUMBER, Object(std::stod(value)));
|
|
}
|
|
|
|
void Scanner::identifier() {
|
|
while (is_alphanumeric(peek())) {
|
|
advance();
|
|
}
|
|
|
|
std::string value = code.substr(start, current - start);
|
|
auto it = keywords.find(value);
|
|
if (it != keywords.end()) {
|
|
TokenType type = it->second;
|
|
add_token(type);
|
|
} else {
|
|
add_token(TokenType::IDENTIFIER, Object(StringObjectType::IDENTIFIER, value));
|
|
}
|
|
}
|