Store the current character and add more handlers after the state

transition
This commit is contained in:
Abdelrahman Said 2023-07-30 19:18:10 +01:00
parent bf958056bd
commit 7cb4b28da8

View File

@ -58,6 +58,7 @@ struct lexer_s {
bool has_extra_token;
token_t extra_token;
dstr_t *error_message;
char current_char;
};
INTERNAL lexer_input_t char_type(char input);
@ -78,6 +79,7 @@ INTERNAL void set_token(token_t *token, u64 line, u64 column, token_type type,
INTERNAL void finalise_state_transition(lexer_t *lexer);
INTERNAL void post_keyword(lexer_t *lexer);
INTERNAL void set_numerical_token(lexer_t *lexer);
INTERNAL void handle_string_end(lexer_t *lexer);
INTERNAL lexer_state_t state_table[COUNT_LEXER_STATES][COUNT_LEXER_INPUTS] = {
#include "lexer_state_transitions.table"
@ -136,8 +138,6 @@ lex_result_t get_next_token(lexer_t *lexer, const char *text) {
dstr_clear(lexer->current_string);
char c;
while (lexer->cursor < lexer->text_length) {
if (lexer->has_extra_token) {
lexer->has_extra_token = false;
@ -148,15 +148,15 @@ lex_result_t get_next_token(lexer_t *lexer, const char *text) {
};
}
c = lexer->text[(lexer->cursor)++];
lexer->current_char = lexer->text[(lexer->cursor)++];
lexer_input_t current_input = char_type(c);
lexer_input_t input = char_type(lexer->current_char);
lexer->next = state_table[lexer->current][current_input];
lexer->next = state_table[lexer->current][input];
finalise_state_transition(lexer);
if (current_input == LEXER_INPUT_NEWLINE) {
if (input == LEXER_INPUT_NEWLINE) {
++(lexer->line);
lexer->column = 0;
} else {
@ -437,16 +437,53 @@ void set_token(token_t *token, u64 line, u64 column, token_type type,
void finalise_state_transition(lexer_t *lexer) {
switch (lexer->next) {
case LEXER_STATE_OBJECT_START:
lexer->token_ready = true;
set_token(&(lexer->token), lexer->line, lexer->column, TK_L_BRACE,
(token_value_t){0});
stack_push(&(lexer->stack), LEXER_STATE_OBJECT);
lexer->next = LEXER_STATE_OBJECT;
break;
case LEXER_STATE_OBJECT_END:
break;
case LEXER_STATE_ARRAY_START:
lexer->token_ready = true;
set_token(&(lexer->token), lexer->line, lexer->column, TK_L_BRACKET,
(token_value_t){0});
stack_push(&(lexer->stack), LEXER_STATE_ARRAY);
lexer->next = LEXER_STATE_ARRAY;
break;
case LEXER_STATE_ARRAY_END:
break;
case LEXER_STATE_KEY:
if (lexer->current == LEXER_STATE_OBJECT) {
stack_push(&(lexer->stack), LEXER_STATE_KEY);
lexer->next = LEXER_STATE_STRING;
}
break;
case LEXER_STATE_KEY_END:
lexer->next = LEXER_STATE_VALUE;
break;
case LEXER_STATE_STRING:
case LEXER_STATE_ESCAPE_SEQUENCE:
case LEXER_STATE_UNICODE_HEX1:
case LEXER_STATE_UNICODE_HEX2:
case LEXER_STATE_UNICODE_HEX3:
case LEXER_STATE_UNICODE_HEX4:
dstr_append(&(lexer->current_string), lexer->current_char);
break;
case LEXER_STATE_STRING_END:
handle_string_end(lexer);
break;
case LEXER_STATE_TRUE:
case LEXER_STATE_FALSE:
@ -471,7 +508,16 @@ void finalise_state_transition(lexer_t *lexer) {
break;
case LEXER_STATE_NUMBER_END:
set_numerical_token(lexer);
switch (lexer->current) {
case LEXER_STATE_NUMBER:
case LEXER_STATE_FRACTION:
case LEXER_STATE_POWER:
set_numerical_token(lexer);
break;
default:
break;
}
break;
}
@ -513,14 +559,14 @@ void post_keyword(lexer_t *lexer) {
break;
default:
lexer->current = LEXER_STATE_ERROR;
lexer->next = LEXER_STATE_ERROR;
return;
}
lexer->token_ready = true;
lexer->current = LEXER_STATE_KEYWORD_END;
lexer->next = LEXER_STATE_KEYWORD_END;
}
void set_numerical_token(lexer_t *lexer) {
@ -532,6 +578,21 @@ void set_numerical_token(lexer_t *lexer) {
set_token(&(lexer->token), lexer->line, column, token.type, token.value);
}
void handle_string_end(lexer_t *lexer) {
lexer_state_t string_type = lexer->stack.stack[lexer->stack.size - 1];
lexer->token_ready = true;
token_t *token = &(lexer->token);
u64 column = lexer->column - dstr_length(lexer->current_string);
token_value_t value = {.string = dstr_to_cstr(lexer->current_string)};
if (string_type == LEXER_STATE_KEY) {
set_token(token, lexer->line, column, TK_STR_KEY, value);
} else if (string_type == LEXER_STATE_VALUE) {
set_token(token, lexer->line, column, TK_STR_VAL, value);
}
}
void lexer_state_machine(lexer_t *lexer, char input) {
switch (lexer->current) {
case LEXER_STATE_START: