From 7cb4b28da8df46443bb1aa336ccd94e497df8c48 Mon Sep 17 00:00:00 2001 From: Abdelrahman Date: Sun, 30 Jul 2023 19:18:10 +0100 Subject: [PATCH] Store the current character and add more handlers after the state transition --- src/lexer/lexer.c | 79 +++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 70 insertions(+), 9 deletions(-) diff --git a/src/lexer/lexer.c b/src/lexer/lexer.c index cca1c17..9e570cd 100644 --- a/src/lexer/lexer.c +++ b/src/lexer/lexer.c @@ -58,6 +58,7 @@ struct lexer_s { bool has_extra_token; token_t extra_token; dstr_t *error_message; + char current_char; }; INTERNAL lexer_input_t char_type(char input); @@ -78,6 +79,7 @@ INTERNAL void set_token(token_t *token, u64 line, u64 column, token_type type, INTERNAL void finalise_state_transition(lexer_t *lexer); INTERNAL void post_keyword(lexer_t *lexer); INTERNAL void set_numerical_token(lexer_t *lexer); +INTERNAL void handle_string_end(lexer_t *lexer); INTERNAL lexer_state_t state_table[COUNT_LEXER_STATES][COUNT_LEXER_INPUTS] = { #include "lexer_state_transitions.table" @@ -136,8 +138,6 @@ lex_result_t get_next_token(lexer_t *lexer, const char *text) { dstr_clear(lexer->current_string); - char c; - while (lexer->cursor < lexer->text_length) { if (lexer->has_extra_token) { lexer->has_extra_token = false; @@ -148,15 +148,15 @@ lex_result_t get_next_token(lexer_t *lexer, const char *text) { }; } - c = lexer->text[(lexer->cursor)++]; + lexer->current_char = lexer->text[(lexer->cursor)++]; - lexer_input_t current_input = char_type(c); + lexer_input_t input = char_type(lexer->current_char); - lexer->next = state_table[lexer->current][current_input]; + lexer->next = state_table[lexer->current][input]; finalise_state_transition(lexer); - if (current_input == LEXER_INPUT_NEWLINE) { + if (input == LEXER_INPUT_NEWLINE) { ++(lexer->line); lexer->column = 0; } else { @@ -437,16 +437,53 @@ void set_token(token_t *token, u64 line, u64 column, token_type type, void finalise_state_transition(lexer_t *lexer) { switch (lexer->next) { case LEXER_STATE_OBJECT_START: + lexer->token_ready = true; + set_token(&(lexer->token), lexer->line, lexer->column, TK_L_BRACE, + (token_value_t){0}); + stack_push(&(lexer->stack), LEXER_STATE_OBJECT); lexer->next = LEXER_STATE_OBJECT; + break; + case LEXER_STATE_OBJECT_END: break; case LEXER_STATE_ARRAY_START: + lexer->token_ready = true; + set_token(&(lexer->token), lexer->line, lexer->column, TK_L_BRACKET, + (token_value_t){0}); + stack_push(&(lexer->stack), LEXER_STATE_ARRAY); lexer->next = LEXER_STATE_ARRAY; + break; + case LEXER_STATE_ARRAY_END: + break; + case LEXER_STATE_KEY: + if (lexer->current == LEXER_STATE_OBJECT) { + stack_push(&(lexer->stack), LEXER_STATE_KEY); + + lexer->next = LEXER_STATE_STRING; + } + + break; + case LEXER_STATE_KEY_END: + lexer->next = LEXER_STATE_VALUE; + + break; + case LEXER_STATE_STRING: + case LEXER_STATE_ESCAPE_SEQUENCE: + case LEXER_STATE_UNICODE_HEX1: + case LEXER_STATE_UNICODE_HEX2: + case LEXER_STATE_UNICODE_HEX3: + case LEXER_STATE_UNICODE_HEX4: + dstr_append(&(lexer->current_string), lexer->current_char); + + break; + case LEXER_STATE_STRING_END: + handle_string_end(lexer); + break; case LEXER_STATE_TRUE: case LEXER_STATE_FALSE: @@ -471,7 +508,16 @@ void finalise_state_transition(lexer_t *lexer) { break; case LEXER_STATE_NUMBER_END: - set_numerical_token(lexer); + switch (lexer->current) { + case LEXER_STATE_NUMBER: + case LEXER_STATE_FRACTION: + case LEXER_STATE_POWER: + set_numerical_token(lexer); + + break; + default: + break; + } break; } @@ -513,14 +559,14 @@ void post_keyword(lexer_t *lexer) { break; default: - lexer->current = LEXER_STATE_ERROR; + lexer->next = LEXER_STATE_ERROR; return; } lexer->token_ready = true; - lexer->current = LEXER_STATE_KEYWORD_END; + lexer->next = LEXER_STATE_KEYWORD_END; } void set_numerical_token(lexer_t *lexer) { @@ -532,6 +578,21 @@ void set_numerical_token(lexer_t *lexer) { set_token(&(lexer->token), lexer->line, column, token.type, token.value); } +void handle_string_end(lexer_t *lexer) { + lexer_state_t string_type = lexer->stack.stack[lexer->stack.size - 1]; + + lexer->token_ready = true; + token_t *token = &(lexer->token); + u64 column = lexer->column - dstr_length(lexer->current_string); + token_value_t value = {.string = dstr_to_cstr(lexer->current_string)}; + + if (string_type == LEXER_STATE_KEY) { + set_token(token, lexer->line, column, TK_STR_KEY, value); + } else if (string_type == LEXER_STATE_VALUE) { + set_token(token, lexer->line, column, TK_STR_VAL, value); + } +} + void lexer_state_machine(lexer_t *lexer, char input) { switch (lexer->current) { case LEXER_STATE_START: