From b59c59e7e1560d5e824dfb4767c0918a91b0420d Mon Sep 17 00:00:00 2001 From: Abdelrahman Date: Sun, 30 Jul 2023 22:44:11 +0100 Subject: [PATCH] Refactor the lexer to use the state transitions table --- src/lexer/lexer.c | 870 ++++++++-------------------------------------- 1 file changed, 154 insertions(+), 716 deletions(-) diff --git a/src/lexer/lexer.c b/src/lexer/lexer.c index 9e570cd..2c02a7f 100644 --- a/src/lexer/lexer.c +++ b/src/lexer/lexer.c @@ -59,6 +59,7 @@ struct lexer_s { token_t extra_token; dstr_t *error_message; char current_char; + lexer_input_t current_input; }; INTERNAL lexer_input_t char_type(char input); @@ -66,20 +67,16 @@ INTERNAL lexer_input_t char_type(char input); INTERNAL void stack_push(state_stack_t *stack, lexer_state_t value); INTERNAL lexer_state_t stack_pop(state_stack_t *stack); -INTERNAL void append_to_lex_str(lexer_string_t *str, char input); -INTERNAL void clear_lex_str(lexer_string_t *str); -INTERNAL bool strequal(const char *const first, const char *const second); -INTERNAL bool is_valid_hex_char(const char input); -INTERNAL bool ishex(const char input); - INTERNAL token_t dstr_to_numerical_token(const dstr_t *str); INTERNAL void set_token(token_t *token, u64 line, u64 column, token_type type, token_value_t value); INTERNAL void finalise_state_transition(lexer_t *lexer); +INTERNAL void handle_object_end(lexer_t *lexer); +INTERNAL void handle_array_end(lexer_t *lexer); +INTERNAL void handle_string_end(lexer_t *lexer); INTERNAL void post_keyword(lexer_t *lexer); INTERNAL void set_numerical_token(lexer_t *lexer); -INTERNAL void handle_string_end(lexer_t *lexer); INTERNAL lexer_state_t state_table[COUNT_LEXER_STATES][COUNT_LEXER_INPUTS] = { #include "lexer_state_transitions.table" @@ -150,13 +147,13 @@ lex_result_t get_next_token(lexer_t *lexer, const char *text) { lexer->current_char = lexer->text[(lexer->cursor)++]; - lexer_input_t input = char_type(lexer->current_char); + lexer->current_input = char_type(lexer->current_char); - lexer->next = state_table[lexer->current][input]; + lexer->next = state_table[lexer->current][lexer->current_input]; finalise_state_transition(lexer); - if (input == LEXER_INPUT_NEWLINE) { + if (lexer->current_input == LEXER_INPUT_NEWLINE) { ++(lexer->line); lexer->column = 0; } else { @@ -347,67 +344,6 @@ lexer_state_t stack_pop(state_stack_t *stack) { return state; } -void append_to_lex_str(lexer_string_t *lex_str, char input) { - u64 capacity = 0; - char *str = NULL; - - switch (lex_str->type) { - case LEXER_STRING_KEYWORD: - capacity = MAX_KEYWORD_LENGTH; - str = lex_str->keyword.str; - - break; - case LEXER_STRING_UNICODE: - capacity = UNICODE_LENGTH; - str = lex_str->unicode.codepoint; - - break; - } - - if (lex_str->size + 1 > capacity) { - return; - } - - assert(str != NULL); - - str[(lex_str->size)++] = input; -} - -void clear_lex_str(lexer_string_t *lex_str) { - u64 capacity = 1; - char *str = NULL; - - switch (lex_str->type) { - case LEXER_STRING_KEYWORD: - capacity += MAX_KEYWORD_LENGTH; - str = lex_str->keyword.str; - - break; - case LEXER_STRING_UNICODE: - capacity += UNICODE_LENGTH; - str = lex_str->unicode.codepoint; - - break; - } - - assert(str != NULL); - - memset(str, 0, capacity); - lex_str->size = 0; -} - -bool strequal(const char *const first, const char *const second) { - return strcmp(first, second) == 0; -} - -bool is_valid_hex_char(const char input) { - return (input >= 'A' && input <= 'F') || (input >= 'a' && input <= 'e'); -} - -bool ishex(const char input) { - return isdigit(input) || is_valid_hex_char(input); -} - token_t dstr_to_numerical_token(const dstr_t *str) { token_t token = {0}; @@ -447,6 +383,24 @@ void finalise_state_transition(lexer_t *lexer) { break; case LEXER_STATE_OBJECT_END: + switch (lexer->current) { + case LEXER_STATE_NUMBER: + case LEXER_STATE_FRACTION: + case LEXER_STATE_POWER: + case LEXER_STATE_NUMBER_END: + if (dstr_length(lexer->current_string) > 0) { + set_numerical_token(lexer); + } + + break; + default: + break; + } + + if (lexer->current_input == LEXER_INPUT_CLOSE_BRACE) { + handle_object_end(lexer); + } + break; case LEXER_STATE_ARRAY_START: lexer->token_ready = true; @@ -459,6 +413,24 @@ void finalise_state_transition(lexer_t *lexer) { break; case LEXER_STATE_ARRAY_END: + switch (lexer->current) { + case LEXER_STATE_NUMBER: + case LEXER_STATE_FRACTION: + case LEXER_STATE_POWER: + case LEXER_STATE_NUMBER_END: + if (dstr_length(lexer->current_string) > 0) { + set_numerical_token(lexer); + } + + break; + default: + break; + } + + if (lexer->current_input == LEXER_INPUT_CLOSE_BRACKET) { + handle_array_end(lexer); + } + break; case LEXER_STATE_KEY: if (lexer->current == LEXER_STATE_OBJECT) { @@ -473,16 +445,31 @@ void finalise_state_transition(lexer_t *lexer) { break; case LEXER_STATE_STRING: + if (lexer->current == LEXER_STATE_VALUE || + lexer->current == LEXER_STATE_ARRAY) { + stack_push(&(lexer->stack), LEXER_STATE_VALUE); + break; + } + + // break left out intentionally case LEXER_STATE_ESCAPE_SEQUENCE: case LEXER_STATE_UNICODE_HEX1: case LEXER_STATE_UNICODE_HEX2: case LEXER_STATE_UNICODE_HEX3: case LEXER_STATE_UNICODE_HEX4: + case LEXER_STATE_DECIMAL: + case LEXER_STATE_NUMBER: + case LEXER_STATE_FRACTION: + case LEXER_STATE_EXPONENT: + case LEXER_STATE_EXP_SIGN: + case LEXER_STATE_POWER: dstr_append(&(lexer->current_string), lexer->current_char); break; case LEXER_STATE_STRING_END: - handle_string_end(lexer); + if (lexer->current_input == LEXER_INPUT_DOUBLE_QUOTE) { + handle_string_end(lexer); + } break; case LEXER_STATE_TRUE: @@ -497,7 +484,9 @@ void finalise_state_transition(lexer_t *lexer) { case LEXER_STATE_FRACTION: case LEXER_STATE_POWER: case LEXER_STATE_NUMBER_END: - set_numerical_token(lexer); + if (dstr_length(lexer->current_string) > 0) { + set_numerical_token(lexer); + } break; default: @@ -519,19 +508,110 @@ void finalise_state_transition(lexer_t *lexer) { break; } + break; + case LEXER_STATE_ERROR: + case LEXER_STATE_START: + case LEXER_STATE_VALUE: + case LEXER_STATE_OBJECT: + case LEXER_STATE_ARRAY: + case LEXER_STATE_LAST_COLLECTION: + case LEXER_STATE_T: + case LEXER_STATE_TR: + case LEXER_STATE_TRU: + case LEXER_STATE_F: + case LEXER_STATE_FA: + case LEXER_STATE_FAL: + case LEXER_STATE_FALS: + case LEXER_STATE_N: + case LEXER_STATE_NU: + case LEXER_STATE_NUL: + case LEXER_STATE_KEYWORD_END: + case COUNT_LEXER_STATES: break; } lexer->current = lexer->next; } +void handle_object_end(lexer_t *lexer) { + lexer_state_t last = lexer->stack.stack[lexer->stack.size - 1]; + + if (last != LEXER_STATE_OBJECT) { + lexer->next = LEXER_STATE_ERROR; + + return; + } + + if (lexer->stack.size > 0) { + stack_pop(&(lexer->stack)); + } else { + lexer->next = LEXER_STATE_LAST_COLLECTION; + } + + token_t *token; + + if (lexer->token_ready) { + lexer->has_extra_token = true; + token = &(lexer->extra_token); + } else { + lexer->token_ready = true; + token = &(lexer->token); + } + + set_token(token, lexer->line, lexer->column, TK_R_BRACE, (token_value_t){0}); +} + +void handle_array_end(lexer_t *lexer) { + lexer_state_t last = lexer->stack.stack[lexer->stack.size - 1]; + + if (last != LEXER_STATE_ARRAY) { + lexer->next = LEXER_STATE_ERROR; + + return; + } + + if (lexer->stack.size > 0) { + stack_pop(&(lexer->stack)); + } else { + lexer->next = LEXER_STATE_LAST_COLLECTION; + } + + token_t *token; + + if (lexer->token_ready) { + lexer->has_extra_token = true; + token = &(lexer->extra_token); + } else { + lexer->token_ready = true; + token = &(lexer->token); + } + + set_token(token, lexer->line, lexer->column, TK_R_BRACKET, + (token_value_t){0}); +} + +void handle_string_end(lexer_t *lexer) { + lexer_state_t string_type = stack_pop(&(lexer->stack)); + + lexer->token_ready = true; + token_t *token = &(lexer->token); + u64 column = lexer->column - dstr_length(lexer->current_string); + token_value_t value = {.string = dstr_to_cstr(lexer->current_string)}; + + if (string_type == LEXER_STATE_KEY) { + set_token(token, lexer->line, column, TK_STR_KEY, value); + } else if (string_type == LEXER_STATE_VALUE) { + set_token(token, lexer->line, column, TK_STR_VAL, value); + } +} + void post_keyword(lexer_t *lexer) { u64 keyword_char_count; u64 column; token_t *token = &(lexer->token); - switch (lexer->current) { + switch (lexer->next) { case LEXER_STATE_NULL: keyword_char_count = 4; @@ -577,645 +657,3 @@ void set_numerical_token(lexer_t *lexer) { set_token(&(lexer->token), lexer->line, column, token.type, token.value); } - -void handle_string_end(lexer_t *lexer) { - lexer_state_t string_type = lexer->stack.stack[lexer->stack.size - 1]; - - lexer->token_ready = true; - token_t *token = &(lexer->token); - u64 column = lexer->column - dstr_length(lexer->current_string); - token_value_t value = {.string = dstr_to_cstr(lexer->current_string)}; - - if (string_type == LEXER_STATE_KEY) { - set_token(token, lexer->line, column, TK_STR_KEY, value); - } else if (string_type == LEXER_STATE_VALUE) { - set_token(token, lexer->line, column, TK_STR_VAL, value); - } -} - -void lexer_state_machine(lexer_t *lexer, char input) { - switch (lexer->current) { - case LEXER_STATE_START: - lexer->current = handle_lexer_start(lexer, input); - break; - case LEXER_STATE_VALUE: - lexer->current = handle_value(lexer, input); - break; - case LEXER_STATE_OBJECT_START: - stack_push(&(lexer->stack), LEXER_STATE_OBJECT); - // break is left out intentionally here to utilise the fallthrough behaviour - // of the switch statement - case LEXER_STATE_OBJECT: - lexer->current = handle_object(lexer, input); - break; - case LEXER_STATE_ARRAY_START: - stack_push(&(lexer->stack), LEXER_STATE_ARRAY); - // break is left out intentionally here to utilise the fallthrough behaviour - // of the switch statement - case LEXER_STATE_ARRAY: - lexer->current = handle_array(lexer, input); - break; - case LEXER_STATE_OBJECT_END: - case LEXER_STATE_ARRAY_END: - if (lexer->stack.size > 1) { - stack_pop(&(lexer->stack)); - - lexer->current = lexer->stack.stack[lexer->stack.size - 1]; - } else { - lexer->current = LEXER_STATE_LAST_COLLECTION; - } - - handle_input_after_collection_end(lexer, input); - - break; - case LEXER_STATE_KEY: - lexer->current = handle_key(lexer, input); - break; - case LEXER_STATE_DECIMAL: - lexer->current = handle_decimal(lexer, input); - break; - case LEXER_STATE_NUMBER: - lexer->current = handle_number(lexer, input); - break; - case LEXER_STATE_FRACTION: - lexer->current = handle_fraction(lexer, input); - break; - case LEXER_STATE_EXPONENT: - lexer->current = handle_exponent(lexer, input); - break; - case LEXER_STATE_EXP_SIGN: - lexer->current = handle_exp_sign(lexer, input); - break; - case LEXER_STATE_POWER: - lexer->current = handle_power(lexer, input); - break; - case LEXER_STATE_NUMBER_END: - lexer->current = handle_number_end(lexer, input); - break; - case LEXER_STATE_STRING: - lexer->current = handle_string(lexer, input); - break; - case LEXER_STATE_STRING_END: - lexer->current = handle_string_end(lexer, input); - break; - case LEXER_STATE_ESCAPE_SEQUENCE: - lexer->current = handle_escape_sequence(lexer, input); - break; - case LEXER_STATE_UNICODE_HEX: - lexer->current = handle_unicode_sequence(lexer, input); - break; - case LEXER_STATE_TRUE: - lexer->current = handle_true(lexer, input); - break; - case LEXER_STATE_FALSE: - lexer->current = handle_false(lexer, input); - break; - case LEXER_STATE_NULL: - lexer->current = handle_null(lexer, input); - break; - case LEXER_STATE_KEYWORD_END: - lexer->current = handle_keyword_end(lexer, input); - break; - case LEXER_STATE_LAST_COLLECTION: - lexer->current = handle_last_collection(input); - break; - case LEXER_STATE_ERROR: - case COUNT_LEXER_STATES: - lexer->current = LEXER_STATE_ERROR; - break; - } -} - -lexer_state_t handle_lexer_start(lexer_t *lexer, char input) { - if (isspace(input)) { - return LEXER_STATE_START; - } - - switch (input) { - case '{': - lexer->token_ready = true; - set_token(&(lexer->token), lexer->line, lexer->column, TK_L_BRACE, - (token_value_t){0}); - - return LEXER_STATE_OBJECT_START; - case '[': - lexer->token_ready = true; - set_token(&(lexer->token), lexer->line, lexer->column, TK_L_BRACKET, - (token_value_t){0}); - - return LEXER_STATE_ARRAY_START; - } - - return LEXER_STATE_ERROR; -} - -lexer_state_t handle_last_collection(char input) { - if (isspace(input)) { - return LEXER_STATE_LAST_COLLECTION; - } - - return LEXER_STATE_ERROR; -} - -lexer_state_t handle_collection_end(lexer_t *lexer, char input) { - // No need to ignore space as this is only called when input is } or ] - - lexer->current = lexer->stack.stack[lexer->stack.size - 1]; - - bool object_end = lexer->current == LEXER_STATE_OBJECT && input == '}'; - - if (object_end) { - token_t *token; - - if (lexer->token_ready) { - lexer->has_extra_token = true; - token = &(lexer->extra_token); - } else { - lexer->token_ready = true; - token = &(lexer->token); - } - - set_token(token, lexer->line, lexer->column, TK_R_BRACE, - (token_value_t){0}); - - return LEXER_STATE_OBJECT_END; - } - - bool array_end = lexer->current == LEXER_STATE_ARRAY && input == ']'; - - if (array_end) { - token_t *token; - - if (lexer->token_ready) { - lexer->has_extra_token = true; - token = &(lexer->extra_token); - } else { - lexer->token_ready = true; - token = &(lexer->token); - } - - set_token(token, lexer->line, lexer->column, TK_R_BRACKET, - (token_value_t){0}); - - return LEXER_STATE_ARRAY_END; - } - - return LEXER_STATE_ERROR; -} - -void handle_input_after_collection_end(lexer_t *lexer, char input) { - switch (input) { - case '}': - lexer->token_ready = true; - set_token(&(lexer->token), lexer->line, lexer->column, TK_R_BRACE, - (token_value_t){0}); - - break; - case ']': - lexer->token_ready = true; - set_token(&(lexer->token), lexer->line, lexer->column, TK_R_BRACKET, - (token_value_t){0}); - - break; - } -} - -lexer_state_t handle_object(lexer_t *lexer, char input) { - if (isspace(input)) { - return LEXER_STATE_OBJECT; - } else if (input == '"') { - stack_push(&(lexer->stack), LEXER_STATE_KEY); - - return LEXER_STATE_KEY; - } else if (input == '}') { - return handle_collection_end(lexer, input); - } - - return LEXER_STATE_ERROR; -} - -lexer_state_t handle_array(lexer_t *lexer, char input) { - if (isspace(input)) { - return LEXER_STATE_ARRAY; - } else if (input == ']') { - return handle_collection_end(lexer, input); - } - - return handle_value(lexer, input); -} - -lexer_state_t handle_key(lexer_t *lexer, char input) { - dstr_append(&(lexer->current_string), input); - - return LEXER_STATE_STRING; -} - -lexer_state_t handle_value(lexer_t *lexer, char input) { - if (isspace(input)) { - return LEXER_STATE_VALUE; - } else if ((isdigit(input) && input != '0') || input == '-') { - dstr_append(&(lexer->current_string), input); - - return LEXER_STATE_NUMBER; - } - - switch (input) { - case '"': - stack_push(&(lexer->stack), LEXER_STATE_VALUE); - - return LEXER_STATE_STRING; - case '0': - dstr_append(&(lexer->current_string), input); - - return LEXER_STATE_DECIMAL; - case '{': - lexer->token_ready = true; - set_token(&(lexer->token), lexer->line, lexer->column, TK_L_BRACE, - (token_value_t){0}); - - return LEXER_STATE_OBJECT_START; - case '[': - lexer->token_ready = true; - set_token(&(lexer->token), lexer->line, lexer->column, TK_L_BRACKET, - (token_value_t){0}); - - return LEXER_STATE_ARRAY_START; - case 't': - case 'f': - case 'n': - append_to_lex_str(&(lexer->keyword), input); - - return handle_keyword(input); - } - - return LEXER_STATE_ERROR; -} - -lexer_state_t handle_string(lexer_t *lexer, char input) { - switch (input) { - case '\\': - dstr_append(&(lexer->current_string), input); - - return LEXER_STATE_ESCAPE_SEQUENCE; - case '"': { - lexer_state_t string_type = lexer->stack.stack[lexer->stack.size - 1]; - - lexer->token_ready = true; - token_t *token = &(lexer->token); - u64 column = lexer->column - dstr_length(lexer->current_string); - token_value_t value = {.string = dstr_to_cstr(lexer->current_string)}; - - if (string_type == LEXER_STATE_KEY) { - set_token(token, lexer->line, column, TK_STR_KEY, value); - } else if (string_type == LEXER_STATE_VALUE) { - set_token(token, lexer->line, column, TK_STR_VAL, value); - } - - return LEXER_STATE_STRING_END; - } - } - - dstr_append(&(lexer->current_string), input); - - return LEXER_STATE_STRING; -} - -lexer_state_t handle_string_end(lexer_t *lexer, char input) { - if (isspace(input)) { - return LEXER_STATE_STRING_END; - } - - lexer->current = stack_pop(&(lexer->stack)); - - bool key_end = lexer->current == LEXER_STATE_KEY && input == ':'; - - if (key_end) { - return LEXER_STATE_VALUE; - } - - bool value_end = lexer->current == LEXER_STATE_VALUE && input == ','; - - if (value_end) { - return lexer->stack.stack[lexer->stack.size - 1]; - } - - bool collection_end = input == '}' || input == ']'; - - return collection_end ? handle_collection_end(lexer, input) - : LEXER_STATE_ERROR; -} - -lexer_state_t handle_escape_sequence(lexer_t *lexer, char input) { - dstr_append(&(lexer->current_string), input); - - switch (input) { - case '"': - case '/': - case '\\': - case 'b': - case 'f': - case 'n': - case 'r': - case 't': - return LEXER_STATE_STRING; - case 'u': - return LEXER_STATE_UNICODE_HEX; - } - - return LEXER_STATE_ERROR; -} - -lexer_state_t handle_unicode_sequence(lexer_t *lexer, char input) { - append_to_lex_str(&(lexer->codepoint), input); - dstr_append(&(lexer->current_string), input); - - if (!ishex(input)) { - clear_lex_str(&(lexer->codepoint)); - - return LEXER_STATE_ERROR; - } else if (lexer->codepoint.size == UNICODE_LENGTH) { - clear_lex_str(&(lexer->codepoint)); - - return LEXER_STATE_STRING; - } - - return LEXER_STATE_UNICODE_HEX; -} - -lexer_state_t handle_decimal(lexer_t *lexer, char input) { - dstr_append(&(lexer->current_string), input); - - if (input == '.') { - return LEXER_STATE_FRACTION; - } - - return LEXER_STATE_ERROR; -} - -lexer_state_t handle_number(lexer_t *lexer, char input) { - if (isdigit(input)) { - dstr_append(&(lexer->current_string), input); - - return LEXER_STATE_NUMBER; - } else if (input == '.') { - dstr_append(&(lexer->current_string), input); - - return LEXER_STATE_FRACTION; - } else if (input == '}' || input == ']') { - lexer->token_ready = true; - u64 column = lexer->column - dstr_length(lexer->current_string); - - token_t token = dstr_to_numerical_token(lexer->current_string); - - set_token(&(lexer->token), lexer->line, column, token.type, token.value); - - return handle_collection_end(lexer, input); - } else if (input == ',') { - lexer->token_ready = true; - u64 column = lexer->column - dstr_length(lexer->current_string); - - token_t token = dstr_to_numerical_token(lexer->current_string); - - set_token(&(lexer->token), lexer->line, column, token.type, token.value); - - return lexer->stack.stack[lexer->stack.size - 1]; - } else if (isspace(input)) { - lexer->token_ready = true; - u64 column = lexer->column - dstr_length(lexer->current_string); - - token_t token = dstr_to_numerical_token(lexer->current_string); - - set_token(&(lexer->token), lexer->line, column, token.type, token.value); - - return LEXER_STATE_NUMBER_END; - } - - return LEXER_STATE_ERROR; -} - -lexer_state_t handle_fraction(lexer_t *lexer, char input) { - if (isdigit(input)) { - dstr_append(&(lexer->current_string), input); - - return LEXER_STATE_FRACTION; - } else if (input == '}' || input == ']') { - lexer->token_ready = true; - u64 column = lexer->column - dstr_length(lexer->current_string); - - token_t token = dstr_to_numerical_token(lexer->current_string); - - set_token(&(lexer->token), lexer->line, column, token.type, token.value); - - return handle_collection_end(lexer, input); - } else if (input == 'e' || input == 'E') { - dstr_append(&(lexer->current_string), input); - - return LEXER_STATE_EXPONENT; - } else if (input == ',') { - lexer->token_ready = true; - u64 column = lexer->column - dstr_length(lexer->current_string); - - token_t token = dstr_to_numerical_token(lexer->current_string); - - set_token(&(lexer->token), lexer->line, column, token.type, token.value); - - return lexer->stack.stack[lexer->stack.size - 1]; - } else if (isspace(input)) { - lexer->token_ready = true; - u64 column = lexer->column - dstr_length(lexer->current_string); - - token_t token = dstr_to_numerical_token(lexer->current_string); - - set_token(&(lexer->token), lexer->line, column, token.type, token.value); - - return LEXER_STATE_NUMBER_END; - } - - return LEXER_STATE_ERROR; -} - -lexer_state_t handle_exponent(lexer_t *lexer, char input) { - dstr_append(&(lexer->current_string), input); - - if (isdigit(input)) { - return LEXER_STATE_POWER; - } else if (input == '+' || input == '-') { - return LEXER_STATE_EXP_SIGN; - } - - return LEXER_STATE_ERROR; -} - -lexer_state_t handle_exp_sign(lexer_t *lexer, char input) { - dstr_append(&(lexer->current_string), input); - - if (isdigit(input)) { - return LEXER_STATE_POWER; - } - - return LEXER_STATE_ERROR; -} - -lexer_state_t handle_power(lexer_t *lexer, char input) { - if (isdigit(input)) { - dstr_append(&(lexer->current_string), input); - - return LEXER_STATE_POWER; - } else if (input == '}' || input == ']') { - lexer->token_ready = true; - u64 column = lexer->column - dstr_length(lexer->current_string); - - token_t token = dstr_to_numerical_token(lexer->current_string); - - set_token(&(lexer->token), lexer->line, column, token.type, token.value); - - return handle_collection_end(lexer, input); - } else if (input == ',') { - lexer->token_ready = true; - u64 column = lexer->column - dstr_length(lexer->current_string); - - token_t token = dstr_to_numerical_token(lexer->current_string); - - set_token(&(lexer->token), lexer->line, column, token.type, token.value); - - return lexer->stack.stack[lexer->stack.size - 1]; - } else if (isspace(input)) { - lexer->token_ready = true; - u64 column = lexer->column - dstr_length(lexer->current_string); - - token_t token = dstr_to_numerical_token(lexer->current_string); - - set_token(&(lexer->token), lexer->line, column, token.type, token.value); - - return LEXER_STATE_NUMBER_END; - } - - return LEXER_STATE_ERROR; -} - -lexer_state_t handle_number_end(lexer_t *lexer, char input) { - if (isspace(input)) { - return LEXER_STATE_NUMBER_END; - } else if (input == ',') { - lexer->token_ready = true; - u64 column = lexer->column - dstr_length(lexer->current_string); - - token_t token = dstr_to_numerical_token(lexer->current_string); - - set_token(&(lexer->token), lexer->line, column, token.type, token.value); - - return lexer->stack.stack[lexer->stack.size - 1]; - } - - bool collection_end = input == '}' || input == ']'; - - return collection_end ? handle_collection_end(lexer, input) - : LEXER_STATE_ERROR; -} - -lexer_state_t handle_keyword(char input) { - switch (input) { - case 't': - return LEXER_STATE_TRUE; - case 'f': - return LEXER_STATE_FALSE; - case 'n': - return LEXER_STATE_NULL; - } - - return LEXER_STATE_ERROR; -} - -lexer_state_t handle_true(lexer_t *lexer, char input) { - char current[MAX_KEYWORD_LENGTH + 1]; - strcpy(current, lexer->keyword.keyword.str); - - append_to_lex_str(&(lexer->keyword), input); - - bool return_state_true = (strequal(current, "t") && input == 'r') || - (strequal(current, "tr") && input == 'u'); - - bool return_state_end = strequal(current, "tru") && input == 'e'; - - if (return_state_true) { - return LEXER_STATE_TRUE; - } else if (return_state_end) { - return LEXER_STATE_KEYWORD_END; - } - - return LEXER_STATE_ERROR; -} - -lexer_state_t handle_false(lexer_t *lexer, char input) { - char current[MAX_KEYWORD_LENGTH + 1]; - strcpy(current, lexer->keyword.keyword.str); - - append_to_lex_str(&(lexer->keyword), input); - - bool return_state_false = (strequal(current, "f") && input == 'a') || - (strequal(current, "fa") && input == 'l') || - (strequal(current, "fal") && input == 's'); - - bool return_state_end = strequal(current, "fals") && input == 'e'; - - if (return_state_false) { - return LEXER_STATE_FALSE; - } else if (return_state_end) { - return LEXER_STATE_KEYWORD_END; - } - - return LEXER_STATE_ERROR; -} - -lexer_state_t handle_null(lexer_t *lexer, char input) { - char current[MAX_KEYWORD_LENGTH + 1]; - strcpy(current, lexer->keyword.keyword.str); - - append_to_lex_str(&(lexer->keyword), input); - - bool return_state_null = (strequal(current, "n") && input == 'u') || - (strequal(current, "nu") && input == 'l'); - - bool return_state_end = strequal(current, "nul") && input == 'l'; - - if (return_state_null) { - return LEXER_STATE_NULL; - } else if (return_state_end) { - return LEXER_STATE_KEYWORD_END; - } - - return LEXER_STATE_ERROR; -} - -lexer_state_t handle_keyword_end(lexer_t *lexer, char input) { - const char *keyword = lexer->keyword.keyword.str; - - if (lexer->keyword.size > 0) { - lexer->token_ready = true; - token_t *token = &(lexer->token); - u64 column = lexer->column - lexer->keyword.size; - - if (strequal(keyword, "null")) { - set_token(token, lexer->line, column, TK_NULL, (token_value_t){0}); - } else if (strequal(keyword, "true")) { - set_token(token, lexer->line, column, TK_BOOL, - (token_value_t){.boolean = true}); - } else if (strequal(keyword, "false")) { - set_token(token, lexer->line, column, TK_BOOL, - (token_value_t){.boolean = false}); - } - - clear_lex_str(&(lexer->keyword)); - } - - if (isspace(input)) { - return LEXER_STATE_KEYWORD_END; - } else if (input == ',') { - return lexer->stack.stack[lexer->stack.size - 1]; - } - - bool collection_end = input == '}' || input == ']'; - - return collection_end ? handle_collection_end(lexer, input) - : LEXER_STATE_ERROR; -}