#include "json/lexer.h" #include "aliases.h" #include "json/dstring.h" #include #include #include #include #include #include #define MAX_KEYWORD_LENGTH 5 #define UNICODE_LENGTH 4 #define MAX_STACK_CAPACITY 1024 #define STRING_BUF_START_CAPACITY 1024 typedef enum { // GENERAL STATES LEXER_STATE_START, LEXER_STATE_ERROR, LEXER_STATE_VALUE, // COLLECTION STATES LEXER_STATE_OBJECT_START, LEXER_STATE_OBJECT, LEXER_STATE_OBJECT_END, LEXER_STATE_ARRAY_START, LEXER_STATE_ARRAY, LEXER_STATE_ARRAY_END, LEXER_STATE_LAST_COLLECTION, // OBJECT STATES LEXER_STATE_KEY, // NUMBER STATES LEXER_STATE_DECIMAL, LEXER_STATE_NUMBER, LEXER_STATE_FRACTION, LEXER_STATE_EXPONENT, LEXER_STATE_EXP_SIGN, LEXER_STATE_POWER, LEXER_STATE_NUMBER_END, // STRING STATES LEXER_STATE_STRING, LEXER_STATE_STRING_END, LEXER_STATE_ESCAPE_SEQUENCE, LEXER_STATE_UNICODE_HEX, // KEYWORD STATES LEXER_STATE_TRUE, LEXER_STATE_FALSE, LEXER_STATE_NULL, LEXER_STATE_KEYWORD_END, COUNT_LEXER_STATES, } lexer_state_t; typedef struct { lexer_state_t stack[MAX_STACK_CAPACITY]; u64 size; } state_stack_t; typedef enum { LEXER_STRING_KEYWORD, LEXER_STRING_UNICODE, } lex_str_type; typedef struct { char str[MAX_KEYWORD_LENGTH + 1]; } keyword_t; typedef struct { char codepoint[UNICODE_LENGTH]; } unicode_t; typedef struct { lex_str_type type; u64 size; union { keyword_t keyword; unicode_t unicode; }; } lexer_string_t; struct lexer_s { u64 cursor; u64 line; u64 column; u64 text_length; const char *text; lexer_state_t current; state_stack_t stack; lexer_string_t keyword; lexer_string_t codepoint; dstr_t *current_string; bool token_ready; token_t token; bool has_extra_token; token_t extra_token; dstr_t *error_message; }; INTERNAL void stack_push(state_stack_t *stack, lexer_state_t value); INTERNAL lexer_state_t stack_pop(state_stack_t *stack); INTERNAL void append_to_lex_str(lexer_string_t *str, char input); INTERNAL void clear_lex_str(lexer_string_t *str); INTERNAL bool strequal(const char *const first, const char *const second); INTERNAL bool is_valid_hex_char(const char input); INTERNAL bool ishex(const char input); INTERNAL token_t dstr_to_numerical_token(const dstr_t *str); INTERNAL void set_token(token_t *token, u64 line, u64 column, token_type type, token_value_t value); INTERNAL void lexer_state_machine(lexer_t *lexer, char input); INTERNAL lexer_state_t handle_lexer_start(lexer_t *lexer, char input); INTERNAL lexer_state_t handle_last_collection(char input); INTERNAL lexer_state_t handle_collection_end(lexer_t *lexer, char input); INTERNAL void handle_input_after_collection_end(lexer_t *lexer, char input); INTERNAL lexer_state_t handle_object(lexer_t *lexer, char input); INTERNAL lexer_state_t handle_array(lexer_t *lexer, char input); INTERNAL lexer_state_t handle_key(lexer_t *lexer, char input); INTERNAL lexer_state_t handle_value(lexer_t *lexer, char input); INTERNAL lexer_state_t handle_string(lexer_t *lexer, char input); INTERNAL lexer_state_t handle_string_end(lexer_t *lexer, char input); INTERNAL lexer_state_t handle_escape_sequence(lexer_t *lexer, char input); INTERNAL lexer_state_t handle_unicode_sequence(lexer_t *lexer, char input); INTERNAL lexer_state_t handle_decimal(lexer_t *lexer, char input); INTERNAL lexer_state_t handle_number(lexer_t *lexer, char input); INTERNAL lexer_state_t handle_fraction(lexer_t *lexer, char input); INTERNAL lexer_state_t handle_exponent(lexer_t *lexer, char input); INTERNAL lexer_state_t handle_exp_sign(lexer_t *lexer, char input); INTERNAL lexer_state_t handle_power(lexer_t *lexer, char input); INTERNAL lexer_state_t handle_number_end(lexer_t *lexer, char input); INTERNAL lexer_state_t handle_keyword(char input); INTERNAL lexer_state_t handle_true(lexer_t *lexer, char input); INTERNAL lexer_state_t handle_false(lexer_t *lexer, char input); INTERNAL lexer_state_t handle_null(lexer_t *lexer, char input); INTERNAL lexer_state_t handle_keyword_end(lexer_t *lexer, char input); void lexer_init(lexer_t **lexer) { if (*lexer) { lexer_free(lexer); } *lexer = (lexer_t *)malloc(sizeof(lexer_t)); if (!(*lexer)) { return; } (*lexer)->cursor = 0; (*lexer)->line = 1; (*lexer)->column = 0; (*lexer)->text_length = 0; (*lexer)->text = ""; (*lexer)->current = LEXER_STATE_START; (*lexer)->keyword.type = LEXER_STRING_KEYWORD; (*lexer)->codepoint.type = LEXER_STRING_UNICODE; (*lexer)->current_string = dstr_with_capacity(STRING_BUF_START_CAPACITY); (*lexer)->error_message = dstr_with_capacity(STRING_BUF_START_CAPACITY); (*lexer)->token_ready = false; (*lexer)->token = (token_t){}; (*lexer)->has_extra_token = false; (*lexer)->extra_token = (token_t){}; if (!((*lexer)->current_string)) { lexer_free(lexer); } } void lexer_free(lexer_t **lexer) { if (!(*lexer)) { return; } dstr_free(&((*lexer)->current_string)); dstr_free(&((*lexer)->error_message)); free(*lexer); *lexer = NULL; } lex_result_t get_next_token(lexer_t *lexer, const char *text) { if (text != NULL) { lexer->cursor = 0; lexer->text = text; lexer->text_length = strlen(text); } dstr_clear(lexer->current_string); char c; while (lexer->cursor < lexer->text_length) { if (lexer->has_extra_token) { lexer->has_extra_token = false; return (lex_result_t){ (lex_err_t){.errno = LEX_ERR_NONE, .msg = ""}, lexer->extra_token, }; } c = lexer->text[(lexer->cursor)++]; lexer_state_machine(lexer, c); if (c == '\n') { ++(lexer->line); lexer->column = 0; } else { ++(lexer->column); } if (lexer->current == LEXER_STATE_ERROR) { char msg[STRING_BUF_START_CAPACITY + 1]; memset(msg, 0, STRING_BUF_START_CAPACITY + 1); u64 slice_length = 20; char slice[slice_length]; snprintf(slice, slice_length, "%s", &(lexer->text[lexer->cursor - 1])); snprintf( msg, STRING_BUF_START_CAPACITY, "\n(%llu:%llu) Encountered an error while parsing the following:\n%s", (unsigned long long)lexer->line, (unsigned long long)lexer->column, slice); dstr_update(&(lexer->error_message), msg); return (lex_result_t){ (lex_err_t){.errno = LEX_ERR_INVALID, .msg = dstr_to_cstr(lexer->error_message)}, (token_t){}, }; } else if (lexer->token_ready) { lexer->token_ready = false; return (lex_result_t){ (lex_err_t){.errno = LEX_ERR_NONE, .msg = ""}, lexer->token, }; } } return (lex_result_t){ (lex_err_t){.errno = LEX_ERR_NONE, .msg = ""}, (token_t){}, }; } void print_token(token_t token) { i32 num_padding = 4; printf("{LINE: %*llu, COLUMN: %*llu, TYPE: ", num_padding, (unsigned long long)token.line, num_padding, (unsigned long long)token.column); i32 token_type_padding = 15; switch (token.type) { case TK_NO_TOKEN: break; case TK_L_BRACE: printf("%*s, VALUE: N/A", token_type_padding, "TK_L_BRACE"); break; case TK_R_BRACE: printf("%*s, VALUE: N/A", token_type_padding, "TK_R_BRACE"); break; case TK_L_BRACKET: printf("%*s, VALUE: N/A", token_type_padding, "TK_L_BRACKET"); break; case TK_R_BRACKET: printf("%*s, VALUE: N/A", token_type_padding, "TK_R_BRACKET"); break; case TK_NULL: printf("%*s, VALUE: N/A", token_type_padding, "TK_NULL"); break; case TK_BOOL: printf("%*s, VALUE: %s", token_type_padding, "TK_BOOL", token.value.boolean ? "true" : "false"); break; case TK_STR_KEY: printf("%*s, VALUE: %s", token_type_padding, "TK_STR_KEY", token.value.string); break; case TK_STR_VAL: printf("%*s, VALUE: %s", token_type_padding, "TK_STR_VAL", token.value.string); break; case TK_INTEGER: printf("%*s, VALUE: %lld", token_type_padding, "TK_INTEGER", (long long)token.value.num_int); break; case TK_DOUBLE: printf("%*s, VALUE: %f", token_type_padding, "TK_DOUBLE", token.value.num_frac); break; } printf("}\n"); } void stack_push(state_stack_t *stack, lexer_state_t state) { if (stack->size + 1 >= MAX_STACK_CAPACITY) { return; } stack->stack[(stack->size)++] = state; } lexer_state_t stack_pop(state_stack_t *stack) { if (stack->size == 0) { return LEXER_STATE_ERROR; } lexer_state_t state = stack->stack[--(stack->size)]; return state; } void append_to_lex_str(lexer_string_t *lex_str, char input) { u64 capacity = 0; char *str = NULL; switch (lex_str->type) { case LEXER_STRING_KEYWORD: capacity = MAX_KEYWORD_LENGTH; str = lex_str->keyword.str; break; case LEXER_STRING_UNICODE: capacity = UNICODE_LENGTH; str = lex_str->unicode.codepoint; break; } if (lex_str->size + 1 > capacity) { return; } assert(str != NULL); str[(lex_str->size)++] = input; } void clear_lex_str(lexer_string_t *lex_str) { u64 capacity = 1; char *str = NULL; switch (lex_str->type) { case LEXER_STRING_KEYWORD: capacity += MAX_KEYWORD_LENGTH; str = lex_str->keyword.str; break; case LEXER_STRING_UNICODE: capacity += UNICODE_LENGTH; str = lex_str->unicode.codepoint; break; } assert(str != NULL); memset(str, 0, capacity); lex_str->size = 0; } bool strequal(const char *const first, const char *const second) { return strcmp(first, second) == 0; } bool is_valid_hex_char(const char input) { switch (input) { case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': return true; } return false; } bool ishex(const char input) { return isdigit(input) || is_valid_hex_char(input); } token_t dstr_to_numerical_token(const dstr_t *str) { token_t token = {}; bool is_double = dstr_find(str, ".") != -1; token.type = is_double ? TK_DOUBLE : TK_INTEGER; if (is_double) { token.value.num_frac = strtod(dstr_to_cstr(str), NULL); } else { token.value.num_int = atol(dstr_to_cstr(str)); } return token; } void set_token(token_t *token, u64 line, u64 column, token_type type, token_value_t value) { *token = (token_t){ .line = line, .column = column, .type = type, .value = value, }; } void lexer_state_machine(lexer_t *lexer, char input) { switch (lexer->current) { case LEXER_STATE_START: lexer->current = handle_lexer_start(lexer, input); break; case LEXER_STATE_VALUE: lexer->current = handle_value(lexer, input); break; case LEXER_STATE_OBJECT_START: stack_push(&(lexer->stack), LEXER_STATE_OBJECT); // break is left out intentionally here to utilise the fallthrough behaviour // of the switch statement case LEXER_STATE_OBJECT: lexer->current = handle_object(lexer, input); break; case LEXER_STATE_ARRAY_START: stack_push(&(lexer->stack), LEXER_STATE_ARRAY); // break is left out intentionally here to utilise the fallthrough behaviour // of the switch statement case LEXER_STATE_ARRAY: lexer->current = handle_array(lexer, input); break; case LEXER_STATE_OBJECT_END: case LEXER_STATE_ARRAY_END: if (lexer->stack.size > 1) { stack_pop(&(lexer->stack)); lexer->current = lexer->stack.stack[lexer->stack.size - 1]; } else { lexer->current = LEXER_STATE_LAST_COLLECTION; } handle_input_after_collection_end(lexer, input); break; case LEXER_STATE_KEY: lexer->current = handle_key(lexer, input); break; case LEXER_STATE_DECIMAL: lexer->current = handle_decimal(lexer, input); break; case LEXER_STATE_NUMBER: lexer->current = handle_number(lexer, input); break; case LEXER_STATE_FRACTION: lexer->current = handle_fraction(lexer, input); break; case LEXER_STATE_EXPONENT: lexer->current = handle_exponent(lexer, input); break; case LEXER_STATE_EXP_SIGN: lexer->current = handle_exp_sign(lexer, input); break; case LEXER_STATE_POWER: lexer->current = handle_power(lexer, input); break; case LEXER_STATE_NUMBER_END: lexer->current = handle_number_end(lexer, input); break; case LEXER_STATE_STRING: lexer->current = handle_string(lexer, input); break; case LEXER_STATE_STRING_END: lexer->current = handle_string_end(lexer, input); break; case LEXER_STATE_ESCAPE_SEQUENCE: lexer->current = handle_escape_sequence(lexer, input); break; case LEXER_STATE_UNICODE_HEX: lexer->current = handle_unicode_sequence(lexer, input); break; case LEXER_STATE_TRUE: lexer->current = handle_true(lexer, input); break; case LEXER_STATE_FALSE: lexer->current = handle_false(lexer, input); break; case LEXER_STATE_NULL: lexer->current = handle_null(lexer, input); break; case LEXER_STATE_KEYWORD_END: lexer->current = handle_keyword_end(lexer, input); break; case LEXER_STATE_LAST_COLLECTION: lexer->current = handle_last_collection(input); break; case LEXER_STATE_ERROR: case COUNT_LEXER_STATES: lexer->current = LEXER_STATE_ERROR; break; } } lexer_state_t handle_lexer_start(lexer_t *lexer, char input) { if (isspace(input)) { return LEXER_STATE_START; } switch (input) { case '{': lexer->token_ready = true; set_token(&(lexer->token), lexer->line, lexer->column, TK_L_BRACE, (token_value_t){0}); return LEXER_STATE_OBJECT_START; case '[': lexer->token_ready = true; set_token(&(lexer->token), lexer->line, lexer->column, TK_L_BRACKET, (token_value_t){0}); return LEXER_STATE_ARRAY_START; } return LEXER_STATE_ERROR; } lexer_state_t handle_last_collection(char input) { if (isspace(input)) { return LEXER_STATE_LAST_COLLECTION; } return LEXER_STATE_ERROR; } lexer_state_t handle_collection_end(lexer_t *lexer, char input) { // No need to ignore space as this is only called when input is } or ] lexer->current = lexer->stack.stack[lexer->stack.size - 1]; bool object_end = lexer->current == LEXER_STATE_OBJECT && input == '}'; if (object_end) { token_t *token; if (lexer->token_ready) { lexer->has_extra_token = true; token = &(lexer->extra_token); } else { lexer->token_ready = true; token = &(lexer->token); } set_token(token, lexer->line, lexer->column, TK_R_BRACE, (token_value_t){0}); return LEXER_STATE_OBJECT_END; } bool array_end = lexer->current == LEXER_STATE_ARRAY && input == ']'; if (array_end) { token_t *token; if (lexer->token_ready) { lexer->has_extra_token = true; token = &(lexer->extra_token); } else { lexer->token_ready = true; token = &(lexer->token); } set_token(token, lexer->line, lexer->column, TK_R_BRACKET, (token_value_t){0}); return LEXER_STATE_ARRAY_END; } return LEXER_STATE_ERROR; } void handle_input_after_collection_end(lexer_t *lexer, char input) { switch (input) { case '}': lexer->token_ready = true; set_token(&(lexer->token), lexer->line, lexer->column, TK_R_BRACE, (token_value_t){0}); break; case ']': lexer->token_ready = true; set_token(&(lexer->token), lexer->line, lexer->column, TK_R_BRACKET, (token_value_t){0}); break; } } lexer_state_t handle_object(lexer_t *lexer, char input) { if (isspace(input)) { return LEXER_STATE_OBJECT; } else if (input == '"') { stack_push(&(lexer->stack), LEXER_STATE_KEY); return LEXER_STATE_KEY; } else if (input == '}') { return handle_collection_end(lexer, input); } return LEXER_STATE_ERROR; } lexer_state_t handle_array(lexer_t *lexer, char input) { if (isspace(input)) { return LEXER_STATE_ARRAY; } else if (input == ']') { return handle_collection_end(lexer, input); } return handle_value(lexer, input); } lexer_state_t handle_key(lexer_t *lexer, char input) { dstr_append(&(lexer->current_string), input); return LEXER_STATE_STRING; } lexer_state_t handle_value(lexer_t *lexer, char input) { if (isspace(input)) { return LEXER_STATE_VALUE; } else if ((isdigit(input) && input != '0') || input == '-') { dstr_append(&(lexer->current_string), input); return LEXER_STATE_NUMBER; } switch (input) { case '"': stack_push(&(lexer->stack), LEXER_STATE_VALUE); return LEXER_STATE_STRING; case '0': dstr_append(&(lexer->current_string), input); return LEXER_STATE_DECIMAL; case '{': lexer->token_ready = true; set_token(&(lexer->token), lexer->line, lexer->column, TK_L_BRACE, (token_value_t){0}); return LEXER_STATE_OBJECT_START; case '[': lexer->token_ready = true; set_token(&(lexer->token), lexer->line, lexer->column, TK_L_BRACKET, (token_value_t){0}); return LEXER_STATE_ARRAY_START; case 't': case 'f': case 'n': append_to_lex_str(&(lexer->keyword), input); return handle_keyword(input); } return LEXER_STATE_ERROR; } lexer_state_t handle_string(lexer_t *lexer, char input) { switch (input) { case '\\': dstr_append(&(lexer->current_string), input); return LEXER_STATE_ESCAPE_SEQUENCE; case '"': { lexer_state_t string_type = lexer->stack.stack[lexer->stack.size - 1]; lexer->token_ready = true; token_t *token = &(lexer->token); u64 column = lexer->column - dstr_length(lexer->current_string); token_value_t value = {.string = dstr_to_cstr(lexer->current_string)}; if (string_type == LEXER_STATE_KEY) { set_token(token, lexer->line, column, TK_STR_KEY, value); } else if (string_type == LEXER_STATE_VALUE) { set_token(token, lexer->line, column, TK_STR_VAL, value); } return LEXER_STATE_STRING_END; } } dstr_append(&(lexer->current_string), input); return LEXER_STATE_STRING; } lexer_state_t handle_string_end(lexer_t *lexer, char input) { if (isspace(input)) { return LEXER_STATE_STRING_END; } lexer->current = stack_pop(&(lexer->stack)); bool key_end = lexer->current == LEXER_STATE_KEY && input == ':'; if (key_end) { return LEXER_STATE_VALUE; } bool value_end = lexer->current == LEXER_STATE_VALUE && input == ','; if (value_end) { return lexer->stack.stack[lexer->stack.size - 1]; } bool collection_end = input == '}' || input == ']'; return collection_end ? handle_collection_end(lexer, input) : LEXER_STATE_ERROR; } lexer_state_t handle_escape_sequence(lexer_t *lexer, char input) { dstr_append(&(lexer->current_string), input); switch (input) { case '"': case '/': case '\\': case 'b': case 'f': case 'n': case 'r': case 't': return LEXER_STATE_STRING; case 'u': return LEXER_STATE_UNICODE_HEX; } return LEXER_STATE_ERROR; } lexer_state_t handle_unicode_sequence(lexer_t *lexer, char input) { append_to_lex_str(&(lexer->codepoint), input); dstr_append(&(lexer->current_string), input); if (!ishex(input)) { clear_lex_str(&(lexer->codepoint)); return LEXER_STATE_ERROR; } else if (lexer->codepoint.size == UNICODE_LENGTH) { clear_lex_str(&(lexer->codepoint)); return LEXER_STATE_STRING; } return LEXER_STATE_UNICODE_HEX; } lexer_state_t handle_decimal(lexer_t *lexer, char input) { dstr_append(&(lexer->current_string), input); if (input == '.') { return LEXER_STATE_FRACTION; } return LEXER_STATE_ERROR; } lexer_state_t handle_number(lexer_t *lexer, char input) { if (isdigit(input)) { dstr_append(&(lexer->current_string), input); return LEXER_STATE_NUMBER; } else if (input == '.') { dstr_append(&(lexer->current_string), input); return LEXER_STATE_FRACTION; } else if (input == '}' || input == ']') { // TODO (Abdelrahman): Set the token type correctly based on whether the // number is an integer or a double lexer->token_ready = true; u64 column = lexer->column - dstr_length(lexer->current_string); token_t token = dstr_to_numerical_token(lexer->current_string); set_token(&(lexer->token), lexer->line, column, token.type, token.value); return handle_collection_end(lexer, input); } else if (input == ',') { // TODO (Abdelrahman): Set the token type correctly based on whether the // number is an integer or a double lexer->token_ready = true; u64 column = lexer->column - dstr_length(lexer->current_string); token_t token = dstr_to_numerical_token(lexer->current_string); set_token(&(lexer->token), lexer->line, column, token.type, token.value); return lexer->stack.stack[lexer->stack.size - 1]; } else if (isspace(input)) { // TODO (Abdelrahman): Set the token type correctly based on whether the // number is an integer or a double lexer->token_ready = true; u64 column = lexer->column - dstr_length(lexer->current_string); token_t token = dstr_to_numerical_token(lexer->current_string); set_token(&(lexer->token), lexer->line, column, token.type, token.value); return LEXER_STATE_NUMBER_END; } return LEXER_STATE_ERROR; } lexer_state_t handle_fraction(lexer_t *lexer, char input) { if (isdigit(input)) { dstr_append(&(lexer->current_string), input); return LEXER_STATE_FRACTION; } else if (input == '}' || input == ']') { // TODO (Abdelrahman): Set the token type correctly based on whether the // number is an integer or a double lexer->token_ready = true; u64 column = lexer->column - dstr_length(lexer->current_string); token_t token = dstr_to_numerical_token(lexer->current_string); set_token(&(lexer->token), lexer->line, column, token.type, token.value); return handle_collection_end(lexer, input); } else if (input == 'e' || input == 'E') { dstr_append(&(lexer->current_string), input); return LEXER_STATE_EXPONENT; } else if (input == ',') { // TODO (Abdelrahman): Set the token type correctly based on whether the // number is an integer or a double lexer->token_ready = true; u64 column = lexer->column - dstr_length(lexer->current_string); token_t token = dstr_to_numerical_token(lexer->current_string); set_token(&(lexer->token), lexer->line, column, token.type, token.value); return lexer->stack.stack[lexer->stack.size - 1]; } else if (isspace(input)) { // TODO (Abdelrahman): Set the token type correctly based on whether the // number is an integer or a double lexer->token_ready = true; u64 column = lexer->column - dstr_length(lexer->current_string); token_t token = dstr_to_numerical_token(lexer->current_string); set_token(&(lexer->token), lexer->line, column, token.type, token.value); return LEXER_STATE_NUMBER_END; } return LEXER_STATE_ERROR; } lexer_state_t handle_exponent(lexer_t *lexer, char input) { dstr_append(&(lexer->current_string), input); if (isdigit(input)) { return LEXER_STATE_POWER; } else if (input == '+' || input == '-') { return LEXER_STATE_EXP_SIGN; } return LEXER_STATE_ERROR; } lexer_state_t handle_exp_sign(lexer_t *lexer, char input) { dstr_append(&(lexer->current_string), input); if (isdigit(input)) { return LEXER_STATE_POWER; } return LEXER_STATE_ERROR; } lexer_state_t handle_power(lexer_t *lexer, char input) { if (isdigit(input)) { dstr_append(&(lexer->current_string), input); return LEXER_STATE_POWER; } else if (input == '}' || input == ']') { // TODO (Abdelrahman): Set the token type correctly based on whether the // number is an integer or a double lexer->token_ready = true; u64 column = lexer->column - dstr_length(lexer->current_string); token_t token = dstr_to_numerical_token(lexer->current_string); set_token(&(lexer->token), lexer->line, column, token.type, token.value); return handle_collection_end(lexer, input); } else if (input == ',') { // TODO (Abdelrahman): Set the token type correctly based on whether the // number is an integer or a double lexer->token_ready = true; u64 column = lexer->column - dstr_length(lexer->current_string); token_t token = dstr_to_numerical_token(lexer->current_string); set_token(&(lexer->token), lexer->line, column, token.type, token.value); return lexer->stack.stack[lexer->stack.size - 1]; } else if (isspace(input)) { // TODO (Abdelrahman): Set the token type correctly based on whether the // number is an integer or a double lexer->token_ready = true; u64 column = lexer->column - dstr_length(lexer->current_string); token_t token = dstr_to_numerical_token(lexer->current_string); set_token(&(lexer->token), lexer->line, column, token.type, token.value); return LEXER_STATE_NUMBER_END; } return LEXER_STATE_ERROR; } lexer_state_t handle_number_end(lexer_t *lexer, char input) { if (isspace(input)) { return LEXER_STATE_NUMBER_END; } else if (input == ',') { // TODO (Abdelrahman): Set the token type correctly based on whether the // number is an integer or a double lexer->token_ready = true; u64 column = lexer->column - dstr_length(lexer->current_string); token_t token = dstr_to_numerical_token(lexer->current_string); set_token(&(lexer->token), lexer->line, column, token.type, token.value); return lexer->stack.stack[lexer->stack.size - 1]; } bool collection_end = input == '}' || input == ']'; return collection_end ? handle_collection_end(lexer, input) : LEXER_STATE_ERROR; } lexer_state_t handle_keyword(char input) { switch (input) { case 't': return LEXER_STATE_TRUE; case 'f': return LEXER_STATE_FALSE; case 'n': return LEXER_STATE_NULL; } return LEXER_STATE_ERROR; } lexer_state_t handle_true(lexer_t *lexer, char input) { char current[MAX_KEYWORD_LENGTH + 1]; strcpy(current, lexer->keyword.keyword.str); append_to_lex_str(&(lexer->keyword), input); bool return_state_true = (strequal(current, "t") && input == 'r') || (strequal(current, "tr") && input == 'u'); bool return_state_end = strequal(current, "tru") && input == 'e'; if (return_state_true) { return LEXER_STATE_TRUE; } else if (return_state_end) { return LEXER_STATE_KEYWORD_END; } return LEXER_STATE_ERROR; } lexer_state_t handle_false(lexer_t *lexer, char input) { char current[MAX_KEYWORD_LENGTH + 1]; strcpy(current, lexer->keyword.keyword.str); append_to_lex_str(&(lexer->keyword), input); bool return_state_false = (strequal(current, "f") && input == 'a') || (strequal(current, "fa") && input == 'l') || (strequal(current, "fal") && input == 's'); bool return_state_end = strequal(current, "fals") && input == 'e'; if (return_state_false) { return LEXER_STATE_FALSE; } else if (return_state_end) { return LEXER_STATE_KEYWORD_END; } return LEXER_STATE_ERROR; } lexer_state_t handle_null(lexer_t *lexer, char input) { char current[MAX_KEYWORD_LENGTH + 1]; strcpy(current, lexer->keyword.keyword.str); append_to_lex_str(&(lexer->keyword), input); bool return_state_null = (strequal(current, "n") && input == 'u') || (strequal(current, "nu") && input == 'l'); bool return_state_end = strequal(current, "nul") && input == 'l'; if (return_state_null) { return LEXER_STATE_NULL; } else if (return_state_end) { return LEXER_STATE_KEYWORD_END; } return LEXER_STATE_ERROR; } lexer_state_t handle_keyword_end(lexer_t *lexer, char input) { const char *keyword = lexer->keyword.keyword.str; if (lexer->keyword.size > 0) { lexer->token_ready = true; token_t *token = &(lexer->token); u64 column = lexer->column - lexer->keyword.size; if (strequal(keyword, "null")) { set_token(token, lexer->line, column, TK_NULL, (token_value_t){0}); } else if (strequal(keyword, "true")) { set_token(token, lexer->line, column, TK_BOOL, (token_value_t){.boolean = true}); } else if (strequal(keyword, "false")) { set_token(token, lexer->line, column, TK_BOOL, (token_value_t){.boolean = false}); } clear_lex_str(&(lexer->keyword)); } if (isspace(input)) { return LEXER_STATE_KEYWORD_END; } else if (input == ',') { return lexer->stack.stack[lexer->stack.size - 1]; } bool collection_end = input == '}' || input == ']'; return collection_end ? handle_collection_end(lexer, input) : LEXER_STATE_ERROR; }