Ensure lexer returns correct token for numerical values
This commit is contained in:
		| @@ -118,12 +118,12 @@ | ||||
|       "-x", | ||||
|       "c", | ||||
|       "-o", | ||||
|       "/tmp/main-92ad35.o", | ||||
|       "/tmp/main-4f432a.o", | ||||
|       "src/main.c" | ||||
|     ], | ||||
|     "directory": "/mnt/3A5CDF785CDF2CFF/Users/abdoo/dev/say_it_in_json", | ||||
|     "file": "/mnt/3A5CDF785CDF2CFF/Users/abdoo/dev/say_it_in_json/src/main.c", | ||||
|     "output": "/tmp/main-92ad35.o" | ||||
|     "output": "/tmp/main-4f432a.o" | ||||
|   }, | ||||
|   { | ||||
|     "arguments": [ | ||||
| @@ -187,12 +187,12 @@ | ||||
|       "-x", | ||||
|       "c", | ||||
|       "-o", | ||||
|       "/tmp/dstring-0fd2b9.o", | ||||
|       "/tmp/dstring-841d2e.o", | ||||
|       "src/dstring/dstring.c" | ||||
|     ], | ||||
|     "directory": "/mnt/3A5CDF785CDF2CFF/Users/abdoo/dev/say_it_in_json", | ||||
|     "file": "/mnt/3A5CDF785CDF2CFF/Users/abdoo/dev/say_it_in_json/src/dstring/dstring.c", | ||||
|     "output": "/tmp/dstring-0fd2b9.o" | ||||
|     "output": "/tmp/dstring-841d2e.o" | ||||
|   }, | ||||
|   { | ||||
|     "arguments": [ | ||||
| @@ -256,11 +256,11 @@ | ||||
|       "-x", | ||||
|       "c", | ||||
|       "-o", | ||||
|       "/tmp/lexer-77effd.o", | ||||
|       "/tmp/lexer-5cd579.o", | ||||
|       "src/lexer/lexer.c" | ||||
|     ], | ||||
|     "directory": "/mnt/3A5CDF785CDF2CFF/Users/abdoo/dev/say_it_in_json", | ||||
|     "file": "/mnt/3A5CDF785CDF2CFF/Users/abdoo/dev/say_it_in_json/src/lexer/lexer.c", | ||||
|     "output": "/tmp/lexer-77effd.o" | ||||
|     "output": "/tmp/lexer-5cd579.o" | ||||
|   } | ||||
| ] | ||||
|   | ||||
| @@ -90,6 +90,8 @@ struct lexer { | ||||
|   dstr_t *current_string; | ||||
|   bool token_ready; | ||||
|   token_t token; | ||||
|   bool has_extra_token; | ||||
|   token_t extra_token; | ||||
| }; | ||||
|  | ||||
| void stack_push(state_stack_t *stack, lexer_state_t value); | ||||
| @@ -97,11 +99,13 @@ lexer_state_t stack_pop(state_stack_t *stack); | ||||
|  | ||||
| void append_to_lex_str(lexer_string_t *str, char input); | ||||
| void clear_lex_str(lexer_string_t *str); | ||||
| bool strequal(const char *first, const char *second); | ||||
| bool strequal(const char *const first, const char *const second); | ||||
| bool is_valid_hex_char(const char input); | ||||
| bool ishex(const char input); | ||||
|  | ||||
| void set_token(lexer_t *lexer, token_type_t type, token_value_t value); | ||||
| token_t dstr_to_numerical_token(const dstr_t *str); | ||||
| void set_token(token_t *token, u64 line, u64 column, token_type_t type, | ||||
|                token_value_t value); | ||||
|  | ||||
| void lexer_state_machine(lexer_t *lexer, char input); | ||||
| lexer_state_t handle_lexer_start(lexer_t *lexer, char input); | ||||
| @@ -151,6 +155,8 @@ void lexer_init(lexer_t **lexer) { | ||||
|   (*lexer)->current_string = dstr_with_capacity(STRING_BUF_START_CAPACITY); | ||||
|   (*lexer)->token_ready = false; | ||||
|   (*lexer)->token = (token_t){0}; | ||||
|   (*lexer)->has_extra_token = false; | ||||
|   (*lexer)->extra_token = (token_t){0}; | ||||
|  | ||||
|   if (!((*lexer)->current_string)) { | ||||
|     lexer_free(lexer); | ||||
| @@ -178,19 +184,23 @@ token_t get_next_token(lexer_t *lexer, const char *text) { | ||||
|   char c; | ||||
|  | ||||
|   while (lexer->cursor < lexer->text_length) { | ||||
|     if (lexer->has_extra_token) { | ||||
|       lexer->has_extra_token = false; | ||||
|  | ||||
|       return lexer->extra_token; | ||||
|     } | ||||
|  | ||||
|     c = lexer->text[(lexer->cursor)++]; | ||||
|  | ||||
|     lexer_state_machine(lexer, c); | ||||
|  | ||||
|     if (c == '\n') { | ||||
|       ++(lexer->line); | ||||
|       lexer->column = 0; | ||||
|       continue; | ||||
|     } else { | ||||
|       ++(lexer->column); | ||||
|     } | ||||
|  | ||||
|     lexer_state_machine(lexer, c); | ||||
|  | ||||
|     // Track the position in the text | ||||
|     ++(lexer->column); | ||||
|  | ||||
|     if (lexer->current == LEXER_STATE_ERROR) { | ||||
|     } else if (lexer->token_ready) { | ||||
|       lexer->token_ready = false; | ||||
| @@ -269,9 +279,10 @@ void print_token(token_t token) { | ||||
|     printf("%15s, VALUE: %s", "TK_STR_VAL", token.value.string); | ||||
|     break; | ||||
|   case TK_INTEGER: | ||||
|     printf("%15s, VALUE: %ld", "TK_INTEGER", token.value.num_int); | ||||
|     break; | ||||
|   case TK_DOUBLE: | ||||
|     printf("%15s, VALUE: %s", "TK_DOUBLE", token.value.string); | ||||
|     printf("%15s, VALUE: %f", "TK_DOUBLE", token.value.num_frac); | ||||
|     break; | ||||
|   } | ||||
|  | ||||
| @@ -345,7 +356,7 @@ void clear_lex_str(lexer_string_t *lex_str) { | ||||
|   lex_str->size = 0; | ||||
| } | ||||
|  | ||||
| bool strequal(const char *first, const char *second) { | ||||
| bool strequal(const char *const first, const char *const second) { | ||||
|   return strcmp(first, second) == 0; | ||||
| } | ||||
|  | ||||
| @@ -373,12 +384,27 @@ bool ishex(const char input) { | ||||
|   return isdigit(input) || is_valid_hex_char(input); | ||||
| } | ||||
|  | ||||
| void set_token(lexer_t *lexer, token_type_t type, token_value_t value) { | ||||
|   lexer->token_ready = true; | ||||
| token_t dstr_to_numerical_token(const dstr_t *str) { | ||||
|   token_t token = {0}; | ||||
|  | ||||
|   lexer->token = (token_t){ | ||||
|       .line = lexer->line, | ||||
|       .column = lexer->column, | ||||
|   bool is_double = dstr_find(str, ".") != -1; | ||||
|  | ||||
|   token.type = is_double ? TK_DOUBLE : TK_INTEGER; | ||||
|  | ||||
|   if (is_double) { | ||||
|     token.value.num_frac = strtod(dstr_to_cstr(str), NULL); | ||||
|   } else { | ||||
|     token.value.num_int = atol(dstr_to_cstr(str)); | ||||
|   } | ||||
|  | ||||
|   return token; | ||||
| } | ||||
|  | ||||
| void set_token(token_t *token, u64 line, u64 column, token_type_t type, | ||||
|                token_value_t value) { | ||||
|   *token = (token_t){ | ||||
|       .line = line, | ||||
|       .column = column, | ||||
|       .type = type, | ||||
|       .value = value, | ||||
|   }; | ||||
| @@ -484,11 +510,15 @@ lexer_state_t handle_lexer_start(lexer_t *lexer, char input) { | ||||
|  | ||||
|   switch (input) { | ||||
|   case '{': | ||||
|     set_token(lexer, TK_L_BRACE, (token_value_t){0}); | ||||
|     lexer->token_ready = true; | ||||
|     set_token(&(lexer->token), lexer->line, lexer->column, TK_L_BRACE, | ||||
|               (token_value_t){0}); | ||||
|  | ||||
|     return LEXER_STATE_OBJECT_START; | ||||
|   case '[': | ||||
|     set_token(lexer, TK_L_BRACKET, (token_value_t){0}); | ||||
|     lexer->token_ready = true; | ||||
|     set_token(&(lexer->token), lexer->line, lexer->column, TK_L_BRACKET, | ||||
|               (token_value_t){0}); | ||||
|  | ||||
|     return LEXER_STATE_ARRAY_START; | ||||
|   } | ||||
| @@ -512,7 +542,18 @@ lexer_state_t handle_collection_end(lexer_t *lexer, char input) { | ||||
|   bool object_end = lexer->current == LEXER_STATE_OBJECT && input == '}'; | ||||
|  | ||||
|   if (object_end) { | ||||
|     set_token(lexer, TK_R_BRACE, (token_value_t){0}); | ||||
|     token_t *token; | ||||
|  | ||||
|     if (lexer->token_ready) { | ||||
|       lexer->has_extra_token = true; | ||||
|       token = &(lexer->extra_token); | ||||
|     } else { | ||||
|       lexer->token_ready = true; | ||||
|       token = &(lexer->token); | ||||
|     } | ||||
|  | ||||
|     set_token(token, lexer->line, lexer->column, TK_R_BRACE, | ||||
|               (token_value_t){0}); | ||||
|  | ||||
|     return LEXER_STATE_OBJECT_END; | ||||
|   } | ||||
| @@ -520,7 +561,18 @@ lexer_state_t handle_collection_end(lexer_t *lexer, char input) { | ||||
|   bool array_end = lexer->current == LEXER_STATE_ARRAY && input == ']'; | ||||
|  | ||||
|   if (array_end) { | ||||
|     set_token(lexer, TK_R_BRACKET, (token_value_t){0}); | ||||
|     token_t *token; | ||||
|  | ||||
|     if (lexer->token_ready) { | ||||
|       lexer->has_extra_token = true; | ||||
|       token = &(lexer->extra_token); | ||||
|     } else { | ||||
|       lexer->token_ready = true; | ||||
|       token = &(lexer->token); | ||||
|     } | ||||
|  | ||||
|     set_token(token, lexer->line, lexer->column, TK_R_BRACKET, | ||||
|               (token_value_t){0}); | ||||
|  | ||||
|     return LEXER_STATE_ARRAY_END; | ||||
|   } | ||||
| @@ -531,11 +583,15 @@ lexer_state_t handle_collection_end(lexer_t *lexer, char input) { | ||||
| void handle_input_after_collection_end(lexer_t *lexer, char input) { | ||||
|   switch (input) { | ||||
|   case '}': | ||||
|     set_token(lexer, TK_R_BRACE, (token_value_t){0}); | ||||
|     lexer->token_ready = true; | ||||
|     set_token(&(lexer->token), lexer->line, lexer->column, TK_R_BRACE, | ||||
|               (token_value_t){0}); | ||||
|  | ||||
|     break; | ||||
|   case ']': | ||||
|     set_token(lexer, TK_R_BRACKET, (token_value_t){0}); | ||||
|     lexer->token_ready = true; | ||||
|     set_token(&(lexer->token), lexer->line, lexer->column, TK_R_BRACKET, | ||||
|               (token_value_t){0}); | ||||
|  | ||||
|     break; | ||||
|   } | ||||
| @@ -590,11 +646,15 @@ lexer_state_t handle_value(lexer_t *lexer, char input) { | ||||
|  | ||||
|     return LEXER_STATE_DECIMAL; | ||||
|   case '{': | ||||
|     set_token(lexer, TK_L_BRACE, (token_value_t){0}); | ||||
|     lexer->token_ready = true; | ||||
|     set_token(&(lexer->token), lexer->line, lexer->column, TK_L_BRACE, | ||||
|               (token_value_t){0}); | ||||
|  | ||||
|     return LEXER_STATE_OBJECT_START; | ||||
|   case '[': | ||||
|     set_token(lexer, TK_L_BRACKET, (token_value_t){0}); | ||||
|     lexer->token_ready = true; | ||||
|     set_token(&(lexer->token), lexer->line, lexer->column, TK_L_BRACKET, | ||||
|               (token_value_t){0}); | ||||
|  | ||||
|     return LEXER_STATE_ARRAY_START; | ||||
|   case 't': | ||||
| @@ -617,12 +677,15 @@ lexer_state_t handle_string(lexer_t *lexer, char input) { | ||||
|   case '"': { | ||||
|     lexer_state_t string_type = lexer->stack.stack[lexer->stack.size - 1]; | ||||
|  | ||||
|     lexer->token_ready = true; | ||||
|     token_t *token = &(lexer->token); | ||||
|     u64 column = lexer->column - dstr_length(lexer->current_string); | ||||
|     token_value_t value = {.string = dstr_to_cstr(lexer->current_string)}; | ||||
|  | ||||
|     if (string_type == LEXER_STATE_KEY) { | ||||
|       set_token(lexer, TK_STR_KEY, | ||||
|                 (token_value_t){.string = dstr_to_cstr(lexer->current_string)}); | ||||
|       set_token(token, lexer->line, column, TK_STR_KEY, value); | ||||
|     } else if (string_type == LEXER_STATE_VALUE) { | ||||
|       set_token(lexer, TK_STR_VAL, | ||||
|                 (token_value_t){.string = dstr_to_cstr(lexer->current_string)}); | ||||
|       set_token(token, lexer->line, column, TK_STR_VAL, value); | ||||
|     } | ||||
|  | ||||
|     return LEXER_STATE_STRING_END; | ||||
| @@ -718,22 +781,34 @@ lexer_state_t handle_number(lexer_t *lexer, char input) { | ||||
|   } else if (input == '}' || input == ']') { | ||||
|     // TODO (Abdelrahman): Set the token type correctly based on whether the | ||||
|     // number is an integer or a double | ||||
|     set_token(lexer, TK_DOUBLE, | ||||
|               (token_value_t){.string = dstr_to_cstr(lexer->current_string)}); | ||||
|     lexer->token_ready = true; | ||||
|     u64 column = lexer->column - dstr_length(lexer->current_string); | ||||
|  | ||||
|     token_t token = dstr_to_numerical_token(lexer->current_string); | ||||
|  | ||||
|     set_token(&(lexer->token), lexer->line, column, token.type, token.value); | ||||
|  | ||||
|     return handle_collection_end(lexer, input); | ||||
|   } else if (input == ',') { | ||||
|     // TODO (Abdelrahman): Set the token type correctly based on whether the | ||||
|     // number is an integer or a double | ||||
|     set_token(lexer, TK_DOUBLE, | ||||
|               (token_value_t){.string = dstr_to_cstr(lexer->current_string)}); | ||||
|     lexer->token_ready = true; | ||||
|     u64 column = lexer->column - dstr_length(lexer->current_string); | ||||
|  | ||||
|     token_t token = dstr_to_numerical_token(lexer->current_string); | ||||
|  | ||||
|     set_token(&(lexer->token), lexer->line, column, token.type, token.value); | ||||
|  | ||||
|     return lexer->stack.stack[lexer->stack.size - 1]; | ||||
|   } else if (isspace(input)) { | ||||
|     // TODO (Abdelrahman): Set the token type correctly based on whether the | ||||
|     // number is an integer or a double | ||||
|     set_token(lexer, TK_DOUBLE, | ||||
|               (token_value_t){.string = dstr_to_cstr(lexer->current_string)}); | ||||
|     lexer->token_ready = true; | ||||
|     u64 column = lexer->column - dstr_length(lexer->current_string); | ||||
|  | ||||
|     token_t token = dstr_to_numerical_token(lexer->current_string); | ||||
|  | ||||
|     set_token(&(lexer->token), lexer->line, column, token.type, token.value); | ||||
|  | ||||
|     return LEXER_STATE_NUMBER_END; | ||||
|   } | ||||
| @@ -749,8 +824,12 @@ lexer_state_t handle_fraction(lexer_t *lexer, char input) { | ||||
|   } else if (input == '}' || input == ']') { | ||||
|     // TODO (Abdelrahman): Set the token type correctly based on whether the | ||||
|     // number is an integer or a double | ||||
|     set_token(lexer, TK_DOUBLE, | ||||
|               (token_value_t){.string = dstr_to_cstr(lexer->current_string)}); | ||||
|     lexer->token_ready = true; | ||||
|     u64 column = lexer->column - dstr_length(lexer->current_string); | ||||
|  | ||||
|     token_t token = dstr_to_numerical_token(lexer->current_string); | ||||
|  | ||||
|     set_token(&(lexer->token), lexer->line, column, token.type, token.value); | ||||
|  | ||||
|     return handle_collection_end(lexer, input); | ||||
|   } else if (input == 'e' || input == 'E') { | ||||
| @@ -760,15 +839,23 @@ lexer_state_t handle_fraction(lexer_t *lexer, char input) { | ||||
|   } else if (input == ',') { | ||||
|     // TODO (Abdelrahman): Set the token type correctly based on whether the | ||||
|     // number is an integer or a double | ||||
|     set_token(lexer, TK_DOUBLE, | ||||
|               (token_value_t){.string = dstr_to_cstr(lexer->current_string)}); | ||||
|     lexer->token_ready = true; | ||||
|     u64 column = lexer->column - dstr_length(lexer->current_string); | ||||
|  | ||||
|     token_t token = dstr_to_numerical_token(lexer->current_string); | ||||
|  | ||||
|     set_token(&(lexer->token), lexer->line, column, token.type, token.value); | ||||
|  | ||||
|     return lexer->stack.stack[lexer->stack.size - 1]; | ||||
|   } else if (isspace(input)) { | ||||
|     // TODO (Abdelrahman): Set the token type correctly based on whether the | ||||
|     // number is an integer or a double | ||||
|     set_token(lexer, TK_DOUBLE, | ||||
|               (token_value_t){.string = dstr_to_cstr(lexer->current_string)}); | ||||
|     lexer->token_ready = true; | ||||
|     u64 column = lexer->column - dstr_length(lexer->current_string); | ||||
|  | ||||
|     token_t token = dstr_to_numerical_token(lexer->current_string); | ||||
|  | ||||
|     set_token(&(lexer->token), lexer->line, column, token.type, token.value); | ||||
|  | ||||
|     return LEXER_STATE_NUMBER_END; | ||||
|   } | ||||
| @@ -806,22 +893,34 @@ lexer_state_t handle_power(lexer_t *lexer, char input) { | ||||
|   } else if (input == '}' || input == ']') { | ||||
|     // TODO (Abdelrahman): Set the token type correctly based on whether the | ||||
|     // number is an integer or a double | ||||
|     set_token(lexer, TK_DOUBLE, | ||||
|               (token_value_t){.string = dstr_to_cstr(lexer->current_string)}); | ||||
|     lexer->token_ready = true; | ||||
|     u64 column = lexer->column - dstr_length(lexer->current_string); | ||||
|  | ||||
|     token_t token = dstr_to_numerical_token(lexer->current_string); | ||||
|  | ||||
|     set_token(&(lexer->token), lexer->line, column, token.type, token.value); | ||||
|  | ||||
|     return handle_collection_end(lexer, input); | ||||
|   } else if (input == ',') { | ||||
|     // TODO (Abdelrahman): Set the token type correctly based on whether the | ||||
|     // number is an integer or a double | ||||
|     set_token(lexer, TK_DOUBLE, | ||||
|               (token_value_t){.string = dstr_to_cstr(lexer->current_string)}); | ||||
|     lexer->token_ready = true; | ||||
|     u64 column = lexer->column - dstr_length(lexer->current_string); | ||||
|  | ||||
|     token_t token = dstr_to_numerical_token(lexer->current_string); | ||||
|  | ||||
|     set_token(&(lexer->token), lexer->line, column, token.type, token.value); | ||||
|  | ||||
|     return lexer->stack.stack[lexer->stack.size - 1]; | ||||
|   } else if (isspace(input)) { | ||||
|     // TODO (Abdelrahman): Set the token type correctly based on whether the | ||||
|     // number is an integer or a double | ||||
|     set_token(lexer, TK_DOUBLE, | ||||
|               (token_value_t){.string = dstr_to_cstr(lexer->current_string)}); | ||||
|     lexer->token_ready = true; | ||||
|     u64 column = lexer->column - dstr_length(lexer->current_string); | ||||
|  | ||||
|     token_t token = dstr_to_numerical_token(lexer->current_string); | ||||
|  | ||||
|     set_token(&(lexer->token), lexer->line, column, token.type, token.value); | ||||
|  | ||||
|     return LEXER_STATE_NUMBER_END; | ||||
|   } | ||||
| @@ -835,8 +934,12 @@ lexer_state_t handle_number_end(lexer_t *lexer, char input) { | ||||
|   } else if (input == ',') { | ||||
|     // TODO (Abdelrahman): Set the token type correctly based on whether the | ||||
|     // number is an integer or a double | ||||
|     set_token(lexer, TK_DOUBLE, | ||||
|               (token_value_t){.string = dstr_to_cstr(lexer->current_string)}); | ||||
|     lexer->token_ready = true; | ||||
|     u64 column = lexer->column - dstr_length(lexer->current_string); | ||||
|  | ||||
|     token_t token = dstr_to_numerical_token(lexer->current_string); | ||||
|  | ||||
|     set_token(&(lexer->token), lexer->line, column, token.type, token.value); | ||||
|  | ||||
|     return lexer->stack.stack[lexer->stack.size - 1]; | ||||
|   } | ||||
| @@ -924,15 +1027,21 @@ lexer_state_t handle_null(lexer_t *lexer, char input) { | ||||
| lexer_state_t handle_keyword_end(lexer_t *lexer, char input) { | ||||
|   const char *keyword = lexer->keyword.keyword.str; | ||||
|  | ||||
|   if (strequal(keyword, "null")) { | ||||
|     set_token(lexer, TK_NULL, (token_value_t){0}); | ||||
|   } else if (strequal(keyword, "true")) { | ||||
|     set_token(lexer, TK_TRUE, (token_value_t){0}); | ||||
|   } else if (strequal(keyword, "false")) { | ||||
|     set_token(lexer, TK_FALSE, (token_value_t){0}); | ||||
|   } | ||||
|   if (lexer->keyword.size > 0) { | ||||
|     lexer->token_ready = true; | ||||
|     token_t *token = &(lexer->token); | ||||
|     u64 column = lexer->column - lexer->keyword.size; | ||||
|  | ||||
|   clear_lex_str(&(lexer->keyword)); | ||||
|     if (strequal(keyword, "null")) { | ||||
|       set_token(token, lexer->line, column, TK_NULL, (token_value_t){0}); | ||||
|     } else if (strequal(keyword, "true")) { | ||||
|       set_token(token, lexer->line, column, TK_TRUE, (token_value_t){0}); | ||||
|     } else if (strequal(keyword, "false")) { | ||||
|       set_token(token, lexer->line, column, TK_FALSE, (token_value_t){0}); | ||||
|     } | ||||
|  | ||||
|     clear_lex_str(&(lexer->keyword)); | ||||
|   } | ||||
|  | ||||
|   if (isspace(input)) { | ||||
|     return LEXER_STATE_KEYWORD_END; | ||||
|   | ||||
		Reference in New Issue
	
	Block a user