Store the current character and add more handlers after the state
transition
This commit is contained in:
		| @@ -58,6 +58,7 @@ struct lexer_s { | ||||
|   bool has_extra_token; | ||||
|   token_t extra_token; | ||||
|   dstr_t *error_message; | ||||
|   char current_char; | ||||
| }; | ||||
|  | ||||
| INTERNAL lexer_input_t char_type(char input); | ||||
| @@ -78,6 +79,7 @@ INTERNAL void set_token(token_t *token, u64 line, u64 column, token_type type, | ||||
| INTERNAL void finalise_state_transition(lexer_t *lexer); | ||||
| INTERNAL void post_keyword(lexer_t *lexer); | ||||
| INTERNAL void set_numerical_token(lexer_t *lexer); | ||||
| INTERNAL void handle_string_end(lexer_t *lexer); | ||||
|  | ||||
| INTERNAL lexer_state_t state_table[COUNT_LEXER_STATES][COUNT_LEXER_INPUTS] = { | ||||
| #include "lexer_state_transitions.table" | ||||
| @@ -136,8 +138,6 @@ lex_result_t get_next_token(lexer_t *lexer, const char *text) { | ||||
|  | ||||
|   dstr_clear(lexer->current_string); | ||||
|  | ||||
|   char c; | ||||
|  | ||||
|   while (lexer->cursor < lexer->text_length) { | ||||
|     if (lexer->has_extra_token) { | ||||
|       lexer->has_extra_token = false; | ||||
| @@ -148,15 +148,15 @@ lex_result_t get_next_token(lexer_t *lexer, const char *text) { | ||||
|       }; | ||||
|     } | ||||
|  | ||||
|     c = lexer->text[(lexer->cursor)++]; | ||||
|     lexer->current_char = lexer->text[(lexer->cursor)++]; | ||||
|  | ||||
|     lexer_input_t current_input = char_type(c); | ||||
|     lexer_input_t input = char_type(lexer->current_char); | ||||
|  | ||||
|     lexer->next = state_table[lexer->current][current_input]; | ||||
|     lexer->next = state_table[lexer->current][input]; | ||||
|  | ||||
|     finalise_state_transition(lexer); | ||||
|  | ||||
|     if (current_input == LEXER_INPUT_NEWLINE) { | ||||
|     if (input == LEXER_INPUT_NEWLINE) { | ||||
|       ++(lexer->line); | ||||
|       lexer->column = 0; | ||||
|     } else { | ||||
| @@ -437,16 +437,53 @@ void set_token(token_t *token, u64 line, u64 column, token_type type, | ||||
| void finalise_state_transition(lexer_t *lexer) { | ||||
|   switch (lexer->next) { | ||||
|   case LEXER_STATE_OBJECT_START: | ||||
|     lexer->token_ready = true; | ||||
|     set_token(&(lexer->token), lexer->line, lexer->column, TK_L_BRACE, | ||||
|               (token_value_t){0}); | ||||
|  | ||||
|     stack_push(&(lexer->stack), LEXER_STATE_OBJECT); | ||||
|  | ||||
|     lexer->next = LEXER_STATE_OBJECT; | ||||
|  | ||||
|     break; | ||||
|   case LEXER_STATE_OBJECT_END: | ||||
|     break; | ||||
|   case LEXER_STATE_ARRAY_START: | ||||
|     lexer->token_ready = true; | ||||
|     set_token(&(lexer->token), lexer->line, lexer->column, TK_L_BRACKET, | ||||
|               (token_value_t){0}); | ||||
|  | ||||
|     stack_push(&(lexer->stack), LEXER_STATE_ARRAY); | ||||
|  | ||||
|     lexer->next = LEXER_STATE_ARRAY; | ||||
|  | ||||
|     break; | ||||
|   case LEXER_STATE_ARRAY_END: | ||||
|     break; | ||||
|   case LEXER_STATE_KEY: | ||||
|     if (lexer->current == LEXER_STATE_OBJECT) { | ||||
|       stack_push(&(lexer->stack), LEXER_STATE_KEY); | ||||
|  | ||||
|       lexer->next = LEXER_STATE_STRING; | ||||
|     } | ||||
|  | ||||
|     break; | ||||
|   case LEXER_STATE_KEY_END: | ||||
|     lexer->next = LEXER_STATE_VALUE; | ||||
|  | ||||
|     break; | ||||
|   case LEXER_STATE_STRING: | ||||
|   case LEXER_STATE_ESCAPE_SEQUENCE: | ||||
|   case LEXER_STATE_UNICODE_HEX1: | ||||
|   case LEXER_STATE_UNICODE_HEX2: | ||||
|   case LEXER_STATE_UNICODE_HEX3: | ||||
|   case LEXER_STATE_UNICODE_HEX4: | ||||
|     dstr_append(&(lexer->current_string), lexer->current_char); | ||||
|  | ||||
|     break; | ||||
|   case LEXER_STATE_STRING_END: | ||||
|     handle_string_end(lexer); | ||||
|  | ||||
|     break; | ||||
|   case LEXER_STATE_TRUE: | ||||
|   case LEXER_STATE_FALSE: | ||||
| @@ -471,7 +508,16 @@ void finalise_state_transition(lexer_t *lexer) { | ||||
|  | ||||
|     break; | ||||
|   case LEXER_STATE_NUMBER_END: | ||||
|     set_numerical_token(lexer); | ||||
|     switch (lexer->current) { | ||||
|     case LEXER_STATE_NUMBER: | ||||
|     case LEXER_STATE_FRACTION: | ||||
|     case LEXER_STATE_POWER: | ||||
|       set_numerical_token(lexer); | ||||
|  | ||||
|       break; | ||||
|     default: | ||||
|       break; | ||||
|     } | ||||
|  | ||||
|     break; | ||||
|   } | ||||
| @@ -513,14 +559,14 @@ void post_keyword(lexer_t *lexer) { | ||||
|  | ||||
|     break; | ||||
|   default: | ||||
|     lexer->current = LEXER_STATE_ERROR; | ||||
|     lexer->next = LEXER_STATE_ERROR; | ||||
|  | ||||
|     return; | ||||
|   } | ||||
|  | ||||
|   lexer->token_ready = true; | ||||
|  | ||||
|   lexer->current = LEXER_STATE_KEYWORD_END; | ||||
|   lexer->next = LEXER_STATE_KEYWORD_END; | ||||
| } | ||||
|  | ||||
| void set_numerical_token(lexer_t *lexer) { | ||||
| @@ -532,6 +578,21 @@ void set_numerical_token(lexer_t *lexer) { | ||||
|   set_token(&(lexer->token), lexer->line, column, token.type, token.value); | ||||
| } | ||||
|  | ||||
| void handle_string_end(lexer_t *lexer) { | ||||
|   lexer_state_t string_type = lexer->stack.stack[lexer->stack.size - 1]; | ||||
|  | ||||
|   lexer->token_ready = true; | ||||
|   token_t *token = &(lexer->token); | ||||
|   u64 column = lexer->column - dstr_length(lexer->current_string); | ||||
|   token_value_t value = {.string = dstr_to_cstr(lexer->current_string)}; | ||||
|  | ||||
|   if (string_type == LEXER_STATE_KEY) { | ||||
|     set_token(token, lexer->line, column, TK_STR_KEY, value); | ||||
|   } else if (string_type == LEXER_STATE_VALUE) { | ||||
|     set_token(token, lexer->line, column, TK_STR_VAL, value); | ||||
|   } | ||||
| } | ||||
|  | ||||
| void lexer_state_machine(lexer_t *lexer, char input) { | ||||
|   switch (lexer->current) { | ||||
|   case LEXER_STATE_START: | ||||
|   | ||||
		Reference in New Issue
	
	Block a user