Store the current character and add more handlers after the state
transition
This commit is contained in:
parent
bf958056bd
commit
7cb4b28da8
@ -58,6 +58,7 @@ struct lexer_s {
|
||||
bool has_extra_token;
|
||||
token_t extra_token;
|
||||
dstr_t *error_message;
|
||||
char current_char;
|
||||
};
|
||||
|
||||
INTERNAL lexer_input_t char_type(char input);
|
||||
@ -78,6 +79,7 @@ INTERNAL void set_token(token_t *token, u64 line, u64 column, token_type type,
|
||||
INTERNAL void finalise_state_transition(lexer_t *lexer);
|
||||
INTERNAL void post_keyword(lexer_t *lexer);
|
||||
INTERNAL void set_numerical_token(lexer_t *lexer);
|
||||
INTERNAL void handle_string_end(lexer_t *lexer);
|
||||
|
||||
INTERNAL lexer_state_t state_table[COUNT_LEXER_STATES][COUNT_LEXER_INPUTS] = {
|
||||
#include "lexer_state_transitions.table"
|
||||
@ -136,8 +138,6 @@ lex_result_t get_next_token(lexer_t *lexer, const char *text) {
|
||||
|
||||
dstr_clear(lexer->current_string);
|
||||
|
||||
char c;
|
||||
|
||||
while (lexer->cursor < lexer->text_length) {
|
||||
if (lexer->has_extra_token) {
|
||||
lexer->has_extra_token = false;
|
||||
@ -148,15 +148,15 @@ lex_result_t get_next_token(lexer_t *lexer, const char *text) {
|
||||
};
|
||||
}
|
||||
|
||||
c = lexer->text[(lexer->cursor)++];
|
||||
lexer->current_char = lexer->text[(lexer->cursor)++];
|
||||
|
||||
lexer_input_t current_input = char_type(c);
|
||||
lexer_input_t input = char_type(lexer->current_char);
|
||||
|
||||
lexer->next = state_table[lexer->current][current_input];
|
||||
lexer->next = state_table[lexer->current][input];
|
||||
|
||||
finalise_state_transition(lexer);
|
||||
|
||||
if (current_input == LEXER_INPUT_NEWLINE) {
|
||||
if (input == LEXER_INPUT_NEWLINE) {
|
||||
++(lexer->line);
|
||||
lexer->column = 0;
|
||||
} else {
|
||||
@ -437,16 +437,53 @@ void set_token(token_t *token, u64 line, u64 column, token_type type,
|
||||
void finalise_state_transition(lexer_t *lexer) {
|
||||
switch (lexer->next) {
|
||||
case LEXER_STATE_OBJECT_START:
|
||||
lexer->token_ready = true;
|
||||
set_token(&(lexer->token), lexer->line, lexer->column, TK_L_BRACE,
|
||||
(token_value_t){0});
|
||||
|
||||
stack_push(&(lexer->stack), LEXER_STATE_OBJECT);
|
||||
|
||||
lexer->next = LEXER_STATE_OBJECT;
|
||||
|
||||
break;
|
||||
case LEXER_STATE_OBJECT_END:
|
||||
break;
|
||||
case LEXER_STATE_ARRAY_START:
|
||||
lexer->token_ready = true;
|
||||
set_token(&(lexer->token), lexer->line, lexer->column, TK_L_BRACKET,
|
||||
(token_value_t){0});
|
||||
|
||||
stack_push(&(lexer->stack), LEXER_STATE_ARRAY);
|
||||
|
||||
lexer->next = LEXER_STATE_ARRAY;
|
||||
|
||||
break;
|
||||
case LEXER_STATE_ARRAY_END:
|
||||
break;
|
||||
case LEXER_STATE_KEY:
|
||||
if (lexer->current == LEXER_STATE_OBJECT) {
|
||||
stack_push(&(lexer->stack), LEXER_STATE_KEY);
|
||||
|
||||
lexer->next = LEXER_STATE_STRING;
|
||||
}
|
||||
|
||||
break;
|
||||
case LEXER_STATE_KEY_END:
|
||||
lexer->next = LEXER_STATE_VALUE;
|
||||
|
||||
break;
|
||||
case LEXER_STATE_STRING:
|
||||
case LEXER_STATE_ESCAPE_SEQUENCE:
|
||||
case LEXER_STATE_UNICODE_HEX1:
|
||||
case LEXER_STATE_UNICODE_HEX2:
|
||||
case LEXER_STATE_UNICODE_HEX3:
|
||||
case LEXER_STATE_UNICODE_HEX4:
|
||||
dstr_append(&(lexer->current_string), lexer->current_char);
|
||||
|
||||
break;
|
||||
case LEXER_STATE_STRING_END:
|
||||
handle_string_end(lexer);
|
||||
|
||||
break;
|
||||
case LEXER_STATE_TRUE:
|
||||
case LEXER_STATE_FALSE:
|
||||
@ -471,7 +508,16 @@ void finalise_state_transition(lexer_t *lexer) {
|
||||
|
||||
break;
|
||||
case LEXER_STATE_NUMBER_END:
|
||||
set_numerical_token(lexer);
|
||||
switch (lexer->current) {
|
||||
case LEXER_STATE_NUMBER:
|
||||
case LEXER_STATE_FRACTION:
|
||||
case LEXER_STATE_POWER:
|
||||
set_numerical_token(lexer);
|
||||
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
@ -513,14 +559,14 @@ void post_keyword(lexer_t *lexer) {
|
||||
|
||||
break;
|
||||
default:
|
||||
lexer->current = LEXER_STATE_ERROR;
|
||||
lexer->next = LEXER_STATE_ERROR;
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
lexer->token_ready = true;
|
||||
|
||||
lexer->current = LEXER_STATE_KEYWORD_END;
|
||||
lexer->next = LEXER_STATE_KEYWORD_END;
|
||||
}
|
||||
|
||||
void set_numerical_token(lexer_t *lexer) {
|
||||
@ -532,6 +578,21 @@ void set_numerical_token(lexer_t *lexer) {
|
||||
set_token(&(lexer->token), lexer->line, column, token.type, token.value);
|
||||
}
|
||||
|
||||
void handle_string_end(lexer_t *lexer) {
|
||||
lexer_state_t string_type = lexer->stack.stack[lexer->stack.size - 1];
|
||||
|
||||
lexer->token_ready = true;
|
||||
token_t *token = &(lexer->token);
|
||||
u64 column = lexer->column - dstr_length(lexer->current_string);
|
||||
token_value_t value = {.string = dstr_to_cstr(lexer->current_string)};
|
||||
|
||||
if (string_type == LEXER_STATE_KEY) {
|
||||
set_token(token, lexer->line, column, TK_STR_KEY, value);
|
||||
} else if (string_type == LEXER_STATE_VALUE) {
|
||||
set_token(token, lexer->line, column, TK_STR_VAL, value);
|
||||
}
|
||||
}
|
||||
|
||||
void lexer_state_machine(lexer_t *lexer, char input) {
|
||||
switch (lexer->current) {
|
||||
case LEXER_STATE_START:
|
||||
|
Loading…
Reference in New Issue
Block a user