1025 lines
27 KiB
C
1025 lines
27 KiB
C
#include "aliases.h"
|
|
#include "lexer_data.h"
|
|
#include <ctype.h>
|
|
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
|
|
#define STRING_BUF_LENGTH 100
|
|
|
|
INTERNAL void fill_table(void);
|
|
INTERNAL void write_table(FILE *fp);
|
|
|
|
INTERNAL void set_column_width(lexer_state_t state);
|
|
INTERNAL const char *get_input_string(lexer_input_t input);
|
|
INTERNAL const char *get_state_string(lexer_state_t state);
|
|
|
|
INTERNAL lexer_state_t lexer_state_machine(lexer_state_t state,
|
|
lexer_input_t input);
|
|
INTERNAL lexer_state_t handle_lexer_start(lexer_input_t input);
|
|
INTERNAL lexer_state_t handle_last_collection(char input);
|
|
INTERNAL lexer_state_t handle_object(lexer_input_t input);
|
|
INTERNAL lexer_state_t handle_array(lexer_input_t input);
|
|
INTERNAL lexer_state_t handle_key(lexer_input_t input);
|
|
INTERNAL lexer_state_t handle_value(lexer_input_t input);
|
|
INTERNAL lexer_state_t handle_string(lexer_input_t input);
|
|
INTERNAL lexer_state_t handle_string_end(lexer_input_t input);
|
|
INTERNAL lexer_state_t handle_escape_sequence(lexer_input_t input);
|
|
INTERNAL lexer_state_t handle_unicode_hex(lexer_input_t input,
|
|
lexer_state_t return_state);
|
|
INTERNAL lexer_state_t handle_decimal(lexer_input_t input);
|
|
INTERNAL lexer_state_t handle_number(lexer_input_t input);
|
|
INTERNAL lexer_state_t handle_fraction(lexer_input_t input);
|
|
INTERNAL lexer_state_t handle_exponent(lexer_input_t input);
|
|
INTERNAL lexer_state_t handle_exp_sign(lexer_input_t input);
|
|
INTERNAL lexer_state_t handle_power(lexer_input_t input);
|
|
INTERNAL lexer_state_t handle_number_end(lexer_input_t input);
|
|
INTERNAL lexer_state_t handle_true(lexer_input_t input,
|
|
lexer_state_t start_state);
|
|
INTERNAL lexer_state_t handle_false(lexer_input_t input,
|
|
lexer_state_t start_state);
|
|
INTERNAL lexer_state_t handle_null(lexer_input_t input,
|
|
lexer_state_t start_state);
|
|
INTERNAL lexer_state_t handle_keyword_end(lexer_input_t input);
|
|
|
|
INTERNAL i32 column_width = 0;
|
|
INTERNAL lexer_state_t state_table[COUNT_LEXER_STATES][COUNT_LEXER_INPUTS] = {
|
|
0};
|
|
INTERNAL const char *filename = "./include/lexer/lexer_state_transitions.table";
|
|
|
|
int main(void) {
|
|
FILE *fp = fopen(filename, "w");
|
|
if (!fp) {
|
|
printf("Failed to open file\n");
|
|
|
|
return EXIT_FAILURE;
|
|
}
|
|
|
|
fill_table();
|
|
|
|
write_table(fp);
|
|
|
|
fclose(fp);
|
|
|
|
return EXIT_SUCCESS;
|
|
}
|
|
|
|
INTERNAL void fill_table(void) {
|
|
lexer_state_t state;
|
|
lexer_input_t input;
|
|
|
|
for (u64 i = 0; i < COUNT_LEXER_STATES; ++i) {
|
|
state = (lexer_state_t)i;
|
|
|
|
for (u64 j = 0; j < COUNT_LEXER_INPUTS; ++j) {
|
|
input = (lexer_input_t)j;
|
|
|
|
state_table[i][j] = (lexer_state_t)lexer_state_machine(state, input);
|
|
}
|
|
}
|
|
}
|
|
|
|
void write_table(FILE *fp) {
|
|
const char *array_open = "{ ";
|
|
const u64 array_open_length = strlen(array_open);
|
|
|
|
const char *array_close = "},";
|
|
const u64 array_close_length = strlen(array_close);
|
|
|
|
const char *comment = "//";
|
|
const u64 comment_length = strlen(comment);
|
|
|
|
const char *table_header_end = " \n";
|
|
const u64 table_header_end_length = strlen(table_header_end);
|
|
|
|
char output[STRING_BUF_LENGTH] = {0};
|
|
u64 length = 0;
|
|
|
|
for (u64 i = 0; i < COUNT_LEXER_STATES; ++i) {
|
|
set_column_width((lexer_state_t)i);
|
|
}
|
|
|
|
fwrite(comment, sizeof(char), comment_length, fp);
|
|
|
|
i32 column_white_space = 0;
|
|
|
|
for (u64 i = 0; i < COUNT_LEXER_INPUTS; ++i) {
|
|
const char *input_str = get_input_string((lexer_input_t)i);
|
|
column_white_space = (column_width - strlen(input_str)) / 2;
|
|
|
|
sprintf(output, "%*s%*s| ", column_width - column_white_space, input_str,
|
|
column_white_space, " ");
|
|
length = strlen(output);
|
|
fwrite(output, sizeof(char), length, fp);
|
|
}
|
|
|
|
fwrite(table_header_end, sizeof(char), table_header_end_length, fp);
|
|
|
|
for (u64 i = 0; i < COUNT_LEXER_STATES; ++i) {
|
|
fwrite(array_open, sizeof(char), array_open_length, fp);
|
|
|
|
for (u64 j = 0; j < COUNT_LEXER_INPUTS; ++j) {
|
|
sprintf(output, "%*s, ", column_width,
|
|
get_state_string((lexer_state_t)state_table[i][j]));
|
|
length = strlen(output);
|
|
fwrite(output, sizeof(char), length, fp);
|
|
}
|
|
|
|
fwrite(array_close, sizeof(char), array_close_length, fp);
|
|
|
|
sprintf(output, " // %s\n", get_state_string((lexer_state_t)i));
|
|
length = strlen(output);
|
|
fwrite(output, sizeof(char), length, fp);
|
|
|
|
memset(output, 0, STRING_BUF_LENGTH);
|
|
}
|
|
}
|
|
|
|
INTERNAL void set_column_width(lexer_state_t state) {
|
|
const char *output = "";
|
|
i32 length = 0;
|
|
|
|
switch (state) {
|
|
case LEXER_STATE_ERROR:
|
|
output = "LEXER_STATE_ERROR";
|
|
length = strlen(output);
|
|
|
|
column_width = length > column_width ? length : column_width;
|
|
|
|
case LEXER_STATE_START:
|
|
output = "LEXER_STATE_START";
|
|
length = strlen(output);
|
|
|
|
column_width = length > column_width ? length : column_width;
|
|
|
|
case LEXER_STATE_VALUE:
|
|
output = "LEXER_STATE_VALUE";
|
|
length = strlen(output);
|
|
|
|
column_width = length > column_width ? length : column_width;
|
|
|
|
case LEXER_STATE_VALUE_END:
|
|
output = "LEXER_STATE_VALUE_END";
|
|
length = strlen(output);
|
|
|
|
column_width = length > column_width ? length : column_width;
|
|
|
|
case LEXER_STATE_OBJECT_START:
|
|
output = "LEXER_STATE_OBJECT_START";
|
|
length = strlen(output);
|
|
|
|
column_width = length > column_width ? length : column_width;
|
|
|
|
case LEXER_STATE_OBJECT:
|
|
output = "LEXER_STATE_OBJECT";
|
|
length = strlen(output);
|
|
|
|
column_width = length > column_width ? length : column_width;
|
|
|
|
case LEXER_STATE_OBJECT_END:
|
|
output = "LEXER_STATE_OBJECT_END";
|
|
length = strlen(output);
|
|
|
|
column_width = length > column_width ? length : column_width;
|
|
|
|
case LEXER_STATE_ARRAY_START:
|
|
output = "LEXER_STATE_ARRAY_START";
|
|
length = strlen(output);
|
|
|
|
column_width = length > column_width ? length : column_width;
|
|
|
|
case LEXER_STATE_ARRAY:
|
|
output = "LEXER_STATE_ARRAY";
|
|
length = strlen(output);
|
|
|
|
column_width = length > column_width ? length : column_width;
|
|
|
|
case LEXER_STATE_ARRAY_END:
|
|
output = "LEXER_STATE_ARRAY_END";
|
|
length = strlen(output);
|
|
|
|
column_width = length > column_width ? length : column_width;
|
|
|
|
case LEXER_STATE_LAST_COLLECTION:
|
|
output = "LEXER_STATE_LAST_COLLECTION";
|
|
length = strlen(output);
|
|
|
|
column_width = length > column_width ? length : column_width;
|
|
|
|
case LEXER_STATE_KEY:
|
|
output = "LEXER_STATE_KEY";
|
|
length = strlen(output);
|
|
|
|
column_width = length > column_width ? length : column_width;
|
|
|
|
case LEXER_STATE_KEY_END:
|
|
output = "LEXER_STATE_KEY_END";
|
|
length = strlen(output);
|
|
|
|
column_width = length > column_width ? length : column_width;
|
|
|
|
case LEXER_STATE_DECIMAL:
|
|
output = "LEXER_STATE_DECIMAL";
|
|
length = strlen(output);
|
|
|
|
column_width = length > column_width ? length : column_width;
|
|
|
|
case LEXER_STATE_NUMBER:
|
|
output = "LEXER_STATE_NUMBER";
|
|
length = strlen(output);
|
|
|
|
column_width = length > column_width ? length : column_width;
|
|
|
|
case LEXER_STATE_FRACTION:
|
|
output = "LEXER_STATE_FRACTION";
|
|
length = strlen(output);
|
|
|
|
column_width = length > column_width ? length : column_width;
|
|
|
|
case LEXER_STATE_EXPONENT:
|
|
output = "LEXER_STATE_EXPONENT";
|
|
length = strlen(output);
|
|
|
|
column_width = length > column_width ? length : column_width;
|
|
|
|
case LEXER_STATE_EXP_SIGN:
|
|
output = "LEXER_STATE_EXP_SIGN";
|
|
length = strlen(output);
|
|
|
|
column_width = length > column_width ? length : column_width;
|
|
|
|
case LEXER_STATE_POWER:
|
|
output = "LEXER_STATE_POWER";
|
|
length = strlen(output);
|
|
|
|
column_width = length > column_width ? length : column_width;
|
|
|
|
case LEXER_STATE_NUMBER_END:
|
|
output = "LEXER_STATE_NUMBER_END";
|
|
length = strlen(output);
|
|
|
|
column_width = length > column_width ? length : column_width;
|
|
|
|
case LEXER_STATE_STRING:
|
|
output = "LEXER_STATE_STRING";
|
|
length = strlen(output);
|
|
|
|
column_width = length > column_width ? length : column_width;
|
|
|
|
case LEXER_STATE_STRING_END:
|
|
output = "LEXER_STATE_STRING_END";
|
|
length = strlen(output);
|
|
|
|
column_width = length > column_width ? length : column_width;
|
|
|
|
case LEXER_STATE_ESCAPE_SEQUENCE:
|
|
output = "LEXER_STATE_ESCAPE_SEQUENCE";
|
|
length = strlen(output);
|
|
|
|
column_width = length > column_width ? length : column_width;
|
|
|
|
case LEXER_STATE_UNICODE_HEX1:
|
|
output = "LEXER_STATE_UNICODE_HEX1";
|
|
length = strlen(output);
|
|
|
|
column_width = length > column_width ? length : column_width;
|
|
|
|
case LEXER_STATE_UNICODE_HEX2:
|
|
output = "LEXER_STATE_UNICODE_HEX2";
|
|
length = strlen(output);
|
|
|
|
column_width = length > column_width ? length : column_width;
|
|
|
|
case LEXER_STATE_UNICODE_HEX3:
|
|
output = "LEXER_STATE_UNICODE_HEX3";
|
|
length = strlen(output);
|
|
|
|
column_width = length > column_width ? length : column_width;
|
|
|
|
case LEXER_STATE_UNICODE_HEX4:
|
|
output = "LEXER_STATE_UNICODE_HEX4";
|
|
length = strlen(output);
|
|
|
|
column_width = length > column_width ? length : column_width;
|
|
|
|
case LEXER_STATE_T:
|
|
output = "LEXER_STATE_T";
|
|
length = strlen(output);
|
|
|
|
column_width = length > column_width ? length : column_width;
|
|
|
|
case LEXER_STATE_TR:
|
|
output = "LEXER_STATE_TR";
|
|
length = strlen(output);
|
|
|
|
column_width = length > column_width ? length : column_width;
|
|
|
|
case LEXER_STATE_TRU:
|
|
output = "LEXER_STATE_TRU";
|
|
length = strlen(output);
|
|
|
|
column_width = length > column_width ? length : column_width;
|
|
|
|
case LEXER_STATE_TRUE:
|
|
output = "LEXER_STATE_TRUE";
|
|
length = strlen(output);
|
|
|
|
column_width = length > column_width ? length : column_width;
|
|
|
|
case LEXER_STATE_F:
|
|
output = "LEXER_STATE_F";
|
|
length = strlen(output);
|
|
|
|
column_width = length > column_width ? length : column_width;
|
|
|
|
case LEXER_STATE_FA:
|
|
output = "LEXER_STATE_FA";
|
|
length = strlen(output);
|
|
|
|
column_width = length > column_width ? length : column_width;
|
|
|
|
case LEXER_STATE_FAL:
|
|
output = "LEXER_STATE_FAL";
|
|
length = strlen(output);
|
|
|
|
column_width = length > column_width ? length : column_width;
|
|
|
|
case LEXER_STATE_FALS:
|
|
output = "LEXER_STATE_FALS";
|
|
length = strlen(output);
|
|
|
|
column_width = length > column_width ? length : column_width;
|
|
|
|
case LEXER_STATE_FALSE:
|
|
output = "LEXER_STATE_FALSE";
|
|
length = strlen(output);
|
|
|
|
column_width = length > column_width ? length : column_width;
|
|
|
|
case LEXER_STATE_N:
|
|
output = "LEXER_STATE_N";
|
|
length = strlen(output);
|
|
|
|
column_width = length > column_width ? length : column_width;
|
|
|
|
case LEXER_STATE_NU:
|
|
output = "LEXER_STATE_NU";
|
|
length = strlen(output);
|
|
|
|
column_width = length > column_width ? length : column_width;
|
|
|
|
case LEXER_STATE_NUL:
|
|
output = "LEXER_STATE_NUL";
|
|
length = strlen(output);
|
|
|
|
column_width = length > column_width ? length : column_width;
|
|
|
|
case LEXER_STATE_NULL:
|
|
output = "LEXER_STATE_NULL";
|
|
length = strlen(output);
|
|
|
|
column_width = length > column_width ? length : column_width;
|
|
|
|
case LEXER_STATE_KEYWORD_END:
|
|
output = "LEXER_STATE_KEYWORD_END";
|
|
length = strlen(output);
|
|
|
|
column_width = length > column_width ? length : column_width;
|
|
|
|
case COUNT_LEXER_STATES:
|
|
break;
|
|
}
|
|
}
|
|
|
|
INTERNAL const char *get_input_string(lexer_input_t input) {
|
|
switch (input) {
|
|
case LEXER_INPUT_WHITE_SPACE:
|
|
return "WHITE_SPACE";
|
|
case LEXER_INPUT_OPEN_BRACE:
|
|
return "OPEN_BRACE";
|
|
case LEXER_INPUT_CLOSE_BRACE:
|
|
return "CLOSE_BRACE";
|
|
case LEXER_INPUT_OPEN_BRACKET:
|
|
return "OPEN_BRACKET";
|
|
case LEXER_INPUT_CLOSE_BRACKET:
|
|
return "CLOSE_BRACKET";
|
|
case LEXER_INPUT_COMMA:
|
|
return "COMMA";
|
|
case LEXER_INPUT_COLON:
|
|
return "COLON";
|
|
case LEXER_INPUT_DOUBLE_QUOTE:
|
|
return "DOUBLE_QUOTE";
|
|
case LEXER_INPUT_BACK_SLASH:
|
|
return "BACK_SLASH";
|
|
case LEXER_INPUT_FORWARD_SLASH:
|
|
return "FORWARD_SLASH";
|
|
case LEXER_INPUT_LOWER_A:
|
|
return "LOWER_A";
|
|
case LEXER_INPUT_LOWER_B:
|
|
return "LOWER_B";
|
|
case LEXER_INPUT_LOWER_C:
|
|
return "LOWER_C";
|
|
case LEXER_INPUT_LOWER_D:
|
|
return "LOWER_D";
|
|
case LEXER_INPUT_LOWER_E:
|
|
return "LOWER_E";
|
|
case LEXER_INPUT_LOWER_F:
|
|
return "LOWER_F";
|
|
case LEXER_INPUT_LOWER_L:
|
|
return "LOWER_L";
|
|
case LEXER_INPUT_LOWER_N:
|
|
return "LOWER_N";
|
|
case LEXER_INPUT_LOWER_R:
|
|
return "LOWER_R";
|
|
case LEXER_INPUT_LOWER_S:
|
|
return "LOWER_S";
|
|
case LEXER_INPUT_LOWER_T:
|
|
return "LOWER_T";
|
|
case LEXER_INPUT_LOWER_U:
|
|
return "LOWER_U";
|
|
case LEXER_INPUT_UPPER_A:
|
|
return "UPPER_A";
|
|
case LEXER_INPUT_UPPER_B:
|
|
return "UPPER_B";
|
|
case LEXER_INPUT_UPPER_C:
|
|
return "UPPER_C";
|
|
case LEXER_INPUT_UPPER_D:
|
|
return "UPPER_D";
|
|
case LEXER_INPUT_UPPER_E:
|
|
return "UPPER_E";
|
|
case LEXER_INPUT_UPPER_F:
|
|
return "UPPER_F";
|
|
case LEXER_INPUT_MINUS:
|
|
return "MINUS";
|
|
case LEXER_INPUT_PLUS:
|
|
return "PLUS";
|
|
case LEXER_INPUT_DECIMAL:
|
|
return "DECIMAL";
|
|
case LEXER_INPUT_ZERO:
|
|
return "ZERO";
|
|
case LEXER_INPUT_NON_ZERO:
|
|
return "NON_ZERO";
|
|
case LEXER_INPUT_OTHER:
|
|
return "OTHER";
|
|
|
|
case COUNT_LEXER_INPUTS:
|
|
return "";
|
|
}
|
|
}
|
|
|
|
INTERNAL const char *get_state_string(lexer_state_t state) {
|
|
const char *output = "";
|
|
|
|
switch (state) {
|
|
case LEXER_STATE_ERROR:
|
|
output = "LEXER_STATE_ERROR";
|
|
|
|
return output;
|
|
case LEXER_STATE_START:
|
|
output = "LEXER_STATE_START";
|
|
|
|
return output;
|
|
case LEXER_STATE_VALUE:
|
|
output = "LEXER_STATE_VALUE";
|
|
|
|
return output;
|
|
case LEXER_STATE_VALUE_END:
|
|
output = "LEXER_STATE_VALUE_END";
|
|
|
|
return output;
|
|
case LEXER_STATE_OBJECT_START:
|
|
output = "LEXER_STATE_OBJECT_START";
|
|
|
|
return output;
|
|
case LEXER_STATE_OBJECT:
|
|
output = "LEXER_STATE_OBJECT";
|
|
|
|
return output;
|
|
case LEXER_STATE_OBJECT_END:
|
|
output = "LEXER_STATE_OBJECT_END";
|
|
|
|
return output;
|
|
case LEXER_STATE_ARRAY_START:
|
|
output = "LEXER_STATE_ARRAY_START";
|
|
|
|
return output;
|
|
case LEXER_STATE_ARRAY:
|
|
output = "LEXER_STATE_ARRAY";
|
|
|
|
return output;
|
|
case LEXER_STATE_ARRAY_END:
|
|
output = "LEXER_STATE_ARRAY_END";
|
|
|
|
return output;
|
|
case LEXER_STATE_LAST_COLLECTION:
|
|
output = "LEXER_STATE_LAST_COLLECTION";
|
|
|
|
return output;
|
|
case LEXER_STATE_KEY:
|
|
output = "LEXER_STATE_KEY";
|
|
|
|
return output;
|
|
case LEXER_STATE_KEY_END:
|
|
output = "LEXER_STATE_KEY_END";
|
|
|
|
return output;
|
|
case LEXER_STATE_DECIMAL:
|
|
output = "LEXER_STATE_DECIMAL";
|
|
|
|
return output;
|
|
case LEXER_STATE_NUMBER:
|
|
output = "LEXER_STATE_NUMBER";
|
|
|
|
return output;
|
|
case LEXER_STATE_FRACTION:
|
|
output = "LEXER_STATE_FRACTION";
|
|
|
|
return output;
|
|
case LEXER_STATE_EXPONENT:
|
|
output = "LEXER_STATE_EXPONENT";
|
|
|
|
return output;
|
|
case LEXER_STATE_EXP_SIGN:
|
|
output = "LEXER_STATE_EXP_SIGN";
|
|
|
|
return output;
|
|
case LEXER_STATE_POWER:
|
|
output = "LEXER_STATE_POWER";
|
|
|
|
return output;
|
|
case LEXER_STATE_NUMBER_END:
|
|
output = "LEXER_STATE_NUMBER_END";
|
|
|
|
return output;
|
|
case LEXER_STATE_STRING:
|
|
output = "LEXER_STATE_STRING";
|
|
|
|
return output;
|
|
case LEXER_STATE_STRING_END:
|
|
output = "LEXER_STATE_STRING_END";
|
|
|
|
return output;
|
|
case LEXER_STATE_ESCAPE_SEQUENCE:
|
|
output = "LEXER_STATE_ESCAPE_SEQUENCE";
|
|
|
|
return output;
|
|
case LEXER_STATE_UNICODE_HEX1:
|
|
output = "LEXER_STATE_UNICODE_HEX1";
|
|
|
|
return output;
|
|
case LEXER_STATE_UNICODE_HEX2:
|
|
output = "LEXER_STATE_UNICODE_HEX2";
|
|
|
|
return output;
|
|
case LEXER_STATE_UNICODE_HEX3:
|
|
output = "LEXER_STATE_UNICODE_HEX3";
|
|
|
|
return output;
|
|
case LEXER_STATE_UNICODE_HEX4:
|
|
output = "LEXER_STATE_UNICODE_HEX4";
|
|
|
|
return output;
|
|
case LEXER_STATE_T:
|
|
output = "LEXER_STATE_T";
|
|
|
|
return output;
|
|
case LEXER_STATE_TR:
|
|
output = "LEXER_STATE_TR";
|
|
|
|
return output;
|
|
case LEXER_STATE_TRU:
|
|
output = "LEXER_STATE_TRU";
|
|
|
|
return output;
|
|
case LEXER_STATE_TRUE:
|
|
output = "LEXER_STATE_TRUE";
|
|
|
|
return output;
|
|
case LEXER_STATE_F:
|
|
output = "LEXER_STATE_F";
|
|
|
|
return output;
|
|
case LEXER_STATE_FA:
|
|
output = "LEXER_STATE_FA";
|
|
|
|
return output;
|
|
case LEXER_STATE_FAL:
|
|
output = "LEXER_STATE_FAL";
|
|
|
|
return output;
|
|
case LEXER_STATE_FALS:
|
|
output = "LEXER_STATE_FALS";
|
|
|
|
return output;
|
|
case LEXER_STATE_FALSE:
|
|
output = "LEXER_STATE_FALSE";
|
|
|
|
return output;
|
|
case LEXER_STATE_N:
|
|
output = "LEXER_STATE_N";
|
|
|
|
return output;
|
|
case LEXER_STATE_NU:
|
|
output = "LEXER_STATE_NU";
|
|
|
|
return output;
|
|
case LEXER_STATE_NUL:
|
|
output = "LEXER_STATE_NUL";
|
|
|
|
return output;
|
|
case LEXER_STATE_NULL:
|
|
output = "LEXER_STATE_NULL";
|
|
|
|
return output;
|
|
case LEXER_STATE_KEYWORD_END:
|
|
output = "LEXER_STATE_KEYWORD_END";
|
|
|
|
return output;
|
|
|
|
case COUNT_LEXER_STATES:
|
|
return "";
|
|
}
|
|
}
|
|
|
|
lexer_state_t lexer_state_machine(lexer_state_t state, lexer_input_t input) {
|
|
switch (state) {
|
|
case LEXER_STATE_START:
|
|
return handle_lexer_start(input);
|
|
case LEXER_STATE_VALUE:
|
|
return handle_value(input);
|
|
case LEXER_STATE_OBJECT:
|
|
return handle_object(input);
|
|
case LEXER_STATE_ARRAY:
|
|
return handle_array(input);
|
|
case LEXER_STATE_KEY:
|
|
return handle_key(input);
|
|
case LEXER_STATE_DECIMAL:
|
|
return handle_decimal(input);
|
|
case LEXER_STATE_NUMBER:
|
|
return handle_number(input);
|
|
case LEXER_STATE_FRACTION:
|
|
return handle_fraction(input);
|
|
case LEXER_STATE_EXPONENT:
|
|
return handle_exponent(input);
|
|
case LEXER_STATE_EXP_SIGN:
|
|
return handle_exp_sign(input);
|
|
case LEXER_STATE_POWER:
|
|
return handle_power(input);
|
|
case LEXER_STATE_NUMBER_END:
|
|
return handle_number_end(input);
|
|
case LEXER_STATE_STRING:
|
|
return handle_string(input);
|
|
case LEXER_STATE_STRING_END:
|
|
return handle_string_end(input);
|
|
case LEXER_STATE_ESCAPE_SEQUENCE:
|
|
return handle_escape_sequence(input);
|
|
case LEXER_STATE_UNICODE_HEX1:
|
|
return handle_unicode_hex(input, LEXER_STATE_UNICODE_HEX2);
|
|
case LEXER_STATE_UNICODE_HEX2:
|
|
return handle_unicode_hex(input, LEXER_STATE_UNICODE_HEX3);
|
|
case LEXER_STATE_UNICODE_HEX3:
|
|
return handle_unicode_hex(input, LEXER_STATE_UNICODE_HEX4);
|
|
case LEXER_STATE_UNICODE_HEX4:
|
|
return handle_unicode_hex(input, LEXER_STATE_STRING);
|
|
case LEXER_STATE_T:
|
|
return handle_true(input, LEXER_STATE_T);
|
|
case LEXER_STATE_TR:
|
|
return handle_true(input, LEXER_STATE_TR);
|
|
case LEXER_STATE_TRU:
|
|
return handle_true(input, LEXER_STATE_TRU);
|
|
case LEXER_STATE_F:
|
|
return handle_false(input, LEXER_STATE_F);
|
|
case LEXER_STATE_FA:
|
|
return handle_false(input, LEXER_STATE_FA);
|
|
case LEXER_STATE_FAL:
|
|
return handle_false(input, LEXER_STATE_FAL);
|
|
case LEXER_STATE_FALS:
|
|
return handle_false(input, LEXER_STATE_FALS);
|
|
case LEXER_STATE_N:
|
|
return handle_null(input, LEXER_STATE_N);
|
|
case LEXER_STATE_NU:
|
|
return handle_null(input, LEXER_STATE_NU);
|
|
case LEXER_STATE_NUL:
|
|
return handle_null(input, LEXER_STATE_NUL);
|
|
case LEXER_STATE_KEYWORD_END:
|
|
return handle_keyword_end(input);
|
|
case LEXER_STATE_LAST_COLLECTION:
|
|
return handle_last_collection(input);
|
|
case LEXER_STATE_TRUE:
|
|
case LEXER_STATE_FALSE:
|
|
case LEXER_STATE_NULL:
|
|
case LEXER_STATE_OBJECT_START:
|
|
case LEXER_STATE_ARRAY_START:
|
|
case LEXER_STATE_OBJECT_END:
|
|
case LEXER_STATE_ARRAY_END:
|
|
case LEXER_STATE_KEY_END:
|
|
case LEXER_STATE_VALUE_END:
|
|
case LEXER_STATE_ERROR:
|
|
case COUNT_LEXER_STATES:
|
|
return LEXER_STATE_ERROR;
|
|
}
|
|
}
|
|
|
|
lexer_state_t handle_lexer_start(lexer_input_t input) {
|
|
switch (input) {
|
|
case LEXER_INPUT_WHITE_SPACE:
|
|
return LEXER_STATE_START;
|
|
case LEXER_INPUT_OPEN_BRACE:
|
|
return LEXER_STATE_OBJECT_START;
|
|
case LEXER_INPUT_OPEN_BRACKET:
|
|
return LEXER_STATE_ARRAY_START;
|
|
default:
|
|
return LEXER_STATE_ERROR;
|
|
}
|
|
}
|
|
|
|
lexer_state_t handle_last_collection(char input) {
|
|
if (input == LEXER_INPUT_WHITE_SPACE) {
|
|
return LEXER_STATE_LAST_COLLECTION;
|
|
}
|
|
|
|
return LEXER_STATE_ERROR;
|
|
}
|
|
|
|
lexer_state_t handle_object(lexer_input_t input) {
|
|
switch (input) {
|
|
case LEXER_INPUT_WHITE_SPACE:
|
|
return LEXER_STATE_OBJECT;
|
|
case LEXER_INPUT_DOUBLE_QUOTE:
|
|
return LEXER_STATE_KEY;
|
|
case LEXER_INPUT_CLOSE_BRACE:
|
|
return LEXER_STATE_OBJECT_END;
|
|
default:
|
|
return LEXER_STATE_ERROR;
|
|
}
|
|
}
|
|
|
|
lexer_state_t handle_array(lexer_input_t input) {
|
|
switch (input) {
|
|
case LEXER_INPUT_WHITE_SPACE:
|
|
return LEXER_STATE_ARRAY;
|
|
case LEXER_INPUT_CLOSE_BRACKET:
|
|
return LEXER_STATE_ARRAY_END;
|
|
default:
|
|
return handle_value(input);
|
|
}
|
|
}
|
|
|
|
lexer_state_t handle_key(lexer_input_t input) { return LEXER_STATE_STRING; }
|
|
|
|
lexer_state_t handle_value(lexer_input_t input) {
|
|
switch (input) {
|
|
case LEXER_INPUT_WHITE_SPACE:
|
|
return LEXER_STATE_VALUE;
|
|
case LEXER_INPUT_NON_ZERO:
|
|
case LEXER_INPUT_MINUS:
|
|
return LEXER_STATE_NUMBER;
|
|
case LEXER_INPUT_ZERO:
|
|
return LEXER_STATE_DECIMAL;
|
|
case LEXER_INPUT_DOUBLE_QUOTE:
|
|
return LEXER_STATE_STRING;
|
|
case LEXER_INPUT_OPEN_BRACE:
|
|
return LEXER_STATE_OBJECT_START;
|
|
case LEXER_INPUT_OPEN_BRACKET:
|
|
return LEXER_STATE_ARRAY_START;
|
|
case LEXER_INPUT_LOWER_T:
|
|
return LEXER_STATE_T;
|
|
case LEXER_INPUT_LOWER_F:
|
|
return LEXER_STATE_F;
|
|
case LEXER_INPUT_LOWER_N:
|
|
return LEXER_STATE_N;
|
|
default:
|
|
return LEXER_STATE_ERROR;
|
|
}
|
|
}
|
|
|
|
lexer_state_t handle_string(lexer_input_t input) {
|
|
switch (input) {
|
|
case LEXER_INPUT_BACK_SLASH:
|
|
return LEXER_STATE_ESCAPE_SEQUENCE;
|
|
case LEXER_INPUT_DOUBLE_QUOTE:
|
|
return LEXER_STATE_STRING_END;
|
|
default:
|
|
return LEXER_STATE_STRING;
|
|
}
|
|
}
|
|
|
|
lexer_state_t handle_string_end(lexer_input_t input) {
|
|
switch (input) {
|
|
case LEXER_INPUT_WHITE_SPACE:
|
|
return LEXER_STATE_STRING_END;
|
|
case LEXER_INPUT_COLON:
|
|
return LEXER_STATE_KEY_END;
|
|
case LEXER_INPUT_COMMA:
|
|
return LEXER_STATE_VALUE_END;
|
|
case LEXER_INPUT_CLOSE_BRACE:
|
|
return LEXER_STATE_OBJECT_END;
|
|
case LEXER_INPUT_CLOSE_BRACKET:
|
|
return LEXER_STATE_ARRAY_END;
|
|
default:
|
|
return LEXER_STATE_ERROR;
|
|
}
|
|
}
|
|
|
|
lexer_state_t handle_escape_sequence(lexer_input_t input) {
|
|
switch (input) {
|
|
case LEXER_INPUT_DOUBLE_QUOTE:
|
|
case LEXER_INPUT_FORWARD_SLASH:
|
|
case LEXER_INPUT_BACK_SLASH:
|
|
case LEXER_INPUT_LOWER_B:
|
|
case LEXER_INPUT_LOWER_F:
|
|
case LEXER_INPUT_LOWER_N:
|
|
case LEXER_INPUT_LOWER_R:
|
|
case LEXER_INPUT_LOWER_T:
|
|
return LEXER_STATE_STRING;
|
|
case LEXER_INPUT_LOWER_U:
|
|
return LEXER_STATE_UNICODE_HEX1;
|
|
default:
|
|
return LEXER_STATE_ERROR;
|
|
}
|
|
}
|
|
|
|
INTERNAL lexer_state_t handle_unicode_hex(lexer_input_t input,
|
|
lexer_state_t return_state) {
|
|
switch (input) {
|
|
case LEXER_INPUT_LOWER_A:
|
|
case LEXER_INPUT_LOWER_B:
|
|
case LEXER_INPUT_LOWER_C:
|
|
case LEXER_INPUT_LOWER_D:
|
|
case LEXER_INPUT_LOWER_E:
|
|
case LEXER_INPUT_LOWER_F:
|
|
case LEXER_INPUT_UPPER_A:
|
|
case LEXER_INPUT_UPPER_B:
|
|
case LEXER_INPUT_UPPER_C:
|
|
case LEXER_INPUT_UPPER_D:
|
|
case LEXER_INPUT_UPPER_E:
|
|
case LEXER_INPUT_UPPER_F:
|
|
case LEXER_INPUT_ZERO:
|
|
case LEXER_INPUT_NON_ZERO:
|
|
return return_state;
|
|
default:
|
|
return LEXER_STATE_ERROR;
|
|
}
|
|
}
|
|
|
|
lexer_state_t handle_decimal(lexer_input_t input) {
|
|
if (input == LEXER_INPUT_DECIMAL) {
|
|
return LEXER_STATE_FRACTION;
|
|
}
|
|
|
|
return LEXER_STATE_ERROR;
|
|
}
|
|
|
|
lexer_state_t handle_number(lexer_input_t input) {
|
|
switch (input) {
|
|
case LEXER_INPUT_ZERO:
|
|
case LEXER_INPUT_NON_ZERO:
|
|
return LEXER_STATE_NUMBER;
|
|
case LEXER_INPUT_DECIMAL:
|
|
return LEXER_STATE_FRACTION;
|
|
case LEXER_INPUT_CLOSE_BRACE:
|
|
return LEXER_STATE_OBJECT_END;
|
|
case LEXER_INPUT_CLOSE_BRACKET:
|
|
return LEXER_STATE_ARRAY_END;
|
|
case LEXER_INPUT_COMMA:
|
|
return LEXER_STATE_VALUE_END;
|
|
case LEXER_INPUT_WHITE_SPACE:
|
|
return LEXER_STATE_NUMBER_END;
|
|
default:
|
|
return LEXER_STATE_ERROR;
|
|
}
|
|
}
|
|
|
|
lexer_state_t handle_fraction(lexer_input_t input) {
|
|
switch (input) {
|
|
case LEXER_INPUT_ZERO:
|
|
case LEXER_INPUT_NON_ZERO:
|
|
return LEXER_STATE_FRACTION;
|
|
case LEXER_INPUT_CLOSE_BRACE:
|
|
return LEXER_STATE_OBJECT_END;
|
|
case LEXER_INPUT_CLOSE_BRACKET:
|
|
return LEXER_STATE_ARRAY_END;
|
|
case LEXER_INPUT_LOWER_E:
|
|
case LEXER_INPUT_UPPER_E:
|
|
return LEXER_STATE_EXPONENT;
|
|
case LEXER_INPUT_COMMA:
|
|
return LEXER_STATE_VALUE_END;
|
|
case LEXER_INPUT_WHITE_SPACE:
|
|
return LEXER_STATE_NUMBER_END;
|
|
default:
|
|
return LEXER_STATE_ERROR;
|
|
}
|
|
}
|
|
|
|
lexer_state_t handle_exponent(lexer_input_t input) {
|
|
switch (input) {
|
|
case LEXER_INPUT_ZERO:
|
|
case LEXER_INPUT_NON_ZERO:
|
|
return LEXER_STATE_POWER;
|
|
case LEXER_INPUT_PLUS:
|
|
case LEXER_INPUT_MINUS:
|
|
return LEXER_STATE_EXP_SIGN;
|
|
default:
|
|
return LEXER_STATE_ERROR;
|
|
}
|
|
}
|
|
|
|
lexer_state_t handle_exp_sign(lexer_input_t input) {
|
|
switch (input) {
|
|
case LEXER_INPUT_ZERO:
|
|
case LEXER_INPUT_NON_ZERO:
|
|
return LEXER_STATE_POWER;
|
|
default:
|
|
return LEXER_STATE_ERROR;
|
|
}
|
|
}
|
|
|
|
lexer_state_t handle_power(lexer_input_t input) {
|
|
switch (input) {
|
|
case LEXER_INPUT_ZERO:
|
|
case LEXER_INPUT_NON_ZERO:
|
|
return LEXER_STATE_POWER;
|
|
case LEXER_INPUT_CLOSE_BRACE:
|
|
return LEXER_STATE_OBJECT_END;
|
|
case LEXER_INPUT_CLOSE_BRACKET:
|
|
return LEXER_STATE_ARRAY_END;
|
|
case LEXER_INPUT_COMMA:
|
|
return LEXER_STATE_VALUE_END;
|
|
case LEXER_INPUT_WHITE_SPACE:
|
|
return LEXER_STATE_NUMBER_END;
|
|
default:
|
|
return LEXER_STATE_ERROR;
|
|
}
|
|
}
|
|
|
|
lexer_state_t handle_number_end(lexer_input_t input) {
|
|
switch (input) {
|
|
case LEXER_INPUT_WHITE_SPACE:
|
|
return LEXER_STATE_NUMBER_END;
|
|
case LEXER_INPUT_CLOSE_BRACE:
|
|
return LEXER_STATE_OBJECT_END;
|
|
case LEXER_INPUT_CLOSE_BRACKET:
|
|
return LEXER_STATE_ARRAY_END;
|
|
case LEXER_INPUT_COMMA:
|
|
return LEXER_STATE_VALUE_END;
|
|
default:
|
|
return LEXER_STATE_ERROR;
|
|
}
|
|
}
|
|
|
|
lexer_state_t handle_true(lexer_input_t input, lexer_state_t start_state) {
|
|
switch (start_state) {
|
|
case LEXER_STATE_T:
|
|
return input == LEXER_INPUT_LOWER_R ? LEXER_STATE_TR : LEXER_STATE_ERROR;
|
|
case LEXER_STATE_TR:
|
|
return input == LEXER_INPUT_LOWER_U ? LEXER_STATE_TRU : LEXER_STATE_ERROR;
|
|
case LEXER_STATE_TRU:
|
|
return input == LEXER_INPUT_LOWER_E ? LEXER_STATE_TRUE : LEXER_STATE_ERROR;
|
|
default:
|
|
return LEXER_STATE_ERROR;
|
|
}
|
|
}
|
|
|
|
lexer_state_t handle_false(lexer_input_t input, lexer_state_t start_state) {
|
|
switch (start_state) {
|
|
case LEXER_STATE_F:
|
|
return input == LEXER_INPUT_LOWER_A ? LEXER_STATE_FA : LEXER_STATE_ERROR;
|
|
case LEXER_STATE_FA:
|
|
return input == LEXER_INPUT_LOWER_L ? LEXER_STATE_FAL : LEXER_STATE_ERROR;
|
|
case LEXER_STATE_FAL:
|
|
return input == LEXER_INPUT_LOWER_S ? LEXER_STATE_FALS : LEXER_STATE_ERROR;
|
|
case LEXER_STATE_FALS:
|
|
return input == LEXER_INPUT_LOWER_E ? LEXER_STATE_FALSE : LEXER_STATE_ERROR;
|
|
default:
|
|
return LEXER_STATE_ERROR;
|
|
}
|
|
}
|
|
|
|
lexer_state_t handle_null(lexer_input_t input, lexer_state_t start_state) {
|
|
switch (start_state) {
|
|
case LEXER_STATE_N:
|
|
return input == LEXER_INPUT_LOWER_U ? LEXER_STATE_NU : LEXER_STATE_ERROR;
|
|
case LEXER_STATE_NU:
|
|
return input == LEXER_INPUT_LOWER_L ? LEXER_STATE_NUL : LEXER_STATE_ERROR;
|
|
case LEXER_STATE_NUL:
|
|
return input == LEXER_INPUT_LOWER_L ? LEXER_STATE_NULL : LEXER_STATE_ERROR;
|
|
default:
|
|
return LEXER_STATE_ERROR;
|
|
}
|
|
}
|
|
|
|
lexer_state_t handle_keyword_end(lexer_input_t input) {
|
|
switch (input) {
|
|
case LEXER_INPUT_WHITE_SPACE:
|
|
return LEXER_STATE_KEYWORD_END;
|
|
case LEXER_INPUT_CLOSE_BRACE:
|
|
return LEXER_STATE_OBJECT_END;
|
|
case LEXER_INPUT_CLOSE_BRACKET:
|
|
return LEXER_STATE_ARRAY_END;
|
|
case LEXER_INPUT_COMMA:
|
|
return LEXER_STATE_VALUE_END;
|
|
default:
|
|
return LEXER_STATE_ERROR;
|
|
}
|
|
}
|