#include "aliases.h" #include "lexer_data.h" #include #include #include #include #define STRING_BUF_LENGTH 100 INTERNAL void fill_table(void); INTERNAL void write_table(FILE *fp); INTERNAL void set_column_width(lexer_state_t state); INTERNAL const char *get_input_string(lexer_input_t input); INTERNAL const char *get_state_string(lexer_state_t state); INTERNAL lexer_state_t lexer_state_machine(lexer_state_t state, lexer_input_t input); INTERNAL lexer_state_t handle_lexer_start(lexer_input_t input); INTERNAL lexer_state_t handle_last_collection(char input); INTERNAL lexer_state_t handle_object(lexer_input_t input); INTERNAL lexer_state_t handle_array(lexer_input_t input); INTERNAL lexer_state_t handle_key(lexer_input_t input); INTERNAL lexer_state_t handle_value(lexer_input_t input); INTERNAL lexer_state_t handle_string(lexer_input_t input); INTERNAL lexer_state_t handle_string_end(lexer_input_t input); INTERNAL lexer_state_t handle_escape_sequence(lexer_input_t input); INTERNAL lexer_state_t handle_unicode_hex(lexer_input_t input, lexer_state_t return_state); INTERNAL lexer_state_t handle_decimal(lexer_input_t input); INTERNAL lexer_state_t handle_number(lexer_input_t input); INTERNAL lexer_state_t handle_fraction(lexer_input_t input); INTERNAL lexer_state_t handle_exponent(lexer_input_t input); INTERNAL lexer_state_t handle_exp_sign(lexer_input_t input); INTERNAL lexer_state_t handle_power(lexer_input_t input); INTERNAL lexer_state_t handle_number_end(lexer_input_t input); INTERNAL lexer_state_t handle_true(lexer_input_t input, lexer_state_t start_state); INTERNAL lexer_state_t handle_false(lexer_input_t input, lexer_state_t start_state); INTERNAL lexer_state_t handle_null(lexer_input_t input, lexer_state_t start_state); INTERNAL lexer_state_t handle_keyword_end(lexer_input_t input); INTERNAL i32 column_width = 0; INTERNAL lexer_state_t state_table[COUNT_LEXER_STATES][COUNT_LEXER_INPUTS] = { 0}; INTERNAL const char *input_strings[COUNT_LEXER_STATES] = { [LEXER_INPUT_WHITE_SPACE] = "WHITE_SPACE", [LEXER_INPUT_OPEN_BRACE] = "OPEN_BRACE", [LEXER_INPUT_CLOSE_BRACE] = "CLOSE_BRACE", [LEXER_INPUT_OPEN_BRACKET] = "OPEN_BRACKET", [LEXER_INPUT_CLOSE_BRACKET] = "CLOSE_BRACKET", [LEXER_INPUT_COMMA] = "COMMA", [LEXER_INPUT_COLON] = "COLON", [LEXER_INPUT_DOUBLE_QUOTE] = "DOUBLE_QUOTE", [LEXER_INPUT_BACK_SLASH] = "BACK_SLASH", [LEXER_INPUT_FORWARD_SLASH] = "FORWARD_SLASH", [LEXER_INPUT_LOWER_A] = "LOWER_A", [LEXER_INPUT_LOWER_B] = "LOWER_B", [LEXER_INPUT_LOWER_C] = "LOWER_C", [LEXER_INPUT_LOWER_D] = "LOWER_D", [LEXER_INPUT_LOWER_E] = "LOWER_E", [LEXER_INPUT_LOWER_F] = "LOWER_F", [LEXER_INPUT_LOWER_L] = "LOWER_L", [LEXER_INPUT_LOWER_N] = "LOWER_N", [LEXER_INPUT_LOWER_R] = "LOWER_R", [LEXER_INPUT_LOWER_S] = "LOWER_S", [LEXER_INPUT_LOWER_T] = "LOWER_T", [LEXER_INPUT_LOWER_U] = "LOWER_U", [LEXER_INPUT_UPPER_A] = "UPPER_A", [LEXER_INPUT_UPPER_B] = "UPPER_B", [LEXER_INPUT_UPPER_C] = "UPPER_C", [LEXER_INPUT_UPPER_D] = "UPPER_D", [LEXER_INPUT_UPPER_E] = "UPPER_E", [LEXER_INPUT_UPPER_F] = "UPPER_F", [LEXER_INPUT_MINUS] = "MINUS", [LEXER_INPUT_PLUS] = "PLUS", [LEXER_INPUT_DECIMAL] = "DECIMAL", [LEXER_INPUT_ZERO] = "ZERO", [LEXER_INPUT_NON_ZERO] = "NON_ZERO", [LEXER_INPUT_OTHER] = "OTHER", }; INTERNAL const char *state_strings[COUNT_LEXER_STATES] = { [LEXER_STATE_ERROR] = "LEXER_STATE_ERROR", [LEXER_STATE_START] = "LEXER_STATE_START", [LEXER_STATE_VALUE] = "LEXER_STATE_VALUE", [LEXER_STATE_VALUE_END] = "LEXER_STATE_VALUE_END", [LEXER_STATE_OBJECT_START] = "LEXER_STATE_OBJECT_START", [LEXER_STATE_OBJECT] = "LEXER_STATE_OBJECT", [LEXER_STATE_OBJECT_END] = "LEXER_STATE_OBJECT_END", [LEXER_STATE_ARRAY_START] = "LEXER_STATE_ARRAY_START", [LEXER_STATE_ARRAY] = "LEXER_STATE_ARRAY", [LEXER_STATE_ARRAY_END] = "LEXER_STATE_ARRAY_END", [LEXER_STATE_LAST_COLLECTION] = "LEXER_STATE_LAST_COLLECTION", [LEXER_STATE_KEY] = "LEXER_STATE_KEY", [LEXER_STATE_KEY_END] = "LEXER_STATE_KEY_END", [LEXER_STATE_DECIMAL] = "LEXER_STATE_DECIMAL", [LEXER_STATE_NUMBER] = "LEXER_STATE_NUMBER", [LEXER_STATE_FRACTION] = "LEXER_STATE_FRACTION", [LEXER_STATE_EXPONENT] = "LEXER_STATE_EXPONENT", [LEXER_STATE_EXP_SIGN] = "LEXER_STATE_EXP_SIGN", [LEXER_STATE_POWER] = "LEXER_STATE_POWER", [LEXER_STATE_NUMBER_END] = "LEXER_STATE_NUMBER_END", [LEXER_STATE_STRING] = "LEXER_STATE_STRING", [LEXER_STATE_STRING_END] = "LEXER_STATE_STRING_END", [LEXER_STATE_ESCAPE_SEQUENCE] = "LEXER_STATE_ESCAPE_SEQUENCE", [LEXER_STATE_UNICODE_HEX1] = "LEXER_STATE_UNICODE_HEX1", [LEXER_STATE_UNICODE_HEX2] = "LEXER_STATE_UNICODE_HEX2", [LEXER_STATE_UNICODE_HEX3] = "LEXER_STATE_UNICODE_HEX3", [LEXER_STATE_UNICODE_HEX4] = "LEXER_STATE_UNICODE_HEX4", [LEXER_STATE_T] = "LEXER_STATE_T", [LEXER_STATE_TR] = "LEXER_STATE_TR", [LEXER_STATE_TRU] = "LEXER_STATE_TRU", [LEXER_STATE_TRUE] = "LEXER_STATE_TRUE", [LEXER_STATE_F] = "LEXER_STATE_F", [LEXER_STATE_FA] = "LEXER_STATE_FA", [LEXER_STATE_FAL] = "LEXER_STATE_FAL", [LEXER_STATE_FALS] = "LEXER_STATE_FALS", [LEXER_STATE_FALSE] = "LEXER_STATE_FALSE", [LEXER_STATE_N] = "LEXER_STATE_N", [LEXER_STATE_NU] = "LEXER_STATE_NU", [LEXER_STATE_NUL] = "LEXER_STATE_NUL", [LEXER_STATE_NULL] = "LEXER_STATE_NULL", [LEXER_STATE_KEYWORD_END] = "LEXER_STATE_KEYWORD_END", }; INTERNAL const char *filename = "./include/lexer/lexer_state_transitions.table"; int main(void) { FILE *fp = fopen(filename, "w"); if (!fp) { printf("Failed to open file\n"); return EXIT_FAILURE; } fill_table(); write_table(fp); fclose(fp); return EXIT_SUCCESS; } INTERNAL void fill_table(void) { lexer_state_t state; lexer_input_t input; for (u64 i = 0; i < COUNT_LEXER_STATES; ++i) { state = (lexer_state_t)i; for (u64 j = 0; j < COUNT_LEXER_INPUTS; ++j) { input = (lexer_input_t)j; state_table[i][j] = (lexer_state_t)lexer_state_machine(state, input); } } } void write_table(FILE *fp) { const char *array_open = "{ "; const u64 array_open_length = strlen(array_open); const char *array_close = "},"; const u64 array_close_length = strlen(array_close); const char *comment = "//"; const u64 comment_length = strlen(comment); const char *table_header_end = " \n"; const u64 table_header_end_length = strlen(table_header_end); char output[STRING_BUF_LENGTH] = {0}; u64 length = 0; for (u64 i = 0; i < COUNT_LEXER_STATES; ++i) { set_column_width((lexer_state_t)i); } fwrite(comment, sizeof(char), comment_length, fp); i32 column_white_space = 0; for (u64 i = 0; i < COUNT_LEXER_INPUTS; ++i) { const char *input_str = get_input_string((lexer_input_t)i); column_white_space = (column_width - strlen(input_str)) / 2; sprintf(output, "%*s%*s| ", column_width - column_white_space, input_str, column_white_space, " "); length = strlen(output); fwrite(output, sizeof(char), length, fp); } fwrite(table_header_end, sizeof(char), table_header_end_length, fp); for (u64 i = 0; i < COUNT_LEXER_STATES; ++i) { fwrite(array_open, sizeof(char), array_open_length, fp); for (u64 j = 0; j < COUNT_LEXER_INPUTS; ++j) { sprintf(output, "%*s, ", column_width, get_state_string((lexer_state_t)state_table[i][j])); length = strlen(output); fwrite(output, sizeof(char), length, fp); } fwrite(array_close, sizeof(char), array_close_length, fp); sprintf(output, " // %s\n", get_state_string((lexer_state_t)i)); length = strlen(output); fwrite(output, sizeof(char), length, fp); memset(output, 0, STRING_BUF_LENGTH); } } INTERNAL void set_column_width(lexer_state_t state) { if (state >= COUNT_LEXER_STATES) { return; } const char *output = state_strings[state]; i32 length = strlen(output); column_width = length > column_width ? length : column_width; } INTERNAL const char *get_input_string(lexer_input_t input) { if (input >= COUNT_LEXER_INPUTS) { return ""; } return input_strings[input]; } INTERNAL const char *get_state_string(lexer_state_t state) { if (state >= COUNT_LEXER_STATES) { return ""; } return state_strings[state]; } lexer_state_t lexer_state_machine(lexer_state_t state, lexer_input_t input) { switch (state) { case LEXER_STATE_START: return handle_lexer_start(input); case LEXER_STATE_VALUE: return handle_value(input); case LEXER_STATE_OBJECT: return handle_object(input); case LEXER_STATE_ARRAY: return handle_array(input); case LEXER_STATE_KEY: return handle_key(input); case LEXER_STATE_DECIMAL: return handle_decimal(input); case LEXER_STATE_NUMBER: return handle_number(input); case LEXER_STATE_FRACTION: return handle_fraction(input); case LEXER_STATE_EXPONENT: return handle_exponent(input); case LEXER_STATE_EXP_SIGN: return handle_exp_sign(input); case LEXER_STATE_POWER: return handle_power(input); case LEXER_STATE_NUMBER_END: return handle_number_end(input); case LEXER_STATE_STRING: return handle_string(input); case LEXER_STATE_STRING_END: return handle_string_end(input); case LEXER_STATE_ESCAPE_SEQUENCE: return handle_escape_sequence(input); case LEXER_STATE_UNICODE_HEX1: return handle_unicode_hex(input, LEXER_STATE_UNICODE_HEX2); case LEXER_STATE_UNICODE_HEX2: return handle_unicode_hex(input, LEXER_STATE_UNICODE_HEX3); case LEXER_STATE_UNICODE_HEX3: return handle_unicode_hex(input, LEXER_STATE_UNICODE_HEX4); case LEXER_STATE_UNICODE_HEX4: return handle_unicode_hex(input, LEXER_STATE_STRING); case LEXER_STATE_T: return handle_true(input, LEXER_STATE_T); case LEXER_STATE_TR: return handle_true(input, LEXER_STATE_TR); case LEXER_STATE_TRU: return handle_true(input, LEXER_STATE_TRU); case LEXER_STATE_F: return handle_false(input, LEXER_STATE_F); case LEXER_STATE_FA: return handle_false(input, LEXER_STATE_FA); case LEXER_STATE_FAL: return handle_false(input, LEXER_STATE_FAL); case LEXER_STATE_FALS: return handle_false(input, LEXER_STATE_FALS); case LEXER_STATE_N: return handle_null(input, LEXER_STATE_N); case LEXER_STATE_NU: return handle_null(input, LEXER_STATE_NU); case LEXER_STATE_NUL: return handle_null(input, LEXER_STATE_NUL); case LEXER_STATE_KEYWORD_END: return handle_keyword_end(input); case LEXER_STATE_LAST_COLLECTION: return handle_last_collection(input); case LEXER_STATE_TRUE: case LEXER_STATE_FALSE: case LEXER_STATE_NULL: case LEXER_STATE_OBJECT_START: case LEXER_STATE_ARRAY_START: case LEXER_STATE_OBJECT_END: case LEXER_STATE_ARRAY_END: case LEXER_STATE_KEY_END: case LEXER_STATE_VALUE_END: case LEXER_STATE_ERROR: case COUNT_LEXER_STATES: return LEXER_STATE_ERROR; } } lexer_state_t handle_lexer_start(lexer_input_t input) { switch (input) { case LEXER_INPUT_WHITE_SPACE: return LEXER_STATE_START; case LEXER_INPUT_OPEN_BRACE: return LEXER_STATE_OBJECT_START; case LEXER_INPUT_OPEN_BRACKET: return LEXER_STATE_ARRAY_START; default: return LEXER_STATE_ERROR; } } lexer_state_t handle_last_collection(char input) { if (input == LEXER_INPUT_WHITE_SPACE) { return LEXER_STATE_LAST_COLLECTION; } return LEXER_STATE_ERROR; } lexer_state_t handle_object(lexer_input_t input) { switch (input) { case LEXER_INPUT_WHITE_SPACE: return LEXER_STATE_OBJECT; case LEXER_INPUT_DOUBLE_QUOTE: return LEXER_STATE_KEY; case LEXER_INPUT_CLOSE_BRACE: return LEXER_STATE_OBJECT_END; default: return LEXER_STATE_ERROR; } } lexer_state_t handle_array(lexer_input_t input) { switch (input) { case LEXER_INPUT_WHITE_SPACE: return LEXER_STATE_ARRAY; case LEXER_INPUT_CLOSE_BRACKET: return LEXER_STATE_ARRAY_END; default: return handle_value(input); } } lexer_state_t handle_key(lexer_input_t input) { return LEXER_STATE_STRING; } lexer_state_t handle_value(lexer_input_t input) { switch (input) { case LEXER_INPUT_WHITE_SPACE: return LEXER_STATE_VALUE; case LEXER_INPUT_NON_ZERO: case LEXER_INPUT_MINUS: return LEXER_STATE_NUMBER; case LEXER_INPUT_ZERO: return LEXER_STATE_DECIMAL; case LEXER_INPUT_DOUBLE_QUOTE: return LEXER_STATE_STRING; case LEXER_INPUT_OPEN_BRACE: return LEXER_STATE_OBJECT_START; case LEXER_INPUT_OPEN_BRACKET: return LEXER_STATE_ARRAY_START; case LEXER_INPUT_LOWER_T: return LEXER_STATE_T; case LEXER_INPUT_LOWER_F: return LEXER_STATE_F; case LEXER_INPUT_LOWER_N: return LEXER_STATE_N; default: return LEXER_STATE_ERROR; } } lexer_state_t handle_string(lexer_input_t input) { switch (input) { case LEXER_INPUT_BACK_SLASH: return LEXER_STATE_ESCAPE_SEQUENCE; case LEXER_INPUT_DOUBLE_QUOTE: return LEXER_STATE_STRING_END; default: return LEXER_STATE_STRING; } } lexer_state_t handle_string_end(lexer_input_t input) { switch (input) { case LEXER_INPUT_WHITE_SPACE: return LEXER_STATE_STRING_END; case LEXER_INPUT_COLON: return LEXER_STATE_KEY_END; case LEXER_INPUT_COMMA: return LEXER_STATE_VALUE_END; case LEXER_INPUT_CLOSE_BRACE: return LEXER_STATE_OBJECT_END; case LEXER_INPUT_CLOSE_BRACKET: return LEXER_STATE_ARRAY_END; default: return LEXER_STATE_ERROR; } } lexer_state_t handle_escape_sequence(lexer_input_t input) { switch (input) { case LEXER_INPUT_DOUBLE_QUOTE: case LEXER_INPUT_FORWARD_SLASH: case LEXER_INPUT_BACK_SLASH: case LEXER_INPUT_LOWER_B: case LEXER_INPUT_LOWER_F: case LEXER_INPUT_LOWER_N: case LEXER_INPUT_LOWER_R: case LEXER_INPUT_LOWER_T: return LEXER_STATE_STRING; case LEXER_INPUT_LOWER_U: return LEXER_STATE_UNICODE_HEX1; default: return LEXER_STATE_ERROR; } } INTERNAL lexer_state_t handle_unicode_hex(lexer_input_t input, lexer_state_t return_state) { switch (input) { case LEXER_INPUT_LOWER_A: case LEXER_INPUT_LOWER_B: case LEXER_INPUT_LOWER_C: case LEXER_INPUT_LOWER_D: case LEXER_INPUT_LOWER_E: case LEXER_INPUT_LOWER_F: case LEXER_INPUT_UPPER_A: case LEXER_INPUT_UPPER_B: case LEXER_INPUT_UPPER_C: case LEXER_INPUT_UPPER_D: case LEXER_INPUT_UPPER_E: case LEXER_INPUT_UPPER_F: case LEXER_INPUT_ZERO: case LEXER_INPUT_NON_ZERO: return return_state; default: return LEXER_STATE_ERROR; } } lexer_state_t handle_decimal(lexer_input_t input) { if (input == LEXER_INPUT_DECIMAL) { return LEXER_STATE_FRACTION; } return LEXER_STATE_ERROR; } lexer_state_t handle_number(lexer_input_t input) { switch (input) { case LEXER_INPUT_ZERO: case LEXER_INPUT_NON_ZERO: return LEXER_STATE_NUMBER; case LEXER_INPUT_DECIMAL: return LEXER_STATE_FRACTION; case LEXER_INPUT_CLOSE_BRACE: return LEXER_STATE_OBJECT_END; case LEXER_INPUT_CLOSE_BRACKET: return LEXER_STATE_ARRAY_END; case LEXER_INPUT_COMMA: return LEXER_STATE_VALUE_END; case LEXER_INPUT_WHITE_SPACE: return LEXER_STATE_NUMBER_END; default: return LEXER_STATE_ERROR; } } lexer_state_t handle_fraction(lexer_input_t input) { switch (input) { case LEXER_INPUT_ZERO: case LEXER_INPUT_NON_ZERO: return LEXER_STATE_FRACTION; case LEXER_INPUT_CLOSE_BRACE: return LEXER_STATE_OBJECT_END; case LEXER_INPUT_CLOSE_BRACKET: return LEXER_STATE_ARRAY_END; case LEXER_INPUT_LOWER_E: case LEXER_INPUT_UPPER_E: return LEXER_STATE_EXPONENT; case LEXER_INPUT_COMMA: return LEXER_STATE_VALUE_END; case LEXER_INPUT_WHITE_SPACE: return LEXER_STATE_NUMBER_END; default: return LEXER_STATE_ERROR; } } lexer_state_t handle_exponent(lexer_input_t input) { switch (input) { case LEXER_INPUT_ZERO: case LEXER_INPUT_NON_ZERO: return LEXER_STATE_POWER; case LEXER_INPUT_PLUS: case LEXER_INPUT_MINUS: return LEXER_STATE_EXP_SIGN; default: return LEXER_STATE_ERROR; } } lexer_state_t handle_exp_sign(lexer_input_t input) { switch (input) { case LEXER_INPUT_ZERO: case LEXER_INPUT_NON_ZERO: return LEXER_STATE_POWER; default: return LEXER_STATE_ERROR; } } lexer_state_t handle_power(lexer_input_t input) { switch (input) { case LEXER_INPUT_ZERO: case LEXER_INPUT_NON_ZERO: return LEXER_STATE_POWER; case LEXER_INPUT_CLOSE_BRACE: return LEXER_STATE_OBJECT_END; case LEXER_INPUT_CLOSE_BRACKET: return LEXER_STATE_ARRAY_END; case LEXER_INPUT_COMMA: return LEXER_STATE_VALUE_END; case LEXER_INPUT_WHITE_SPACE: return LEXER_STATE_NUMBER_END; default: return LEXER_STATE_ERROR; } } lexer_state_t handle_number_end(lexer_input_t input) { switch (input) { case LEXER_INPUT_WHITE_SPACE: return LEXER_STATE_NUMBER_END; case LEXER_INPUT_CLOSE_BRACE: return LEXER_STATE_OBJECT_END; case LEXER_INPUT_CLOSE_BRACKET: return LEXER_STATE_ARRAY_END; case LEXER_INPUT_COMMA: return LEXER_STATE_VALUE_END; default: return LEXER_STATE_ERROR; } } lexer_state_t handle_true(lexer_input_t input, lexer_state_t start_state) { switch (start_state) { case LEXER_STATE_T: return input == LEXER_INPUT_LOWER_R ? LEXER_STATE_TR : LEXER_STATE_ERROR; case LEXER_STATE_TR: return input == LEXER_INPUT_LOWER_U ? LEXER_STATE_TRU : LEXER_STATE_ERROR; case LEXER_STATE_TRU: return input == LEXER_INPUT_LOWER_E ? LEXER_STATE_TRUE : LEXER_STATE_ERROR; default: return LEXER_STATE_ERROR; } } lexer_state_t handle_false(lexer_input_t input, lexer_state_t start_state) { switch (start_state) { case LEXER_STATE_F: return input == LEXER_INPUT_LOWER_A ? LEXER_STATE_FA : LEXER_STATE_ERROR; case LEXER_STATE_FA: return input == LEXER_INPUT_LOWER_L ? LEXER_STATE_FAL : LEXER_STATE_ERROR; case LEXER_STATE_FAL: return input == LEXER_INPUT_LOWER_S ? LEXER_STATE_FALS : LEXER_STATE_ERROR; case LEXER_STATE_FALS: return input == LEXER_INPUT_LOWER_E ? LEXER_STATE_FALSE : LEXER_STATE_ERROR; default: return LEXER_STATE_ERROR; } } lexer_state_t handle_null(lexer_input_t input, lexer_state_t start_state) { switch (start_state) { case LEXER_STATE_N: return input == LEXER_INPUT_LOWER_U ? LEXER_STATE_NU : LEXER_STATE_ERROR; case LEXER_STATE_NU: return input == LEXER_INPUT_LOWER_L ? LEXER_STATE_NUL : LEXER_STATE_ERROR; case LEXER_STATE_NUL: return input == LEXER_INPUT_LOWER_L ? LEXER_STATE_NULL : LEXER_STATE_ERROR; default: return LEXER_STATE_ERROR; } } lexer_state_t handle_keyword_end(lexer_input_t input) { switch (input) { case LEXER_INPUT_WHITE_SPACE: return LEXER_STATE_KEYWORD_END; case LEXER_INPUT_CLOSE_BRACE: return LEXER_STATE_OBJECT_END; case LEXER_INPUT_CLOSE_BRACKET: return LEXER_STATE_ARRAY_END; case LEXER_INPUT_COMMA: return LEXER_STATE_VALUE_END; default: return LEXER_STATE_ERROR; } }