From 002a8e7064d1cf8ca0bcebbf2cf0224271420103 Mon Sep 17 00:00:00 2001 From: Abdelrahman Date: Sat, 29 Jul 2023 22:07:03 +0100 Subject: [PATCH] Get the input and state strings from tables instead of switch cases --- generate_state_table.c | 584 +++++++---------------------------------- 1 file changed, 92 insertions(+), 492 deletions(-) diff --git a/generate_state_table.c b/generate_state_table.c index 0200aef..b2d6b0a 100644 --- a/generate_state_table.c +++ b/generate_state_table.c @@ -45,6 +45,85 @@ INTERNAL lexer_state_t handle_keyword_end(lexer_input_t input); INTERNAL i32 column_width = 0; INTERNAL lexer_state_t state_table[COUNT_LEXER_STATES][COUNT_LEXER_INPUTS] = { 0}; +INTERNAL const char *input_strings[] = { + [LEXER_INPUT_WHITE_SPACE] = "WHITE_SPACE", + [LEXER_INPUT_OPEN_BRACE] = "OPEN_BRACE", + [LEXER_INPUT_CLOSE_BRACE] = "CLOSE_BRACE", + [LEXER_INPUT_OPEN_BRACKET] = "OPEN_BRACKET", + [LEXER_INPUT_CLOSE_BRACKET] = "CLOSE_BRACKET", + [LEXER_INPUT_COMMA] = "COMMA", + [LEXER_INPUT_COLON] = "COLON", + [LEXER_INPUT_DOUBLE_QUOTE] = "DOUBLE_QUOTE", + [LEXER_INPUT_BACK_SLASH] = "BACK_SLASH", + [LEXER_INPUT_FORWARD_SLASH] = "FORWARD_SLASH", + [LEXER_INPUT_LOWER_A] = "LOWER_A", + [LEXER_INPUT_LOWER_B] = "LOWER_B", + [LEXER_INPUT_LOWER_C] = "LOWER_C", + [LEXER_INPUT_LOWER_D] = "LOWER_D", + [LEXER_INPUT_LOWER_E] = "LOWER_E", + [LEXER_INPUT_LOWER_F] = "LOWER_F", + [LEXER_INPUT_LOWER_L] = "LOWER_L", + [LEXER_INPUT_LOWER_N] = "LOWER_N", + [LEXER_INPUT_LOWER_R] = "LOWER_R", + [LEXER_INPUT_LOWER_S] = "LOWER_S", + [LEXER_INPUT_LOWER_T] = "LOWER_T", + [LEXER_INPUT_LOWER_U] = "LOWER_U", + [LEXER_INPUT_UPPER_A] = "UPPER_A", + [LEXER_INPUT_UPPER_B] = "UPPER_B", + [LEXER_INPUT_UPPER_C] = "UPPER_C", + [LEXER_INPUT_UPPER_D] = "UPPER_D", + [LEXER_INPUT_UPPER_E] = "UPPER_E", + [LEXER_INPUT_UPPER_F] = "UPPER_F", + [LEXER_INPUT_MINUS] = "MINUS", + [LEXER_INPUT_PLUS] = "PLUS", + [LEXER_INPUT_DECIMAL] = "DECIMAL", + [LEXER_INPUT_ZERO] = "ZERO", + [LEXER_INPUT_NON_ZERO] = "NON_ZERO", + [LEXER_INPUT_OTHER] = "OTHER", +}; +INTERNAL const char *state_strings[] = { + [LEXER_STATE_ERROR] = "LEXER_STATE_ERROR", + [LEXER_STATE_START] = "LEXER_STATE_START", + [LEXER_STATE_VALUE] = "LEXER_STATE_VALUE", + [LEXER_STATE_VALUE_END] = "LEXER_STATE_VALUE_END", + [LEXER_STATE_OBJECT_START] = "LEXER_STATE_OBJECT_START", + [LEXER_STATE_OBJECT] = "LEXER_STATE_OBJECT", + [LEXER_STATE_OBJECT_END] = "LEXER_STATE_OBJECT_END", + [LEXER_STATE_ARRAY_START] = "LEXER_STATE_ARRAY_START", + [LEXER_STATE_ARRAY] = "LEXER_STATE_ARRAY", + [LEXER_STATE_ARRAY_END] = "LEXER_STATE_ARRAY_END", + [LEXER_STATE_LAST_COLLECTION] = "LEXER_STATE_LAST_COLLECTION", + [LEXER_STATE_KEY] = "LEXER_STATE_KEY", + [LEXER_STATE_KEY_END] = "LEXER_STATE_KEY_END", + [LEXER_STATE_DECIMAL] = "LEXER_STATE_DECIMAL", + [LEXER_STATE_NUMBER] = "LEXER_STATE_NUMBER", + [LEXER_STATE_FRACTION] = "LEXER_STATE_FRACTION", + [LEXER_STATE_EXPONENT] = "LEXER_STATE_EXPONENT", + [LEXER_STATE_EXP_SIGN] = "LEXER_STATE_EXP_SIGN", + [LEXER_STATE_POWER] = "LEXER_STATE_POWER", + [LEXER_STATE_NUMBER_END] = "LEXER_STATE_NUMBER_END", + [LEXER_STATE_STRING] = "LEXER_STATE_STRING", + [LEXER_STATE_STRING_END] = "LEXER_STATE_STRING_END", + [LEXER_STATE_ESCAPE_SEQUENCE] = "LEXER_STATE_ESCAPE_SEQUENCE", + [LEXER_STATE_UNICODE_HEX1] = "LEXER_STATE_UNICODE_HEX1", + [LEXER_STATE_UNICODE_HEX2] = "LEXER_STATE_UNICODE_HEX2", + [LEXER_STATE_UNICODE_HEX3] = "LEXER_STATE_UNICODE_HEX3", + [LEXER_STATE_UNICODE_HEX4] = "LEXER_STATE_UNICODE_HEX4", + [LEXER_STATE_T] = "LEXER_STATE_T", + [LEXER_STATE_TR] = "LEXER_STATE_TR", + [LEXER_STATE_TRU] = "LEXER_STATE_TRU", + [LEXER_STATE_TRUE] = "LEXER_STATE_TRUE", + [LEXER_STATE_F] = "LEXER_STATE_F", + [LEXER_STATE_FA] = "LEXER_STATE_FA", + [LEXER_STATE_FAL] = "LEXER_STATE_FAL", + [LEXER_STATE_FALS] = "LEXER_STATE_FALS", + [LEXER_STATE_FALSE] = "LEXER_STATE_FALSE", + [LEXER_STATE_N] = "LEXER_STATE_N", + [LEXER_STATE_NU] = "LEXER_STATE_NU", + [LEXER_STATE_NUL] = "LEXER_STATE_NUL", + [LEXER_STATE_NULL] = "LEXER_STATE_NULL", + [LEXER_STATE_KEYWORD_END] = "LEXER_STATE_KEYWORD_END", +}; INTERNAL const char *filename = "./include/lexer/lexer_state_transitions.table"; int main(void) { @@ -136,509 +215,30 @@ void write_table(FILE *fp) { } INTERNAL void set_column_width(lexer_state_t state) { - const char *output = ""; - i32 length = 0; - - switch (state) { - case LEXER_STATE_ERROR: - output = "LEXER_STATE_ERROR"; - length = strlen(output); - - column_width = length > column_width ? length : column_width; - - case LEXER_STATE_START: - output = "LEXER_STATE_START"; - length = strlen(output); - - column_width = length > column_width ? length : column_width; - - case LEXER_STATE_VALUE: - output = "LEXER_STATE_VALUE"; - length = strlen(output); - - column_width = length > column_width ? length : column_width; - - case LEXER_STATE_VALUE_END: - output = "LEXER_STATE_VALUE_END"; - length = strlen(output); - - column_width = length > column_width ? length : column_width; - - case LEXER_STATE_OBJECT_START: - output = "LEXER_STATE_OBJECT_START"; - length = strlen(output); - - column_width = length > column_width ? length : column_width; - - case LEXER_STATE_OBJECT: - output = "LEXER_STATE_OBJECT"; - length = strlen(output); - - column_width = length > column_width ? length : column_width; - - case LEXER_STATE_OBJECT_END: - output = "LEXER_STATE_OBJECT_END"; - length = strlen(output); - - column_width = length > column_width ? length : column_width; - - case LEXER_STATE_ARRAY_START: - output = "LEXER_STATE_ARRAY_START"; - length = strlen(output); - - column_width = length > column_width ? length : column_width; - - case LEXER_STATE_ARRAY: - output = "LEXER_STATE_ARRAY"; - length = strlen(output); - - column_width = length > column_width ? length : column_width; - - case LEXER_STATE_ARRAY_END: - output = "LEXER_STATE_ARRAY_END"; - length = strlen(output); - - column_width = length > column_width ? length : column_width; - - case LEXER_STATE_LAST_COLLECTION: - output = "LEXER_STATE_LAST_COLLECTION"; - length = strlen(output); - - column_width = length > column_width ? length : column_width; - - case LEXER_STATE_KEY: - output = "LEXER_STATE_KEY"; - length = strlen(output); - - column_width = length > column_width ? length : column_width; - - case LEXER_STATE_KEY_END: - output = "LEXER_STATE_KEY_END"; - length = strlen(output); - - column_width = length > column_width ? length : column_width; - - case LEXER_STATE_DECIMAL: - output = "LEXER_STATE_DECIMAL"; - length = strlen(output); - - column_width = length > column_width ? length : column_width; - - case LEXER_STATE_NUMBER: - output = "LEXER_STATE_NUMBER"; - length = strlen(output); - - column_width = length > column_width ? length : column_width; - - case LEXER_STATE_FRACTION: - output = "LEXER_STATE_FRACTION"; - length = strlen(output); - - column_width = length > column_width ? length : column_width; - - case LEXER_STATE_EXPONENT: - output = "LEXER_STATE_EXPONENT"; - length = strlen(output); - - column_width = length > column_width ? length : column_width; - - case LEXER_STATE_EXP_SIGN: - output = "LEXER_STATE_EXP_SIGN"; - length = strlen(output); - - column_width = length > column_width ? length : column_width; - - case LEXER_STATE_POWER: - output = "LEXER_STATE_POWER"; - length = strlen(output); - - column_width = length > column_width ? length : column_width; - - case LEXER_STATE_NUMBER_END: - output = "LEXER_STATE_NUMBER_END"; - length = strlen(output); - - column_width = length > column_width ? length : column_width; - - case LEXER_STATE_STRING: - output = "LEXER_STATE_STRING"; - length = strlen(output); - - column_width = length > column_width ? length : column_width; - - case LEXER_STATE_STRING_END: - output = "LEXER_STATE_STRING_END"; - length = strlen(output); - - column_width = length > column_width ? length : column_width; - - case LEXER_STATE_ESCAPE_SEQUENCE: - output = "LEXER_STATE_ESCAPE_SEQUENCE"; - length = strlen(output); - - column_width = length > column_width ? length : column_width; - - case LEXER_STATE_UNICODE_HEX1: - output = "LEXER_STATE_UNICODE_HEX1"; - length = strlen(output); - - column_width = length > column_width ? length : column_width; - - case LEXER_STATE_UNICODE_HEX2: - output = "LEXER_STATE_UNICODE_HEX2"; - length = strlen(output); - - column_width = length > column_width ? length : column_width; - - case LEXER_STATE_UNICODE_HEX3: - output = "LEXER_STATE_UNICODE_HEX3"; - length = strlen(output); - - column_width = length > column_width ? length : column_width; - - case LEXER_STATE_UNICODE_HEX4: - output = "LEXER_STATE_UNICODE_HEX4"; - length = strlen(output); - - column_width = length > column_width ? length : column_width; - - case LEXER_STATE_T: - output = "LEXER_STATE_T"; - length = strlen(output); - - column_width = length > column_width ? length : column_width; - - case LEXER_STATE_TR: - output = "LEXER_STATE_TR"; - length = strlen(output); - - column_width = length > column_width ? length : column_width; - - case LEXER_STATE_TRU: - output = "LEXER_STATE_TRU"; - length = strlen(output); - - column_width = length > column_width ? length : column_width; - - case LEXER_STATE_TRUE: - output = "LEXER_STATE_TRUE"; - length = strlen(output); - - column_width = length > column_width ? length : column_width; - - case LEXER_STATE_F: - output = "LEXER_STATE_F"; - length = strlen(output); - - column_width = length > column_width ? length : column_width; - - case LEXER_STATE_FA: - output = "LEXER_STATE_FA"; - length = strlen(output); - - column_width = length > column_width ? length : column_width; - - case LEXER_STATE_FAL: - output = "LEXER_STATE_FAL"; - length = strlen(output); - - column_width = length > column_width ? length : column_width; - - case LEXER_STATE_FALS: - output = "LEXER_STATE_FALS"; - length = strlen(output); - - column_width = length > column_width ? length : column_width; - - case LEXER_STATE_FALSE: - output = "LEXER_STATE_FALSE"; - length = strlen(output); - - column_width = length > column_width ? length : column_width; - - case LEXER_STATE_N: - output = "LEXER_STATE_N"; - length = strlen(output); - - column_width = length > column_width ? length : column_width; - - case LEXER_STATE_NU: - output = "LEXER_STATE_NU"; - length = strlen(output); - - column_width = length > column_width ? length : column_width; - - case LEXER_STATE_NUL: - output = "LEXER_STATE_NUL"; - length = strlen(output); - - column_width = length > column_width ? length : column_width; - - case LEXER_STATE_NULL: - output = "LEXER_STATE_NULL"; - length = strlen(output); - - column_width = length > column_width ? length : column_width; - - case LEXER_STATE_KEYWORD_END: - output = "LEXER_STATE_KEYWORD_END"; - length = strlen(output); - - column_width = length > column_width ? length : column_width; - - case COUNT_LEXER_STATES: - break; + if (state >= COUNT_LEXER_STATES) { + return; } + + const char *output = state_strings[state]; + i32 length = strlen(output); + + column_width = length > column_width ? length : column_width; } INTERNAL const char *get_input_string(lexer_input_t input) { - switch (input) { - case LEXER_INPUT_WHITE_SPACE: - return "WHITE_SPACE"; - case LEXER_INPUT_OPEN_BRACE: - return "OPEN_BRACE"; - case LEXER_INPUT_CLOSE_BRACE: - return "CLOSE_BRACE"; - case LEXER_INPUT_OPEN_BRACKET: - return "OPEN_BRACKET"; - case LEXER_INPUT_CLOSE_BRACKET: - return "CLOSE_BRACKET"; - case LEXER_INPUT_COMMA: - return "COMMA"; - case LEXER_INPUT_COLON: - return "COLON"; - case LEXER_INPUT_DOUBLE_QUOTE: - return "DOUBLE_QUOTE"; - case LEXER_INPUT_BACK_SLASH: - return "BACK_SLASH"; - case LEXER_INPUT_FORWARD_SLASH: - return "FORWARD_SLASH"; - case LEXER_INPUT_LOWER_A: - return "LOWER_A"; - case LEXER_INPUT_LOWER_B: - return "LOWER_B"; - case LEXER_INPUT_LOWER_C: - return "LOWER_C"; - case LEXER_INPUT_LOWER_D: - return "LOWER_D"; - case LEXER_INPUT_LOWER_E: - return "LOWER_E"; - case LEXER_INPUT_LOWER_F: - return "LOWER_F"; - case LEXER_INPUT_LOWER_L: - return "LOWER_L"; - case LEXER_INPUT_LOWER_N: - return "LOWER_N"; - case LEXER_INPUT_LOWER_R: - return "LOWER_R"; - case LEXER_INPUT_LOWER_S: - return "LOWER_S"; - case LEXER_INPUT_LOWER_T: - return "LOWER_T"; - case LEXER_INPUT_LOWER_U: - return "LOWER_U"; - case LEXER_INPUT_UPPER_A: - return "UPPER_A"; - case LEXER_INPUT_UPPER_B: - return "UPPER_B"; - case LEXER_INPUT_UPPER_C: - return "UPPER_C"; - case LEXER_INPUT_UPPER_D: - return "UPPER_D"; - case LEXER_INPUT_UPPER_E: - return "UPPER_E"; - case LEXER_INPUT_UPPER_F: - return "UPPER_F"; - case LEXER_INPUT_MINUS: - return "MINUS"; - case LEXER_INPUT_PLUS: - return "PLUS"; - case LEXER_INPUT_DECIMAL: - return "DECIMAL"; - case LEXER_INPUT_ZERO: - return "ZERO"; - case LEXER_INPUT_NON_ZERO: - return "NON_ZERO"; - case LEXER_INPUT_OTHER: - return "OTHER"; - - case COUNT_LEXER_INPUTS: + if (input >= COUNT_LEXER_INPUTS) { return ""; } + + return input_strings[input]; } INTERNAL const char *get_state_string(lexer_state_t state) { - const char *output = ""; - - switch (state) { - case LEXER_STATE_ERROR: - output = "LEXER_STATE_ERROR"; - - return output; - case LEXER_STATE_START: - output = "LEXER_STATE_START"; - - return output; - case LEXER_STATE_VALUE: - output = "LEXER_STATE_VALUE"; - - return output; - case LEXER_STATE_VALUE_END: - output = "LEXER_STATE_VALUE_END"; - - return output; - case LEXER_STATE_OBJECT_START: - output = "LEXER_STATE_OBJECT_START"; - - return output; - case LEXER_STATE_OBJECT: - output = "LEXER_STATE_OBJECT"; - - return output; - case LEXER_STATE_OBJECT_END: - output = "LEXER_STATE_OBJECT_END"; - - return output; - case LEXER_STATE_ARRAY_START: - output = "LEXER_STATE_ARRAY_START"; - - return output; - case LEXER_STATE_ARRAY: - output = "LEXER_STATE_ARRAY"; - - return output; - case LEXER_STATE_ARRAY_END: - output = "LEXER_STATE_ARRAY_END"; - - return output; - case LEXER_STATE_LAST_COLLECTION: - output = "LEXER_STATE_LAST_COLLECTION"; - - return output; - case LEXER_STATE_KEY: - output = "LEXER_STATE_KEY"; - - return output; - case LEXER_STATE_KEY_END: - output = "LEXER_STATE_KEY_END"; - - return output; - case LEXER_STATE_DECIMAL: - output = "LEXER_STATE_DECIMAL"; - - return output; - case LEXER_STATE_NUMBER: - output = "LEXER_STATE_NUMBER"; - - return output; - case LEXER_STATE_FRACTION: - output = "LEXER_STATE_FRACTION"; - - return output; - case LEXER_STATE_EXPONENT: - output = "LEXER_STATE_EXPONENT"; - - return output; - case LEXER_STATE_EXP_SIGN: - output = "LEXER_STATE_EXP_SIGN"; - - return output; - case LEXER_STATE_POWER: - output = "LEXER_STATE_POWER"; - - return output; - case LEXER_STATE_NUMBER_END: - output = "LEXER_STATE_NUMBER_END"; - - return output; - case LEXER_STATE_STRING: - output = "LEXER_STATE_STRING"; - - return output; - case LEXER_STATE_STRING_END: - output = "LEXER_STATE_STRING_END"; - - return output; - case LEXER_STATE_ESCAPE_SEQUENCE: - output = "LEXER_STATE_ESCAPE_SEQUENCE"; - - return output; - case LEXER_STATE_UNICODE_HEX1: - output = "LEXER_STATE_UNICODE_HEX1"; - - return output; - case LEXER_STATE_UNICODE_HEX2: - output = "LEXER_STATE_UNICODE_HEX2"; - - return output; - case LEXER_STATE_UNICODE_HEX3: - output = "LEXER_STATE_UNICODE_HEX3"; - - return output; - case LEXER_STATE_UNICODE_HEX4: - output = "LEXER_STATE_UNICODE_HEX4"; - - return output; - case LEXER_STATE_T: - output = "LEXER_STATE_T"; - - return output; - case LEXER_STATE_TR: - output = "LEXER_STATE_TR"; - - return output; - case LEXER_STATE_TRU: - output = "LEXER_STATE_TRU"; - - return output; - case LEXER_STATE_TRUE: - output = "LEXER_STATE_TRUE"; - - return output; - case LEXER_STATE_F: - output = "LEXER_STATE_F"; - - return output; - case LEXER_STATE_FA: - output = "LEXER_STATE_FA"; - - return output; - case LEXER_STATE_FAL: - output = "LEXER_STATE_FAL"; - - return output; - case LEXER_STATE_FALS: - output = "LEXER_STATE_FALS"; - - return output; - case LEXER_STATE_FALSE: - output = "LEXER_STATE_FALSE"; - - return output; - case LEXER_STATE_N: - output = "LEXER_STATE_N"; - - return output; - case LEXER_STATE_NU: - output = "LEXER_STATE_NU"; - - return output; - case LEXER_STATE_NUL: - output = "LEXER_STATE_NUL"; - - return output; - case LEXER_STATE_NULL: - output = "LEXER_STATE_NULL"; - - return output; - case LEXER_STATE_KEYWORD_END: - output = "LEXER_STATE_KEYWORD_END"; - - return output; - - case COUNT_LEXER_STATES: + if (state >= COUNT_LEXER_STATES) { return ""; } + + return state_strings[state]; } lexer_state_t lexer_state_machine(lexer_state_t state, lexer_input_t input) {