#include "aliases.h" #include "lexer_data.h" #include #include #include #include #define STRING_BUF_LENGTH 100 INTERNAL void write_table(void); INTERNAL void clear_file(void); INTERNAL lexer_input_t char_type(char input); INTERNAL void lexer_state_machine(lexer_input_t input); INTERNAL lexer_state_t handle_lexer_start(lexer_input_t input); INTERNAL lexer_state_t handle_last_collection(char input); INTERNAL lexer_state_t handle_collection_end(lexer_input_t input); INTERNAL void handle_input_after_collection_end(lexer_input_t input); INTERNAL lexer_state_t handle_object(lexer_input_t input); INTERNAL lexer_state_t handle_array(lexer_input_t input); INTERNAL lexer_state_t handle_key(lexer_input_t input); INTERNAL lexer_state_t handle_value(lexer_input_t input); INTERNAL lexer_state_t handle_string(lexer_input_t input); INTERNAL lexer_state_t handle_string_end(lexer_input_t input); INTERNAL lexer_state_t handle_escape_sequence(lexer_input_t input); INTERNAL lexer_state_t handle_unicode_hex(lexer_input_t input, lexer_state_t return_state); INTERNAL lexer_state_t handle_decimal(lexer_input_t input); INTERNAL lexer_state_t handle_number(lexer_input_t input); INTERNAL lexer_state_t handle_fraction(lexer_input_t input); INTERNAL lexer_state_t handle_exponent(lexer_input_t input); INTERNAL lexer_state_t handle_exp_sign(lexer_input_t input); INTERNAL lexer_state_t handle_power(lexer_input_t input); INTERNAL lexer_state_t handle_number_end(lexer_input_t input); INTERNAL lexer_state_t handle_true(lexer_input_t input, lexer_state_t start_state); INTERNAL lexer_state_t handle_false(lexer_input_t input, lexer_state_t start_state); INTERNAL lexer_state_t handle_null(lexer_input_t input, lexer_state_t start_state); INTERNAL lexer_state_t handle_keyword_end(lexer_input_t input); INTERNAL lexer_state_t current_state = LEXER_STATE_START; INTERNAL lexer_state_t state_table[COUNT_LEXER_STATES][COUNT_LEXER_INPUTS] = { 0}; INTERNAL const char *filename = "./include/lexer/lexer_state_transitions.table"; int main(void) { clear_file(); write_table(); return EXIT_SUCCESS; } void clear_file(void) { FILE *fp = fopen(filename, "w"); if (!fp) { printf("Failed to open file\n"); return; } fclose(fp); } void write_table(void) { FILE *fp = fopen(filename, "a"); if (!fp) { printf("Failed to open file\n"); return; } char output[STRING_BUF_LENGTH] = {0}; u64 length = 0; for (u64 i = 0; i < COUNT_LEXER_STATES; ++i) { sprintf(output, "{ "); length = strlen(output); fwrite(output, 1, length, fp); for (u64 j = 0; j < COUNT_LEXER_INPUTS; ++j) { sprintf(output, "%lld, ", (unsigned long long)state_table[i][j]); length = strlen(output); fwrite(output, 1, length, fp); } sprintf(output, "},\n"); length = strlen(output); fwrite(output, 1, length, fp); memset(output, 0, STRING_BUF_LENGTH); } fclose(fp); } INTERNAL lexer_input_t char_type(char input) { if (isspace(input)) { return LEXER_INPUT_WHITE_SPACE; } else if (input >= '1' && input <= '9') { return LEXER_INPUT_NON_ZERO; } switch (input) { case '{': return LEXER_INPUT_OPEN_BRACE; case '}': return LEXER_INPUT_CLOSE_BRACE; case '[': return LEXER_INPUT_OPEN_BRACKET; case ']': return LEXER_INPUT_CLOSE_BRACKET; case ',': return LEXER_INPUT_COMMA; case ':': return LEXER_INPUT_COLON; case '"': return LEXER_INPUT_DOUBLE_QUOTE; case '\\': return LEXER_INPUT_BACK_SLASH; case '/': return LEXER_INPUT_FORWARD_SLASH; case 'a': return LEXER_INPUT_LOWER_A; case 'b': return LEXER_INPUT_LOWER_B; case 'c': return LEXER_INPUT_LOWER_C; case 'd': return LEXER_INPUT_LOWER_D; case 'e': return LEXER_INPUT_LOWER_E; case 'f': return LEXER_INPUT_LOWER_F; case 'l': return LEXER_INPUT_LOWER_L; case 'n': return LEXER_INPUT_LOWER_N; case 'r': return LEXER_INPUT_LOWER_R; case 's': return LEXER_INPUT_LOWER_S; case 't': return LEXER_INPUT_LOWER_T; case 'u': return LEXER_INPUT_LOWER_U; case 'A': return LEXER_INPUT_UPPER_A; case 'B': return LEXER_INPUT_UPPER_B; case 'C': return LEXER_INPUT_UPPER_C; case 'D': return LEXER_INPUT_UPPER_D; case 'E': return LEXER_INPUT_UPPER_E; case 'F': return LEXER_INPUT_UPPER_F; case '-': return LEXER_INPUT_MINUS; case '+': return LEXER_INPUT_PLUS; case '.': return LEXER_INPUT_DECIMAL; case '0': return LEXER_INPUT_ZERO; default: return LEXER_INPUT_OTHER; } } void lexer_state_machine(lexer_input_t input) { switch (current_state) { case LEXER_STATE_START: current_state = handle_lexer_start(input); break; case LEXER_STATE_VALUE: current_state = handle_value(input); break; case LEXER_STATE_OBJECT_START: case LEXER_STATE_OBJECT: current_state = handle_object(input); break; case LEXER_STATE_ARRAY_START: case LEXER_STATE_ARRAY: current_state = handle_array(input); break; case LEXER_STATE_OBJECT_END: case LEXER_STATE_ARRAY_END: handle_input_after_collection_end(input); break; case LEXER_STATE_KEY: current_state = handle_key(input); break; case LEXER_STATE_DECIMAL: current_state = handle_decimal(input); break; case LEXER_STATE_NUMBER: current_state = handle_number(input); break; case LEXER_STATE_FRACTION: current_state = handle_fraction(input); break; case LEXER_STATE_EXPONENT: current_state = handle_exponent(input); break; case LEXER_STATE_EXP_SIGN: current_state = handle_exp_sign(input); break; case LEXER_STATE_POWER: current_state = handle_power(input); break; case LEXER_STATE_NUMBER_END: current_state = handle_number_end(input); break; case LEXER_STATE_STRING: current_state = handle_string(input); break; case LEXER_STATE_STRING_END: current_state = handle_string_end(input); break; case LEXER_STATE_ESCAPE_SEQUENCE: current_state = handle_escape_sequence(input); break; case LEXER_STATE_UNICODE_HEX1: current_state = handle_unicode_hex(input, LEXER_STATE_UNICODE_HEX2); break; case LEXER_STATE_UNICODE_HEX2: current_state = handle_unicode_hex(input, LEXER_STATE_UNICODE_HEX3); break; case LEXER_STATE_UNICODE_HEX3: current_state = handle_unicode_hex(input, LEXER_STATE_UNICODE_HEX4); break; case LEXER_STATE_UNICODE_HEX4: current_state = handle_unicode_hex(input, LEXER_STATE_STRING); break; case LEXER_STATE_T: current_state = handle_true(input, LEXER_STATE_T); break; case LEXER_STATE_TR: current_state = handle_true(input, LEXER_STATE_TR); break; case LEXER_STATE_TRU: current_state = handle_true(input, LEXER_STATE_TRU); break; case LEXER_STATE_TRUE: current_state = handle_true(input, LEXER_STATE_TRUE); break; case LEXER_STATE_F: current_state = handle_false(input, LEXER_STATE_F); break; case LEXER_STATE_FA: current_state = handle_false(input, LEXER_STATE_FA); break; case LEXER_STATE_FAL: current_state = handle_false(input, LEXER_STATE_FAL); break; case LEXER_STATE_FALS: current_state = handle_false(input, LEXER_STATE_FALS); break; case LEXER_STATE_FALSE: current_state = handle_false(input, LEXER_STATE_FALSE); break; case LEXER_STATE_N: current_state = handle_null(input, LEXER_STATE_N); break; case LEXER_STATE_NU: current_state = handle_null(input, LEXER_STATE_NU); break; case LEXER_STATE_NUL: current_state = handle_null(input, LEXER_STATE_NUL); break; case LEXER_STATE_NULL: current_state = handle_null(input, LEXER_STATE_NULL); break; case LEXER_STATE_KEYWORD_END: current_state = handle_keyword_end(input); break; case LEXER_STATE_LAST_COLLECTION: current_state = handle_last_collection(input); break; case LEXER_STATE_ERROR: case COUNT_LEXER_STATES: current_state = LEXER_STATE_ERROR; break; } } lexer_state_t handle_lexer_start(lexer_input_t input) { switch (input) { case LEXER_INPUT_WHITE_SPACE: return LEXER_STATE_START; case LEXER_INPUT_OPEN_BRACE: return LEXER_STATE_OBJECT_START; case LEXER_INPUT_OPEN_BRACKET: return LEXER_STATE_ARRAY_START; default: return LEXER_STATE_ERROR; } } lexer_state_t handle_last_collection(char input) { if (input == LEXER_INPUT_WHITE_SPACE) { return LEXER_STATE_LAST_COLLECTION; } return LEXER_STATE_ERROR; } // TODO (Abdelrahman): Figure out how to handle this lexer_state_t handle_collection_end(lexer_input_t input) { // No need to ignore space as this is only called when input is } or ] lexer->current = lexer->stack.stack[lexer->stack.size - 1]; bool object_end = lexer->current == LEXER_STATE_OBJECT && input == '}'; if (object_end) { token_t *token; if (lexer->token_ready) { lexer->has_extra_token = true; token = &(lexer->extra_token); } else { lexer->token_ready = true; token = &(lexer->token); } set_token(token, lexer->line, lexer->column, TK_R_BRACE, (token_value_t){0}); return LEXER_STATE_OBJECT_END; } bool array_end = lexer->current == LEXER_STATE_ARRAY && input == ']'; if (array_end) { token_t *token; if (lexer->token_ready) { lexer->has_extra_token = true; token = &(lexer->extra_token); } else { lexer->token_ready = true; token = &(lexer->token); } set_token(token, lexer->line, lexer->column, TK_R_BRACKET, (token_value_t){0}); return LEXER_STATE_ARRAY_END; } return LEXER_STATE_ERROR; } // TODO (Abdelrahman): Figure out how to handle this void handle_input_after_collection_end(lexer_input_t input) { switch (input) { case '}': lexer->token_ready = true; set_token(&(lexer->token), lexer->line, lexer->column, TK_R_BRACE, (token_value_t){0}); break; case ']': lexer->token_ready = true; set_token(&(lexer->token), lexer->line, lexer->column, TK_R_BRACKET, (token_value_t){0}); break; } } lexer_state_t handle_object(lexer_input_t input) { switch (input) { case LEXER_INPUT_WHITE_SPACE: return LEXER_STATE_OBJECT; case LEXER_INPUT_DOUBLE_QUOTE: return LEXER_STATE_KEY; case LEXER_INPUT_CLOSE_BRACE: return handle_collection_end(input); default: return LEXER_STATE_ERROR; } } lexer_state_t handle_array(lexer_input_t input) { switch (input) { case LEXER_INPUT_WHITE_SPACE: return LEXER_STATE_ARRAY; case LEXER_INPUT_CLOSE_BRACKET: return handle_collection_end(input); default: return handle_value(input); } } lexer_state_t handle_key(lexer_input_t input) { return LEXER_STATE_STRING; } lexer_state_t handle_value(lexer_input_t input) { switch (input) { case LEXER_INPUT_WHITE_SPACE: return LEXER_STATE_VALUE; case LEXER_INPUT_NON_ZERO: case LEXER_INPUT_MINUS: return LEXER_STATE_NUMBER; case LEXER_INPUT_ZERO: return LEXER_STATE_DECIMAL; case LEXER_INPUT_DOUBLE_QUOTE: return LEXER_STATE_STRING; case LEXER_INPUT_OPEN_BRACE: return LEXER_STATE_OBJECT_START; case LEXER_INPUT_OPEN_BRACKET: return LEXER_STATE_ARRAY_START; case LEXER_INPUT_LOWER_T: return LEXER_STATE_T; case LEXER_INPUT_LOWER_F: return LEXER_STATE_F; case LEXER_INPUT_LOWER_N: return LEXER_STATE_N; default: return LEXER_STATE_ERROR; } } lexer_state_t handle_string(lexer_input_t input) { switch (input) { case LEXER_INPUT_BACK_SLASH: return LEXER_STATE_ESCAPE_SEQUENCE; case LEXER_INPUT_DOUBLE_QUOTE: return LEXER_STATE_STRING_END; default: return LEXER_STATE_STRING; } } // TODO (Abdelrahman): Figure out how to handle this lexer_state_t handle_string_end(lexer_input_t input) { switch (input) { case LEXER_INPUT_WHITE_SPACE: return LEXER_STATE_STRING_END; } lexer->current = stack_pop(&(lexer->stack)); bool key_end = lexer->current == LEXER_STATE_KEY && input == ':'; if (key_end) { return LEXER_STATE_VALUE; } bool value_end = lexer->current == LEXER_STATE_VALUE && input == ','; if (value_end) { return lexer->stack.stack[lexer->stack.size - 1]; } bool collection_end = input == '}' || input == ']'; return collection_end ? handle_collection_end(lexer, input) : LEXER_STATE_ERROR; } lexer_state_t handle_escape_sequence(lexer_input_t input) { switch (input) { case LEXER_INPUT_DOUBLE_QUOTE: case LEXER_INPUT_FORWARD_SLASH: case LEXER_INPUT_BACK_SLASH: case LEXER_INPUT_LOWER_B: case LEXER_INPUT_LOWER_F: case LEXER_INPUT_LOWER_N: case LEXER_INPUT_LOWER_R: case LEXER_INPUT_LOWER_T: return LEXER_STATE_STRING; case LEXER_INPUT_LOWER_U: return LEXER_STATE_UNICODE_HEX1; default: return LEXER_STATE_ERROR; } } INTERNAL lexer_state_t handle_unicode_hex(lexer_input_t input, lexer_state_t return_state) { switch (input) { case LEXER_INPUT_LOWER_A: case LEXER_INPUT_LOWER_B: case LEXER_INPUT_LOWER_C: case LEXER_INPUT_LOWER_D: case LEXER_INPUT_LOWER_E: case LEXER_INPUT_LOWER_F: case LEXER_INPUT_UPPER_A: case LEXER_INPUT_UPPER_B: case LEXER_INPUT_UPPER_C: case LEXER_INPUT_UPPER_D: case LEXER_INPUT_UPPER_E: case LEXER_INPUT_UPPER_F: case LEXER_INPUT_ZERO: case LEXER_INPUT_NON_ZERO: return return_state; default: return LEXER_STATE_ERROR; } } lexer_state_t handle_decimal(lexer_input_t input) { if (input == LEXER_INPUT_DECIMAL) { return LEXER_STATE_FRACTION; } return LEXER_STATE_ERROR; } lexer_state_t handle_number(lexer_input_t input) { switch (input) { case LEXER_INPUT_ZERO: case LEXER_INPUT_NON_ZERO: return LEXER_STATE_NUMBER; case LEXER_INPUT_DECIMAL: return LEXER_STATE_FRACTION; case LEXER_INPUT_CLOSE_BRACE: case LEXER_INPUT_CLOSE_BRACKET: return handle_collection_end(input); case LEXER_INPUT_COMMA: // TODO (Abdelrahman): Figure out how to handle this return lexer->stack.stack[lexer->stack.size - 1]; case LEXER_INPUT_WHITE_SPACE: return LEXER_STATE_NUMBER_END; default: return LEXER_STATE_ERROR; } } lexer_state_t handle_fraction(lexer_input_t input) { switch (input) { case LEXER_INPUT_ZERO: case LEXER_INPUT_NON_ZERO: return LEXER_STATE_FRACTION; case LEXER_INPUT_CLOSE_BRACE: case LEXER_INPUT_CLOSE_BRACKET: return handle_collection_end(input); case LEXER_INPUT_LOWER_E: case LEXER_INPUT_UPPER_E: return LEXER_STATE_EXPONENT; case LEXER_INPUT_COMMA: // TODO (Abdelrahman): Figure out how to handle this return lexer->stack.stack[lexer->stack.size - 1]; case LEXER_INPUT_WHITE_SPACE: return LEXER_STATE_NUMBER_END; default: return LEXER_STATE_ERROR; } } lexer_state_t handle_exponent(lexer_input_t input) { switch (input) { case LEXER_INPUT_ZERO: case LEXER_INPUT_NON_ZERO: return LEXER_STATE_POWER; case LEXER_INPUT_PLUS: case LEXER_INPUT_MINUS: return LEXER_STATE_EXP_SIGN; default: return LEXER_STATE_ERROR; } } lexer_state_t handle_exp_sign(lexer_input_t input) { switch (input) { case LEXER_INPUT_ZERO: case LEXER_INPUT_NON_ZERO: return LEXER_STATE_POWER; default: return LEXER_STATE_ERROR; } } lexer_state_t handle_power(lexer_input_t input) { switch (input) { case LEXER_INPUT_ZERO: case LEXER_INPUT_NON_ZERO: return LEXER_STATE_POWER; case LEXER_INPUT_CLOSE_BRACE: case LEXER_INPUT_CLOSE_BRACKET: return handle_collection_end(input); case LEXER_INPUT_COMMA: // TODO (Abdelrahman): Figure out how to handle this return lexer->stack.stack[lexer->stack.size - 1]; case LEXER_INPUT_WHITE_SPACE: return LEXER_STATE_NUMBER_END; default: return LEXER_STATE_ERROR; } } lexer_state_t handle_number_end(lexer_input_t input) { switch (input) { case LEXER_INPUT_WHITE_SPACE: return LEXER_STATE_NUMBER_END; case LEXER_INPUT_CLOSE_BRACE: case LEXER_INPUT_CLOSE_BRACKET: return handle_collection_end(input); case LEXER_INPUT_COMMA: // TODO (Abdelrahman): Figure out how to handle this return lexer->stack.stack[lexer->stack.size - 1]; default: return LEXER_STATE_ERROR; } } lexer_state_t handle_true(lexer_input_t input, lexer_state_t start_state) { switch (start_state) { case LEXER_STATE_T: return input == LEXER_INPUT_LOWER_R ? LEXER_STATE_TR : LEXER_STATE_ERROR; case LEXER_STATE_TR: return input == LEXER_INPUT_LOWER_U ? LEXER_STATE_TRU : LEXER_STATE_ERROR; case LEXER_STATE_TRU: return input == LEXER_INPUT_LOWER_E ? LEXER_STATE_TRUE : LEXER_STATE_ERROR; case LEXER_STATE_TRUE: return LEXER_STATE_KEYWORD_END; default: return LEXER_STATE_ERROR; } } lexer_state_t handle_false(lexer_input_t input, lexer_state_t start_state) { switch (start_state) { case LEXER_STATE_F: return input == LEXER_INPUT_LOWER_A ? LEXER_STATE_FA : LEXER_STATE_ERROR; case LEXER_STATE_FA: return input == LEXER_INPUT_LOWER_L ? LEXER_STATE_FAL : LEXER_STATE_ERROR; case LEXER_STATE_FAL: return input == LEXER_INPUT_LOWER_S ? LEXER_STATE_FALS : LEXER_STATE_ERROR; case LEXER_STATE_FALS: return input == LEXER_INPUT_LOWER_E ? LEXER_STATE_FALSE : LEXER_STATE_ERROR; case LEXER_STATE_FALSE: return LEXER_STATE_KEYWORD_END; default: return LEXER_STATE_ERROR; } } lexer_state_t handle_null(lexer_input_t input, lexer_state_t start_state) { switch (start_state) { case LEXER_STATE_N: return input == LEXER_INPUT_LOWER_U ? LEXER_STATE_NU : LEXER_STATE_ERROR; case LEXER_STATE_NU: return input == LEXER_INPUT_LOWER_L ? LEXER_STATE_NUL : LEXER_STATE_ERROR; case LEXER_STATE_NUL: return input == LEXER_INPUT_LOWER_L ? LEXER_STATE_NULL : LEXER_STATE_ERROR; case LEXER_STATE_NULL: return LEXER_STATE_KEYWORD_END; default: return LEXER_STATE_ERROR; } } lexer_state_t handle_keyword_end(lexer_input_t input) { switch (input) { case LEXER_INPUT_WHITE_SPACE: return LEXER_STATE_KEYWORD_END; case LEXER_INPUT_CLOSE_BRACE: case LEXER_INPUT_CLOSE_BRACKET: return handle_collection_end(input); case LEXER_INPUT_COMMA: // TODO (Abdelrahman): Figure out how to handle this return lexer->stack.stack[lexer->stack.size - 1]; default: return LEXER_STATE_ERROR; } }