Add the get_next_token function
This commit is contained in:
		| @@ -14,9 +14,9 @@ | ||||
|       "main", | ||||
|       "src/main.c" | ||||
|     ], | ||||
|     "directory": "/mnt/3A5CDF785CDF2CFF/Users/abdoo/dev/say_it_in_json", | ||||
|     "file": "/mnt/3A5CDF785CDF2CFF/Users/abdoo/dev/say_it_in_json/src/main.c", | ||||
|     "output": "/mnt/3A5CDF785CDF2CFF/Users/abdoo/dev/say_it_in_json/main" | ||||
|     "directory": "/Users/abdelrahman/dev/personal/say-it-in-json", | ||||
|     "file": "/Users/abdelrahman/dev/personal/say-it-in-json/src/main.c", | ||||
|     "output": "/Users/abdelrahman/dev/personal/say-it-in-json/main" | ||||
|   }, | ||||
|   { | ||||
|     "arguments": [ | ||||
| @@ -33,9 +33,9 @@ | ||||
|       "main", | ||||
|       "src/dstring/dstring.c" | ||||
|     ], | ||||
|     "directory": "/mnt/3A5CDF785CDF2CFF/Users/abdoo/dev/say_it_in_json", | ||||
|     "file": "/mnt/3A5CDF785CDF2CFF/Users/abdoo/dev/say_it_in_json/src/dstring/dstring.c", | ||||
|     "output": "/mnt/3A5CDF785CDF2CFF/Users/abdoo/dev/say_it_in_json/main" | ||||
|     "directory": "/Users/abdelrahman/dev/personal/say-it-in-json", | ||||
|     "file": "/Users/abdelrahman/dev/personal/say-it-in-json/src/dstring/dstring.c", | ||||
|     "output": "/Users/abdelrahman/dev/personal/say-it-in-json/main" | ||||
|   }, | ||||
|   { | ||||
|     "arguments": [ | ||||
| @@ -52,215 +52,8 @@ | ||||
|       "main", | ||||
|       "src/lexer/lexer.c" | ||||
|     ], | ||||
|     "directory": "/mnt/3A5CDF785CDF2CFF/Users/abdoo/dev/say_it_in_json", | ||||
|     "file": "/mnt/3A5CDF785CDF2CFF/Users/abdoo/dev/say_it_in_json/src/lexer/lexer.c", | ||||
|     "output": "/mnt/3A5CDF785CDF2CFF/Users/abdoo/dev/say_it_in_json/main" | ||||
|   }, | ||||
|   { | ||||
|     "arguments": [ | ||||
|       "/usr/bin/clang-16", | ||||
|       "-cc1", | ||||
|       "-triple", | ||||
|       "x86_64-redhat-linux-gnu", | ||||
|       "-emit-obj", | ||||
|       "-mrelax-all", | ||||
|       "-disable-free", | ||||
|       "-clear-ast-before-backend", | ||||
|       "-disable-llvm-verifier", | ||||
|       "-discard-value-names", | ||||
|       "-main-file-name", | ||||
|       "-mrelocation-model", | ||||
|       "static", | ||||
|       "-mframe-pointer=all", | ||||
|       "-fmath-errno", | ||||
|       "-ffp-contract=on", | ||||
|       "-fno-rounding-math", | ||||
|       "-mconstructor-aliases", | ||||
|       "-funwind-tables=2", | ||||
|       "-target-cpu", | ||||
|       "x86-64", | ||||
|       "-tune-cpu", | ||||
|       "generic", | ||||
|       "-mllvm", | ||||
|       "-treat-scalable-fixed-error-as-warning", | ||||
|       "-debug-info-kind=constructor", | ||||
|       "-dwarf-version=4", | ||||
|       "-debugger-tuning=gdb", | ||||
|       "-fcoverage-compilation-dir=/home/abdelrahman/dev_work/say_it_in_json", | ||||
|       "-resource-dir", | ||||
|       "/usr/lib64/clang/16", | ||||
|       "-I", | ||||
|       "include", | ||||
|       "-I", | ||||
|       "include/dstring", | ||||
|       "-I", | ||||
|       "include/lexer", | ||||
|       "-internal-isystem", | ||||
|       "/usr/lib64/clang/16/include", | ||||
|       "-internal-isystem", | ||||
|       "/usr/local/include", | ||||
|       "-internal-isystem", | ||||
|       "/usr/bin/../lib/gcc/x86_64-redhat-linux/13/../../../../x86_64-redhat-linux/include", | ||||
|       "-internal-externc-isystem", | ||||
|       "/include", | ||||
|       "-internal-externc-isystem", | ||||
|       "/usr/include", | ||||
|       "-Wall", | ||||
|       "-Werror", | ||||
|       "-pedantic", | ||||
|       "-fdebug-compilation-dir=/home/abdelrahman/dev_work/say_it_in_json", | ||||
|       "-ferror-limit", | ||||
|       "19", | ||||
|       "-fgnuc-version=4.2.1", | ||||
|       "-fcolor-diagnostics", | ||||
|       "-faddrsig", | ||||
|       "-D__GCC_HAVE_DWARF2_CFI_ASM=1", | ||||
|       "-x", | ||||
|       "c", | ||||
|       "-o", | ||||
|       "/tmp/main-1df523.o", | ||||
|       "src/main.c" | ||||
|     ], | ||||
|     "directory": "/mnt/3A5CDF785CDF2CFF/Users/abdoo/dev/say_it_in_json", | ||||
|     "file": "/mnt/3A5CDF785CDF2CFF/Users/abdoo/dev/say_it_in_json/src/main.c", | ||||
|     "output": "/tmp/main-1df523.o" | ||||
|   }, | ||||
|   { | ||||
|     "arguments": [ | ||||
|       "/usr/bin/clang-16", | ||||
|       "-cc1", | ||||
|       "-triple", | ||||
|       "x86_64-redhat-linux-gnu", | ||||
|       "-emit-obj", | ||||
|       "-mrelax-all", | ||||
|       "-disable-free", | ||||
|       "-clear-ast-before-backend", | ||||
|       "-disable-llvm-verifier", | ||||
|       "-discard-value-names", | ||||
|       "-main-file-name", | ||||
|       "-mrelocation-model", | ||||
|       "static", | ||||
|       "-mframe-pointer=all", | ||||
|       "-fmath-errno", | ||||
|       "-ffp-contract=on", | ||||
|       "-fno-rounding-math", | ||||
|       "-mconstructor-aliases", | ||||
|       "-funwind-tables=2", | ||||
|       "-target-cpu", | ||||
|       "x86-64", | ||||
|       "-tune-cpu", | ||||
|       "generic", | ||||
|       "-mllvm", | ||||
|       "-treat-scalable-fixed-error-as-warning", | ||||
|       "-debug-info-kind=constructor", | ||||
|       "-dwarf-version=4", | ||||
|       "-debugger-tuning=gdb", | ||||
|       "-fcoverage-compilation-dir=/home/abdelrahman/dev_work/say_it_in_json", | ||||
|       "-resource-dir", | ||||
|       "/usr/lib64/clang/16", | ||||
|       "-I", | ||||
|       "include", | ||||
|       "-I", | ||||
|       "include/dstring", | ||||
|       "-I", | ||||
|       "include/lexer", | ||||
|       "-internal-isystem", | ||||
|       "/usr/lib64/clang/16/include", | ||||
|       "-internal-isystem", | ||||
|       "/usr/local/include", | ||||
|       "-internal-isystem", | ||||
|       "/usr/bin/../lib/gcc/x86_64-redhat-linux/13/../../../../x86_64-redhat-linux/include", | ||||
|       "-internal-externc-isystem", | ||||
|       "/include", | ||||
|       "-internal-externc-isystem", | ||||
|       "/usr/include", | ||||
|       "-Wall", | ||||
|       "-Werror", | ||||
|       "-pedantic", | ||||
|       "-fdebug-compilation-dir=/home/abdelrahman/dev_work/say_it_in_json", | ||||
|       "-ferror-limit", | ||||
|       "19", | ||||
|       "-fgnuc-version=4.2.1", | ||||
|       "-fcolor-diagnostics", | ||||
|       "-faddrsig", | ||||
|       "-D__GCC_HAVE_DWARF2_CFI_ASM=1", | ||||
|       "-x", | ||||
|       "c", | ||||
|       "-o", | ||||
|       "/tmp/dstring-3eff44.o", | ||||
|       "src/dstring/dstring.c" | ||||
|     ], | ||||
|     "directory": "/mnt/3A5CDF785CDF2CFF/Users/abdoo/dev/say_it_in_json", | ||||
|     "file": "/mnt/3A5CDF785CDF2CFF/Users/abdoo/dev/say_it_in_json/src/dstring/dstring.c", | ||||
|     "output": "/tmp/dstring-3eff44.o" | ||||
|   }, | ||||
|   { | ||||
|     "arguments": [ | ||||
|       "/usr/bin/clang-16", | ||||
|       "-cc1", | ||||
|       "-triple", | ||||
|       "x86_64-redhat-linux-gnu", | ||||
|       "-emit-obj", | ||||
|       "-mrelax-all", | ||||
|       "-disable-free", | ||||
|       "-clear-ast-before-backend", | ||||
|       "-disable-llvm-verifier", | ||||
|       "-discard-value-names", | ||||
|       "-main-file-name", | ||||
|       "-mrelocation-model", | ||||
|       "static", | ||||
|       "-mframe-pointer=all", | ||||
|       "-fmath-errno", | ||||
|       "-ffp-contract=on", | ||||
|       "-fno-rounding-math", | ||||
|       "-mconstructor-aliases", | ||||
|       "-funwind-tables=2", | ||||
|       "-target-cpu", | ||||
|       "x86-64", | ||||
|       "-tune-cpu", | ||||
|       "generic", | ||||
|       "-mllvm", | ||||
|       "-treat-scalable-fixed-error-as-warning", | ||||
|       "-debug-info-kind=constructor", | ||||
|       "-dwarf-version=4", | ||||
|       "-debugger-tuning=gdb", | ||||
|       "-fcoverage-compilation-dir=/home/abdelrahman/dev_work/say_it_in_json", | ||||
|       "-resource-dir", | ||||
|       "/usr/lib64/clang/16", | ||||
|       "-I", | ||||
|       "include", | ||||
|       "-I", | ||||
|       "include/dstring", | ||||
|       "-I", | ||||
|       "include/lexer", | ||||
|       "-internal-isystem", | ||||
|       "/usr/lib64/clang/16/include", | ||||
|       "-internal-isystem", | ||||
|       "/usr/local/include", | ||||
|       "-internal-isystem", | ||||
|       "/usr/bin/../lib/gcc/x86_64-redhat-linux/13/../../../../x86_64-redhat-linux/include", | ||||
|       "-internal-externc-isystem", | ||||
|       "/include", | ||||
|       "-internal-externc-isystem", | ||||
|       "/usr/include", | ||||
|       "-Wall", | ||||
|       "-Werror", | ||||
|       "-pedantic", | ||||
|       "-fdebug-compilation-dir=/home/abdelrahman/dev_work/say_it_in_json", | ||||
|       "-ferror-limit", | ||||
|       "19", | ||||
|       "-fgnuc-version=4.2.1", | ||||
|       "-fcolor-diagnostics", | ||||
|       "-faddrsig", | ||||
|       "-D__GCC_HAVE_DWARF2_CFI_ASM=1", | ||||
|       "-x", | ||||
|       "c", | ||||
|       "-o", | ||||
|       "/tmp/lexer-b7cbfb.o", | ||||
|       "src/lexer/lexer.c" | ||||
|     ], | ||||
|     "directory": "/mnt/3A5CDF785CDF2CFF/Users/abdoo/dev/say_it_in_json", | ||||
|     "file": "/mnt/3A5CDF785CDF2CFF/Users/abdoo/dev/say_it_in_json/src/lexer/lexer.c", | ||||
|     "output": "/tmp/lexer-b7cbfb.o" | ||||
|     "directory": "/Users/abdelrahman/dev/personal/say-it-in-json", | ||||
|     "file": "/Users/abdelrahman/dev/personal/say-it-in-json/src/lexer/lexer.c", | ||||
|     "output": "/Users/abdelrahman/dev/personal/say-it-in-json/main" | ||||
|   } | ||||
| ] | ||||
|   | ||||
| @@ -15,8 +15,6 @@ typedef enum { | ||||
|   TK_R_BRACE, | ||||
|   TK_L_BRACKET, | ||||
|   TK_R_BRACKET, | ||||
|   TK_COLON, | ||||
|   TK_COMMA, | ||||
|   TK_NULL, | ||||
|   TK_TRUE, | ||||
|   TK_FALSE, | ||||
| @@ -47,5 +45,6 @@ void lexer_free(lexer_t **lexer); | ||||
| token_t get_next_token(lexer_t *lexer, const char *text); | ||||
|  | ||||
| bool validate_json(char *json); | ||||
| void print_token(token_t token); | ||||
|  | ||||
| #endif // !LEXER_STATES_H | ||||
|   | ||||
| @@ -81,13 +81,15 @@ struct lexer { | ||||
|   u64 cursor; | ||||
|   u64 line; | ||||
|   u64 column; | ||||
|   const char *text; | ||||
|   u64 text_length; | ||||
|   const char *text; | ||||
|   lexer_state_t current; | ||||
|   state_stack_t stack; | ||||
|   lexer_string_t keyword; | ||||
|   lexer_string_t codepoint; | ||||
|   dstr_t *current_string; | ||||
|   bool token_ready; | ||||
|   token_t token; | ||||
| }; | ||||
|  | ||||
| void stack_push(state_stack_t *stack, lexer_state_t value); | ||||
| @@ -99,10 +101,13 @@ bool strequal(const char *first, const char *second); | ||||
| bool is_valid_hex_char(const char input); | ||||
| bool ishex(const char input); | ||||
|  | ||||
| void set_token(lexer_t *lexer, token_type_t type, token_value_t value); | ||||
|  | ||||
| void lexer_state_machine(lexer_t *lexer, char input); | ||||
| lexer_state_t handle_lexer_start(char input); | ||||
| lexer_state_t handle_lexer_start(lexer_t *lexer, char input); | ||||
| lexer_state_t handle_last_collection(char input); | ||||
| lexer_state_t handle_collection_end(lexer_t *lexer, char input); | ||||
| void handle_input_after_collection_end(lexer_t *lexer, char input); | ||||
| lexer_state_t handle_object(lexer_t *lexer, char input); | ||||
| lexer_state_t handle_array(lexer_t *lexer, char input); | ||||
| lexer_state_t handle_key(lexer_t *lexer, char input); | ||||
| @@ -124,9 +129,6 @@ lexer_state_t handle_false(lexer_t *lexer, char input); | ||||
| lexer_state_t handle_null(lexer_t *lexer, char input); | ||||
| lexer_state_t handle_keyword_end(lexer_t *lexer, char input); | ||||
|  | ||||
| // TODO (Abdelrahman): The printf functions in the state handlers are the exit | ||||
| // points for the tokenisation function. Replace them once ready. | ||||
|  | ||||
| void lexer_init(lexer_t **lexer) { | ||||
|   if (*lexer) { | ||||
|     lexer_free(lexer); | ||||
| @@ -141,10 +143,14 @@ void lexer_init(lexer_t **lexer) { | ||||
|   (*lexer)->cursor = 0; | ||||
|   (*lexer)->line = 1; | ||||
|   (*lexer)->column = 0; | ||||
|   (*lexer)->text_length = 0; | ||||
|   (*lexer)->text = ""; | ||||
|   (*lexer)->current = LEXER_STATE_START; | ||||
|   (*lexer)->keyword.type = LEXER_STRING_KEYWORD; | ||||
|   (*lexer)->codepoint.type = LEXER_STRING_UNICODE; | ||||
|   (*lexer)->current_string = dstr_with_capacity(STRING_BUF_START_CAPACITY); | ||||
|   (*lexer)->token_ready = false; | ||||
|   (*lexer)->token = (token_t){0}; | ||||
|  | ||||
|   if (!((*lexer)->current_string)) { | ||||
|     lexer_free(lexer); | ||||
| @@ -167,10 +173,12 @@ token_t get_next_token(lexer_t *lexer, const char *text) { | ||||
|     lexer->text_length = strlen(text); | ||||
|   } | ||||
|  | ||||
|   dstr_clear(lexer->current_string); | ||||
|  | ||||
|   char c; | ||||
|  | ||||
|   for (; lexer->cursor < lexer->text_length; ++(lexer->cursor)) { | ||||
|     c = lexer->text[lexer->cursor]; | ||||
|   while (lexer->cursor < lexer->text_length) { | ||||
|     c = lexer->text[(lexer->cursor)++]; | ||||
|  | ||||
|     if (c == '\n') { | ||||
|       ++(lexer->line); | ||||
| @@ -184,6 +192,10 @@ token_t get_next_token(lexer_t *lexer, const char *text) { | ||||
|     ++(lexer->column); | ||||
|  | ||||
|     if (lexer->current == LEXER_STATE_ERROR) { | ||||
|     } else if (lexer->token_ready) { | ||||
|       lexer->token_ready = false; | ||||
|  | ||||
|       return lexer->token; | ||||
|     } | ||||
|   } | ||||
|  | ||||
| @@ -223,6 +235,49 @@ bool validate_json(char *json) { | ||||
|   return lexer.current == LEXER_STATE_LAST_COLLECTION || lexer.stack.size == 0; | ||||
| } | ||||
|  | ||||
| void print_token(token_t token) { | ||||
|   printf("{LINE: %4llu, COLUMN: %4llu, TYPE: ", token.line, token.column); | ||||
|  | ||||
|   switch (token.type) { | ||||
|   case TK_NO_TOKEN: | ||||
|     break; | ||||
|   case TK_L_BRACE: | ||||
|     printf("%15s, VALUE: N/A", "TK_L_BRACE"); | ||||
|     break; | ||||
|   case TK_R_BRACE: | ||||
|     printf("%15s, VALUE: N/A", "TK_R_BRACE"); | ||||
|     break; | ||||
|   case TK_L_BRACKET: | ||||
|     printf("%15s, VALUE: N/A", "TK_L_BRACKET"); | ||||
|     break; | ||||
|   case TK_R_BRACKET: | ||||
|     printf("%15s, VALUE: N/A", "TK_R_BRACKET"); | ||||
|     break; | ||||
|   case TK_NULL: | ||||
|     printf("%15s, VALUE: N/A", "TK_NULL"); | ||||
|     break; | ||||
|   case TK_TRUE: | ||||
|     printf("%15s, VALUE: N/A", "TK_TRUE"); | ||||
|     break; | ||||
|   case TK_FALSE: | ||||
|     printf("%15s, VALUE: N/A", "TK_FALSE"); | ||||
|     break; | ||||
|   case TK_STR_KEY: | ||||
|     printf("%15s, VALUE: %s", "TK_STR_KEY", token.value.string); | ||||
|     break; | ||||
|   case TK_STR_VAL: | ||||
|     printf("%15s, VALUE: %s", "TK_STR_VAL", token.value.string); | ||||
|     break; | ||||
|   case TK_INTEGER: | ||||
|     break; | ||||
|   case TK_DOUBLE: | ||||
|     printf("%15s, VALUE: %s", "TK_DOUBLE", token.value.string); | ||||
|     break; | ||||
|   } | ||||
|  | ||||
|   printf("}\n"); | ||||
| } | ||||
|  | ||||
| void stack_push(state_stack_t *stack, lexer_state_t state) { | ||||
|   if (stack->size + 1 >= MAX_STACK_CAPACITY) { | ||||
|     return; | ||||
| @@ -318,10 +373,21 @@ bool ishex(const char input) { | ||||
|   return isdigit(input) || is_valid_hex_char(input); | ||||
| } | ||||
|  | ||||
| void set_token(lexer_t *lexer, token_type_t type, token_value_t value) { | ||||
|   lexer->token_ready = true; | ||||
|  | ||||
|   lexer->token = (token_t){ | ||||
|       .line = lexer->line, | ||||
|       .column = lexer->column, | ||||
|       .type = type, | ||||
|       .value = value, | ||||
|   }; | ||||
| } | ||||
|  | ||||
| void lexer_state_machine(lexer_t *lexer, char input) { | ||||
|   switch (lexer->current) { | ||||
|   case LEXER_STATE_START: | ||||
|     lexer->current = handle_lexer_start(input); | ||||
|     lexer->current = handle_lexer_start(lexer, input); | ||||
|     break; | ||||
|   case LEXER_STATE_VALUE: | ||||
|     lexer->current = handle_value(lexer, input); | ||||
| @@ -350,6 +416,8 @@ void lexer_state_machine(lexer_t *lexer, char input) { | ||||
|       lexer->current = LEXER_STATE_LAST_COLLECTION; | ||||
|     } | ||||
|  | ||||
|     handle_input_after_collection_end(lexer, input); | ||||
|  | ||||
|     break; | ||||
|   case LEXER_STATE_KEY: | ||||
|     lexer->current = handle_key(lexer, input); | ||||
| @@ -409,17 +477,19 @@ void lexer_state_machine(lexer_t *lexer, char input) { | ||||
|   } | ||||
| } | ||||
|  | ||||
| lexer_state_t handle_lexer_start(char input) { | ||||
| lexer_state_t handle_lexer_start(lexer_t *lexer, char input) { | ||||
|   if (isspace(input)) { | ||||
|     return LEXER_STATE_START; | ||||
|   } | ||||
|  | ||||
|   switch (input) { | ||||
|   case '{': | ||||
|     printf("TK_L_BRACE\n"); | ||||
|     set_token(lexer, TK_L_BRACE, (token_value_t){0}); | ||||
|  | ||||
|     return LEXER_STATE_OBJECT_START; | ||||
|   case '[': | ||||
|     printf("TK_L_BRACKET\n"); | ||||
|     set_token(lexer, TK_L_BRACKET, (token_value_t){0}); | ||||
|  | ||||
|     return LEXER_STATE_ARRAY_START; | ||||
|   } | ||||
|  | ||||
| @@ -442,20 +512,35 @@ lexer_state_t handle_collection_end(lexer_t *lexer, char input) { | ||||
|   bool object_end = lexer->current == LEXER_STATE_OBJECT && input == '}'; | ||||
|  | ||||
|   if (object_end) { | ||||
|     printf("TK_R_BRACE\n"); | ||||
|     set_token(lexer, TK_R_BRACE, (token_value_t){0}); | ||||
|  | ||||
|     return LEXER_STATE_OBJECT_END; | ||||
|   } | ||||
|  | ||||
|   bool array_end = lexer->current == LEXER_STATE_ARRAY && input == ']'; | ||||
|  | ||||
|   if (array_end) { | ||||
|     printf("TK_R_BRACKET\n"); | ||||
|     set_token(lexer, TK_R_BRACKET, (token_value_t){0}); | ||||
|  | ||||
|     return LEXER_STATE_ARRAY_END; | ||||
|   } | ||||
|  | ||||
|   return LEXER_STATE_ERROR; | ||||
| } | ||||
|  | ||||
| void handle_input_after_collection_end(lexer_t *lexer, char input) { | ||||
|   switch (input) { | ||||
|   case '}': | ||||
|     set_token(lexer, TK_R_BRACE, (token_value_t){0}); | ||||
|  | ||||
|     break; | ||||
|   case ']': | ||||
|     set_token(lexer, TK_R_BRACKET, (token_value_t){0}); | ||||
|  | ||||
|     break; | ||||
|   } | ||||
| } | ||||
|  | ||||
| lexer_state_t handle_object(lexer_t *lexer, char input) { | ||||
|   if (isspace(input)) { | ||||
|     return LEXER_STATE_OBJECT; | ||||
| @@ -464,7 +549,6 @@ lexer_state_t handle_object(lexer_t *lexer, char input) { | ||||
|  | ||||
|     return LEXER_STATE_KEY; | ||||
|   } else if (input == '}') { | ||||
|     printf("TK_R_BRACE\n"); | ||||
|     return handle_collection_end(lexer, input); | ||||
|   } | ||||
|  | ||||
| @@ -475,7 +559,6 @@ lexer_state_t handle_array(lexer_t *lexer, char input) { | ||||
|   if (isspace(input)) { | ||||
|     return LEXER_STATE_ARRAY; | ||||
|   } else if (input == ']') { | ||||
|     printf("TK_R_BRACKET\n"); | ||||
|     return handle_collection_end(lexer, input); | ||||
|   } | ||||
|  | ||||
| @@ -507,10 +590,12 @@ lexer_state_t handle_value(lexer_t *lexer, char input) { | ||||
|  | ||||
|     return LEXER_STATE_DECIMAL; | ||||
|   case '{': | ||||
|     printf("TK_L_BRACE\n"); | ||||
|     set_token(lexer, TK_L_BRACE, (token_value_t){0}); | ||||
|  | ||||
|     return LEXER_STATE_OBJECT_START; | ||||
|   case '[': | ||||
|     printf("TK_L_BRACKET\n"); | ||||
|     set_token(lexer, TK_L_BRACKET, (token_value_t){0}); | ||||
|  | ||||
|     return LEXER_STATE_ARRAY_START; | ||||
|   case 't': | ||||
|   case 'f': | ||||
| @@ -529,10 +614,20 @@ lexer_state_t handle_string(lexer_t *lexer, char input) { | ||||
|     dstr_append(&(lexer->current_string), input); | ||||
|  | ||||
|     return LEXER_STATE_ESCAPE_SEQUENCE; | ||||
|   case '"': | ||||
|     printf("TK_STRING: %s\n", dstr_to_cstr(lexer->current_string)); | ||||
|   case '"': { | ||||
|     lexer_state_t string_type = lexer->stack.stack[lexer->stack.size - 1]; | ||||
|  | ||||
|     if (string_type == LEXER_STATE_KEY) { | ||||
|       set_token(lexer, TK_STR_KEY, | ||||
|                 (token_value_t){.string = dstr_to_cstr(lexer->current_string)}); | ||||
|     } else if (string_type == LEXER_STATE_VALUE) { | ||||
|       set_token(lexer, TK_STR_VAL, | ||||
|                 (token_value_t){.string = dstr_to_cstr(lexer->current_string)}); | ||||
|     } | ||||
|  | ||||
|     return LEXER_STATE_STRING_END; | ||||
|   } | ||||
|   } | ||||
|  | ||||
|   dstr_append(&(lexer->current_string), input); | ||||
|  | ||||
| @@ -544,21 +639,17 @@ lexer_state_t handle_string_end(lexer_t *lexer, char input) { | ||||
|     return LEXER_STATE_STRING_END; | ||||
|   } | ||||
|  | ||||
|   dstr_clear(lexer->current_string); | ||||
|  | ||||
|   lexer->current = stack_pop(&(lexer->stack)); | ||||
|  | ||||
|   bool key_end = lexer->current == LEXER_STATE_KEY && input == ':'; | ||||
|  | ||||
|   if (key_end) { | ||||
|     printf("TK_COLON\n"); | ||||
|     return LEXER_STATE_VALUE; | ||||
|   } | ||||
|  | ||||
|   bool value_end = lexer->current == LEXER_STATE_VALUE && input == ','; | ||||
|  | ||||
|   if (value_end) { | ||||
|     printf("TK_COMMA\n"); | ||||
|     return lexer->stack.stack[lexer->stack.size - 1]; | ||||
|   } | ||||
|  | ||||
| @@ -625,16 +716,25 @@ lexer_state_t handle_number(lexer_t *lexer, char input) { | ||||
|  | ||||
|     return LEXER_STATE_FRACTION; | ||||
|   } else if (input == '}' || input == ']') { | ||||
|     printf("TK_NUMBER: %s\n", dstr_to_cstr(lexer->current_string)); | ||||
|     dstr_clear(lexer->current_string); | ||||
|     // TODO (Abdelrahman): Set the token type correctly based on whether the | ||||
|     // number is an integer or a double | ||||
|     set_token(lexer, TK_DOUBLE, | ||||
|               (token_value_t){.string = dstr_to_cstr(lexer->current_string)}); | ||||
|  | ||||
|     return handle_collection_end(lexer, input); | ||||
|   } else if (input == ',') { | ||||
|     printf("TK_NUMBER: %s\n", dstr_to_cstr(lexer->current_string)); | ||||
|     dstr_clear(lexer->current_string); | ||||
|     // TODO (Abdelrahman): Set the token type correctly based on whether the | ||||
|     // number is an integer or a double | ||||
|     set_token(lexer, TK_DOUBLE, | ||||
|               (token_value_t){.string = dstr_to_cstr(lexer->current_string)}); | ||||
|  | ||||
|     return lexer->stack.stack[lexer->stack.size - 1]; | ||||
|   } else if (isspace(input)) { | ||||
|     // TODO (Abdelrahman): Set the token type correctly based on whether the | ||||
|     // number is an integer or a double | ||||
|     set_token(lexer, TK_DOUBLE, | ||||
|               (token_value_t){.string = dstr_to_cstr(lexer->current_string)}); | ||||
|  | ||||
|     return LEXER_STATE_NUMBER_END; | ||||
|   } | ||||
|  | ||||
| @@ -647,8 +747,10 @@ lexer_state_t handle_fraction(lexer_t *lexer, char input) { | ||||
|  | ||||
|     return LEXER_STATE_FRACTION; | ||||
|   } else if (input == '}' || input == ']') { | ||||
|     printf("TK_NUMBER: %s\n", dstr_to_cstr(lexer->current_string)); | ||||
|     dstr_clear(lexer->current_string); | ||||
|     // TODO (Abdelrahman): Set the token type correctly based on whether the | ||||
|     // number is an integer or a double | ||||
|     set_token(lexer, TK_DOUBLE, | ||||
|               (token_value_t){.string = dstr_to_cstr(lexer->current_string)}); | ||||
|  | ||||
|     return handle_collection_end(lexer, input); | ||||
|   } else if (input == 'e' || input == 'E') { | ||||
| @@ -656,11 +758,18 @@ lexer_state_t handle_fraction(lexer_t *lexer, char input) { | ||||
|  | ||||
|     return LEXER_STATE_EXPONENT; | ||||
|   } else if (input == ',') { | ||||
|     printf("TK_NUMBER: %s\n", dstr_to_cstr(lexer->current_string)); | ||||
|     dstr_clear(lexer->current_string); | ||||
|     // TODO (Abdelrahman): Set the token type correctly based on whether the | ||||
|     // number is an integer or a double | ||||
|     set_token(lexer, TK_DOUBLE, | ||||
|               (token_value_t){.string = dstr_to_cstr(lexer->current_string)}); | ||||
|  | ||||
|     return lexer->stack.stack[lexer->stack.size - 1]; | ||||
|   } else if (isspace(input)) { | ||||
|     // TODO (Abdelrahman): Set the token type correctly based on whether the | ||||
|     // number is an integer or a double | ||||
|     set_token(lexer, TK_DOUBLE, | ||||
|               (token_value_t){.string = dstr_to_cstr(lexer->current_string)}); | ||||
|  | ||||
|     return LEXER_STATE_NUMBER_END; | ||||
|   } | ||||
|  | ||||
| @@ -695,16 +804,25 @@ lexer_state_t handle_power(lexer_t *lexer, char input) { | ||||
|  | ||||
|     return LEXER_STATE_POWER; | ||||
|   } else if (input == '}' || input == ']') { | ||||
|     printf("TK_NUMBER: %s\n", dstr_to_cstr(lexer->current_string)); | ||||
|     dstr_clear(lexer->current_string); | ||||
|     // TODO (Abdelrahman): Set the token type correctly based on whether the | ||||
|     // number is an integer or a double | ||||
|     set_token(lexer, TK_DOUBLE, | ||||
|               (token_value_t){.string = dstr_to_cstr(lexer->current_string)}); | ||||
|  | ||||
|     return handle_collection_end(lexer, input); | ||||
|   } else if (input == ',') { | ||||
|     printf("TK_NUMBER: %s\n", dstr_to_cstr(lexer->current_string)); | ||||
|     dstr_clear(lexer->current_string); | ||||
|     // TODO (Abdelrahman): Set the token type correctly based on whether the | ||||
|     // number is an integer or a double | ||||
|     set_token(lexer, TK_DOUBLE, | ||||
|               (token_value_t){.string = dstr_to_cstr(lexer->current_string)}); | ||||
|  | ||||
|     return lexer->stack.stack[lexer->stack.size - 1]; | ||||
|   } else if (isspace(input)) { | ||||
|     // TODO (Abdelrahman): Set the token type correctly based on whether the | ||||
|     // number is an integer or a double | ||||
|     set_token(lexer, TK_DOUBLE, | ||||
|               (token_value_t){.string = dstr_to_cstr(lexer->current_string)}); | ||||
|  | ||||
|     return LEXER_STATE_NUMBER_END; | ||||
|   } | ||||
|  | ||||
| @@ -712,12 +830,14 @@ lexer_state_t handle_power(lexer_t *lexer, char input) { | ||||
| } | ||||
|  | ||||
| lexer_state_t handle_number_end(lexer_t *lexer, char input) { | ||||
|   printf("TK_NUMBER: %s\n", dstr_to_cstr(lexer->current_string)); | ||||
|   dstr_clear(lexer->current_string); | ||||
|  | ||||
|   if (isspace(input)) { | ||||
|     return LEXER_STATE_NUMBER_END; | ||||
|   } else if (input == ',') { | ||||
|     // TODO (Abdelrahman): Set the token type correctly based on whether the | ||||
|     // number is an integer or a double | ||||
|     set_token(lexer, TK_DOUBLE, | ||||
|               (token_value_t){.string = dstr_to_cstr(lexer->current_string)}); | ||||
|  | ||||
|     return lexer->stack.stack[lexer->stack.size - 1]; | ||||
|   } | ||||
|  | ||||
| @@ -802,7 +922,16 @@ lexer_state_t handle_null(lexer_t *lexer, char input) { | ||||
| } | ||||
|  | ||||
| lexer_state_t handle_keyword_end(lexer_t *lexer, char input) { | ||||
|   printf("TK_KEYWORD: %s\n", lexer->keyword.keyword.str); | ||||
|   const char *keyword = lexer->keyword.keyword.str; | ||||
|  | ||||
|   if (strequal(keyword, "null")) { | ||||
|     set_token(lexer, TK_NULL, (token_value_t){0}); | ||||
|   } else if (strequal(keyword, "true")) { | ||||
|     set_token(lexer, TK_TRUE, (token_value_t){0}); | ||||
|   } else if (strequal(keyword, "false")) { | ||||
|     set_token(lexer, TK_FALSE, (token_value_t){0}); | ||||
|   } | ||||
|  | ||||
|   clear_lex_str(&(lexer->keyword)); | ||||
|  | ||||
|   if (isspace(input)) { | ||||
|   | ||||
| @@ -35,7 +35,12 @@ int main(int argc, char *argv[]) { | ||||
|     return EXIT_FAILURE; | ||||
|   } | ||||
|  | ||||
|   get_next_token(lexer, json); | ||||
| 	token_t token = get_next_token(lexer, json); | ||||
| 	while (token.type != TK_NO_TOKEN) { | ||||
| 		print_token(token); | ||||
|  | ||||
| 		token = get_next_token(lexer, NULL); | ||||
| 	} | ||||
|  | ||||
|   lexer_free(&lexer); | ||||
|  | ||||
|   | ||||
		Reference in New Issue
	
	Block a user