get_next_token returns result which includes possible error and token
This commit is contained in:
		| @@ -15,9 +15,9 @@ | ||||
|       "main", | ||||
|       "src/main.c" | ||||
|     ], | ||||
|     "directory": "/Users/abdelrahman/dev/personal/say-it-in-json", | ||||
|     "file": "/Users/abdelrahman/dev/personal/say-it-in-json/src/main.c", | ||||
|     "output": "/Users/abdelrahman/dev/personal/say-it-in-json/main" | ||||
|     "directory": "/mnt/3A5CDF785CDF2CFF/Users/abdoo/dev/say_it_in_json", | ||||
|     "file": "/mnt/3A5CDF785CDF2CFF/Users/abdoo/dev/say_it_in_json/src/main.c", | ||||
|     "output": "/mnt/3A5CDF785CDF2CFF/Users/abdoo/dev/say_it_in_json/main" | ||||
|   }, | ||||
|   { | ||||
|     "arguments": [ | ||||
| @@ -35,9 +35,9 @@ | ||||
|       "main", | ||||
|       "src/dstring/dstring.c" | ||||
|     ], | ||||
|     "directory": "/Users/abdelrahman/dev/personal/say-it-in-json", | ||||
|     "file": "/Users/abdelrahman/dev/personal/say-it-in-json/src/dstring/dstring.c", | ||||
|     "output": "/Users/abdelrahman/dev/personal/say-it-in-json/main" | ||||
|     "directory": "/mnt/3A5CDF785CDF2CFF/Users/abdoo/dev/say_it_in_json", | ||||
|     "file": "/mnt/3A5CDF785CDF2CFF/Users/abdoo/dev/say_it_in_json/src/dstring/dstring.c", | ||||
|     "output": "/mnt/3A5CDF785CDF2CFF/Users/abdoo/dev/say_it_in_json/main" | ||||
|   }, | ||||
|   { | ||||
|     "arguments": [ | ||||
| @@ -55,9 +55,9 @@ | ||||
|       "main", | ||||
|       "src/json_entities/json_entities.c" | ||||
|     ], | ||||
|     "directory": "/Users/abdelrahman/dev/personal/say-it-in-json", | ||||
|     "file": "/Users/abdelrahman/dev/personal/say-it-in-json/src/json_entities/json_entities.c", | ||||
|     "output": "/Users/abdelrahman/dev/personal/say-it-in-json/main" | ||||
|     "directory": "/mnt/3A5CDF785CDF2CFF/Users/abdoo/dev/say_it_in_json", | ||||
|     "file": "/mnt/3A5CDF785CDF2CFF/Users/abdoo/dev/say_it_in_json/src/json_entities/json_entities.c", | ||||
|     "output": "/mnt/3A5CDF785CDF2CFF/Users/abdoo/dev/say_it_in_json/main" | ||||
|   }, | ||||
|   { | ||||
|     "arguments": [ | ||||
| @@ -75,8 +75,292 @@ | ||||
|       "main", | ||||
|       "src/lexer/lexer.c" | ||||
|     ], | ||||
|     "directory": "/Users/abdelrahman/dev/personal/say-it-in-json", | ||||
|     "file": "/Users/abdelrahman/dev/personal/say-it-in-json/src/lexer/lexer.c", | ||||
|     "output": "/Users/abdelrahman/dev/personal/say-it-in-json/main" | ||||
|     "directory": "/mnt/3A5CDF785CDF2CFF/Users/abdoo/dev/say_it_in_json", | ||||
|     "file": "/mnt/3A5CDF785CDF2CFF/Users/abdoo/dev/say_it_in_json/src/lexer/lexer.c", | ||||
|     "output": "/mnt/3A5CDF785CDF2CFF/Users/abdoo/dev/say_it_in_json/main" | ||||
|   }, | ||||
|   { | ||||
|     "arguments": [ | ||||
|       "/usr/bin/clang-16", | ||||
|       "-cc1", | ||||
|       "-triple", | ||||
|       "x86_64-redhat-linux-gnu", | ||||
|       "-emit-obj", | ||||
|       "-mrelax-all", | ||||
|       "-disable-free", | ||||
|       "-clear-ast-before-backend", | ||||
|       "-disable-llvm-verifier", | ||||
|       "-discard-value-names", | ||||
|       "-main-file-name", | ||||
|       "-mrelocation-model", | ||||
|       "static", | ||||
|       "-mframe-pointer=all", | ||||
|       "-fmath-errno", | ||||
|       "-ffp-contract=on", | ||||
|       "-fno-rounding-math", | ||||
|       "-mconstructor-aliases", | ||||
|       "-funwind-tables=2", | ||||
|       "-target-cpu", | ||||
|       "x86-64", | ||||
|       "-tune-cpu", | ||||
|       "generic", | ||||
|       "-mllvm", | ||||
|       "-treat-scalable-fixed-error-as-warning", | ||||
|       "-debug-info-kind=constructor", | ||||
|       "-dwarf-version=4", | ||||
|       "-debugger-tuning=gdb", | ||||
|       "-fcoverage-compilation-dir=/home/abdelrahman/dev_work/say_it_in_json", | ||||
|       "-resource-dir", | ||||
|       "/usr/lib64/clang/16", | ||||
|       "-I", | ||||
|       "include", | ||||
|       "-I", | ||||
|       "include/dstring", | ||||
|       "-I", | ||||
|       "include/json_entities", | ||||
|       "-I", | ||||
|       "include/lexer", | ||||
|       "-internal-isystem", | ||||
|       "/usr/lib64/clang/16/include", | ||||
|       "-internal-isystem", | ||||
|       "/usr/local/include", | ||||
|       "-internal-isystem", | ||||
|       "/usr/bin/../lib/gcc/x86_64-redhat-linux/13/../../../../x86_64-redhat-linux/include", | ||||
|       "-internal-externc-isystem", | ||||
|       "/include", | ||||
|       "-internal-externc-isystem", | ||||
|       "/usr/include", | ||||
|       "-Wall", | ||||
|       "-Werror", | ||||
|       "-pedantic", | ||||
|       "-fdebug-compilation-dir=/home/abdelrahman/dev_work/say_it_in_json", | ||||
|       "-ferror-limit", | ||||
|       "19", | ||||
|       "-fgnuc-version=4.2.1", | ||||
|       "-fcolor-diagnostics", | ||||
|       "-faddrsig", | ||||
|       "-D__GCC_HAVE_DWARF2_CFI_ASM=1", | ||||
|       "-x", | ||||
|       "c", | ||||
|       "-o", | ||||
|       "/tmp/main-8f77b1.o", | ||||
|       "src/main.c" | ||||
|     ], | ||||
|     "directory": "/mnt/3A5CDF785CDF2CFF/Users/abdoo/dev/say_it_in_json", | ||||
|     "file": "/mnt/3A5CDF785CDF2CFF/Users/abdoo/dev/say_it_in_json/src/main.c", | ||||
|     "output": "/tmp/main-8f77b1.o" | ||||
|   }, | ||||
|   { | ||||
|     "arguments": [ | ||||
|       "/usr/bin/clang-16", | ||||
|       "-cc1", | ||||
|       "-triple", | ||||
|       "x86_64-redhat-linux-gnu", | ||||
|       "-emit-obj", | ||||
|       "-mrelax-all", | ||||
|       "-disable-free", | ||||
|       "-clear-ast-before-backend", | ||||
|       "-disable-llvm-verifier", | ||||
|       "-discard-value-names", | ||||
|       "-main-file-name", | ||||
|       "-mrelocation-model", | ||||
|       "static", | ||||
|       "-mframe-pointer=all", | ||||
|       "-fmath-errno", | ||||
|       "-ffp-contract=on", | ||||
|       "-fno-rounding-math", | ||||
|       "-mconstructor-aliases", | ||||
|       "-funwind-tables=2", | ||||
|       "-target-cpu", | ||||
|       "x86-64", | ||||
|       "-tune-cpu", | ||||
|       "generic", | ||||
|       "-mllvm", | ||||
|       "-treat-scalable-fixed-error-as-warning", | ||||
|       "-debug-info-kind=constructor", | ||||
|       "-dwarf-version=4", | ||||
|       "-debugger-tuning=gdb", | ||||
|       "-fcoverage-compilation-dir=/home/abdelrahman/dev_work/say_it_in_json", | ||||
|       "-resource-dir", | ||||
|       "/usr/lib64/clang/16", | ||||
|       "-I", | ||||
|       "include", | ||||
|       "-I", | ||||
|       "include/dstring", | ||||
|       "-I", | ||||
|       "include/json_entities", | ||||
|       "-I", | ||||
|       "include/lexer", | ||||
|       "-internal-isystem", | ||||
|       "/usr/lib64/clang/16/include", | ||||
|       "-internal-isystem", | ||||
|       "/usr/local/include", | ||||
|       "-internal-isystem", | ||||
|       "/usr/bin/../lib/gcc/x86_64-redhat-linux/13/../../../../x86_64-redhat-linux/include", | ||||
|       "-internal-externc-isystem", | ||||
|       "/include", | ||||
|       "-internal-externc-isystem", | ||||
|       "/usr/include", | ||||
|       "-Wall", | ||||
|       "-Werror", | ||||
|       "-pedantic", | ||||
|       "-fdebug-compilation-dir=/home/abdelrahman/dev_work/say_it_in_json", | ||||
|       "-ferror-limit", | ||||
|       "19", | ||||
|       "-fgnuc-version=4.2.1", | ||||
|       "-fcolor-diagnostics", | ||||
|       "-faddrsig", | ||||
|       "-D__GCC_HAVE_DWARF2_CFI_ASM=1", | ||||
|       "-x", | ||||
|       "c", | ||||
|       "-o", | ||||
|       "/tmp/dstring-e11abe.o", | ||||
|       "src/dstring/dstring.c" | ||||
|     ], | ||||
|     "directory": "/mnt/3A5CDF785CDF2CFF/Users/abdoo/dev/say_it_in_json", | ||||
|     "file": "/mnt/3A5CDF785CDF2CFF/Users/abdoo/dev/say_it_in_json/src/dstring/dstring.c", | ||||
|     "output": "/tmp/dstring-e11abe.o" | ||||
|   }, | ||||
|   { | ||||
|     "arguments": [ | ||||
|       "/usr/bin/clang-16", | ||||
|       "-cc1", | ||||
|       "-triple", | ||||
|       "x86_64-redhat-linux-gnu", | ||||
|       "-emit-obj", | ||||
|       "-mrelax-all", | ||||
|       "-disable-free", | ||||
|       "-clear-ast-before-backend", | ||||
|       "-disable-llvm-verifier", | ||||
|       "-discard-value-names", | ||||
|       "-main-file-name", | ||||
|       "-mrelocation-model", | ||||
|       "static", | ||||
|       "-mframe-pointer=all", | ||||
|       "-fmath-errno", | ||||
|       "-ffp-contract=on", | ||||
|       "-fno-rounding-math", | ||||
|       "-mconstructor-aliases", | ||||
|       "-funwind-tables=2", | ||||
|       "-target-cpu", | ||||
|       "x86-64", | ||||
|       "-tune-cpu", | ||||
|       "generic", | ||||
|       "-mllvm", | ||||
|       "-treat-scalable-fixed-error-as-warning", | ||||
|       "-debug-info-kind=constructor", | ||||
|       "-dwarf-version=4", | ||||
|       "-debugger-tuning=gdb", | ||||
|       "-fcoverage-compilation-dir=/home/abdelrahman/dev_work/say_it_in_json", | ||||
|       "-resource-dir", | ||||
|       "/usr/lib64/clang/16", | ||||
|       "-I", | ||||
|       "include", | ||||
|       "-I", | ||||
|       "include/dstring", | ||||
|       "-I", | ||||
|       "include/json_entities", | ||||
|       "-I", | ||||
|       "include/lexer", | ||||
|       "-internal-isystem", | ||||
|       "/usr/lib64/clang/16/include", | ||||
|       "-internal-isystem", | ||||
|       "/usr/local/include", | ||||
|       "-internal-isystem", | ||||
|       "/usr/bin/../lib/gcc/x86_64-redhat-linux/13/../../../../x86_64-redhat-linux/include", | ||||
|       "-internal-externc-isystem", | ||||
|       "/include", | ||||
|       "-internal-externc-isystem", | ||||
|       "/usr/include", | ||||
|       "-Wall", | ||||
|       "-Werror", | ||||
|       "-pedantic", | ||||
|       "-fdebug-compilation-dir=/home/abdelrahman/dev_work/say_it_in_json", | ||||
|       "-ferror-limit", | ||||
|       "19", | ||||
|       "-fgnuc-version=4.2.1", | ||||
|       "-fcolor-diagnostics", | ||||
|       "-faddrsig", | ||||
|       "-D__GCC_HAVE_DWARF2_CFI_ASM=1", | ||||
|       "-x", | ||||
|       "c", | ||||
|       "-o", | ||||
|       "/tmp/json_entities-caa676.o", | ||||
|       "src/json_entities/json_entities.c" | ||||
|     ], | ||||
|     "directory": "/mnt/3A5CDF785CDF2CFF/Users/abdoo/dev/say_it_in_json", | ||||
|     "file": "/mnt/3A5CDF785CDF2CFF/Users/abdoo/dev/say_it_in_json/src/json_entities/json_entities.c", | ||||
|     "output": "/tmp/json_entities-caa676.o" | ||||
|   }, | ||||
|   { | ||||
|     "arguments": [ | ||||
|       "/usr/bin/clang-16", | ||||
|       "-cc1", | ||||
|       "-triple", | ||||
|       "x86_64-redhat-linux-gnu", | ||||
|       "-emit-obj", | ||||
|       "-mrelax-all", | ||||
|       "-disable-free", | ||||
|       "-clear-ast-before-backend", | ||||
|       "-disable-llvm-verifier", | ||||
|       "-discard-value-names", | ||||
|       "-main-file-name", | ||||
|       "-mrelocation-model", | ||||
|       "static", | ||||
|       "-mframe-pointer=all", | ||||
|       "-fmath-errno", | ||||
|       "-ffp-contract=on", | ||||
|       "-fno-rounding-math", | ||||
|       "-mconstructor-aliases", | ||||
|       "-funwind-tables=2", | ||||
|       "-target-cpu", | ||||
|       "x86-64", | ||||
|       "-tune-cpu", | ||||
|       "generic", | ||||
|       "-mllvm", | ||||
|       "-treat-scalable-fixed-error-as-warning", | ||||
|       "-debug-info-kind=constructor", | ||||
|       "-dwarf-version=4", | ||||
|       "-debugger-tuning=gdb", | ||||
|       "-fcoverage-compilation-dir=/home/abdelrahman/dev_work/say_it_in_json", | ||||
|       "-resource-dir", | ||||
|       "/usr/lib64/clang/16", | ||||
|       "-I", | ||||
|       "include", | ||||
|       "-I", | ||||
|       "include/dstring", | ||||
|       "-I", | ||||
|       "include/json_entities", | ||||
|       "-I", | ||||
|       "include/lexer", | ||||
|       "-internal-isystem", | ||||
|       "/usr/lib64/clang/16/include", | ||||
|       "-internal-isystem", | ||||
|       "/usr/local/include", | ||||
|       "-internal-isystem", | ||||
|       "/usr/bin/../lib/gcc/x86_64-redhat-linux/13/../../../../x86_64-redhat-linux/include", | ||||
|       "-internal-externc-isystem", | ||||
|       "/include", | ||||
|       "-internal-externc-isystem", | ||||
|       "/usr/include", | ||||
|       "-Wall", | ||||
|       "-Werror", | ||||
|       "-pedantic", | ||||
|       "-fdebug-compilation-dir=/home/abdelrahman/dev_work/say_it_in_json", | ||||
|       "-ferror-limit", | ||||
|       "19", | ||||
|       "-fgnuc-version=4.2.1", | ||||
|       "-fcolor-diagnostics", | ||||
|       "-faddrsig", | ||||
|       "-D__GCC_HAVE_DWARF2_CFI_ASM=1", | ||||
|       "-x", | ||||
|       "c", | ||||
|       "-o", | ||||
|       "/tmp/lexer-7fcb6e.o", | ||||
|       "src/lexer/lexer.c" | ||||
|     ], | ||||
|     "directory": "/mnt/3A5CDF785CDF2CFF/Users/abdoo/dev/say_it_in_json", | ||||
|     "file": "/mnt/3A5CDF785CDF2CFF/Users/abdoo/dev/say_it_in_json/src/lexer/lexer.c", | ||||
|     "output": "/tmp/lexer-7fcb6e.o" | ||||
|   } | ||||
| ] | ||||
|   | ||||
| @@ -10,7 +10,7 @@ | ||||
| typedef const char *str_view_t; | ||||
|  | ||||
| typedef enum { | ||||
| 	TK_NO_TOKEN, | ||||
|   TK_NO_TOKEN, | ||||
|   TK_L_BRACE, | ||||
|   TK_R_BRACE, | ||||
|   TK_L_BRACKET, | ||||
| @@ -22,7 +22,7 @@ typedef enum { | ||||
|   TK_STR_VAL, | ||||
|   TK_INTEGER, | ||||
|   TK_DOUBLE, | ||||
| } token_type_t; | ||||
| } token_type; | ||||
|  | ||||
| typedef union { | ||||
|   void *no_val; | ||||
| @@ -34,15 +34,30 @@ typedef union { | ||||
| typedef struct { | ||||
|   u64 line; | ||||
|   u64 column; | ||||
|   token_type_t type; | ||||
|   token_type type; | ||||
|   token_value_t value; | ||||
| } token_t; | ||||
|  | ||||
| typedef enum { | ||||
|   LEX_ERR_NONE, | ||||
|   LEX_ERR_INVALID, | ||||
| } lex_err_type; | ||||
|  | ||||
| typedef struct { | ||||
|   lex_err_type errno; | ||||
|   str_view_t msg; | ||||
| } lex_err_t; | ||||
|  | ||||
| typedef struct { | ||||
|   lex_err_t error; | ||||
|   token_t token; | ||||
| } lex_result_t; | ||||
|  | ||||
| typedef struct lexer lexer_t; | ||||
|  | ||||
| void lexer_init(lexer_t **lexer); | ||||
| void lexer_free(lexer_t **lexer); | ||||
| token_t get_next_token(lexer_t *lexer, const char *text); | ||||
| lex_result_t get_next_token(lexer_t *lexer, const char *text); | ||||
|  | ||||
| bool validate_json(char *json); | ||||
| void print_token(token_t token); | ||||
|   | ||||
| @@ -92,6 +92,7 @@ struct lexer { | ||||
|   token_t token; | ||||
|   bool has_extra_token; | ||||
|   token_t extra_token; | ||||
|   dstr_t *error_message; | ||||
| }; | ||||
|  | ||||
| void stack_push(state_stack_t *stack, lexer_state_t value); | ||||
| @@ -104,7 +105,7 @@ bool is_valid_hex_char(const char input); | ||||
| bool ishex(const char input); | ||||
|  | ||||
| token_t dstr_to_numerical_token(const dstr_t *str); | ||||
| void set_token(token_t *token, u64 line, u64 column, token_type_t type, | ||||
| void set_token(token_t *token, u64 line, u64 column, token_type type, | ||||
|                token_value_t value); | ||||
|  | ||||
| void lexer_state_machine(lexer_t *lexer, char input); | ||||
| @@ -153,6 +154,7 @@ void lexer_init(lexer_t **lexer) { | ||||
|   (*lexer)->keyword.type = LEXER_STRING_KEYWORD; | ||||
|   (*lexer)->codepoint.type = LEXER_STRING_UNICODE; | ||||
|   (*lexer)->current_string = dstr_with_capacity(STRING_BUF_START_CAPACITY); | ||||
|   (*lexer)->error_message = dstr_with_capacity(STRING_BUF_START_CAPACITY); | ||||
|   (*lexer)->token_ready = false; | ||||
|   (*lexer)->token = (token_t){0}; | ||||
|   (*lexer)->has_extra_token = false; | ||||
| @@ -168,11 +170,14 @@ void lexer_free(lexer_t **lexer) { | ||||
|     return; | ||||
|   } | ||||
|  | ||||
|   dstr_free(&((*lexer)->current_string)); | ||||
|   dstr_free(&((*lexer)->error_message)); | ||||
|  | ||||
|   free(*lexer); | ||||
|   *lexer = NULL; | ||||
| } | ||||
|  | ||||
| token_t get_next_token(lexer_t *lexer, const char *text) { | ||||
| lex_result_t get_next_token(lexer_t *lexer, const char *text) { | ||||
|   if (text != NULL) { | ||||
|     lexer->cursor = 0; | ||||
|     lexer->text = text; | ||||
| @@ -187,7 +192,10 @@ token_t get_next_token(lexer_t *lexer, const char *text) { | ||||
|     if (lexer->has_extra_token) { | ||||
|       lexer->has_extra_token = false; | ||||
|  | ||||
|       return lexer->extra_token; | ||||
|       return (lex_result_t){ | ||||
|           (lex_err_t){.errno = LEX_ERR_NONE, .msg = ""}, | ||||
|           lexer->extra_token, | ||||
|       }; | ||||
|     } | ||||
|  | ||||
|     c = lexer->text[(lexer->cursor)++]; | ||||
| @@ -202,14 +210,40 @@ token_t get_next_token(lexer_t *lexer, const char *text) { | ||||
|     } | ||||
|  | ||||
|     if (lexer->current == LEXER_STATE_ERROR) { | ||||
|       char msg[STRING_BUF_START_CAPACITY + 1]; | ||||
|       memset(msg, 0, STRING_BUF_START_CAPACITY + 1); | ||||
|  | ||||
|       u64 slice_length = 20; | ||||
|       char slice[slice_length]; | ||||
|       snprintf(slice, slice_length, "%s", &(lexer->text[lexer->cursor - 1])); | ||||
|  | ||||
|       snprintf( | ||||
|           msg, STRING_BUF_START_CAPACITY, | ||||
|           "\n(%llu:%llu) Encountered an error while parsing the following:\n%s", | ||||
|           (unsigned long long)lexer->line, (unsigned long long)lexer->column, | ||||
|           slice); | ||||
|  | ||||
|       dstr_update(&(lexer->error_message), msg); | ||||
|  | ||||
|       return (lex_result_t){ | ||||
|           (lex_err_t){.errno = LEX_ERR_INVALID, | ||||
|                       .msg = dstr_to_cstr(lexer->error_message)}, | ||||
|           (token_t){0}, | ||||
|       }; | ||||
|     } else if (lexer->token_ready) { | ||||
|       lexer->token_ready = false; | ||||
|  | ||||
|       return lexer->token; | ||||
|       return (lex_result_t){ | ||||
|           (lex_err_t){.errno = LEX_ERR_NONE, .msg = ""}, | ||||
|           lexer->token, | ||||
|       }; | ||||
|     } | ||||
|   } | ||||
|  | ||||
|   return (token_t){0}; | ||||
|   return (lex_result_t){ | ||||
|       (lex_err_t){.errno = LEX_ERR_NONE, .msg = ""}, | ||||
|       (token_t){0}, | ||||
|   }; | ||||
| } | ||||
|  | ||||
| bool validate_json(char *json) { | ||||
| @@ -401,7 +435,7 @@ token_t dstr_to_numerical_token(const dstr_t *str) { | ||||
|   return token; | ||||
| } | ||||
|  | ||||
| void set_token(token_t *token, u64 line, u64 column, token_type_t type, | ||||
| void set_token(token_t *token, u64 line, u64 column, token_type type, | ||||
|                token_value_t value) { | ||||
|   *token = (token_t){ | ||||
|       .line = line, | ||||
|   | ||||
							
								
								
									
										18
									
								
								src/main.c
									
									
									
									
									
								
							
							
						
						
									
										18
									
								
								src/main.c
									
									
									
									
									
								
							| @@ -35,11 +35,21 @@ int main(int argc, char *argv[]) { | ||||
|     return EXIT_FAILURE; | ||||
|   } | ||||
|  | ||||
|   token_t token = get_next_token(lexer, json); | ||||
|   while (token.type != TK_NO_TOKEN) { | ||||
|     print_token(token); | ||||
|   lex_result_t result = get_next_token(lexer, json); | ||||
|  | ||||
|     token = get_next_token(lexer, NULL); | ||||
|   if (result.error.errno) { | ||||
|     printf("%s\n", result.error.msg); | ||||
|   } else { | ||||
|     while (result.token.type != TK_NO_TOKEN) { | ||||
|       print_token(result.token); | ||||
|  | ||||
|       result = get_next_token(lexer, NULL); | ||||
|  | ||||
|       if (result.error.errno) { | ||||
|         printf("%s\n", result.error.msg); | ||||
|         break; | ||||
|       } | ||||
|     } | ||||
|   } | ||||
|  | ||||
|   lexer_free(&lexer); | ||||
|   | ||||
		Reference in New Issue
	
	Block a user