From 1e510411dcfddb6b14b926262aa5c9130e752d28 Mon Sep 17 00:00:00 2001 From: Abdelrahman Said Date: Tue, 27 Jun 2023 22:23:16 +0100 Subject: [PATCH] get_next_token returns result which includes possible error and token --- compile_commands.json | 308 ++++++++++++++++++++++++++++++++++++++++-- include/lexer/lexer.h | 23 +++- src/lexer/lexer.c | 46 ++++++- src/main.c | 18 ++- 4 files changed, 369 insertions(+), 26 deletions(-) diff --git a/compile_commands.json b/compile_commands.json index 78e10ee..692279d 100644 --- a/compile_commands.json +++ b/compile_commands.json @@ -15,9 +15,9 @@ "main", "src/main.c" ], - "directory": "/Users/abdelrahman/dev/personal/say-it-in-json", - "file": "/Users/abdelrahman/dev/personal/say-it-in-json/src/main.c", - "output": "/Users/abdelrahman/dev/personal/say-it-in-json/main" + "directory": "/mnt/3A5CDF785CDF2CFF/Users/abdoo/dev/say_it_in_json", + "file": "/mnt/3A5CDF785CDF2CFF/Users/abdoo/dev/say_it_in_json/src/main.c", + "output": "/mnt/3A5CDF785CDF2CFF/Users/abdoo/dev/say_it_in_json/main" }, { "arguments": [ @@ -35,9 +35,9 @@ "main", "src/dstring/dstring.c" ], - "directory": "/Users/abdelrahman/dev/personal/say-it-in-json", - "file": "/Users/abdelrahman/dev/personal/say-it-in-json/src/dstring/dstring.c", - "output": "/Users/abdelrahman/dev/personal/say-it-in-json/main" + "directory": "/mnt/3A5CDF785CDF2CFF/Users/abdoo/dev/say_it_in_json", + "file": "/mnt/3A5CDF785CDF2CFF/Users/abdoo/dev/say_it_in_json/src/dstring/dstring.c", + "output": "/mnt/3A5CDF785CDF2CFF/Users/abdoo/dev/say_it_in_json/main" }, { "arguments": [ @@ -55,9 +55,9 @@ "main", "src/json_entities/json_entities.c" ], - "directory": "/Users/abdelrahman/dev/personal/say-it-in-json", - "file": "/Users/abdelrahman/dev/personal/say-it-in-json/src/json_entities/json_entities.c", - "output": "/Users/abdelrahman/dev/personal/say-it-in-json/main" + "directory": "/mnt/3A5CDF785CDF2CFF/Users/abdoo/dev/say_it_in_json", + "file": "/mnt/3A5CDF785CDF2CFF/Users/abdoo/dev/say_it_in_json/src/json_entities/json_entities.c", + "output": "/mnt/3A5CDF785CDF2CFF/Users/abdoo/dev/say_it_in_json/main" }, { "arguments": [ @@ -75,8 +75,292 @@ "main", "src/lexer/lexer.c" ], - "directory": "/Users/abdelrahman/dev/personal/say-it-in-json", - "file": "/Users/abdelrahman/dev/personal/say-it-in-json/src/lexer/lexer.c", - "output": "/Users/abdelrahman/dev/personal/say-it-in-json/main" + "directory": "/mnt/3A5CDF785CDF2CFF/Users/abdoo/dev/say_it_in_json", + "file": "/mnt/3A5CDF785CDF2CFF/Users/abdoo/dev/say_it_in_json/src/lexer/lexer.c", + "output": "/mnt/3A5CDF785CDF2CFF/Users/abdoo/dev/say_it_in_json/main" + }, + { + "arguments": [ + "/usr/bin/clang-16", + "-cc1", + "-triple", + "x86_64-redhat-linux-gnu", + "-emit-obj", + "-mrelax-all", + "-disable-free", + "-clear-ast-before-backend", + "-disable-llvm-verifier", + "-discard-value-names", + "-main-file-name", + "-mrelocation-model", + "static", + "-mframe-pointer=all", + "-fmath-errno", + "-ffp-contract=on", + "-fno-rounding-math", + "-mconstructor-aliases", + "-funwind-tables=2", + "-target-cpu", + "x86-64", + "-tune-cpu", + "generic", + "-mllvm", + "-treat-scalable-fixed-error-as-warning", + "-debug-info-kind=constructor", + "-dwarf-version=4", + "-debugger-tuning=gdb", + "-fcoverage-compilation-dir=/home/abdelrahman/dev_work/say_it_in_json", + "-resource-dir", + "/usr/lib64/clang/16", + "-I", + "include", + "-I", + "include/dstring", + "-I", + "include/json_entities", + "-I", + "include/lexer", + "-internal-isystem", + "/usr/lib64/clang/16/include", + "-internal-isystem", + "/usr/local/include", + "-internal-isystem", + "/usr/bin/../lib/gcc/x86_64-redhat-linux/13/../../../../x86_64-redhat-linux/include", + "-internal-externc-isystem", + "/include", + "-internal-externc-isystem", + "/usr/include", + "-Wall", + "-Werror", + "-pedantic", + "-fdebug-compilation-dir=/home/abdelrahman/dev_work/say_it_in_json", + "-ferror-limit", + "19", + "-fgnuc-version=4.2.1", + "-fcolor-diagnostics", + "-faddrsig", + "-D__GCC_HAVE_DWARF2_CFI_ASM=1", + "-x", + "c", + "-o", + "/tmp/main-8f77b1.o", + "src/main.c" + ], + "directory": "/mnt/3A5CDF785CDF2CFF/Users/abdoo/dev/say_it_in_json", + "file": "/mnt/3A5CDF785CDF2CFF/Users/abdoo/dev/say_it_in_json/src/main.c", + "output": "/tmp/main-8f77b1.o" + }, + { + "arguments": [ + "/usr/bin/clang-16", + "-cc1", + "-triple", + "x86_64-redhat-linux-gnu", + "-emit-obj", + "-mrelax-all", + "-disable-free", + "-clear-ast-before-backend", + "-disable-llvm-verifier", + "-discard-value-names", + "-main-file-name", + "-mrelocation-model", + "static", + "-mframe-pointer=all", + "-fmath-errno", + "-ffp-contract=on", + "-fno-rounding-math", + "-mconstructor-aliases", + "-funwind-tables=2", + "-target-cpu", + "x86-64", + "-tune-cpu", + "generic", + "-mllvm", + "-treat-scalable-fixed-error-as-warning", + "-debug-info-kind=constructor", + "-dwarf-version=4", + "-debugger-tuning=gdb", + "-fcoverage-compilation-dir=/home/abdelrahman/dev_work/say_it_in_json", + "-resource-dir", + "/usr/lib64/clang/16", + "-I", + "include", + "-I", + "include/dstring", + "-I", + "include/json_entities", + "-I", + "include/lexer", + "-internal-isystem", + "/usr/lib64/clang/16/include", + "-internal-isystem", + "/usr/local/include", + "-internal-isystem", + "/usr/bin/../lib/gcc/x86_64-redhat-linux/13/../../../../x86_64-redhat-linux/include", + "-internal-externc-isystem", + "/include", + "-internal-externc-isystem", + "/usr/include", + "-Wall", + "-Werror", + "-pedantic", + "-fdebug-compilation-dir=/home/abdelrahman/dev_work/say_it_in_json", + "-ferror-limit", + "19", + "-fgnuc-version=4.2.1", + "-fcolor-diagnostics", + "-faddrsig", + "-D__GCC_HAVE_DWARF2_CFI_ASM=1", + "-x", + "c", + "-o", + "/tmp/dstring-e11abe.o", + "src/dstring/dstring.c" + ], + "directory": "/mnt/3A5CDF785CDF2CFF/Users/abdoo/dev/say_it_in_json", + "file": "/mnt/3A5CDF785CDF2CFF/Users/abdoo/dev/say_it_in_json/src/dstring/dstring.c", + "output": "/tmp/dstring-e11abe.o" + }, + { + "arguments": [ + "/usr/bin/clang-16", + "-cc1", + "-triple", + "x86_64-redhat-linux-gnu", + "-emit-obj", + "-mrelax-all", + "-disable-free", + "-clear-ast-before-backend", + "-disable-llvm-verifier", + "-discard-value-names", + "-main-file-name", + "-mrelocation-model", + "static", + "-mframe-pointer=all", + "-fmath-errno", + "-ffp-contract=on", + "-fno-rounding-math", + "-mconstructor-aliases", + "-funwind-tables=2", + "-target-cpu", + "x86-64", + "-tune-cpu", + "generic", + "-mllvm", + "-treat-scalable-fixed-error-as-warning", + "-debug-info-kind=constructor", + "-dwarf-version=4", + "-debugger-tuning=gdb", + "-fcoverage-compilation-dir=/home/abdelrahman/dev_work/say_it_in_json", + "-resource-dir", + "/usr/lib64/clang/16", + "-I", + "include", + "-I", + "include/dstring", + "-I", + "include/json_entities", + "-I", + "include/lexer", + "-internal-isystem", + "/usr/lib64/clang/16/include", + "-internal-isystem", + "/usr/local/include", + "-internal-isystem", + "/usr/bin/../lib/gcc/x86_64-redhat-linux/13/../../../../x86_64-redhat-linux/include", + "-internal-externc-isystem", + "/include", + "-internal-externc-isystem", + "/usr/include", + "-Wall", + "-Werror", + "-pedantic", + "-fdebug-compilation-dir=/home/abdelrahman/dev_work/say_it_in_json", + "-ferror-limit", + "19", + "-fgnuc-version=4.2.1", + "-fcolor-diagnostics", + "-faddrsig", + "-D__GCC_HAVE_DWARF2_CFI_ASM=1", + "-x", + "c", + "-o", + "/tmp/json_entities-caa676.o", + "src/json_entities/json_entities.c" + ], + "directory": "/mnt/3A5CDF785CDF2CFF/Users/abdoo/dev/say_it_in_json", + "file": "/mnt/3A5CDF785CDF2CFF/Users/abdoo/dev/say_it_in_json/src/json_entities/json_entities.c", + "output": "/tmp/json_entities-caa676.o" + }, + { + "arguments": [ + "/usr/bin/clang-16", + "-cc1", + "-triple", + "x86_64-redhat-linux-gnu", + "-emit-obj", + "-mrelax-all", + "-disable-free", + "-clear-ast-before-backend", + "-disable-llvm-verifier", + "-discard-value-names", + "-main-file-name", + "-mrelocation-model", + "static", + "-mframe-pointer=all", + "-fmath-errno", + "-ffp-contract=on", + "-fno-rounding-math", + "-mconstructor-aliases", + "-funwind-tables=2", + "-target-cpu", + "x86-64", + "-tune-cpu", + "generic", + "-mllvm", + "-treat-scalable-fixed-error-as-warning", + "-debug-info-kind=constructor", + "-dwarf-version=4", + "-debugger-tuning=gdb", + "-fcoverage-compilation-dir=/home/abdelrahman/dev_work/say_it_in_json", + "-resource-dir", + "/usr/lib64/clang/16", + "-I", + "include", + "-I", + "include/dstring", + "-I", + "include/json_entities", + "-I", + "include/lexer", + "-internal-isystem", + "/usr/lib64/clang/16/include", + "-internal-isystem", + "/usr/local/include", + "-internal-isystem", + "/usr/bin/../lib/gcc/x86_64-redhat-linux/13/../../../../x86_64-redhat-linux/include", + "-internal-externc-isystem", + "/include", + "-internal-externc-isystem", + "/usr/include", + "-Wall", + "-Werror", + "-pedantic", + "-fdebug-compilation-dir=/home/abdelrahman/dev_work/say_it_in_json", + "-ferror-limit", + "19", + "-fgnuc-version=4.2.1", + "-fcolor-diagnostics", + "-faddrsig", + "-D__GCC_HAVE_DWARF2_CFI_ASM=1", + "-x", + "c", + "-o", + "/tmp/lexer-7fcb6e.o", + "src/lexer/lexer.c" + ], + "directory": "/mnt/3A5CDF785CDF2CFF/Users/abdoo/dev/say_it_in_json", + "file": "/mnt/3A5CDF785CDF2CFF/Users/abdoo/dev/say_it_in_json/src/lexer/lexer.c", + "output": "/tmp/lexer-7fcb6e.o" } ] diff --git a/include/lexer/lexer.h b/include/lexer/lexer.h index acc0b11..5306c7c 100644 --- a/include/lexer/lexer.h +++ b/include/lexer/lexer.h @@ -10,7 +10,7 @@ typedef const char *str_view_t; typedef enum { - TK_NO_TOKEN, + TK_NO_TOKEN, TK_L_BRACE, TK_R_BRACE, TK_L_BRACKET, @@ -22,7 +22,7 @@ typedef enum { TK_STR_VAL, TK_INTEGER, TK_DOUBLE, -} token_type_t; +} token_type; typedef union { void *no_val; @@ -34,15 +34,30 @@ typedef union { typedef struct { u64 line; u64 column; - token_type_t type; + token_type type; token_value_t value; } token_t; +typedef enum { + LEX_ERR_NONE, + LEX_ERR_INVALID, +} lex_err_type; + +typedef struct { + lex_err_type errno; + str_view_t msg; +} lex_err_t; + +typedef struct { + lex_err_t error; + token_t token; +} lex_result_t; + typedef struct lexer lexer_t; void lexer_init(lexer_t **lexer); void lexer_free(lexer_t **lexer); -token_t get_next_token(lexer_t *lexer, const char *text); +lex_result_t get_next_token(lexer_t *lexer, const char *text); bool validate_json(char *json); void print_token(token_t token); diff --git a/src/lexer/lexer.c b/src/lexer/lexer.c index 9751e7c..14fabfc 100644 --- a/src/lexer/lexer.c +++ b/src/lexer/lexer.c @@ -92,6 +92,7 @@ struct lexer { token_t token; bool has_extra_token; token_t extra_token; + dstr_t *error_message; }; void stack_push(state_stack_t *stack, lexer_state_t value); @@ -104,7 +105,7 @@ bool is_valid_hex_char(const char input); bool ishex(const char input); token_t dstr_to_numerical_token(const dstr_t *str); -void set_token(token_t *token, u64 line, u64 column, token_type_t type, +void set_token(token_t *token, u64 line, u64 column, token_type type, token_value_t value); void lexer_state_machine(lexer_t *lexer, char input); @@ -153,6 +154,7 @@ void lexer_init(lexer_t **lexer) { (*lexer)->keyword.type = LEXER_STRING_KEYWORD; (*lexer)->codepoint.type = LEXER_STRING_UNICODE; (*lexer)->current_string = dstr_with_capacity(STRING_BUF_START_CAPACITY); + (*lexer)->error_message = dstr_with_capacity(STRING_BUF_START_CAPACITY); (*lexer)->token_ready = false; (*lexer)->token = (token_t){0}; (*lexer)->has_extra_token = false; @@ -168,11 +170,14 @@ void lexer_free(lexer_t **lexer) { return; } + dstr_free(&((*lexer)->current_string)); + dstr_free(&((*lexer)->error_message)); + free(*lexer); *lexer = NULL; } -token_t get_next_token(lexer_t *lexer, const char *text) { +lex_result_t get_next_token(lexer_t *lexer, const char *text) { if (text != NULL) { lexer->cursor = 0; lexer->text = text; @@ -187,7 +192,10 @@ token_t get_next_token(lexer_t *lexer, const char *text) { if (lexer->has_extra_token) { lexer->has_extra_token = false; - return lexer->extra_token; + return (lex_result_t){ + (lex_err_t){.errno = LEX_ERR_NONE, .msg = ""}, + lexer->extra_token, + }; } c = lexer->text[(lexer->cursor)++]; @@ -202,14 +210,40 @@ token_t get_next_token(lexer_t *lexer, const char *text) { } if (lexer->current == LEXER_STATE_ERROR) { + char msg[STRING_BUF_START_CAPACITY + 1]; + memset(msg, 0, STRING_BUF_START_CAPACITY + 1); + + u64 slice_length = 20; + char slice[slice_length]; + snprintf(slice, slice_length, "%s", &(lexer->text[lexer->cursor - 1])); + + snprintf( + msg, STRING_BUF_START_CAPACITY, + "\n(%llu:%llu) Encountered an error while parsing the following:\n%s", + (unsigned long long)lexer->line, (unsigned long long)lexer->column, + slice); + + dstr_update(&(lexer->error_message), msg); + + return (lex_result_t){ + (lex_err_t){.errno = LEX_ERR_INVALID, + .msg = dstr_to_cstr(lexer->error_message)}, + (token_t){0}, + }; } else if (lexer->token_ready) { lexer->token_ready = false; - return lexer->token; + return (lex_result_t){ + (lex_err_t){.errno = LEX_ERR_NONE, .msg = ""}, + lexer->token, + }; } } - return (token_t){0}; + return (lex_result_t){ + (lex_err_t){.errno = LEX_ERR_NONE, .msg = ""}, + (token_t){0}, + }; } bool validate_json(char *json) { @@ -401,7 +435,7 @@ token_t dstr_to_numerical_token(const dstr_t *str) { return token; } -void set_token(token_t *token, u64 line, u64 column, token_type_t type, +void set_token(token_t *token, u64 line, u64 column, token_type type, token_value_t value) { *token = (token_t){ .line = line, diff --git a/src/main.c b/src/main.c index f73c74c..1a66783 100644 --- a/src/main.c +++ b/src/main.c @@ -35,11 +35,21 @@ int main(int argc, char *argv[]) { return EXIT_FAILURE; } - token_t token = get_next_token(lexer, json); - while (token.type != TK_NO_TOKEN) { - print_token(token); + lex_result_t result = get_next_token(lexer, json); - token = get_next_token(lexer, NULL); + if (result.error.errno) { + printf("%s\n", result.error.msg); + } else { + while (result.token.type != TK_NO_TOKEN) { + print_token(result.token); + + result = get_next_token(lexer, NULL); + + if (result.error.errno) { + printf("%s\n", result.error.msg); + break; + } + } } lexer_free(&lexer);