From e4d161d79d26513b8f06fc0a5d22aff899347b15 Mon Sep 17 00:00:00 2001 From: Abdelrahman Said Date: Tue, 20 Jun 2023 21:56:59 +0100 Subject: [PATCH] Plan work for get_next_token function --- compile_commands.json | 225 ++++++++++++++++++++++++++++++++++++++++-- src/lexer/lexer.c | 22 +++++ 2 files changed, 238 insertions(+), 9 deletions(-) diff --git a/compile_commands.json b/compile_commands.json index 3552923..0c2bf13 100644 --- a/compile_commands.json +++ b/compile_commands.json @@ -14,9 +14,9 @@ "main", "src/main.c" ], - "directory": "/Users/abdelrahman/dev/personal/say-it-in-json", - "file": "/Users/abdelrahman/dev/personal/say-it-in-json/src/main.c", - "output": "/Users/abdelrahman/dev/personal/say-it-in-json/main" + "directory": "/mnt/3A5CDF785CDF2CFF/Users/abdoo/dev/say_it_in_json", + "file": "/mnt/3A5CDF785CDF2CFF/Users/abdoo/dev/say_it_in_json/src/main.c", + "output": "/mnt/3A5CDF785CDF2CFF/Users/abdoo/dev/say_it_in_json/main" }, { "arguments": [ @@ -33,9 +33,9 @@ "main", "src/dstring/dstring.c" ], - "directory": "/Users/abdelrahman/dev/personal/say-it-in-json", - "file": "/Users/abdelrahman/dev/personal/say-it-in-json/src/dstring/dstring.c", - "output": "/Users/abdelrahman/dev/personal/say-it-in-json/main" + "directory": "/mnt/3A5CDF785CDF2CFF/Users/abdoo/dev/say_it_in_json", + "file": "/mnt/3A5CDF785CDF2CFF/Users/abdoo/dev/say_it_in_json/src/dstring/dstring.c", + "output": "/mnt/3A5CDF785CDF2CFF/Users/abdoo/dev/say_it_in_json/main" }, { "arguments": [ @@ -52,8 +52,215 @@ "main", "src/lexer/lexer.c" ], - "directory": "/Users/abdelrahman/dev/personal/say-it-in-json", - "file": "/Users/abdelrahman/dev/personal/say-it-in-json/src/lexer/lexer.c", - "output": "/Users/abdelrahman/dev/personal/say-it-in-json/main" + "directory": "/mnt/3A5CDF785CDF2CFF/Users/abdoo/dev/say_it_in_json", + "file": "/mnt/3A5CDF785CDF2CFF/Users/abdoo/dev/say_it_in_json/src/lexer/lexer.c", + "output": "/mnt/3A5CDF785CDF2CFF/Users/abdoo/dev/say_it_in_json/main" + }, + { + "arguments": [ + "/usr/bin/clang-16", + "-cc1", + "-triple", + "x86_64-redhat-linux-gnu", + "-emit-obj", + "-mrelax-all", + "-disable-free", + "-clear-ast-before-backend", + "-disable-llvm-verifier", + "-discard-value-names", + "-main-file-name", + "-mrelocation-model", + "static", + "-mframe-pointer=all", + "-fmath-errno", + "-ffp-contract=on", + "-fno-rounding-math", + "-mconstructor-aliases", + "-funwind-tables=2", + "-target-cpu", + "x86-64", + "-tune-cpu", + "generic", + "-mllvm", + "-treat-scalable-fixed-error-as-warning", + "-debug-info-kind=constructor", + "-dwarf-version=4", + "-debugger-tuning=gdb", + "-fcoverage-compilation-dir=/home/abdelrahman/dev_work/say_it_in_json", + "-resource-dir", + "/usr/lib64/clang/16", + "-I", + "include", + "-I", + "include/dstring", + "-I", + "include/lexer", + "-internal-isystem", + "/usr/lib64/clang/16/include", + "-internal-isystem", + "/usr/local/include", + "-internal-isystem", + "/usr/bin/../lib/gcc/x86_64-redhat-linux/13/../../../../x86_64-redhat-linux/include", + "-internal-externc-isystem", + "/include", + "-internal-externc-isystem", + "/usr/include", + "-Wall", + "-Werror", + "-pedantic", + "-fdebug-compilation-dir=/home/abdelrahman/dev_work/say_it_in_json", + "-ferror-limit", + "19", + "-fgnuc-version=4.2.1", + "-fcolor-diagnostics", + "-faddrsig", + "-D__GCC_HAVE_DWARF2_CFI_ASM=1", + "-x", + "c", + "-o", + "/tmp/main-368473.o", + "src/main.c" + ], + "directory": "/mnt/3A5CDF785CDF2CFF/Users/abdoo/dev/say_it_in_json", + "file": "/mnt/3A5CDF785CDF2CFF/Users/abdoo/dev/say_it_in_json/src/main.c", + "output": "/tmp/main-368473.o" + }, + { + "arguments": [ + "/usr/bin/clang-16", + "-cc1", + "-triple", + "x86_64-redhat-linux-gnu", + "-emit-obj", + "-mrelax-all", + "-disable-free", + "-clear-ast-before-backend", + "-disable-llvm-verifier", + "-discard-value-names", + "-main-file-name", + "-mrelocation-model", + "static", + "-mframe-pointer=all", + "-fmath-errno", + "-ffp-contract=on", + "-fno-rounding-math", + "-mconstructor-aliases", + "-funwind-tables=2", + "-target-cpu", + "x86-64", + "-tune-cpu", + "generic", + "-mllvm", + "-treat-scalable-fixed-error-as-warning", + "-debug-info-kind=constructor", + "-dwarf-version=4", + "-debugger-tuning=gdb", + "-fcoverage-compilation-dir=/home/abdelrahman/dev_work/say_it_in_json", + "-resource-dir", + "/usr/lib64/clang/16", + "-I", + "include", + "-I", + "include/dstring", + "-I", + "include/lexer", + "-internal-isystem", + "/usr/lib64/clang/16/include", + "-internal-isystem", + "/usr/local/include", + "-internal-isystem", + "/usr/bin/../lib/gcc/x86_64-redhat-linux/13/../../../../x86_64-redhat-linux/include", + "-internal-externc-isystem", + "/include", + "-internal-externc-isystem", + "/usr/include", + "-Wall", + "-Werror", + "-pedantic", + "-fdebug-compilation-dir=/home/abdelrahman/dev_work/say_it_in_json", + "-ferror-limit", + "19", + "-fgnuc-version=4.2.1", + "-fcolor-diagnostics", + "-faddrsig", + "-D__GCC_HAVE_DWARF2_CFI_ASM=1", + "-x", + "c", + "-o", + "/tmp/dstring-ea8650.o", + "src/dstring/dstring.c" + ], + "directory": "/mnt/3A5CDF785CDF2CFF/Users/abdoo/dev/say_it_in_json", + "file": "/mnt/3A5CDF785CDF2CFF/Users/abdoo/dev/say_it_in_json/src/dstring/dstring.c", + "output": "/tmp/dstring-ea8650.o" + }, + { + "arguments": [ + "/usr/bin/clang-16", + "-cc1", + "-triple", + "x86_64-redhat-linux-gnu", + "-emit-obj", + "-mrelax-all", + "-disable-free", + "-clear-ast-before-backend", + "-disable-llvm-verifier", + "-discard-value-names", + "-main-file-name", + "-mrelocation-model", + "static", + "-mframe-pointer=all", + "-fmath-errno", + "-ffp-contract=on", + "-fno-rounding-math", + "-mconstructor-aliases", + "-funwind-tables=2", + "-target-cpu", + "x86-64", + "-tune-cpu", + "generic", + "-mllvm", + "-treat-scalable-fixed-error-as-warning", + "-debug-info-kind=constructor", + "-dwarf-version=4", + "-debugger-tuning=gdb", + "-fcoverage-compilation-dir=/home/abdelrahman/dev_work/say_it_in_json", + "-resource-dir", + "/usr/lib64/clang/16", + "-I", + "include", + "-I", + "include/dstring", + "-I", + "include/lexer", + "-internal-isystem", + "/usr/lib64/clang/16/include", + "-internal-isystem", + "/usr/local/include", + "-internal-isystem", + "/usr/bin/../lib/gcc/x86_64-redhat-linux/13/../../../../x86_64-redhat-linux/include", + "-internal-externc-isystem", + "/include", + "-internal-externc-isystem", + "/usr/include", + "-Wall", + "-Werror", + "-pedantic", + "-fdebug-compilation-dir=/home/abdelrahman/dev_work/say_it_in_json", + "-ferror-limit", + "19", + "-fgnuc-version=4.2.1", + "-fcolor-diagnostics", + "-faddrsig", + "-D__GCC_HAVE_DWARF2_CFI_ASM=1", + "-x", + "c", + "-o", + "/tmp/lexer-403cee.o", + "src/lexer/lexer.c" + ], + "directory": "/mnt/3A5CDF785CDF2CFF/Users/abdoo/dev/say_it_in_json", + "file": "/mnt/3A5CDF785CDF2CFF/Users/abdoo/dev/say_it_in_json/src/lexer/lexer.c", + "output": "/tmp/lexer-403cee.o" } ] diff --git a/src/lexer/lexer.c b/src/lexer/lexer.c index 02be623..dd930da 100644 --- a/src/lexer/lexer.c +++ b/src/lexer/lexer.c @@ -121,6 +121,9 @@ lexer_state_t handle_false(lexer_t *lexer, char input); lexer_state_t handle_null(lexer_t *lexer, char input); lexer_state_t handle_keyword_end(lexer_t *lexer, char input); +// TODO (Abdelrahman): The printf functions in the state handlers are the exit +// points for the tokenisation function. Replace them once ready. + bool validate_json(char *json) { lexer_t lexer = {0}; lexer.line = 1; @@ -347,8 +350,10 @@ lexer_state_t handle_lexer_start(char input) { switch (input) { case '{': + printf("TK_L_BRACE\n"); return LEXER_STATE_OBJECT_START; case '[': + printf("TK_L_BRACKET\n"); return LEXER_STATE_ARRAY_START; } @@ -371,12 +376,14 @@ lexer_state_t handle_collection_end(lexer_t *lexer, char input) { bool object_end = lexer->current == LEXER_STATE_OBJECT && input == '}'; if (object_end) { + printf("TK_R_BRACE\n"); return LEXER_STATE_OBJECT_END; } bool array_end = lexer->current == LEXER_STATE_ARRAY && input == ']'; if (array_end) { + printf("TK_R_BRACKET\n"); return LEXER_STATE_ARRAY_END; } @@ -391,6 +398,7 @@ lexer_state_t handle_object(lexer_t *lexer, char input) { return LEXER_STATE_KEY; } else if (input == '}') { + printf("TK_R_BRACE\n"); return handle_collection_end(lexer, input); } @@ -401,6 +409,7 @@ lexer_state_t handle_array(lexer_t *lexer, char input) { if (isspace(input)) { return LEXER_STATE_ARRAY; } else if (input == ']') { + printf("TK_R_BRACKET\n"); return handle_collection_end(lexer, input); } @@ -432,8 +441,10 @@ lexer_state_t handle_value(lexer_t *lexer, char input) { return LEXER_STATE_DECIMAL; case '{': + printf("TK_L_BRACE\n"); return LEXER_STATE_OBJECT_START; case '[': + printf("TK_L_BRACKET\n"); return LEXER_STATE_ARRAY_START; case 't': case 'f': @@ -453,6 +464,7 @@ lexer_state_t handle_string(lexer_t *lexer, char input) { return LEXER_STATE_ESCAPE_SEQUENCE; case '"': + printf("TK_STRING: %s\n", dstr_to_cstr(lexer->current_string)); return LEXER_STATE_STRING_END; } @@ -473,12 +485,14 @@ lexer_state_t handle_string_end(lexer_t *lexer, char input) { bool key_end = lexer->current == LEXER_STATE_KEY && input == ':'; if (key_end) { + printf("TK_COLON\n"); return LEXER_STATE_VALUE; } bool value_end = lexer->current == LEXER_STATE_VALUE && input == ','; if (value_end) { + printf("TK_COMMA\n"); return lexer->stack.stack[lexer->stack.size - 1]; } @@ -545,10 +559,12 @@ lexer_state_t handle_number(lexer_t *lexer, char input) { return LEXER_STATE_FRACTION; } else if (input == '}' || input == ']') { + printf("TK_NUMBER: %s\n", dstr_to_cstr(lexer->current_string)); dstr_clear(lexer->current_string); return handle_collection_end(lexer, input); } else if (input == ',') { + printf("TK_NUMBER: %s\n", dstr_to_cstr(lexer->current_string)); dstr_clear(lexer->current_string); return lexer->stack.stack[lexer->stack.size - 1]; @@ -565,6 +581,7 @@ lexer_state_t handle_fraction(lexer_t *lexer, char input) { return LEXER_STATE_FRACTION; } else if (input == '}' || input == ']') { + printf("TK_NUMBER: %s\n", dstr_to_cstr(lexer->current_string)); dstr_clear(lexer->current_string); return handle_collection_end(lexer, input); @@ -573,6 +590,7 @@ lexer_state_t handle_fraction(lexer_t *lexer, char input) { return LEXER_STATE_EXPONENT; } else if (input == ',') { + printf("TK_NUMBER: %s\n", dstr_to_cstr(lexer->current_string)); dstr_clear(lexer->current_string); return lexer->stack.stack[lexer->stack.size - 1]; @@ -611,10 +629,12 @@ lexer_state_t handle_power(lexer_t *lexer, char input) { return LEXER_STATE_POWER; } else if (input == '}' || input == ']') { + printf("TK_NUMBER: %s\n", dstr_to_cstr(lexer->current_string)); dstr_clear(lexer->current_string); return handle_collection_end(lexer, input); } else if (input == ',') { + printf("TK_NUMBER: %s\n", dstr_to_cstr(lexer->current_string)); dstr_clear(lexer->current_string); return lexer->stack.stack[lexer->stack.size - 1]; @@ -626,6 +646,7 @@ lexer_state_t handle_power(lexer_t *lexer, char input) { } lexer_state_t handle_number_end(lexer_t *lexer, char input) { + printf("TK_NUMBER: %s\n", dstr_to_cstr(lexer->current_string)); dstr_clear(lexer->current_string); if (isspace(input)) { @@ -715,6 +736,7 @@ lexer_state_t handle_null(lexer_t *lexer, char input) { } lexer_state_t handle_keyword_end(lexer_t *lexer, char input) { + printf("TK_KEYWORD: %s\n", lexer->keyword.keyword.str); clear_lex_str(&(lexer->keyword)); if (isspace(input)) {