From 1b56aaa185873e24206e446e980df25fc80e4503 Mon Sep 17 00:00:00 2001 From: Abdelrahman Said Date: Tue, 20 Jun 2023 23:37:56 +0100 Subject: [PATCH] Started implementing the get_next_token function --- compile_commands.json | 12 ++++---- include/lexer/lexer.h | 6 ++++ src/lexer/lexer.c | 66 +++++++++++++++++++++++++++++++++++++++++++ src/main.c | 16 ++++++++++- 4 files changed, 93 insertions(+), 7 deletions(-) diff --git a/compile_commands.json b/compile_commands.json index 0c2bf13..327951b 100644 --- a/compile_commands.json +++ b/compile_commands.json @@ -118,12 +118,12 @@ "-x", "c", "-o", - "/tmp/main-368473.o", + "/tmp/main-1df523.o", "src/main.c" ], "directory": "/mnt/3A5CDF785CDF2CFF/Users/abdoo/dev/say_it_in_json", "file": "/mnt/3A5CDF785CDF2CFF/Users/abdoo/dev/say_it_in_json/src/main.c", - "output": "/tmp/main-368473.o" + "output": "/tmp/main-1df523.o" }, { "arguments": [ @@ -187,12 +187,12 @@ "-x", "c", "-o", - "/tmp/dstring-ea8650.o", + "/tmp/dstring-3eff44.o", "src/dstring/dstring.c" ], "directory": "/mnt/3A5CDF785CDF2CFF/Users/abdoo/dev/say_it_in_json", "file": "/mnt/3A5CDF785CDF2CFF/Users/abdoo/dev/say_it_in_json/src/dstring/dstring.c", - "output": "/tmp/dstring-ea8650.o" + "output": "/tmp/dstring-3eff44.o" }, { "arguments": [ @@ -256,11 +256,11 @@ "-x", "c", "-o", - "/tmp/lexer-403cee.o", + "/tmp/lexer-b7cbfb.o", "src/lexer/lexer.c" ], "directory": "/mnt/3A5CDF785CDF2CFF/Users/abdoo/dev/say_it_in_json", "file": "/mnt/3A5CDF785CDF2CFF/Users/abdoo/dev/say_it_in_json/src/lexer/lexer.c", - "output": "/tmp/lexer-403cee.o" + "output": "/tmp/lexer-b7cbfb.o" } ] diff --git a/include/lexer/lexer.h b/include/lexer/lexer.h index 808db9d..3e25a72 100644 --- a/include/lexer/lexer.h +++ b/include/lexer/lexer.h @@ -33,12 +33,18 @@ typedef union { } token_value_t; typedef struct { + u64 line; + u64 column; token_type_t type; token_value_t value; } token_t; typedef struct lexer lexer_t; +void lexer_init(lexer_t **lexer); +void lexer_free(lexer_t **lexer); +token_t get_next_token(lexer_t *lexer, const char *text); + bool validate_json(char *json); #endif // !LEXER_STATES_H diff --git a/src/lexer/lexer.c b/src/lexer/lexer.c index dd930da..f49b196 100644 --- a/src/lexer/lexer.c +++ b/src/lexer/lexer.c @@ -78,8 +78,11 @@ typedef struct { } lexer_string_t; struct lexer { + u64 cursor; u64 line; u64 column; + const char *text; + u64 text_length; lexer_state_t current; state_stack_t stack; lexer_string_t keyword; @@ -124,6 +127,69 @@ lexer_state_t handle_keyword_end(lexer_t *lexer, char input); // TODO (Abdelrahman): The printf functions in the state handlers are the exit // points for the tokenisation function. Replace them once ready. +void lexer_init(lexer_t **lexer) { + if (*lexer) { + lexer_free(lexer); + } + + *lexer = (lexer_t *)malloc(sizeof(lexer_t)); + + if (!(*lexer)) { + return; + } + + (*lexer)->cursor = 0; + (*lexer)->line = 1; + (*lexer)->column = 0; + (*lexer)->current = LEXER_STATE_START; + (*lexer)->keyword.type = LEXER_STRING_KEYWORD; + (*lexer)->codepoint.type = LEXER_STRING_UNICODE; + (*lexer)->current_string = dstr_with_capacity(STRING_BUF_START_CAPACITY); + + if (!((*lexer)->current_string)) { + lexer_free(lexer); + } +} + +void lexer_free(lexer_t **lexer) { + if (!(*lexer)) { + return; + } + + free(*lexer); + *lexer = NULL; +} + +token_t get_next_token(lexer_t *lexer, const char *text) { + if (text != NULL) { + lexer->cursor = 0; + lexer->text = text; + lexer->text_length = strlen(text); + } + + char c; + + for (; lexer->cursor < lexer->text_length; ++(lexer->cursor)) { + c = lexer->text[lexer->cursor]; + + if (c == '\n') { + ++(lexer->line); + lexer->column = 0; + continue; + } + + lexer_state_machine(lexer, c); + + // Track the position in the text + ++(lexer->column); + + if (lexer->current == LEXER_STATE_ERROR) { + } + } + + return (token_t){0}; +} + bool validate_json(char *json) { lexer_t lexer = {0}; lexer.line = 1; diff --git a/src/main.c b/src/main.c index e07b667..a1edb94 100644 --- a/src/main.c +++ b/src/main.c @@ -4,6 +4,8 @@ #include #include +#define BUFFER_LENGTH 128 + int main(int argc, char *argv[]) { if (argc < 2) { printf("NO FILE PROVIDED\n"); @@ -28,7 +30,19 @@ int main(int argc, char *argv[]) { fclose(fp); - printf("\n%35s: %s\n", filename, validate_json(json) ? "VALID" : "INVALID"); + lexer_t *lexer = NULL; + + lexer_init(&lexer); + if (!lexer) { + return EXIT_FAILURE; + } + + get_next_token(lexer, json); + + lexer_free(&lexer); + + // printf("\n%35s: %s\n", filename, validate_json(json) ? "VALID" : + // "INVALID"); return EXIT_SUCCESS; }