get_next_token returns result which includes possible error and token
This commit is contained in:
parent
a6b45834fc
commit
1e510411dc
@ -15,9 +15,9 @@
|
||||
"main",
|
||||
"src/main.c"
|
||||
],
|
||||
"directory": "/Users/abdelrahman/dev/personal/say-it-in-json",
|
||||
"file": "/Users/abdelrahman/dev/personal/say-it-in-json/src/main.c",
|
||||
"output": "/Users/abdelrahman/dev/personal/say-it-in-json/main"
|
||||
"directory": "/mnt/3A5CDF785CDF2CFF/Users/abdoo/dev/say_it_in_json",
|
||||
"file": "/mnt/3A5CDF785CDF2CFF/Users/abdoo/dev/say_it_in_json/src/main.c",
|
||||
"output": "/mnt/3A5CDF785CDF2CFF/Users/abdoo/dev/say_it_in_json/main"
|
||||
},
|
||||
{
|
||||
"arguments": [
|
||||
@ -35,9 +35,9 @@
|
||||
"main",
|
||||
"src/dstring/dstring.c"
|
||||
],
|
||||
"directory": "/Users/abdelrahman/dev/personal/say-it-in-json",
|
||||
"file": "/Users/abdelrahman/dev/personal/say-it-in-json/src/dstring/dstring.c",
|
||||
"output": "/Users/abdelrahman/dev/personal/say-it-in-json/main"
|
||||
"directory": "/mnt/3A5CDF785CDF2CFF/Users/abdoo/dev/say_it_in_json",
|
||||
"file": "/mnt/3A5CDF785CDF2CFF/Users/abdoo/dev/say_it_in_json/src/dstring/dstring.c",
|
||||
"output": "/mnt/3A5CDF785CDF2CFF/Users/abdoo/dev/say_it_in_json/main"
|
||||
},
|
||||
{
|
||||
"arguments": [
|
||||
@ -55,9 +55,9 @@
|
||||
"main",
|
||||
"src/json_entities/json_entities.c"
|
||||
],
|
||||
"directory": "/Users/abdelrahman/dev/personal/say-it-in-json",
|
||||
"file": "/Users/abdelrahman/dev/personal/say-it-in-json/src/json_entities/json_entities.c",
|
||||
"output": "/Users/abdelrahman/dev/personal/say-it-in-json/main"
|
||||
"directory": "/mnt/3A5CDF785CDF2CFF/Users/abdoo/dev/say_it_in_json",
|
||||
"file": "/mnt/3A5CDF785CDF2CFF/Users/abdoo/dev/say_it_in_json/src/json_entities/json_entities.c",
|
||||
"output": "/mnt/3A5CDF785CDF2CFF/Users/abdoo/dev/say_it_in_json/main"
|
||||
},
|
||||
{
|
||||
"arguments": [
|
||||
@ -75,8 +75,292 @@
|
||||
"main",
|
||||
"src/lexer/lexer.c"
|
||||
],
|
||||
"directory": "/Users/abdelrahman/dev/personal/say-it-in-json",
|
||||
"file": "/Users/abdelrahman/dev/personal/say-it-in-json/src/lexer/lexer.c",
|
||||
"output": "/Users/abdelrahman/dev/personal/say-it-in-json/main"
|
||||
"directory": "/mnt/3A5CDF785CDF2CFF/Users/abdoo/dev/say_it_in_json",
|
||||
"file": "/mnt/3A5CDF785CDF2CFF/Users/abdoo/dev/say_it_in_json/src/lexer/lexer.c",
|
||||
"output": "/mnt/3A5CDF785CDF2CFF/Users/abdoo/dev/say_it_in_json/main"
|
||||
},
|
||||
{
|
||||
"arguments": [
|
||||
"/usr/bin/clang-16",
|
||||
"-cc1",
|
||||
"-triple",
|
||||
"x86_64-redhat-linux-gnu",
|
||||
"-emit-obj",
|
||||
"-mrelax-all",
|
||||
"-disable-free",
|
||||
"-clear-ast-before-backend",
|
||||
"-disable-llvm-verifier",
|
||||
"-discard-value-names",
|
||||
"-main-file-name",
|
||||
"-mrelocation-model",
|
||||
"static",
|
||||
"-mframe-pointer=all",
|
||||
"-fmath-errno",
|
||||
"-ffp-contract=on",
|
||||
"-fno-rounding-math",
|
||||
"-mconstructor-aliases",
|
||||
"-funwind-tables=2",
|
||||
"-target-cpu",
|
||||
"x86-64",
|
||||
"-tune-cpu",
|
||||
"generic",
|
||||
"-mllvm",
|
||||
"-treat-scalable-fixed-error-as-warning",
|
||||
"-debug-info-kind=constructor",
|
||||
"-dwarf-version=4",
|
||||
"-debugger-tuning=gdb",
|
||||
"-fcoverage-compilation-dir=/home/abdelrahman/dev_work/say_it_in_json",
|
||||
"-resource-dir",
|
||||
"/usr/lib64/clang/16",
|
||||
"-I",
|
||||
"include",
|
||||
"-I",
|
||||
"include/dstring",
|
||||
"-I",
|
||||
"include/json_entities",
|
||||
"-I",
|
||||
"include/lexer",
|
||||
"-internal-isystem",
|
||||
"/usr/lib64/clang/16/include",
|
||||
"-internal-isystem",
|
||||
"/usr/local/include",
|
||||
"-internal-isystem",
|
||||
"/usr/bin/../lib/gcc/x86_64-redhat-linux/13/../../../../x86_64-redhat-linux/include",
|
||||
"-internal-externc-isystem",
|
||||
"/include",
|
||||
"-internal-externc-isystem",
|
||||
"/usr/include",
|
||||
"-Wall",
|
||||
"-Werror",
|
||||
"-pedantic",
|
||||
"-fdebug-compilation-dir=/home/abdelrahman/dev_work/say_it_in_json",
|
||||
"-ferror-limit",
|
||||
"19",
|
||||
"-fgnuc-version=4.2.1",
|
||||
"-fcolor-diagnostics",
|
||||
"-faddrsig",
|
||||
"-D__GCC_HAVE_DWARF2_CFI_ASM=1",
|
||||
"-x",
|
||||
"c",
|
||||
"-o",
|
||||
"/tmp/main-8f77b1.o",
|
||||
"src/main.c"
|
||||
],
|
||||
"directory": "/mnt/3A5CDF785CDF2CFF/Users/abdoo/dev/say_it_in_json",
|
||||
"file": "/mnt/3A5CDF785CDF2CFF/Users/abdoo/dev/say_it_in_json/src/main.c",
|
||||
"output": "/tmp/main-8f77b1.o"
|
||||
},
|
||||
{
|
||||
"arguments": [
|
||||
"/usr/bin/clang-16",
|
||||
"-cc1",
|
||||
"-triple",
|
||||
"x86_64-redhat-linux-gnu",
|
||||
"-emit-obj",
|
||||
"-mrelax-all",
|
||||
"-disable-free",
|
||||
"-clear-ast-before-backend",
|
||||
"-disable-llvm-verifier",
|
||||
"-discard-value-names",
|
||||
"-main-file-name",
|
||||
"-mrelocation-model",
|
||||
"static",
|
||||
"-mframe-pointer=all",
|
||||
"-fmath-errno",
|
||||
"-ffp-contract=on",
|
||||
"-fno-rounding-math",
|
||||
"-mconstructor-aliases",
|
||||
"-funwind-tables=2",
|
||||
"-target-cpu",
|
||||
"x86-64",
|
||||
"-tune-cpu",
|
||||
"generic",
|
||||
"-mllvm",
|
||||
"-treat-scalable-fixed-error-as-warning",
|
||||
"-debug-info-kind=constructor",
|
||||
"-dwarf-version=4",
|
||||
"-debugger-tuning=gdb",
|
||||
"-fcoverage-compilation-dir=/home/abdelrahman/dev_work/say_it_in_json",
|
||||
"-resource-dir",
|
||||
"/usr/lib64/clang/16",
|
||||
"-I",
|
||||
"include",
|
||||
"-I",
|
||||
"include/dstring",
|
||||
"-I",
|
||||
"include/json_entities",
|
||||
"-I",
|
||||
"include/lexer",
|
||||
"-internal-isystem",
|
||||
"/usr/lib64/clang/16/include",
|
||||
"-internal-isystem",
|
||||
"/usr/local/include",
|
||||
"-internal-isystem",
|
||||
"/usr/bin/../lib/gcc/x86_64-redhat-linux/13/../../../../x86_64-redhat-linux/include",
|
||||
"-internal-externc-isystem",
|
||||
"/include",
|
||||
"-internal-externc-isystem",
|
||||
"/usr/include",
|
||||
"-Wall",
|
||||
"-Werror",
|
||||
"-pedantic",
|
||||
"-fdebug-compilation-dir=/home/abdelrahman/dev_work/say_it_in_json",
|
||||
"-ferror-limit",
|
||||
"19",
|
||||
"-fgnuc-version=4.2.1",
|
||||
"-fcolor-diagnostics",
|
||||
"-faddrsig",
|
||||
"-D__GCC_HAVE_DWARF2_CFI_ASM=1",
|
||||
"-x",
|
||||
"c",
|
||||
"-o",
|
||||
"/tmp/dstring-e11abe.o",
|
||||
"src/dstring/dstring.c"
|
||||
],
|
||||
"directory": "/mnt/3A5CDF785CDF2CFF/Users/abdoo/dev/say_it_in_json",
|
||||
"file": "/mnt/3A5CDF785CDF2CFF/Users/abdoo/dev/say_it_in_json/src/dstring/dstring.c",
|
||||
"output": "/tmp/dstring-e11abe.o"
|
||||
},
|
||||
{
|
||||
"arguments": [
|
||||
"/usr/bin/clang-16",
|
||||
"-cc1",
|
||||
"-triple",
|
||||
"x86_64-redhat-linux-gnu",
|
||||
"-emit-obj",
|
||||
"-mrelax-all",
|
||||
"-disable-free",
|
||||
"-clear-ast-before-backend",
|
||||
"-disable-llvm-verifier",
|
||||
"-discard-value-names",
|
||||
"-main-file-name",
|
||||
"-mrelocation-model",
|
||||
"static",
|
||||
"-mframe-pointer=all",
|
||||
"-fmath-errno",
|
||||
"-ffp-contract=on",
|
||||
"-fno-rounding-math",
|
||||
"-mconstructor-aliases",
|
||||
"-funwind-tables=2",
|
||||
"-target-cpu",
|
||||
"x86-64",
|
||||
"-tune-cpu",
|
||||
"generic",
|
||||
"-mllvm",
|
||||
"-treat-scalable-fixed-error-as-warning",
|
||||
"-debug-info-kind=constructor",
|
||||
"-dwarf-version=4",
|
||||
"-debugger-tuning=gdb",
|
||||
"-fcoverage-compilation-dir=/home/abdelrahman/dev_work/say_it_in_json",
|
||||
"-resource-dir",
|
||||
"/usr/lib64/clang/16",
|
||||
"-I",
|
||||
"include",
|
||||
"-I",
|
||||
"include/dstring",
|
||||
"-I",
|
||||
"include/json_entities",
|
||||
"-I",
|
||||
"include/lexer",
|
||||
"-internal-isystem",
|
||||
"/usr/lib64/clang/16/include",
|
||||
"-internal-isystem",
|
||||
"/usr/local/include",
|
||||
"-internal-isystem",
|
||||
"/usr/bin/../lib/gcc/x86_64-redhat-linux/13/../../../../x86_64-redhat-linux/include",
|
||||
"-internal-externc-isystem",
|
||||
"/include",
|
||||
"-internal-externc-isystem",
|
||||
"/usr/include",
|
||||
"-Wall",
|
||||
"-Werror",
|
||||
"-pedantic",
|
||||
"-fdebug-compilation-dir=/home/abdelrahman/dev_work/say_it_in_json",
|
||||
"-ferror-limit",
|
||||
"19",
|
||||
"-fgnuc-version=4.2.1",
|
||||
"-fcolor-diagnostics",
|
||||
"-faddrsig",
|
||||
"-D__GCC_HAVE_DWARF2_CFI_ASM=1",
|
||||
"-x",
|
||||
"c",
|
||||
"-o",
|
||||
"/tmp/json_entities-caa676.o",
|
||||
"src/json_entities/json_entities.c"
|
||||
],
|
||||
"directory": "/mnt/3A5CDF785CDF2CFF/Users/abdoo/dev/say_it_in_json",
|
||||
"file": "/mnt/3A5CDF785CDF2CFF/Users/abdoo/dev/say_it_in_json/src/json_entities/json_entities.c",
|
||||
"output": "/tmp/json_entities-caa676.o"
|
||||
},
|
||||
{
|
||||
"arguments": [
|
||||
"/usr/bin/clang-16",
|
||||
"-cc1",
|
||||
"-triple",
|
||||
"x86_64-redhat-linux-gnu",
|
||||
"-emit-obj",
|
||||
"-mrelax-all",
|
||||
"-disable-free",
|
||||
"-clear-ast-before-backend",
|
||||
"-disable-llvm-verifier",
|
||||
"-discard-value-names",
|
||||
"-main-file-name",
|
||||
"-mrelocation-model",
|
||||
"static",
|
||||
"-mframe-pointer=all",
|
||||
"-fmath-errno",
|
||||
"-ffp-contract=on",
|
||||
"-fno-rounding-math",
|
||||
"-mconstructor-aliases",
|
||||
"-funwind-tables=2",
|
||||
"-target-cpu",
|
||||
"x86-64",
|
||||
"-tune-cpu",
|
||||
"generic",
|
||||
"-mllvm",
|
||||
"-treat-scalable-fixed-error-as-warning",
|
||||
"-debug-info-kind=constructor",
|
||||
"-dwarf-version=4",
|
||||
"-debugger-tuning=gdb",
|
||||
"-fcoverage-compilation-dir=/home/abdelrahman/dev_work/say_it_in_json",
|
||||
"-resource-dir",
|
||||
"/usr/lib64/clang/16",
|
||||
"-I",
|
||||
"include",
|
||||
"-I",
|
||||
"include/dstring",
|
||||
"-I",
|
||||
"include/json_entities",
|
||||
"-I",
|
||||
"include/lexer",
|
||||
"-internal-isystem",
|
||||
"/usr/lib64/clang/16/include",
|
||||
"-internal-isystem",
|
||||
"/usr/local/include",
|
||||
"-internal-isystem",
|
||||
"/usr/bin/../lib/gcc/x86_64-redhat-linux/13/../../../../x86_64-redhat-linux/include",
|
||||
"-internal-externc-isystem",
|
||||
"/include",
|
||||
"-internal-externc-isystem",
|
||||
"/usr/include",
|
||||
"-Wall",
|
||||
"-Werror",
|
||||
"-pedantic",
|
||||
"-fdebug-compilation-dir=/home/abdelrahman/dev_work/say_it_in_json",
|
||||
"-ferror-limit",
|
||||
"19",
|
||||
"-fgnuc-version=4.2.1",
|
||||
"-fcolor-diagnostics",
|
||||
"-faddrsig",
|
||||
"-D__GCC_HAVE_DWARF2_CFI_ASM=1",
|
||||
"-x",
|
||||
"c",
|
||||
"-o",
|
||||
"/tmp/lexer-7fcb6e.o",
|
||||
"src/lexer/lexer.c"
|
||||
],
|
||||
"directory": "/mnt/3A5CDF785CDF2CFF/Users/abdoo/dev/say_it_in_json",
|
||||
"file": "/mnt/3A5CDF785CDF2CFF/Users/abdoo/dev/say_it_in_json/src/lexer/lexer.c",
|
||||
"output": "/tmp/lexer-7fcb6e.o"
|
||||
}
|
||||
]
|
||||
|
@ -10,7 +10,7 @@
|
||||
typedef const char *str_view_t;
|
||||
|
||||
typedef enum {
|
||||
TK_NO_TOKEN,
|
||||
TK_NO_TOKEN,
|
||||
TK_L_BRACE,
|
||||
TK_R_BRACE,
|
||||
TK_L_BRACKET,
|
||||
@ -22,7 +22,7 @@ typedef enum {
|
||||
TK_STR_VAL,
|
||||
TK_INTEGER,
|
||||
TK_DOUBLE,
|
||||
} token_type_t;
|
||||
} token_type;
|
||||
|
||||
typedef union {
|
||||
void *no_val;
|
||||
@ -34,15 +34,30 @@ typedef union {
|
||||
typedef struct {
|
||||
u64 line;
|
||||
u64 column;
|
||||
token_type_t type;
|
||||
token_type type;
|
||||
token_value_t value;
|
||||
} token_t;
|
||||
|
||||
typedef enum {
|
||||
LEX_ERR_NONE,
|
||||
LEX_ERR_INVALID,
|
||||
} lex_err_type;
|
||||
|
||||
typedef struct {
|
||||
lex_err_type errno;
|
||||
str_view_t msg;
|
||||
} lex_err_t;
|
||||
|
||||
typedef struct {
|
||||
lex_err_t error;
|
||||
token_t token;
|
||||
} lex_result_t;
|
||||
|
||||
typedef struct lexer lexer_t;
|
||||
|
||||
void lexer_init(lexer_t **lexer);
|
||||
void lexer_free(lexer_t **lexer);
|
||||
token_t get_next_token(lexer_t *lexer, const char *text);
|
||||
lex_result_t get_next_token(lexer_t *lexer, const char *text);
|
||||
|
||||
bool validate_json(char *json);
|
||||
void print_token(token_t token);
|
||||
|
@ -92,6 +92,7 @@ struct lexer {
|
||||
token_t token;
|
||||
bool has_extra_token;
|
||||
token_t extra_token;
|
||||
dstr_t *error_message;
|
||||
};
|
||||
|
||||
void stack_push(state_stack_t *stack, lexer_state_t value);
|
||||
@ -104,7 +105,7 @@ bool is_valid_hex_char(const char input);
|
||||
bool ishex(const char input);
|
||||
|
||||
token_t dstr_to_numerical_token(const dstr_t *str);
|
||||
void set_token(token_t *token, u64 line, u64 column, token_type_t type,
|
||||
void set_token(token_t *token, u64 line, u64 column, token_type type,
|
||||
token_value_t value);
|
||||
|
||||
void lexer_state_machine(lexer_t *lexer, char input);
|
||||
@ -153,6 +154,7 @@ void lexer_init(lexer_t **lexer) {
|
||||
(*lexer)->keyword.type = LEXER_STRING_KEYWORD;
|
||||
(*lexer)->codepoint.type = LEXER_STRING_UNICODE;
|
||||
(*lexer)->current_string = dstr_with_capacity(STRING_BUF_START_CAPACITY);
|
||||
(*lexer)->error_message = dstr_with_capacity(STRING_BUF_START_CAPACITY);
|
||||
(*lexer)->token_ready = false;
|
||||
(*lexer)->token = (token_t){0};
|
||||
(*lexer)->has_extra_token = false;
|
||||
@ -168,11 +170,14 @@ void lexer_free(lexer_t **lexer) {
|
||||
return;
|
||||
}
|
||||
|
||||
dstr_free(&((*lexer)->current_string));
|
||||
dstr_free(&((*lexer)->error_message));
|
||||
|
||||
free(*lexer);
|
||||
*lexer = NULL;
|
||||
}
|
||||
|
||||
token_t get_next_token(lexer_t *lexer, const char *text) {
|
||||
lex_result_t get_next_token(lexer_t *lexer, const char *text) {
|
||||
if (text != NULL) {
|
||||
lexer->cursor = 0;
|
||||
lexer->text = text;
|
||||
@ -187,7 +192,10 @@ token_t get_next_token(lexer_t *lexer, const char *text) {
|
||||
if (lexer->has_extra_token) {
|
||||
lexer->has_extra_token = false;
|
||||
|
||||
return lexer->extra_token;
|
||||
return (lex_result_t){
|
||||
(lex_err_t){.errno = LEX_ERR_NONE, .msg = ""},
|
||||
lexer->extra_token,
|
||||
};
|
||||
}
|
||||
|
||||
c = lexer->text[(lexer->cursor)++];
|
||||
@ -202,14 +210,40 @@ token_t get_next_token(lexer_t *lexer, const char *text) {
|
||||
}
|
||||
|
||||
if (lexer->current == LEXER_STATE_ERROR) {
|
||||
char msg[STRING_BUF_START_CAPACITY + 1];
|
||||
memset(msg, 0, STRING_BUF_START_CAPACITY + 1);
|
||||
|
||||
u64 slice_length = 20;
|
||||
char slice[slice_length];
|
||||
snprintf(slice, slice_length, "%s", &(lexer->text[lexer->cursor - 1]));
|
||||
|
||||
snprintf(
|
||||
msg, STRING_BUF_START_CAPACITY,
|
||||
"\n(%llu:%llu) Encountered an error while parsing the following:\n%s",
|
||||
(unsigned long long)lexer->line, (unsigned long long)lexer->column,
|
||||
slice);
|
||||
|
||||
dstr_update(&(lexer->error_message), msg);
|
||||
|
||||
return (lex_result_t){
|
||||
(lex_err_t){.errno = LEX_ERR_INVALID,
|
||||
.msg = dstr_to_cstr(lexer->error_message)},
|
||||
(token_t){0},
|
||||
};
|
||||
} else if (lexer->token_ready) {
|
||||
lexer->token_ready = false;
|
||||
|
||||
return lexer->token;
|
||||
return (lex_result_t){
|
||||
(lex_err_t){.errno = LEX_ERR_NONE, .msg = ""},
|
||||
lexer->token,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
return (token_t){0};
|
||||
return (lex_result_t){
|
||||
(lex_err_t){.errno = LEX_ERR_NONE, .msg = ""},
|
||||
(token_t){0},
|
||||
};
|
||||
}
|
||||
|
||||
bool validate_json(char *json) {
|
||||
@ -401,7 +435,7 @@ token_t dstr_to_numerical_token(const dstr_t *str) {
|
||||
return token;
|
||||
}
|
||||
|
||||
void set_token(token_t *token, u64 line, u64 column, token_type_t type,
|
||||
void set_token(token_t *token, u64 line, u64 column, token_type type,
|
||||
token_value_t value) {
|
||||
*token = (token_t){
|
||||
.line = line,
|
||||
|
18
src/main.c
18
src/main.c
@ -35,11 +35,21 @@ int main(int argc, char *argv[]) {
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
|
||||
token_t token = get_next_token(lexer, json);
|
||||
while (token.type != TK_NO_TOKEN) {
|
||||
print_token(token);
|
||||
lex_result_t result = get_next_token(lexer, json);
|
||||
|
||||
token = get_next_token(lexer, NULL);
|
||||
if (result.error.errno) {
|
||||
printf("%s\n", result.error.msg);
|
||||
} else {
|
||||
while (result.token.type != TK_NO_TOKEN) {
|
||||
print_token(result.token);
|
||||
|
||||
result = get_next_token(lexer, NULL);
|
||||
|
||||
if (result.error.errno) {
|
||||
printf("%s\n", result.error.msg);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
lexer_free(&lexer);
|
||||
|
Loading…
Reference in New Issue
Block a user