Add the get_next_token function
This commit is contained in:
parent
7845ad4b06
commit
efe07a3c60
@ -14,9 +14,9 @@
|
|||||||
"main",
|
"main",
|
||||||
"src/main.c"
|
"src/main.c"
|
||||||
],
|
],
|
||||||
"directory": "/mnt/3A5CDF785CDF2CFF/Users/abdoo/dev/say_it_in_json",
|
"directory": "/Users/abdelrahman/dev/personal/say-it-in-json",
|
||||||
"file": "/mnt/3A5CDF785CDF2CFF/Users/abdoo/dev/say_it_in_json/src/main.c",
|
"file": "/Users/abdelrahman/dev/personal/say-it-in-json/src/main.c",
|
||||||
"output": "/mnt/3A5CDF785CDF2CFF/Users/abdoo/dev/say_it_in_json/main"
|
"output": "/Users/abdelrahman/dev/personal/say-it-in-json/main"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"arguments": [
|
"arguments": [
|
||||||
@ -33,9 +33,9 @@
|
|||||||
"main",
|
"main",
|
||||||
"src/dstring/dstring.c"
|
"src/dstring/dstring.c"
|
||||||
],
|
],
|
||||||
"directory": "/mnt/3A5CDF785CDF2CFF/Users/abdoo/dev/say_it_in_json",
|
"directory": "/Users/abdelrahman/dev/personal/say-it-in-json",
|
||||||
"file": "/mnt/3A5CDF785CDF2CFF/Users/abdoo/dev/say_it_in_json/src/dstring/dstring.c",
|
"file": "/Users/abdelrahman/dev/personal/say-it-in-json/src/dstring/dstring.c",
|
||||||
"output": "/mnt/3A5CDF785CDF2CFF/Users/abdoo/dev/say_it_in_json/main"
|
"output": "/Users/abdelrahman/dev/personal/say-it-in-json/main"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"arguments": [
|
"arguments": [
|
||||||
@ -52,215 +52,8 @@
|
|||||||
"main",
|
"main",
|
||||||
"src/lexer/lexer.c"
|
"src/lexer/lexer.c"
|
||||||
],
|
],
|
||||||
"directory": "/mnt/3A5CDF785CDF2CFF/Users/abdoo/dev/say_it_in_json",
|
"directory": "/Users/abdelrahman/dev/personal/say-it-in-json",
|
||||||
"file": "/mnt/3A5CDF785CDF2CFF/Users/abdoo/dev/say_it_in_json/src/lexer/lexer.c",
|
"file": "/Users/abdelrahman/dev/personal/say-it-in-json/src/lexer/lexer.c",
|
||||||
"output": "/mnt/3A5CDF785CDF2CFF/Users/abdoo/dev/say_it_in_json/main"
|
"output": "/Users/abdelrahman/dev/personal/say-it-in-json/main"
|
||||||
},
|
|
||||||
{
|
|
||||||
"arguments": [
|
|
||||||
"/usr/bin/clang-16",
|
|
||||||
"-cc1",
|
|
||||||
"-triple",
|
|
||||||
"x86_64-redhat-linux-gnu",
|
|
||||||
"-emit-obj",
|
|
||||||
"-mrelax-all",
|
|
||||||
"-disable-free",
|
|
||||||
"-clear-ast-before-backend",
|
|
||||||
"-disable-llvm-verifier",
|
|
||||||
"-discard-value-names",
|
|
||||||
"-main-file-name",
|
|
||||||
"-mrelocation-model",
|
|
||||||
"static",
|
|
||||||
"-mframe-pointer=all",
|
|
||||||
"-fmath-errno",
|
|
||||||
"-ffp-contract=on",
|
|
||||||
"-fno-rounding-math",
|
|
||||||
"-mconstructor-aliases",
|
|
||||||
"-funwind-tables=2",
|
|
||||||
"-target-cpu",
|
|
||||||
"x86-64",
|
|
||||||
"-tune-cpu",
|
|
||||||
"generic",
|
|
||||||
"-mllvm",
|
|
||||||
"-treat-scalable-fixed-error-as-warning",
|
|
||||||
"-debug-info-kind=constructor",
|
|
||||||
"-dwarf-version=4",
|
|
||||||
"-debugger-tuning=gdb",
|
|
||||||
"-fcoverage-compilation-dir=/home/abdelrahman/dev_work/say_it_in_json",
|
|
||||||
"-resource-dir",
|
|
||||||
"/usr/lib64/clang/16",
|
|
||||||
"-I",
|
|
||||||
"include",
|
|
||||||
"-I",
|
|
||||||
"include/dstring",
|
|
||||||
"-I",
|
|
||||||
"include/lexer",
|
|
||||||
"-internal-isystem",
|
|
||||||
"/usr/lib64/clang/16/include",
|
|
||||||
"-internal-isystem",
|
|
||||||
"/usr/local/include",
|
|
||||||
"-internal-isystem",
|
|
||||||
"/usr/bin/../lib/gcc/x86_64-redhat-linux/13/../../../../x86_64-redhat-linux/include",
|
|
||||||
"-internal-externc-isystem",
|
|
||||||
"/include",
|
|
||||||
"-internal-externc-isystem",
|
|
||||||
"/usr/include",
|
|
||||||
"-Wall",
|
|
||||||
"-Werror",
|
|
||||||
"-pedantic",
|
|
||||||
"-fdebug-compilation-dir=/home/abdelrahman/dev_work/say_it_in_json",
|
|
||||||
"-ferror-limit",
|
|
||||||
"19",
|
|
||||||
"-fgnuc-version=4.2.1",
|
|
||||||
"-fcolor-diagnostics",
|
|
||||||
"-faddrsig",
|
|
||||||
"-D__GCC_HAVE_DWARF2_CFI_ASM=1",
|
|
||||||
"-x",
|
|
||||||
"c",
|
|
||||||
"-o",
|
|
||||||
"/tmp/main-1df523.o",
|
|
||||||
"src/main.c"
|
|
||||||
],
|
|
||||||
"directory": "/mnt/3A5CDF785CDF2CFF/Users/abdoo/dev/say_it_in_json",
|
|
||||||
"file": "/mnt/3A5CDF785CDF2CFF/Users/abdoo/dev/say_it_in_json/src/main.c",
|
|
||||||
"output": "/tmp/main-1df523.o"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"arguments": [
|
|
||||||
"/usr/bin/clang-16",
|
|
||||||
"-cc1",
|
|
||||||
"-triple",
|
|
||||||
"x86_64-redhat-linux-gnu",
|
|
||||||
"-emit-obj",
|
|
||||||
"-mrelax-all",
|
|
||||||
"-disable-free",
|
|
||||||
"-clear-ast-before-backend",
|
|
||||||
"-disable-llvm-verifier",
|
|
||||||
"-discard-value-names",
|
|
||||||
"-main-file-name",
|
|
||||||
"-mrelocation-model",
|
|
||||||
"static",
|
|
||||||
"-mframe-pointer=all",
|
|
||||||
"-fmath-errno",
|
|
||||||
"-ffp-contract=on",
|
|
||||||
"-fno-rounding-math",
|
|
||||||
"-mconstructor-aliases",
|
|
||||||
"-funwind-tables=2",
|
|
||||||
"-target-cpu",
|
|
||||||
"x86-64",
|
|
||||||
"-tune-cpu",
|
|
||||||
"generic",
|
|
||||||
"-mllvm",
|
|
||||||
"-treat-scalable-fixed-error-as-warning",
|
|
||||||
"-debug-info-kind=constructor",
|
|
||||||
"-dwarf-version=4",
|
|
||||||
"-debugger-tuning=gdb",
|
|
||||||
"-fcoverage-compilation-dir=/home/abdelrahman/dev_work/say_it_in_json",
|
|
||||||
"-resource-dir",
|
|
||||||
"/usr/lib64/clang/16",
|
|
||||||
"-I",
|
|
||||||
"include",
|
|
||||||
"-I",
|
|
||||||
"include/dstring",
|
|
||||||
"-I",
|
|
||||||
"include/lexer",
|
|
||||||
"-internal-isystem",
|
|
||||||
"/usr/lib64/clang/16/include",
|
|
||||||
"-internal-isystem",
|
|
||||||
"/usr/local/include",
|
|
||||||
"-internal-isystem",
|
|
||||||
"/usr/bin/../lib/gcc/x86_64-redhat-linux/13/../../../../x86_64-redhat-linux/include",
|
|
||||||
"-internal-externc-isystem",
|
|
||||||
"/include",
|
|
||||||
"-internal-externc-isystem",
|
|
||||||
"/usr/include",
|
|
||||||
"-Wall",
|
|
||||||
"-Werror",
|
|
||||||
"-pedantic",
|
|
||||||
"-fdebug-compilation-dir=/home/abdelrahman/dev_work/say_it_in_json",
|
|
||||||
"-ferror-limit",
|
|
||||||
"19",
|
|
||||||
"-fgnuc-version=4.2.1",
|
|
||||||
"-fcolor-diagnostics",
|
|
||||||
"-faddrsig",
|
|
||||||
"-D__GCC_HAVE_DWARF2_CFI_ASM=1",
|
|
||||||
"-x",
|
|
||||||
"c",
|
|
||||||
"-o",
|
|
||||||
"/tmp/dstring-3eff44.o",
|
|
||||||
"src/dstring/dstring.c"
|
|
||||||
],
|
|
||||||
"directory": "/mnt/3A5CDF785CDF2CFF/Users/abdoo/dev/say_it_in_json",
|
|
||||||
"file": "/mnt/3A5CDF785CDF2CFF/Users/abdoo/dev/say_it_in_json/src/dstring/dstring.c",
|
|
||||||
"output": "/tmp/dstring-3eff44.o"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"arguments": [
|
|
||||||
"/usr/bin/clang-16",
|
|
||||||
"-cc1",
|
|
||||||
"-triple",
|
|
||||||
"x86_64-redhat-linux-gnu",
|
|
||||||
"-emit-obj",
|
|
||||||
"-mrelax-all",
|
|
||||||
"-disable-free",
|
|
||||||
"-clear-ast-before-backend",
|
|
||||||
"-disable-llvm-verifier",
|
|
||||||
"-discard-value-names",
|
|
||||||
"-main-file-name",
|
|
||||||
"-mrelocation-model",
|
|
||||||
"static",
|
|
||||||
"-mframe-pointer=all",
|
|
||||||
"-fmath-errno",
|
|
||||||
"-ffp-contract=on",
|
|
||||||
"-fno-rounding-math",
|
|
||||||
"-mconstructor-aliases",
|
|
||||||
"-funwind-tables=2",
|
|
||||||
"-target-cpu",
|
|
||||||
"x86-64",
|
|
||||||
"-tune-cpu",
|
|
||||||
"generic",
|
|
||||||
"-mllvm",
|
|
||||||
"-treat-scalable-fixed-error-as-warning",
|
|
||||||
"-debug-info-kind=constructor",
|
|
||||||
"-dwarf-version=4",
|
|
||||||
"-debugger-tuning=gdb",
|
|
||||||
"-fcoverage-compilation-dir=/home/abdelrahman/dev_work/say_it_in_json",
|
|
||||||
"-resource-dir",
|
|
||||||
"/usr/lib64/clang/16",
|
|
||||||
"-I",
|
|
||||||
"include",
|
|
||||||
"-I",
|
|
||||||
"include/dstring",
|
|
||||||
"-I",
|
|
||||||
"include/lexer",
|
|
||||||
"-internal-isystem",
|
|
||||||
"/usr/lib64/clang/16/include",
|
|
||||||
"-internal-isystem",
|
|
||||||
"/usr/local/include",
|
|
||||||
"-internal-isystem",
|
|
||||||
"/usr/bin/../lib/gcc/x86_64-redhat-linux/13/../../../../x86_64-redhat-linux/include",
|
|
||||||
"-internal-externc-isystem",
|
|
||||||
"/include",
|
|
||||||
"-internal-externc-isystem",
|
|
||||||
"/usr/include",
|
|
||||||
"-Wall",
|
|
||||||
"-Werror",
|
|
||||||
"-pedantic",
|
|
||||||
"-fdebug-compilation-dir=/home/abdelrahman/dev_work/say_it_in_json",
|
|
||||||
"-ferror-limit",
|
|
||||||
"19",
|
|
||||||
"-fgnuc-version=4.2.1",
|
|
||||||
"-fcolor-diagnostics",
|
|
||||||
"-faddrsig",
|
|
||||||
"-D__GCC_HAVE_DWARF2_CFI_ASM=1",
|
|
||||||
"-x",
|
|
||||||
"c",
|
|
||||||
"-o",
|
|
||||||
"/tmp/lexer-b7cbfb.o",
|
|
||||||
"src/lexer/lexer.c"
|
|
||||||
],
|
|
||||||
"directory": "/mnt/3A5CDF785CDF2CFF/Users/abdoo/dev/say_it_in_json",
|
|
||||||
"file": "/mnt/3A5CDF785CDF2CFF/Users/abdoo/dev/say_it_in_json/src/lexer/lexer.c",
|
|
||||||
"output": "/tmp/lexer-b7cbfb.o"
|
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
|
@ -15,8 +15,6 @@ typedef enum {
|
|||||||
TK_R_BRACE,
|
TK_R_BRACE,
|
||||||
TK_L_BRACKET,
|
TK_L_BRACKET,
|
||||||
TK_R_BRACKET,
|
TK_R_BRACKET,
|
||||||
TK_COLON,
|
|
||||||
TK_COMMA,
|
|
||||||
TK_NULL,
|
TK_NULL,
|
||||||
TK_TRUE,
|
TK_TRUE,
|
||||||
TK_FALSE,
|
TK_FALSE,
|
||||||
@ -47,5 +45,6 @@ void lexer_free(lexer_t **lexer);
|
|||||||
token_t get_next_token(lexer_t *lexer, const char *text);
|
token_t get_next_token(lexer_t *lexer, const char *text);
|
||||||
|
|
||||||
bool validate_json(char *json);
|
bool validate_json(char *json);
|
||||||
|
void print_token(token_t token);
|
||||||
|
|
||||||
#endif // !LEXER_STATES_H
|
#endif // !LEXER_STATES_H
|
||||||
|
@ -81,13 +81,15 @@ struct lexer {
|
|||||||
u64 cursor;
|
u64 cursor;
|
||||||
u64 line;
|
u64 line;
|
||||||
u64 column;
|
u64 column;
|
||||||
const char *text;
|
|
||||||
u64 text_length;
|
u64 text_length;
|
||||||
|
const char *text;
|
||||||
lexer_state_t current;
|
lexer_state_t current;
|
||||||
state_stack_t stack;
|
state_stack_t stack;
|
||||||
lexer_string_t keyword;
|
lexer_string_t keyword;
|
||||||
lexer_string_t codepoint;
|
lexer_string_t codepoint;
|
||||||
dstr_t *current_string;
|
dstr_t *current_string;
|
||||||
|
bool token_ready;
|
||||||
|
token_t token;
|
||||||
};
|
};
|
||||||
|
|
||||||
void stack_push(state_stack_t *stack, lexer_state_t value);
|
void stack_push(state_stack_t *stack, lexer_state_t value);
|
||||||
@ -99,10 +101,13 @@ bool strequal(const char *first, const char *second);
|
|||||||
bool is_valid_hex_char(const char input);
|
bool is_valid_hex_char(const char input);
|
||||||
bool ishex(const char input);
|
bool ishex(const char input);
|
||||||
|
|
||||||
|
void set_token(lexer_t *lexer, token_type_t type, token_value_t value);
|
||||||
|
|
||||||
void lexer_state_machine(lexer_t *lexer, char input);
|
void lexer_state_machine(lexer_t *lexer, char input);
|
||||||
lexer_state_t handle_lexer_start(char input);
|
lexer_state_t handle_lexer_start(lexer_t *lexer, char input);
|
||||||
lexer_state_t handle_last_collection(char input);
|
lexer_state_t handle_last_collection(char input);
|
||||||
lexer_state_t handle_collection_end(lexer_t *lexer, char input);
|
lexer_state_t handle_collection_end(lexer_t *lexer, char input);
|
||||||
|
void handle_input_after_collection_end(lexer_t *lexer, char input);
|
||||||
lexer_state_t handle_object(lexer_t *lexer, char input);
|
lexer_state_t handle_object(lexer_t *lexer, char input);
|
||||||
lexer_state_t handle_array(lexer_t *lexer, char input);
|
lexer_state_t handle_array(lexer_t *lexer, char input);
|
||||||
lexer_state_t handle_key(lexer_t *lexer, char input);
|
lexer_state_t handle_key(lexer_t *lexer, char input);
|
||||||
@ -124,9 +129,6 @@ lexer_state_t handle_false(lexer_t *lexer, char input);
|
|||||||
lexer_state_t handle_null(lexer_t *lexer, char input);
|
lexer_state_t handle_null(lexer_t *lexer, char input);
|
||||||
lexer_state_t handle_keyword_end(lexer_t *lexer, char input);
|
lexer_state_t handle_keyword_end(lexer_t *lexer, char input);
|
||||||
|
|
||||||
// TODO (Abdelrahman): The printf functions in the state handlers are the exit
|
|
||||||
// points for the tokenisation function. Replace them once ready.
|
|
||||||
|
|
||||||
void lexer_init(lexer_t **lexer) {
|
void lexer_init(lexer_t **lexer) {
|
||||||
if (*lexer) {
|
if (*lexer) {
|
||||||
lexer_free(lexer);
|
lexer_free(lexer);
|
||||||
@ -141,10 +143,14 @@ void lexer_init(lexer_t **lexer) {
|
|||||||
(*lexer)->cursor = 0;
|
(*lexer)->cursor = 0;
|
||||||
(*lexer)->line = 1;
|
(*lexer)->line = 1;
|
||||||
(*lexer)->column = 0;
|
(*lexer)->column = 0;
|
||||||
|
(*lexer)->text_length = 0;
|
||||||
|
(*lexer)->text = "";
|
||||||
(*lexer)->current = LEXER_STATE_START;
|
(*lexer)->current = LEXER_STATE_START;
|
||||||
(*lexer)->keyword.type = LEXER_STRING_KEYWORD;
|
(*lexer)->keyword.type = LEXER_STRING_KEYWORD;
|
||||||
(*lexer)->codepoint.type = LEXER_STRING_UNICODE;
|
(*lexer)->codepoint.type = LEXER_STRING_UNICODE;
|
||||||
(*lexer)->current_string = dstr_with_capacity(STRING_BUF_START_CAPACITY);
|
(*lexer)->current_string = dstr_with_capacity(STRING_BUF_START_CAPACITY);
|
||||||
|
(*lexer)->token_ready = false;
|
||||||
|
(*lexer)->token = (token_t){0};
|
||||||
|
|
||||||
if (!((*lexer)->current_string)) {
|
if (!((*lexer)->current_string)) {
|
||||||
lexer_free(lexer);
|
lexer_free(lexer);
|
||||||
@ -167,10 +173,12 @@ token_t get_next_token(lexer_t *lexer, const char *text) {
|
|||||||
lexer->text_length = strlen(text);
|
lexer->text_length = strlen(text);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
dstr_clear(lexer->current_string);
|
||||||
|
|
||||||
char c;
|
char c;
|
||||||
|
|
||||||
for (; lexer->cursor < lexer->text_length; ++(lexer->cursor)) {
|
while (lexer->cursor < lexer->text_length) {
|
||||||
c = lexer->text[lexer->cursor];
|
c = lexer->text[(lexer->cursor)++];
|
||||||
|
|
||||||
if (c == '\n') {
|
if (c == '\n') {
|
||||||
++(lexer->line);
|
++(lexer->line);
|
||||||
@ -184,6 +192,10 @@ token_t get_next_token(lexer_t *lexer, const char *text) {
|
|||||||
++(lexer->column);
|
++(lexer->column);
|
||||||
|
|
||||||
if (lexer->current == LEXER_STATE_ERROR) {
|
if (lexer->current == LEXER_STATE_ERROR) {
|
||||||
|
} else if (lexer->token_ready) {
|
||||||
|
lexer->token_ready = false;
|
||||||
|
|
||||||
|
return lexer->token;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -223,6 +235,49 @@ bool validate_json(char *json) {
|
|||||||
return lexer.current == LEXER_STATE_LAST_COLLECTION || lexer.stack.size == 0;
|
return lexer.current == LEXER_STATE_LAST_COLLECTION || lexer.stack.size == 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void print_token(token_t token) {
|
||||||
|
printf("{LINE: %4llu, COLUMN: %4llu, TYPE: ", token.line, token.column);
|
||||||
|
|
||||||
|
switch (token.type) {
|
||||||
|
case TK_NO_TOKEN:
|
||||||
|
break;
|
||||||
|
case TK_L_BRACE:
|
||||||
|
printf("%15s, VALUE: N/A", "TK_L_BRACE");
|
||||||
|
break;
|
||||||
|
case TK_R_BRACE:
|
||||||
|
printf("%15s, VALUE: N/A", "TK_R_BRACE");
|
||||||
|
break;
|
||||||
|
case TK_L_BRACKET:
|
||||||
|
printf("%15s, VALUE: N/A", "TK_L_BRACKET");
|
||||||
|
break;
|
||||||
|
case TK_R_BRACKET:
|
||||||
|
printf("%15s, VALUE: N/A", "TK_R_BRACKET");
|
||||||
|
break;
|
||||||
|
case TK_NULL:
|
||||||
|
printf("%15s, VALUE: N/A", "TK_NULL");
|
||||||
|
break;
|
||||||
|
case TK_TRUE:
|
||||||
|
printf("%15s, VALUE: N/A", "TK_TRUE");
|
||||||
|
break;
|
||||||
|
case TK_FALSE:
|
||||||
|
printf("%15s, VALUE: N/A", "TK_FALSE");
|
||||||
|
break;
|
||||||
|
case TK_STR_KEY:
|
||||||
|
printf("%15s, VALUE: %s", "TK_STR_KEY", token.value.string);
|
||||||
|
break;
|
||||||
|
case TK_STR_VAL:
|
||||||
|
printf("%15s, VALUE: %s", "TK_STR_VAL", token.value.string);
|
||||||
|
break;
|
||||||
|
case TK_INTEGER:
|
||||||
|
break;
|
||||||
|
case TK_DOUBLE:
|
||||||
|
printf("%15s, VALUE: %s", "TK_DOUBLE", token.value.string);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
printf("}\n");
|
||||||
|
}
|
||||||
|
|
||||||
void stack_push(state_stack_t *stack, lexer_state_t state) {
|
void stack_push(state_stack_t *stack, lexer_state_t state) {
|
||||||
if (stack->size + 1 >= MAX_STACK_CAPACITY) {
|
if (stack->size + 1 >= MAX_STACK_CAPACITY) {
|
||||||
return;
|
return;
|
||||||
@ -318,10 +373,21 @@ bool ishex(const char input) {
|
|||||||
return isdigit(input) || is_valid_hex_char(input);
|
return isdigit(input) || is_valid_hex_char(input);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void set_token(lexer_t *lexer, token_type_t type, token_value_t value) {
|
||||||
|
lexer->token_ready = true;
|
||||||
|
|
||||||
|
lexer->token = (token_t){
|
||||||
|
.line = lexer->line,
|
||||||
|
.column = lexer->column,
|
||||||
|
.type = type,
|
||||||
|
.value = value,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
void lexer_state_machine(lexer_t *lexer, char input) {
|
void lexer_state_machine(lexer_t *lexer, char input) {
|
||||||
switch (lexer->current) {
|
switch (lexer->current) {
|
||||||
case LEXER_STATE_START:
|
case LEXER_STATE_START:
|
||||||
lexer->current = handle_lexer_start(input);
|
lexer->current = handle_lexer_start(lexer, input);
|
||||||
break;
|
break;
|
||||||
case LEXER_STATE_VALUE:
|
case LEXER_STATE_VALUE:
|
||||||
lexer->current = handle_value(lexer, input);
|
lexer->current = handle_value(lexer, input);
|
||||||
@ -350,6 +416,8 @@ void lexer_state_machine(lexer_t *lexer, char input) {
|
|||||||
lexer->current = LEXER_STATE_LAST_COLLECTION;
|
lexer->current = LEXER_STATE_LAST_COLLECTION;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
handle_input_after_collection_end(lexer, input);
|
||||||
|
|
||||||
break;
|
break;
|
||||||
case LEXER_STATE_KEY:
|
case LEXER_STATE_KEY:
|
||||||
lexer->current = handle_key(lexer, input);
|
lexer->current = handle_key(lexer, input);
|
||||||
@ -409,17 +477,19 @@ void lexer_state_machine(lexer_t *lexer, char input) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
lexer_state_t handle_lexer_start(char input) {
|
lexer_state_t handle_lexer_start(lexer_t *lexer, char input) {
|
||||||
if (isspace(input)) {
|
if (isspace(input)) {
|
||||||
return LEXER_STATE_START;
|
return LEXER_STATE_START;
|
||||||
}
|
}
|
||||||
|
|
||||||
switch (input) {
|
switch (input) {
|
||||||
case '{':
|
case '{':
|
||||||
printf("TK_L_BRACE\n");
|
set_token(lexer, TK_L_BRACE, (token_value_t){0});
|
||||||
|
|
||||||
return LEXER_STATE_OBJECT_START;
|
return LEXER_STATE_OBJECT_START;
|
||||||
case '[':
|
case '[':
|
||||||
printf("TK_L_BRACKET\n");
|
set_token(lexer, TK_L_BRACKET, (token_value_t){0});
|
||||||
|
|
||||||
return LEXER_STATE_ARRAY_START;
|
return LEXER_STATE_ARRAY_START;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -442,20 +512,35 @@ lexer_state_t handle_collection_end(lexer_t *lexer, char input) {
|
|||||||
bool object_end = lexer->current == LEXER_STATE_OBJECT && input == '}';
|
bool object_end = lexer->current == LEXER_STATE_OBJECT && input == '}';
|
||||||
|
|
||||||
if (object_end) {
|
if (object_end) {
|
||||||
printf("TK_R_BRACE\n");
|
set_token(lexer, TK_R_BRACE, (token_value_t){0});
|
||||||
|
|
||||||
return LEXER_STATE_OBJECT_END;
|
return LEXER_STATE_OBJECT_END;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool array_end = lexer->current == LEXER_STATE_ARRAY && input == ']';
|
bool array_end = lexer->current == LEXER_STATE_ARRAY && input == ']';
|
||||||
|
|
||||||
if (array_end) {
|
if (array_end) {
|
||||||
printf("TK_R_BRACKET\n");
|
set_token(lexer, TK_R_BRACKET, (token_value_t){0});
|
||||||
|
|
||||||
return LEXER_STATE_ARRAY_END;
|
return LEXER_STATE_ARRAY_END;
|
||||||
}
|
}
|
||||||
|
|
||||||
return LEXER_STATE_ERROR;
|
return LEXER_STATE_ERROR;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void handle_input_after_collection_end(lexer_t *lexer, char input) {
|
||||||
|
switch (input) {
|
||||||
|
case '}':
|
||||||
|
set_token(lexer, TK_R_BRACE, (token_value_t){0});
|
||||||
|
|
||||||
|
break;
|
||||||
|
case ']':
|
||||||
|
set_token(lexer, TK_R_BRACKET, (token_value_t){0});
|
||||||
|
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
lexer_state_t handle_object(lexer_t *lexer, char input) {
|
lexer_state_t handle_object(lexer_t *lexer, char input) {
|
||||||
if (isspace(input)) {
|
if (isspace(input)) {
|
||||||
return LEXER_STATE_OBJECT;
|
return LEXER_STATE_OBJECT;
|
||||||
@ -464,7 +549,6 @@ lexer_state_t handle_object(lexer_t *lexer, char input) {
|
|||||||
|
|
||||||
return LEXER_STATE_KEY;
|
return LEXER_STATE_KEY;
|
||||||
} else if (input == '}') {
|
} else if (input == '}') {
|
||||||
printf("TK_R_BRACE\n");
|
|
||||||
return handle_collection_end(lexer, input);
|
return handle_collection_end(lexer, input);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -475,7 +559,6 @@ lexer_state_t handle_array(lexer_t *lexer, char input) {
|
|||||||
if (isspace(input)) {
|
if (isspace(input)) {
|
||||||
return LEXER_STATE_ARRAY;
|
return LEXER_STATE_ARRAY;
|
||||||
} else if (input == ']') {
|
} else if (input == ']') {
|
||||||
printf("TK_R_BRACKET\n");
|
|
||||||
return handle_collection_end(lexer, input);
|
return handle_collection_end(lexer, input);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -507,10 +590,12 @@ lexer_state_t handle_value(lexer_t *lexer, char input) {
|
|||||||
|
|
||||||
return LEXER_STATE_DECIMAL;
|
return LEXER_STATE_DECIMAL;
|
||||||
case '{':
|
case '{':
|
||||||
printf("TK_L_BRACE\n");
|
set_token(lexer, TK_L_BRACE, (token_value_t){0});
|
||||||
|
|
||||||
return LEXER_STATE_OBJECT_START;
|
return LEXER_STATE_OBJECT_START;
|
||||||
case '[':
|
case '[':
|
||||||
printf("TK_L_BRACKET\n");
|
set_token(lexer, TK_L_BRACKET, (token_value_t){0});
|
||||||
|
|
||||||
return LEXER_STATE_ARRAY_START;
|
return LEXER_STATE_ARRAY_START;
|
||||||
case 't':
|
case 't':
|
||||||
case 'f':
|
case 'f':
|
||||||
@ -529,10 +614,20 @@ lexer_state_t handle_string(lexer_t *lexer, char input) {
|
|||||||
dstr_append(&(lexer->current_string), input);
|
dstr_append(&(lexer->current_string), input);
|
||||||
|
|
||||||
return LEXER_STATE_ESCAPE_SEQUENCE;
|
return LEXER_STATE_ESCAPE_SEQUENCE;
|
||||||
case '"':
|
case '"': {
|
||||||
printf("TK_STRING: %s\n", dstr_to_cstr(lexer->current_string));
|
lexer_state_t string_type = lexer->stack.stack[lexer->stack.size - 1];
|
||||||
|
|
||||||
|
if (string_type == LEXER_STATE_KEY) {
|
||||||
|
set_token(lexer, TK_STR_KEY,
|
||||||
|
(token_value_t){.string = dstr_to_cstr(lexer->current_string)});
|
||||||
|
} else if (string_type == LEXER_STATE_VALUE) {
|
||||||
|
set_token(lexer, TK_STR_VAL,
|
||||||
|
(token_value_t){.string = dstr_to_cstr(lexer->current_string)});
|
||||||
|
}
|
||||||
|
|
||||||
return LEXER_STATE_STRING_END;
|
return LEXER_STATE_STRING_END;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
dstr_append(&(lexer->current_string), input);
|
dstr_append(&(lexer->current_string), input);
|
||||||
|
|
||||||
@ -544,21 +639,17 @@ lexer_state_t handle_string_end(lexer_t *lexer, char input) {
|
|||||||
return LEXER_STATE_STRING_END;
|
return LEXER_STATE_STRING_END;
|
||||||
}
|
}
|
||||||
|
|
||||||
dstr_clear(lexer->current_string);
|
|
||||||
|
|
||||||
lexer->current = stack_pop(&(lexer->stack));
|
lexer->current = stack_pop(&(lexer->stack));
|
||||||
|
|
||||||
bool key_end = lexer->current == LEXER_STATE_KEY && input == ':';
|
bool key_end = lexer->current == LEXER_STATE_KEY && input == ':';
|
||||||
|
|
||||||
if (key_end) {
|
if (key_end) {
|
||||||
printf("TK_COLON\n");
|
|
||||||
return LEXER_STATE_VALUE;
|
return LEXER_STATE_VALUE;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool value_end = lexer->current == LEXER_STATE_VALUE && input == ',';
|
bool value_end = lexer->current == LEXER_STATE_VALUE && input == ',';
|
||||||
|
|
||||||
if (value_end) {
|
if (value_end) {
|
||||||
printf("TK_COMMA\n");
|
|
||||||
return lexer->stack.stack[lexer->stack.size - 1];
|
return lexer->stack.stack[lexer->stack.size - 1];
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -625,16 +716,25 @@ lexer_state_t handle_number(lexer_t *lexer, char input) {
|
|||||||
|
|
||||||
return LEXER_STATE_FRACTION;
|
return LEXER_STATE_FRACTION;
|
||||||
} else if (input == '}' || input == ']') {
|
} else if (input == '}' || input == ']') {
|
||||||
printf("TK_NUMBER: %s\n", dstr_to_cstr(lexer->current_string));
|
// TODO (Abdelrahman): Set the token type correctly based on whether the
|
||||||
dstr_clear(lexer->current_string);
|
// number is an integer or a double
|
||||||
|
set_token(lexer, TK_DOUBLE,
|
||||||
|
(token_value_t){.string = dstr_to_cstr(lexer->current_string)});
|
||||||
|
|
||||||
return handle_collection_end(lexer, input);
|
return handle_collection_end(lexer, input);
|
||||||
} else if (input == ',') {
|
} else if (input == ',') {
|
||||||
printf("TK_NUMBER: %s\n", dstr_to_cstr(lexer->current_string));
|
// TODO (Abdelrahman): Set the token type correctly based on whether the
|
||||||
dstr_clear(lexer->current_string);
|
// number is an integer or a double
|
||||||
|
set_token(lexer, TK_DOUBLE,
|
||||||
|
(token_value_t){.string = dstr_to_cstr(lexer->current_string)});
|
||||||
|
|
||||||
return lexer->stack.stack[lexer->stack.size - 1];
|
return lexer->stack.stack[lexer->stack.size - 1];
|
||||||
} else if (isspace(input)) {
|
} else if (isspace(input)) {
|
||||||
|
// TODO (Abdelrahman): Set the token type correctly based on whether the
|
||||||
|
// number is an integer or a double
|
||||||
|
set_token(lexer, TK_DOUBLE,
|
||||||
|
(token_value_t){.string = dstr_to_cstr(lexer->current_string)});
|
||||||
|
|
||||||
return LEXER_STATE_NUMBER_END;
|
return LEXER_STATE_NUMBER_END;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -647,8 +747,10 @@ lexer_state_t handle_fraction(lexer_t *lexer, char input) {
|
|||||||
|
|
||||||
return LEXER_STATE_FRACTION;
|
return LEXER_STATE_FRACTION;
|
||||||
} else if (input == '}' || input == ']') {
|
} else if (input == '}' || input == ']') {
|
||||||
printf("TK_NUMBER: %s\n", dstr_to_cstr(lexer->current_string));
|
// TODO (Abdelrahman): Set the token type correctly based on whether the
|
||||||
dstr_clear(lexer->current_string);
|
// number is an integer or a double
|
||||||
|
set_token(lexer, TK_DOUBLE,
|
||||||
|
(token_value_t){.string = dstr_to_cstr(lexer->current_string)});
|
||||||
|
|
||||||
return handle_collection_end(lexer, input);
|
return handle_collection_end(lexer, input);
|
||||||
} else if (input == 'e' || input == 'E') {
|
} else if (input == 'e' || input == 'E') {
|
||||||
@ -656,11 +758,18 @@ lexer_state_t handle_fraction(lexer_t *lexer, char input) {
|
|||||||
|
|
||||||
return LEXER_STATE_EXPONENT;
|
return LEXER_STATE_EXPONENT;
|
||||||
} else if (input == ',') {
|
} else if (input == ',') {
|
||||||
printf("TK_NUMBER: %s\n", dstr_to_cstr(lexer->current_string));
|
// TODO (Abdelrahman): Set the token type correctly based on whether the
|
||||||
dstr_clear(lexer->current_string);
|
// number is an integer or a double
|
||||||
|
set_token(lexer, TK_DOUBLE,
|
||||||
|
(token_value_t){.string = dstr_to_cstr(lexer->current_string)});
|
||||||
|
|
||||||
return lexer->stack.stack[lexer->stack.size - 1];
|
return lexer->stack.stack[lexer->stack.size - 1];
|
||||||
} else if (isspace(input)) {
|
} else if (isspace(input)) {
|
||||||
|
// TODO (Abdelrahman): Set the token type correctly based on whether the
|
||||||
|
// number is an integer or a double
|
||||||
|
set_token(lexer, TK_DOUBLE,
|
||||||
|
(token_value_t){.string = dstr_to_cstr(lexer->current_string)});
|
||||||
|
|
||||||
return LEXER_STATE_NUMBER_END;
|
return LEXER_STATE_NUMBER_END;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -695,16 +804,25 @@ lexer_state_t handle_power(lexer_t *lexer, char input) {
|
|||||||
|
|
||||||
return LEXER_STATE_POWER;
|
return LEXER_STATE_POWER;
|
||||||
} else if (input == '}' || input == ']') {
|
} else if (input == '}' || input == ']') {
|
||||||
printf("TK_NUMBER: %s\n", dstr_to_cstr(lexer->current_string));
|
// TODO (Abdelrahman): Set the token type correctly based on whether the
|
||||||
dstr_clear(lexer->current_string);
|
// number is an integer or a double
|
||||||
|
set_token(lexer, TK_DOUBLE,
|
||||||
|
(token_value_t){.string = dstr_to_cstr(lexer->current_string)});
|
||||||
|
|
||||||
return handle_collection_end(lexer, input);
|
return handle_collection_end(lexer, input);
|
||||||
} else if (input == ',') {
|
} else if (input == ',') {
|
||||||
printf("TK_NUMBER: %s\n", dstr_to_cstr(lexer->current_string));
|
// TODO (Abdelrahman): Set the token type correctly based on whether the
|
||||||
dstr_clear(lexer->current_string);
|
// number is an integer or a double
|
||||||
|
set_token(lexer, TK_DOUBLE,
|
||||||
|
(token_value_t){.string = dstr_to_cstr(lexer->current_string)});
|
||||||
|
|
||||||
return lexer->stack.stack[lexer->stack.size - 1];
|
return lexer->stack.stack[lexer->stack.size - 1];
|
||||||
} else if (isspace(input)) {
|
} else if (isspace(input)) {
|
||||||
|
// TODO (Abdelrahman): Set the token type correctly based on whether the
|
||||||
|
// number is an integer or a double
|
||||||
|
set_token(lexer, TK_DOUBLE,
|
||||||
|
(token_value_t){.string = dstr_to_cstr(lexer->current_string)});
|
||||||
|
|
||||||
return LEXER_STATE_NUMBER_END;
|
return LEXER_STATE_NUMBER_END;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -712,12 +830,14 @@ lexer_state_t handle_power(lexer_t *lexer, char input) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
lexer_state_t handle_number_end(lexer_t *lexer, char input) {
|
lexer_state_t handle_number_end(lexer_t *lexer, char input) {
|
||||||
printf("TK_NUMBER: %s\n", dstr_to_cstr(lexer->current_string));
|
|
||||||
dstr_clear(lexer->current_string);
|
|
||||||
|
|
||||||
if (isspace(input)) {
|
if (isspace(input)) {
|
||||||
return LEXER_STATE_NUMBER_END;
|
return LEXER_STATE_NUMBER_END;
|
||||||
} else if (input == ',') {
|
} else if (input == ',') {
|
||||||
|
// TODO (Abdelrahman): Set the token type correctly based on whether the
|
||||||
|
// number is an integer or a double
|
||||||
|
set_token(lexer, TK_DOUBLE,
|
||||||
|
(token_value_t){.string = dstr_to_cstr(lexer->current_string)});
|
||||||
|
|
||||||
return lexer->stack.stack[lexer->stack.size - 1];
|
return lexer->stack.stack[lexer->stack.size - 1];
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -802,7 +922,16 @@ lexer_state_t handle_null(lexer_t *lexer, char input) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
lexer_state_t handle_keyword_end(lexer_t *lexer, char input) {
|
lexer_state_t handle_keyword_end(lexer_t *lexer, char input) {
|
||||||
printf("TK_KEYWORD: %s\n", lexer->keyword.keyword.str);
|
const char *keyword = lexer->keyword.keyword.str;
|
||||||
|
|
||||||
|
if (strequal(keyword, "null")) {
|
||||||
|
set_token(lexer, TK_NULL, (token_value_t){0});
|
||||||
|
} else if (strequal(keyword, "true")) {
|
||||||
|
set_token(lexer, TK_TRUE, (token_value_t){0});
|
||||||
|
} else if (strequal(keyword, "false")) {
|
||||||
|
set_token(lexer, TK_FALSE, (token_value_t){0});
|
||||||
|
}
|
||||||
|
|
||||||
clear_lex_str(&(lexer->keyword));
|
clear_lex_str(&(lexer->keyword));
|
||||||
|
|
||||||
if (isspace(input)) {
|
if (isspace(input)) {
|
||||||
|
@ -35,7 +35,12 @@ int main(int argc, char *argv[]) {
|
|||||||
return EXIT_FAILURE;
|
return EXIT_FAILURE;
|
||||||
}
|
}
|
||||||
|
|
||||||
get_next_token(lexer, json);
|
token_t token = get_next_token(lexer, json);
|
||||||
|
while (token.type != TK_NO_TOKEN) {
|
||||||
|
print_token(token);
|
||||||
|
|
||||||
|
token = get_next_token(lexer, NULL);
|
||||||
|
}
|
||||||
|
|
||||||
lexer_free(&lexer);
|
lexer_free(&lexer);
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user