Save string and number values for tokenisation

This commit is contained in:
Abdelrahman Said 2023-06-18 22:57:08 +01:00
parent f0043a691c
commit 31e19a50fc
3 changed files with 81 additions and 27 deletions

2
.vscode/launch.json vendored
View File

@ -10,7 +10,7 @@
"request": "launch",
"program": "${workspaceFolder}/main",
"args": [
"${workspaceFolder}/test_files/webapp.json"
"${workspaceFolder}/test_files/menu.json"
],
"stopAtEntry": false,
"cwd": "${workspaceFolder}",

View File

@ -118,12 +118,12 @@
"-x",
"c",
"-o",
"/tmp/main-c4d09c.o",
"/tmp/main-e1ef59.o",
"src/main.c"
],
"directory": "/mnt/3A5CDF785CDF2CFF/Users/abdoo/dev/say_it_in_json",
"file": "/mnt/3A5CDF785CDF2CFF/Users/abdoo/dev/say_it_in_json/src/main.c",
"output": "/tmp/main-c4d09c.o"
"output": "/tmp/main-e1ef59.o"
},
{
"arguments": [
@ -187,12 +187,12 @@
"-x",
"c",
"-o",
"/tmp/dstring-9f956a.o",
"/tmp/dstring-b2eb78.o",
"src/dstring/dstring.c"
],
"directory": "/mnt/3A5CDF785CDF2CFF/Users/abdoo/dev/say_it_in_json",
"file": "/mnt/3A5CDF785CDF2CFF/Users/abdoo/dev/say_it_in_json/src/dstring/dstring.c",
"output": "/tmp/dstring-9f956a.o"
"output": "/tmp/dstring-b2eb78.o"
},
{
"arguments": [
@ -256,11 +256,11 @@
"-x",
"c",
"-o",
"/tmp/lexer-7622c3.o",
"/tmp/lexer-b0ee1f.o",
"src/lexer/lexer.c"
],
"directory": "/mnt/3A5CDF785CDF2CFF/Users/abdoo/dev/say_it_in_json",
"file": "/mnt/3A5CDF785CDF2CFF/Users/abdoo/dev/say_it_in_json/src/lexer/lexer.c",
"output": "/tmp/lexer-7622c3.o"
"output": "/tmp/lexer-b0ee1f.o"
}
]

View File

@ -102,17 +102,17 @@ lexer_state_t handle_last_collection(char input);
lexer_state_t handle_collection_end(lexer_t *lexer, char input);
lexer_state_t handle_object(lexer_t *lexer, char input);
lexer_state_t handle_array(lexer_t *lexer, char input);
lexer_state_t handle_key(lexer_t *lexer);
lexer_state_t handle_key(lexer_t *lexer, char input);
lexer_state_t handle_value(lexer_t *lexer, char input);
lexer_state_t handle_string(char input);
lexer_state_t handle_string(lexer_t *lexer, char input);
lexer_state_t handle_string_end(lexer_t *lexer, char input);
lexer_state_t handle_escape_sequence(char input);
lexer_state_t handle_escape_sequence(lexer_t *lexer, char input);
lexer_state_t handle_unicode_sequence(lexer_t *lexer, char input);
lexer_state_t handle_decimal(char input);
lexer_state_t handle_decimal(lexer_t *lexer, char input);
lexer_state_t handle_number(lexer_t *lexer, char input);
lexer_state_t handle_fraction(lexer_t *lexer, char input);
lexer_state_t handle_exponent(char input);
lexer_state_t handle_exp_sign(char input);
lexer_state_t handle_exponent(lexer_t *lexer, char input);
lexer_state_t handle_exp_sign(lexer_t *lexer, char input);
lexer_state_t handle_power(lexer_t *lexer, char input);
lexer_state_t handle_number_end(lexer_t *lexer, char input);
lexer_state_t handle_keyword(char input);
@ -123,6 +123,8 @@ lexer_state_t handle_keyword_end(lexer_t *lexer, char input);
bool validate_json(char *json) {
lexer_t lexer = {0};
lexer.line = 1;
lexer.column = 0;
lexer.current = LEXER_STATE_START;
lexer.keyword.type = LEXER_STRING_KEYWORD;
lexer.codepoint.type = LEXER_STRING_UNICODE;
@ -135,10 +137,15 @@ bool validate_json(char *json) {
}
for (char *c = json; *c != '\0'; ++c) {
// printf("\nINPUT=>%s\n", c);
// printf("STACK SIZE: %zu\n", lexer.stack.size);
lexer_state_machine(&lexer, *c);
// Track the position in the text
++(lexer.column);
if (*c == '\n') {
++(lexer.line);
lexer.column = 0;
}
if (lexer.current == LEXER_STATE_ERROR) {
return INVALID_JSON;
}
@ -276,10 +283,10 @@ void lexer_state_machine(lexer_t *lexer, char input) {
break;
case LEXER_STATE_KEY:
lexer->current = handle_key(lexer);
lexer->current = handle_key(lexer, input);
break;
case LEXER_STATE_DECIMAL:
lexer->current = handle_decimal(input);
lexer->current = handle_decimal(lexer, input);
break;
case LEXER_STATE_NUMBER:
lexer->current = handle_number(lexer, input);
@ -288,10 +295,10 @@ void lexer_state_machine(lexer_t *lexer, char input) {
lexer->current = handle_fraction(lexer, input);
break;
case LEXER_STATE_EXPONENT:
lexer->current = handle_exponent(input);
lexer->current = handle_exponent(lexer, input);
break;
case LEXER_STATE_EXP_SIGN:
lexer->current = handle_exp_sign(input);
lexer->current = handle_exp_sign(lexer, input);
break;
case LEXER_STATE_POWER:
lexer->current = handle_power(lexer, input);
@ -300,13 +307,13 @@ void lexer_state_machine(lexer_t *lexer, char input) {
lexer->current = handle_number_end(lexer, input);
break;
case LEXER_STATE_STRING:
lexer->current = handle_string(input);
lexer->current = handle_string(lexer, input);
break;
case LEXER_STATE_STRING_END:
lexer->current = handle_string_end(lexer, input);
break;
case LEXER_STATE_ESCAPE_SEQUENCE:
lexer->current = handle_escape_sequence(input);
lexer->current = handle_escape_sequence(lexer, input);
break;
case LEXER_STATE_UNICODE_HEX:
lexer->current = handle_unicode_sequence(lexer, input);
@ -400,12 +407,18 @@ lexer_state_t handle_array(lexer_t *lexer, char input) {
return handle_value(lexer, input);
}
lexer_state_t handle_key(lexer_t *lexer) { return LEXER_STATE_STRING; }
lexer_state_t handle_key(lexer_t *lexer, char input) {
append_to_dstr(&(lexer->current_string), input);
return LEXER_STATE_STRING;
}
lexer_state_t handle_value(lexer_t *lexer, char input) {
if (isspace(input)) {
return LEXER_STATE_VALUE;
} else if (isdigit(input) && input != '0') {
append_to_dstr(&(lexer->current_string), input);
return LEXER_STATE_NUMBER;
}
@ -415,6 +428,8 @@ lexer_state_t handle_value(lexer_t *lexer, char input) {
return LEXER_STATE_STRING;
case '0':
append_to_dstr(&(lexer->current_string), input);
return LEXER_STATE_DECIMAL;
case '{':
return LEXER_STATE_OBJECT_START;
@ -431,14 +446,18 @@ lexer_state_t handle_value(lexer_t *lexer, char input) {
return LEXER_STATE_ERROR;
}
lexer_state_t handle_string(char input) {
lexer_state_t handle_string(lexer_t *lexer, char input) {
switch (input) {
case '\\':
append_to_dstr(&(lexer->current_string), input);
return LEXER_STATE_ESCAPE_SEQUENCE;
case '"':
return LEXER_STATE_STRING_END;
}
append_to_dstr(&(lexer->current_string), input);
return LEXER_STATE_STRING;
}
@ -447,6 +466,8 @@ lexer_state_t handle_string_end(lexer_t *lexer, char input) {
return LEXER_STATE_STRING_END;
}
empty_dstr(lexer->current_string);
lexer->current = stack_pop(&(lexer->stack));
bool key_end = lexer->current == LEXER_STATE_KEY && input == ':';
@ -467,7 +488,9 @@ lexer_state_t handle_string_end(lexer_t *lexer, char input) {
: LEXER_STATE_ERROR;
}
lexer_state_t handle_escape_sequence(char input) {
lexer_state_t handle_escape_sequence(lexer_t *lexer, char input) {
append_to_dstr(&(lexer->current_string), input);
switch (input) {
case '"':
case '/':
@ -487,6 +510,7 @@ lexer_state_t handle_escape_sequence(char input) {
lexer_state_t handle_unicode_sequence(lexer_t *lexer, char input) {
append_to_lex_str(&(lexer->codepoint), input);
append_to_dstr(&(lexer->current_string), input);
if (!ishex(input)) {
clear_lex_str(&(lexer->codepoint));
@ -501,7 +525,9 @@ lexer_state_t handle_unicode_sequence(lexer_t *lexer, char input) {
return LEXER_STATE_UNICODE_HEX;
}
lexer_state_t handle_decimal(char input) {
lexer_state_t handle_decimal(lexer_t *lexer, char input) {
append_to_dstr(&(lexer->current_string), input);
if (input == '.') {
return LEXER_STATE_FRACTION;
}
@ -511,12 +537,20 @@ lexer_state_t handle_decimal(char input) {
lexer_state_t handle_number(lexer_t *lexer, char input) {
if (isdigit(input)) {
append_to_dstr(&(lexer->current_string), input);
return LEXER_STATE_NUMBER;
} else if (input == '.') {
append_to_dstr(&(lexer->current_string), input);
return LEXER_STATE_FRACTION;
} else if (input == '}' || input == ']') {
empty_dstr(lexer->current_string);
return handle_collection_end(lexer, input);
} else if (input == ',') {
empty_dstr(lexer->current_string);
return lexer->stack.stack[lexer->stack.size - 1];
} else if (isspace(input)) {
return LEXER_STATE_NUMBER_END;
@ -527,12 +561,20 @@ lexer_state_t handle_number(lexer_t *lexer, char input) {
lexer_state_t handle_fraction(lexer_t *lexer, char input) {
if (isdigit(input)) {
append_to_dstr(&(lexer->current_string), input);
return LEXER_STATE_FRACTION;
} else if (input == '}' || input == ']') {
empty_dstr(lexer->current_string);
return handle_collection_end(lexer, input);
} else if (input == 'e' || input == 'E') {
append_to_dstr(&(lexer->current_string), input);
return LEXER_STATE_EXPONENT;
} else if (input == ',') {
empty_dstr(lexer->current_string);
return lexer->stack.stack[lexer->stack.size - 1];
} else if (isspace(input)) {
return LEXER_STATE_NUMBER_END;
@ -541,7 +583,9 @@ lexer_state_t handle_fraction(lexer_t *lexer, char input) {
return LEXER_STATE_ERROR;
}
lexer_state_t handle_exponent(char input) {
lexer_state_t handle_exponent(lexer_t *lexer, char input) {
append_to_dstr(&(lexer->current_string), input);
if (isdigit(input)) {
return LEXER_STATE_POWER;
} else if (input == '+' || input == '-') {
@ -551,7 +595,9 @@ lexer_state_t handle_exponent(char input) {
return LEXER_STATE_ERROR;
}
lexer_state_t handle_exp_sign(char input) {
lexer_state_t handle_exp_sign(lexer_t *lexer, char input) {
append_to_dstr(&(lexer->current_string), input);
if (isdigit(input)) {
return LEXER_STATE_POWER;
}
@ -561,10 +607,16 @@ lexer_state_t handle_exp_sign(char input) {
lexer_state_t handle_power(lexer_t *lexer, char input) {
if (isdigit(input)) {
append_to_dstr(&(lexer->current_string), input);
return LEXER_STATE_POWER;
} else if (input == '}' || input == ']') {
empty_dstr(lexer->current_string);
return handle_collection_end(lexer, input);
} else if (input == ',') {
empty_dstr(lexer->current_string);
return lexer->stack.stack[lexer->stack.size - 1];
} else if (isspace(input)) {
return LEXER_STATE_NUMBER_END;
@ -574,6 +626,8 @@ lexer_state_t handle_power(lexer_t *lexer, char input) {
}
lexer_state_t handle_number_end(lexer_t *lexer, char input) {
empty_dstr(lexer->current_string);
if (isspace(input)) {
return LEXER_STATE_NUMBER_END;
} else if (input == ',') {