Save string and number values for tokenisation
This commit is contained in:
parent
f0043a691c
commit
31e19a50fc
2
.vscode/launch.json
vendored
2
.vscode/launch.json
vendored
@ -10,7 +10,7 @@
|
||||
"request": "launch",
|
||||
"program": "${workspaceFolder}/main",
|
||||
"args": [
|
||||
"${workspaceFolder}/test_files/webapp.json"
|
||||
"${workspaceFolder}/test_files/menu.json"
|
||||
],
|
||||
"stopAtEntry": false,
|
||||
"cwd": "${workspaceFolder}",
|
||||
|
@ -118,12 +118,12 @@
|
||||
"-x",
|
||||
"c",
|
||||
"-o",
|
||||
"/tmp/main-c4d09c.o",
|
||||
"/tmp/main-e1ef59.o",
|
||||
"src/main.c"
|
||||
],
|
||||
"directory": "/mnt/3A5CDF785CDF2CFF/Users/abdoo/dev/say_it_in_json",
|
||||
"file": "/mnt/3A5CDF785CDF2CFF/Users/abdoo/dev/say_it_in_json/src/main.c",
|
||||
"output": "/tmp/main-c4d09c.o"
|
||||
"output": "/tmp/main-e1ef59.o"
|
||||
},
|
||||
{
|
||||
"arguments": [
|
||||
@ -187,12 +187,12 @@
|
||||
"-x",
|
||||
"c",
|
||||
"-o",
|
||||
"/tmp/dstring-9f956a.o",
|
||||
"/tmp/dstring-b2eb78.o",
|
||||
"src/dstring/dstring.c"
|
||||
],
|
||||
"directory": "/mnt/3A5CDF785CDF2CFF/Users/abdoo/dev/say_it_in_json",
|
||||
"file": "/mnt/3A5CDF785CDF2CFF/Users/abdoo/dev/say_it_in_json/src/dstring/dstring.c",
|
||||
"output": "/tmp/dstring-9f956a.o"
|
||||
"output": "/tmp/dstring-b2eb78.o"
|
||||
},
|
||||
{
|
||||
"arguments": [
|
||||
@ -256,11 +256,11 @@
|
||||
"-x",
|
||||
"c",
|
||||
"-o",
|
||||
"/tmp/lexer-7622c3.o",
|
||||
"/tmp/lexer-b0ee1f.o",
|
||||
"src/lexer/lexer.c"
|
||||
],
|
||||
"directory": "/mnt/3A5CDF785CDF2CFF/Users/abdoo/dev/say_it_in_json",
|
||||
"file": "/mnt/3A5CDF785CDF2CFF/Users/abdoo/dev/say_it_in_json/src/lexer/lexer.c",
|
||||
"output": "/tmp/lexer-7622c3.o"
|
||||
"output": "/tmp/lexer-b0ee1f.o"
|
||||
}
|
||||
]
|
||||
|
@ -102,17 +102,17 @@ lexer_state_t handle_last_collection(char input);
|
||||
lexer_state_t handle_collection_end(lexer_t *lexer, char input);
|
||||
lexer_state_t handle_object(lexer_t *lexer, char input);
|
||||
lexer_state_t handle_array(lexer_t *lexer, char input);
|
||||
lexer_state_t handle_key(lexer_t *lexer);
|
||||
lexer_state_t handle_key(lexer_t *lexer, char input);
|
||||
lexer_state_t handle_value(lexer_t *lexer, char input);
|
||||
lexer_state_t handle_string(char input);
|
||||
lexer_state_t handle_string(lexer_t *lexer, char input);
|
||||
lexer_state_t handle_string_end(lexer_t *lexer, char input);
|
||||
lexer_state_t handle_escape_sequence(char input);
|
||||
lexer_state_t handle_escape_sequence(lexer_t *lexer, char input);
|
||||
lexer_state_t handle_unicode_sequence(lexer_t *lexer, char input);
|
||||
lexer_state_t handle_decimal(char input);
|
||||
lexer_state_t handle_decimal(lexer_t *lexer, char input);
|
||||
lexer_state_t handle_number(lexer_t *lexer, char input);
|
||||
lexer_state_t handle_fraction(lexer_t *lexer, char input);
|
||||
lexer_state_t handle_exponent(char input);
|
||||
lexer_state_t handle_exp_sign(char input);
|
||||
lexer_state_t handle_exponent(lexer_t *lexer, char input);
|
||||
lexer_state_t handle_exp_sign(lexer_t *lexer, char input);
|
||||
lexer_state_t handle_power(lexer_t *lexer, char input);
|
||||
lexer_state_t handle_number_end(lexer_t *lexer, char input);
|
||||
lexer_state_t handle_keyword(char input);
|
||||
@ -123,6 +123,8 @@ lexer_state_t handle_keyword_end(lexer_t *lexer, char input);
|
||||
|
||||
bool validate_json(char *json) {
|
||||
lexer_t lexer = {0};
|
||||
lexer.line = 1;
|
||||
lexer.column = 0;
|
||||
lexer.current = LEXER_STATE_START;
|
||||
lexer.keyword.type = LEXER_STRING_KEYWORD;
|
||||
lexer.codepoint.type = LEXER_STRING_UNICODE;
|
||||
@ -135,10 +137,15 @@ bool validate_json(char *json) {
|
||||
}
|
||||
|
||||
for (char *c = json; *c != '\0'; ++c) {
|
||||
// printf("\nINPUT=>%s\n", c);
|
||||
// printf("STACK SIZE: %zu\n", lexer.stack.size);
|
||||
lexer_state_machine(&lexer, *c);
|
||||
|
||||
// Track the position in the text
|
||||
++(lexer.column);
|
||||
if (*c == '\n') {
|
||||
++(lexer.line);
|
||||
lexer.column = 0;
|
||||
}
|
||||
|
||||
if (lexer.current == LEXER_STATE_ERROR) {
|
||||
return INVALID_JSON;
|
||||
}
|
||||
@ -276,10 +283,10 @@ void lexer_state_machine(lexer_t *lexer, char input) {
|
||||
|
||||
break;
|
||||
case LEXER_STATE_KEY:
|
||||
lexer->current = handle_key(lexer);
|
||||
lexer->current = handle_key(lexer, input);
|
||||
break;
|
||||
case LEXER_STATE_DECIMAL:
|
||||
lexer->current = handle_decimal(input);
|
||||
lexer->current = handle_decimal(lexer, input);
|
||||
break;
|
||||
case LEXER_STATE_NUMBER:
|
||||
lexer->current = handle_number(lexer, input);
|
||||
@ -288,10 +295,10 @@ void lexer_state_machine(lexer_t *lexer, char input) {
|
||||
lexer->current = handle_fraction(lexer, input);
|
||||
break;
|
||||
case LEXER_STATE_EXPONENT:
|
||||
lexer->current = handle_exponent(input);
|
||||
lexer->current = handle_exponent(lexer, input);
|
||||
break;
|
||||
case LEXER_STATE_EXP_SIGN:
|
||||
lexer->current = handle_exp_sign(input);
|
||||
lexer->current = handle_exp_sign(lexer, input);
|
||||
break;
|
||||
case LEXER_STATE_POWER:
|
||||
lexer->current = handle_power(lexer, input);
|
||||
@ -300,13 +307,13 @@ void lexer_state_machine(lexer_t *lexer, char input) {
|
||||
lexer->current = handle_number_end(lexer, input);
|
||||
break;
|
||||
case LEXER_STATE_STRING:
|
||||
lexer->current = handle_string(input);
|
||||
lexer->current = handle_string(lexer, input);
|
||||
break;
|
||||
case LEXER_STATE_STRING_END:
|
||||
lexer->current = handle_string_end(lexer, input);
|
||||
break;
|
||||
case LEXER_STATE_ESCAPE_SEQUENCE:
|
||||
lexer->current = handle_escape_sequence(input);
|
||||
lexer->current = handle_escape_sequence(lexer, input);
|
||||
break;
|
||||
case LEXER_STATE_UNICODE_HEX:
|
||||
lexer->current = handle_unicode_sequence(lexer, input);
|
||||
@ -400,12 +407,18 @@ lexer_state_t handle_array(lexer_t *lexer, char input) {
|
||||
return handle_value(lexer, input);
|
||||
}
|
||||
|
||||
lexer_state_t handle_key(lexer_t *lexer) { return LEXER_STATE_STRING; }
|
||||
lexer_state_t handle_key(lexer_t *lexer, char input) {
|
||||
append_to_dstr(&(lexer->current_string), input);
|
||||
|
||||
return LEXER_STATE_STRING;
|
||||
}
|
||||
|
||||
lexer_state_t handle_value(lexer_t *lexer, char input) {
|
||||
if (isspace(input)) {
|
||||
return LEXER_STATE_VALUE;
|
||||
} else if (isdigit(input) && input != '0') {
|
||||
append_to_dstr(&(lexer->current_string), input);
|
||||
|
||||
return LEXER_STATE_NUMBER;
|
||||
}
|
||||
|
||||
@ -415,6 +428,8 @@ lexer_state_t handle_value(lexer_t *lexer, char input) {
|
||||
|
||||
return LEXER_STATE_STRING;
|
||||
case '0':
|
||||
append_to_dstr(&(lexer->current_string), input);
|
||||
|
||||
return LEXER_STATE_DECIMAL;
|
||||
case '{':
|
||||
return LEXER_STATE_OBJECT_START;
|
||||
@ -431,14 +446,18 @@ lexer_state_t handle_value(lexer_t *lexer, char input) {
|
||||
return LEXER_STATE_ERROR;
|
||||
}
|
||||
|
||||
lexer_state_t handle_string(char input) {
|
||||
lexer_state_t handle_string(lexer_t *lexer, char input) {
|
||||
switch (input) {
|
||||
case '\\':
|
||||
append_to_dstr(&(lexer->current_string), input);
|
||||
|
||||
return LEXER_STATE_ESCAPE_SEQUENCE;
|
||||
case '"':
|
||||
return LEXER_STATE_STRING_END;
|
||||
}
|
||||
|
||||
append_to_dstr(&(lexer->current_string), input);
|
||||
|
||||
return LEXER_STATE_STRING;
|
||||
}
|
||||
|
||||
@ -447,6 +466,8 @@ lexer_state_t handle_string_end(lexer_t *lexer, char input) {
|
||||
return LEXER_STATE_STRING_END;
|
||||
}
|
||||
|
||||
empty_dstr(lexer->current_string);
|
||||
|
||||
lexer->current = stack_pop(&(lexer->stack));
|
||||
|
||||
bool key_end = lexer->current == LEXER_STATE_KEY && input == ':';
|
||||
@ -467,7 +488,9 @@ lexer_state_t handle_string_end(lexer_t *lexer, char input) {
|
||||
: LEXER_STATE_ERROR;
|
||||
}
|
||||
|
||||
lexer_state_t handle_escape_sequence(char input) {
|
||||
lexer_state_t handle_escape_sequence(lexer_t *lexer, char input) {
|
||||
append_to_dstr(&(lexer->current_string), input);
|
||||
|
||||
switch (input) {
|
||||
case '"':
|
||||
case '/':
|
||||
@ -487,6 +510,7 @@ lexer_state_t handle_escape_sequence(char input) {
|
||||
|
||||
lexer_state_t handle_unicode_sequence(lexer_t *lexer, char input) {
|
||||
append_to_lex_str(&(lexer->codepoint), input);
|
||||
append_to_dstr(&(lexer->current_string), input);
|
||||
|
||||
if (!ishex(input)) {
|
||||
clear_lex_str(&(lexer->codepoint));
|
||||
@ -501,7 +525,9 @@ lexer_state_t handle_unicode_sequence(lexer_t *lexer, char input) {
|
||||
return LEXER_STATE_UNICODE_HEX;
|
||||
}
|
||||
|
||||
lexer_state_t handle_decimal(char input) {
|
||||
lexer_state_t handle_decimal(lexer_t *lexer, char input) {
|
||||
append_to_dstr(&(lexer->current_string), input);
|
||||
|
||||
if (input == '.') {
|
||||
return LEXER_STATE_FRACTION;
|
||||
}
|
||||
@ -511,12 +537,20 @@ lexer_state_t handle_decimal(char input) {
|
||||
|
||||
lexer_state_t handle_number(lexer_t *lexer, char input) {
|
||||
if (isdigit(input)) {
|
||||
append_to_dstr(&(lexer->current_string), input);
|
||||
|
||||
return LEXER_STATE_NUMBER;
|
||||
} else if (input == '.') {
|
||||
append_to_dstr(&(lexer->current_string), input);
|
||||
|
||||
return LEXER_STATE_FRACTION;
|
||||
} else if (input == '}' || input == ']') {
|
||||
empty_dstr(lexer->current_string);
|
||||
|
||||
return handle_collection_end(lexer, input);
|
||||
} else if (input == ',') {
|
||||
empty_dstr(lexer->current_string);
|
||||
|
||||
return lexer->stack.stack[lexer->stack.size - 1];
|
||||
} else if (isspace(input)) {
|
||||
return LEXER_STATE_NUMBER_END;
|
||||
@ -527,12 +561,20 @@ lexer_state_t handle_number(lexer_t *lexer, char input) {
|
||||
|
||||
lexer_state_t handle_fraction(lexer_t *lexer, char input) {
|
||||
if (isdigit(input)) {
|
||||
append_to_dstr(&(lexer->current_string), input);
|
||||
|
||||
return LEXER_STATE_FRACTION;
|
||||
} else if (input == '}' || input == ']') {
|
||||
empty_dstr(lexer->current_string);
|
||||
|
||||
return handle_collection_end(lexer, input);
|
||||
} else if (input == 'e' || input == 'E') {
|
||||
append_to_dstr(&(lexer->current_string), input);
|
||||
|
||||
return LEXER_STATE_EXPONENT;
|
||||
} else if (input == ',') {
|
||||
empty_dstr(lexer->current_string);
|
||||
|
||||
return lexer->stack.stack[lexer->stack.size - 1];
|
||||
} else if (isspace(input)) {
|
||||
return LEXER_STATE_NUMBER_END;
|
||||
@ -541,7 +583,9 @@ lexer_state_t handle_fraction(lexer_t *lexer, char input) {
|
||||
return LEXER_STATE_ERROR;
|
||||
}
|
||||
|
||||
lexer_state_t handle_exponent(char input) {
|
||||
lexer_state_t handle_exponent(lexer_t *lexer, char input) {
|
||||
append_to_dstr(&(lexer->current_string), input);
|
||||
|
||||
if (isdigit(input)) {
|
||||
return LEXER_STATE_POWER;
|
||||
} else if (input == '+' || input == '-') {
|
||||
@ -551,7 +595,9 @@ lexer_state_t handle_exponent(char input) {
|
||||
return LEXER_STATE_ERROR;
|
||||
}
|
||||
|
||||
lexer_state_t handle_exp_sign(char input) {
|
||||
lexer_state_t handle_exp_sign(lexer_t *lexer, char input) {
|
||||
append_to_dstr(&(lexer->current_string), input);
|
||||
|
||||
if (isdigit(input)) {
|
||||
return LEXER_STATE_POWER;
|
||||
}
|
||||
@ -561,10 +607,16 @@ lexer_state_t handle_exp_sign(char input) {
|
||||
|
||||
lexer_state_t handle_power(lexer_t *lexer, char input) {
|
||||
if (isdigit(input)) {
|
||||
append_to_dstr(&(lexer->current_string), input);
|
||||
|
||||
return LEXER_STATE_POWER;
|
||||
} else if (input == '}' || input == ']') {
|
||||
empty_dstr(lexer->current_string);
|
||||
|
||||
return handle_collection_end(lexer, input);
|
||||
} else if (input == ',') {
|
||||
empty_dstr(lexer->current_string);
|
||||
|
||||
return lexer->stack.stack[lexer->stack.size - 1];
|
||||
} else if (isspace(input)) {
|
||||
return LEXER_STATE_NUMBER_END;
|
||||
@ -574,6 +626,8 @@ lexer_state_t handle_power(lexer_t *lexer, char input) {
|
||||
}
|
||||
|
||||
lexer_state_t handle_number_end(lexer_t *lexer, char input) {
|
||||
empty_dstr(lexer->current_string);
|
||||
|
||||
if (isspace(input)) {
|
||||
return LEXER_STATE_NUMBER_END;
|
||||
} else if (input == ',') {
|
||||
|
Loading…
Reference in New Issue
Block a user