Add test files for unicode sequences

This commit is contained in:
Abdelrahman Said 2023-06-18 00:23:40 +01:00
parent 386dfe72db
commit 909bcf3056
7 changed files with 66 additions and 6 deletions

View File

@ -95,12 +95,12 @@
"-x",
"c",
"-o",
"/tmp/main-977e60.o",
"/tmp/main-dc9945.o",
"src/main.c"
],
"directory": "/mnt/3A5CDF785CDF2CFF/Users/abdoo/dev/say_it_in_json",
"file": "/mnt/3A5CDF785CDF2CFF/Users/abdoo/dev/say_it_in_json/src/main.c",
"output": "/tmp/main-977e60.o"
"output": "/tmp/main-dc9945.o"
},
{
"arguments": [
@ -162,11 +162,11 @@
"-x",
"c",
"-o",
"/tmp/lexer_states-04f606.o",
"/tmp/lexer_states-e000bc.o",
"src/lexer/lexer_states.c"
],
"directory": "/mnt/3A5CDF785CDF2CFF/Users/abdoo/dev/say_it_in_json",
"file": "/mnt/3A5CDF785CDF2CFF/Users/abdoo/dev/say_it_in_json/src/lexer/lexer_states.c",
"output": "/tmp/lexer_states-04f606.o"
"output": "/tmp/lexer_states-e000bc.o"
}
]

View File

@ -90,6 +90,8 @@ lexer_state_t stack_pop(state_stack_t *stack);
void append_to_string(lexer_string_t *str, char input);
void clear_string(lexer_string_t *str);
bool strequal(const char *first, const char *second);
bool is_valid_hex_char(const char input);
bool ishex(const char input);
void lexer_state_machine(lexer_t *lexer, char input);
lexer_state_t handle_lexer_start(char input);
@ -102,6 +104,7 @@ lexer_state_t handle_value(lexer_t *lexer, char input);
lexer_state_t handle_string(char input);
lexer_state_t handle_string_end(lexer_t *lexer, char input);
lexer_state_t handle_escape_sequence(char input);
lexer_state_t handle_unicode_sequence(lexer_t *lexer, char input);
lexer_state_t handle_decimal(char input);
lexer_state_t handle_number(lexer_t *lexer, char input);
lexer_state_t handle_fraction(lexer_t *lexer, char input);
@ -173,6 +176,8 @@ void append_to_string(lexer_string_t *lex_str, char input) {
return;
}
assert(str != NULL);
str[(lex_str->size)++] = input;
}
@ -203,6 +208,30 @@ bool strequal(const char *first, const char *second) {
return strcmp(first, second) == 0;
}
bool is_valid_hex_char(const char input) {
switch (input) {
case 'A':
case 'B':
case 'C':
case 'D':
case 'E':
case 'F':
case 'a':
case 'b':
case 'c':
case 'd':
case 'e':
case 'f':
return true;
}
return false;
}
bool ishex(const char input) {
return isdigit(input) || is_valid_hex_char(input);
}
void lexer_state_machine(lexer_t *lexer, char input) {
switch (lexer->current) {
case LEXER_STATE_START:
@ -270,6 +299,7 @@ void lexer_state_machine(lexer_t *lexer, char input) {
lexer->current = handle_escape_sequence(input);
break;
case LEXER_STATE_UNICODE_HEX:
lexer->current = handle_unicode_sequence(lexer, input);
break;
case LEXER_STATE_TRUE:
lexer->current = handle_true(lexer, input);
@ -445,6 +475,22 @@ lexer_state_t handle_escape_sequence(char input) {
return LEXER_STATE_ERROR;
}
lexer_state_t handle_unicode_sequence(lexer_t *lexer, char input) {
append_to_string(&(lexer->codepoint), input);
if (!ishex(input)) {
clear_string(&(lexer->codepoint));
return LEXER_STATE_ERROR;
} else if (lexer->codepoint.size == UNICODE_LENGTH) {
clear_string(&(lexer->codepoint));
return LEXER_STATE_STRING;
}
return LEXER_STATE_UNICODE_HEX;
}
lexer_state_t handle_decimal(char input) {
if (input == '.') {
return LEXER_STATE_FRACTION;

View File

@ -11,7 +11,9 @@ int main(int argc, char *argv[]) {
return EXIT_FAILURE;
}
FILE *fp = fopen(argv[1], "r");
const char *filename = argv[1];
FILE *fp = fopen(filename, "r");
fseek(fp, 0, SEEK_END);
@ -26,7 +28,7 @@ int main(int argc, char *argv[]) {
fclose(fp);
printf("\n%s\n", validate_json(json) ? "VALID" : "INVALID");
printf("\n%35s: %s\n", filename, validate_json(json) ? "VALID" : "INVALID");
return EXIT_SUCCESS;
}

View File

@ -0,0 +1,3 @@
{
"hello\u124": "testing"
}

View File

@ -0,0 +1,3 @@
{
"hello\\u124f": "tes\uting"
}

View File

@ -0,0 +1,3 @@
{
"hello\\u124f": "testing"
}

View File

@ -0,0 +1,3 @@
{
"hello\\u124ffdr": "tes\\u7eacting"
}