diff --git a/compile_commands.json b/compile_commands.json index ea60dd1..3b0f741 100644 --- a/compile_commands.json +++ b/compile_commands.json @@ -95,12 +95,12 @@ "-x", "c", "-o", - "/tmp/main-977e60.o", + "/tmp/main-dc9945.o", "src/main.c" ], "directory": "/mnt/3A5CDF785CDF2CFF/Users/abdoo/dev/say_it_in_json", "file": "/mnt/3A5CDF785CDF2CFF/Users/abdoo/dev/say_it_in_json/src/main.c", - "output": "/tmp/main-977e60.o" + "output": "/tmp/main-dc9945.o" }, { "arguments": [ @@ -162,11 +162,11 @@ "-x", "c", "-o", - "/tmp/lexer_states-04f606.o", + "/tmp/lexer_states-e000bc.o", "src/lexer/lexer_states.c" ], "directory": "/mnt/3A5CDF785CDF2CFF/Users/abdoo/dev/say_it_in_json", "file": "/mnt/3A5CDF785CDF2CFF/Users/abdoo/dev/say_it_in_json/src/lexer/lexer_states.c", - "output": "/tmp/lexer_states-04f606.o" + "output": "/tmp/lexer_states-e000bc.o" } ] diff --git a/src/lexer/lexer_states.c b/src/lexer/lexer_states.c index e697097..c04754c 100644 --- a/src/lexer/lexer_states.c +++ b/src/lexer/lexer_states.c @@ -90,6 +90,8 @@ lexer_state_t stack_pop(state_stack_t *stack); void append_to_string(lexer_string_t *str, char input); void clear_string(lexer_string_t *str); bool strequal(const char *first, const char *second); +bool is_valid_hex_char(const char input); +bool ishex(const char input); void lexer_state_machine(lexer_t *lexer, char input); lexer_state_t handle_lexer_start(char input); @@ -102,6 +104,7 @@ lexer_state_t handle_value(lexer_t *lexer, char input); lexer_state_t handle_string(char input); lexer_state_t handle_string_end(lexer_t *lexer, char input); lexer_state_t handle_escape_sequence(char input); +lexer_state_t handle_unicode_sequence(lexer_t *lexer, char input); lexer_state_t handle_decimal(char input); lexer_state_t handle_number(lexer_t *lexer, char input); lexer_state_t handle_fraction(lexer_t *lexer, char input); @@ -173,6 +176,8 @@ void append_to_string(lexer_string_t *lex_str, char input) { return; } + assert(str != NULL); + str[(lex_str->size)++] = input; } @@ -203,6 +208,30 @@ bool strequal(const char *first, const char *second) { return strcmp(first, second) == 0; } +bool is_valid_hex_char(const char input) { + switch (input) { + case 'A': + case 'B': + case 'C': + case 'D': + case 'E': + case 'F': + case 'a': + case 'b': + case 'c': + case 'd': + case 'e': + case 'f': + return true; + } + + return false; +} + +bool ishex(const char input) { + return isdigit(input) || is_valid_hex_char(input); +} + void lexer_state_machine(lexer_t *lexer, char input) { switch (lexer->current) { case LEXER_STATE_START: @@ -270,6 +299,7 @@ void lexer_state_machine(lexer_t *lexer, char input) { lexer->current = handle_escape_sequence(input); break; case LEXER_STATE_UNICODE_HEX: + lexer->current = handle_unicode_sequence(lexer, input); break; case LEXER_STATE_TRUE: lexer->current = handle_true(lexer, input); @@ -445,6 +475,22 @@ lexer_state_t handle_escape_sequence(char input) { return LEXER_STATE_ERROR; } +lexer_state_t handle_unicode_sequence(lexer_t *lexer, char input) { + append_to_string(&(lexer->codepoint), input); + + if (!ishex(input)) { + clear_string(&(lexer->codepoint)); + + return LEXER_STATE_ERROR; + } else if (lexer->codepoint.size == UNICODE_LENGTH) { + clear_string(&(lexer->codepoint)); + + return LEXER_STATE_STRING; + } + + return LEXER_STATE_UNICODE_HEX; +} + lexer_state_t handle_decimal(char input) { if (input == '.') { return LEXER_STATE_FRACTION; diff --git a/src/main.c b/src/main.c index 270f513..220c656 100644 --- a/src/main.c +++ b/src/main.c @@ -11,7 +11,9 @@ int main(int argc, char *argv[]) { return EXIT_FAILURE; } - FILE *fp = fopen(argv[1], "r"); + const char *filename = argv[1]; + + FILE *fp = fopen(filename, "r"); fseek(fp, 0, SEEK_END); @@ -26,7 +28,7 @@ int main(int argc, char *argv[]) { fclose(fp); - printf("\n%s\n", validate_json(json) ? "VALID" : "INVALID"); + printf("\n%35s: %s\n", filename, validate_json(json) ? "VALID" : "INVALID"); return EXIT_SUCCESS; } diff --git a/test_files/unicode_invalid_01.json b/test_files/unicode_invalid_01.json new file mode 100644 index 0000000..127294b --- /dev/null +++ b/test_files/unicode_invalid_01.json @@ -0,0 +1,3 @@ +{ + "hello\u124": "testing" +} diff --git a/test_files/unicode_invalid_02.json b/test_files/unicode_invalid_02.json new file mode 100644 index 0000000..fc80471 --- /dev/null +++ b/test_files/unicode_invalid_02.json @@ -0,0 +1,3 @@ +{ + "hello\\u124f": "tes\uting" +} diff --git a/test_files/unicode_valid_01.json b/test_files/unicode_valid_01.json new file mode 100644 index 0000000..b533080 --- /dev/null +++ b/test_files/unicode_valid_01.json @@ -0,0 +1,3 @@ +{ + "hello\\u124f": "testing" +} diff --git a/test_files/unicode_valid_02.json b/test_files/unicode_valid_02.json new file mode 100644 index 0000000..f25c93e --- /dev/null +++ b/test_files/unicode_valid_02.json @@ -0,0 +1,3 @@ +{ + "hello\\u124ffdr": "tes\\u7eacting" +}