Add test files for unicode sequences
This commit is contained in:
		| @@ -95,12 +95,12 @@ | ||||
|       "-x", | ||||
|       "c", | ||||
|       "-o", | ||||
|       "/tmp/main-977e60.o", | ||||
|       "/tmp/main-dc9945.o", | ||||
|       "src/main.c" | ||||
|     ], | ||||
|     "directory": "/mnt/3A5CDF785CDF2CFF/Users/abdoo/dev/say_it_in_json", | ||||
|     "file": "/mnt/3A5CDF785CDF2CFF/Users/abdoo/dev/say_it_in_json/src/main.c", | ||||
|     "output": "/tmp/main-977e60.o" | ||||
|     "output": "/tmp/main-dc9945.o" | ||||
|   }, | ||||
|   { | ||||
|     "arguments": [ | ||||
| @@ -162,11 +162,11 @@ | ||||
|       "-x", | ||||
|       "c", | ||||
|       "-o", | ||||
|       "/tmp/lexer_states-04f606.o", | ||||
|       "/tmp/lexer_states-e000bc.o", | ||||
|       "src/lexer/lexer_states.c" | ||||
|     ], | ||||
|     "directory": "/mnt/3A5CDF785CDF2CFF/Users/abdoo/dev/say_it_in_json", | ||||
|     "file": "/mnt/3A5CDF785CDF2CFF/Users/abdoo/dev/say_it_in_json/src/lexer/lexer_states.c", | ||||
|     "output": "/tmp/lexer_states-04f606.o" | ||||
|     "output": "/tmp/lexer_states-e000bc.o" | ||||
|   } | ||||
| ] | ||||
|   | ||||
| @@ -90,6 +90,8 @@ lexer_state_t stack_pop(state_stack_t *stack); | ||||
| void append_to_string(lexer_string_t *str, char input); | ||||
| void clear_string(lexer_string_t *str); | ||||
| bool strequal(const char *first, const char *second); | ||||
| bool is_valid_hex_char(const char input); | ||||
| bool ishex(const char input); | ||||
|  | ||||
| void lexer_state_machine(lexer_t *lexer, char input); | ||||
| lexer_state_t handle_lexer_start(char input); | ||||
| @@ -102,6 +104,7 @@ lexer_state_t handle_value(lexer_t *lexer, char input); | ||||
| lexer_state_t handle_string(char input); | ||||
| lexer_state_t handle_string_end(lexer_t *lexer, char input); | ||||
| lexer_state_t handle_escape_sequence(char input); | ||||
| lexer_state_t handle_unicode_sequence(lexer_t *lexer, char input); | ||||
| lexer_state_t handle_decimal(char input); | ||||
| lexer_state_t handle_number(lexer_t *lexer, char input); | ||||
| lexer_state_t handle_fraction(lexer_t *lexer, char input); | ||||
| @@ -173,6 +176,8 @@ void append_to_string(lexer_string_t *lex_str, char input) { | ||||
|     return; | ||||
|   } | ||||
|  | ||||
|   assert(str != NULL); | ||||
|  | ||||
|   str[(lex_str->size)++] = input; | ||||
| } | ||||
|  | ||||
| @@ -203,6 +208,30 @@ bool strequal(const char *first, const char *second) { | ||||
|   return strcmp(first, second) == 0; | ||||
| } | ||||
|  | ||||
| bool is_valid_hex_char(const char input) { | ||||
|   switch (input) { | ||||
|   case 'A': | ||||
|   case 'B': | ||||
|   case 'C': | ||||
|   case 'D': | ||||
|   case 'E': | ||||
|   case 'F': | ||||
|   case 'a': | ||||
|   case 'b': | ||||
|   case 'c': | ||||
|   case 'd': | ||||
|   case 'e': | ||||
|   case 'f': | ||||
|     return true; | ||||
|   } | ||||
|  | ||||
|   return false; | ||||
| } | ||||
|  | ||||
| bool ishex(const char input) { | ||||
|   return isdigit(input) || is_valid_hex_char(input); | ||||
| } | ||||
|  | ||||
| void lexer_state_machine(lexer_t *lexer, char input) { | ||||
|   switch (lexer->current) { | ||||
|   case LEXER_STATE_START: | ||||
| @@ -270,6 +299,7 @@ void lexer_state_machine(lexer_t *lexer, char input) { | ||||
|     lexer->current = handle_escape_sequence(input); | ||||
|     break; | ||||
|   case LEXER_STATE_UNICODE_HEX: | ||||
|     lexer->current = handle_unicode_sequence(lexer, input); | ||||
|     break; | ||||
|   case LEXER_STATE_TRUE: | ||||
|     lexer->current = handle_true(lexer, input); | ||||
| @@ -445,6 +475,22 @@ lexer_state_t handle_escape_sequence(char input) { | ||||
|   return LEXER_STATE_ERROR; | ||||
| } | ||||
|  | ||||
| lexer_state_t handle_unicode_sequence(lexer_t *lexer, char input) { | ||||
|   append_to_string(&(lexer->codepoint), input); | ||||
|  | ||||
|   if (!ishex(input)) { | ||||
|     clear_string(&(lexer->codepoint)); | ||||
|  | ||||
|     return LEXER_STATE_ERROR; | ||||
|   } else if (lexer->codepoint.size == UNICODE_LENGTH) { | ||||
|     clear_string(&(lexer->codepoint)); | ||||
|  | ||||
|     return LEXER_STATE_STRING; | ||||
|   } | ||||
|  | ||||
|   return LEXER_STATE_UNICODE_HEX; | ||||
| } | ||||
|  | ||||
| lexer_state_t handle_decimal(char input) { | ||||
|   if (input == '.') { | ||||
|     return LEXER_STATE_FRACTION; | ||||
|   | ||||
| @@ -11,7 +11,9 @@ int main(int argc, char *argv[]) { | ||||
|     return EXIT_FAILURE; | ||||
|   } | ||||
|  | ||||
|   FILE *fp = fopen(argv[1], "r"); | ||||
|   const char *filename = argv[1]; | ||||
|  | ||||
|   FILE *fp = fopen(filename, "r"); | ||||
|  | ||||
|   fseek(fp, 0, SEEK_END); | ||||
|  | ||||
| @@ -26,7 +28,7 @@ int main(int argc, char *argv[]) { | ||||
|  | ||||
|   fclose(fp); | ||||
|  | ||||
|   printf("\n%s\n", validate_json(json) ? "VALID" : "INVALID"); | ||||
|   printf("\n%35s: %s\n", filename, validate_json(json) ? "VALID" : "INVALID"); | ||||
|  | ||||
|   return EXIT_SUCCESS; | ||||
| } | ||||
|   | ||||
							
								
								
									
										3
									
								
								test_files/unicode_invalid_01.json
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										3
									
								
								test_files/unicode_invalid_01.json
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,3 @@ | ||||
| { | ||||
| 	"hello\u124": "testing" | ||||
| } | ||||
							
								
								
									
										3
									
								
								test_files/unicode_invalid_02.json
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										3
									
								
								test_files/unicode_invalid_02.json
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,3 @@ | ||||
| { | ||||
| 	"hello\\u124f": "tes\uting" | ||||
| } | ||||
							
								
								
									
										3
									
								
								test_files/unicode_valid_01.json
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										3
									
								
								test_files/unicode_valid_01.json
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,3 @@ | ||||
| { | ||||
| 	"hello\\u124f": "testing" | ||||
| } | ||||
							
								
								
									
										3
									
								
								test_files/unicode_valid_02.json
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										3
									
								
								test_files/unicode_valid_02.json
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,3 @@ | ||||
| { | ||||
| 	"hello\\u124ffdr": "tes\\u7eacting" | ||||
| } | ||||
		Reference in New Issue
	
	Block a user