Add test files for unicode sequences
This commit is contained in:
		| @@ -95,12 +95,12 @@ | |||||||
|       "-x", |       "-x", | ||||||
|       "c", |       "c", | ||||||
|       "-o", |       "-o", | ||||||
|       "/tmp/main-977e60.o", |       "/tmp/main-dc9945.o", | ||||||
|       "src/main.c" |       "src/main.c" | ||||||
|     ], |     ], | ||||||
|     "directory": "/mnt/3A5CDF785CDF2CFF/Users/abdoo/dev/say_it_in_json", |     "directory": "/mnt/3A5CDF785CDF2CFF/Users/abdoo/dev/say_it_in_json", | ||||||
|     "file": "/mnt/3A5CDF785CDF2CFF/Users/abdoo/dev/say_it_in_json/src/main.c", |     "file": "/mnt/3A5CDF785CDF2CFF/Users/abdoo/dev/say_it_in_json/src/main.c", | ||||||
|     "output": "/tmp/main-977e60.o" |     "output": "/tmp/main-dc9945.o" | ||||||
|   }, |   }, | ||||||
|   { |   { | ||||||
|     "arguments": [ |     "arguments": [ | ||||||
| @@ -162,11 +162,11 @@ | |||||||
|       "-x", |       "-x", | ||||||
|       "c", |       "c", | ||||||
|       "-o", |       "-o", | ||||||
|       "/tmp/lexer_states-04f606.o", |       "/tmp/lexer_states-e000bc.o", | ||||||
|       "src/lexer/lexer_states.c" |       "src/lexer/lexer_states.c" | ||||||
|     ], |     ], | ||||||
|     "directory": "/mnt/3A5CDF785CDF2CFF/Users/abdoo/dev/say_it_in_json", |     "directory": "/mnt/3A5CDF785CDF2CFF/Users/abdoo/dev/say_it_in_json", | ||||||
|     "file": "/mnt/3A5CDF785CDF2CFF/Users/abdoo/dev/say_it_in_json/src/lexer/lexer_states.c", |     "file": "/mnt/3A5CDF785CDF2CFF/Users/abdoo/dev/say_it_in_json/src/lexer/lexer_states.c", | ||||||
|     "output": "/tmp/lexer_states-04f606.o" |     "output": "/tmp/lexer_states-e000bc.o" | ||||||
|   } |   } | ||||||
| ] | ] | ||||||
|   | |||||||
| @@ -90,6 +90,8 @@ lexer_state_t stack_pop(state_stack_t *stack); | |||||||
| void append_to_string(lexer_string_t *str, char input); | void append_to_string(lexer_string_t *str, char input); | ||||||
| void clear_string(lexer_string_t *str); | void clear_string(lexer_string_t *str); | ||||||
| bool strequal(const char *first, const char *second); | bool strequal(const char *first, const char *second); | ||||||
|  | bool is_valid_hex_char(const char input); | ||||||
|  | bool ishex(const char input); | ||||||
|  |  | ||||||
| void lexer_state_machine(lexer_t *lexer, char input); | void lexer_state_machine(lexer_t *lexer, char input); | ||||||
| lexer_state_t handle_lexer_start(char input); | lexer_state_t handle_lexer_start(char input); | ||||||
| @@ -102,6 +104,7 @@ lexer_state_t handle_value(lexer_t *lexer, char input); | |||||||
| lexer_state_t handle_string(char input); | lexer_state_t handle_string(char input); | ||||||
| lexer_state_t handle_string_end(lexer_t *lexer, char input); | lexer_state_t handle_string_end(lexer_t *lexer, char input); | ||||||
| lexer_state_t handle_escape_sequence(char input); | lexer_state_t handle_escape_sequence(char input); | ||||||
|  | lexer_state_t handle_unicode_sequence(lexer_t *lexer, char input); | ||||||
| lexer_state_t handle_decimal(char input); | lexer_state_t handle_decimal(char input); | ||||||
| lexer_state_t handle_number(lexer_t *lexer, char input); | lexer_state_t handle_number(lexer_t *lexer, char input); | ||||||
| lexer_state_t handle_fraction(lexer_t *lexer, char input); | lexer_state_t handle_fraction(lexer_t *lexer, char input); | ||||||
| @@ -173,6 +176,8 @@ void append_to_string(lexer_string_t *lex_str, char input) { | |||||||
|     return; |     return; | ||||||
|   } |   } | ||||||
|  |  | ||||||
|  |   assert(str != NULL); | ||||||
|  |  | ||||||
|   str[(lex_str->size)++] = input; |   str[(lex_str->size)++] = input; | ||||||
| } | } | ||||||
|  |  | ||||||
| @@ -203,6 +208,30 @@ bool strequal(const char *first, const char *second) { | |||||||
|   return strcmp(first, second) == 0; |   return strcmp(first, second) == 0; | ||||||
| } | } | ||||||
|  |  | ||||||
|  | bool is_valid_hex_char(const char input) { | ||||||
|  |   switch (input) { | ||||||
|  |   case 'A': | ||||||
|  |   case 'B': | ||||||
|  |   case 'C': | ||||||
|  |   case 'D': | ||||||
|  |   case 'E': | ||||||
|  |   case 'F': | ||||||
|  |   case 'a': | ||||||
|  |   case 'b': | ||||||
|  |   case 'c': | ||||||
|  |   case 'd': | ||||||
|  |   case 'e': | ||||||
|  |   case 'f': | ||||||
|  |     return true; | ||||||
|  |   } | ||||||
|  |  | ||||||
|  |   return false; | ||||||
|  | } | ||||||
|  |  | ||||||
|  | bool ishex(const char input) { | ||||||
|  |   return isdigit(input) || is_valid_hex_char(input); | ||||||
|  | } | ||||||
|  |  | ||||||
| void lexer_state_machine(lexer_t *lexer, char input) { | void lexer_state_machine(lexer_t *lexer, char input) { | ||||||
|   switch (lexer->current) { |   switch (lexer->current) { | ||||||
|   case LEXER_STATE_START: |   case LEXER_STATE_START: | ||||||
| @@ -270,6 +299,7 @@ void lexer_state_machine(lexer_t *lexer, char input) { | |||||||
|     lexer->current = handle_escape_sequence(input); |     lexer->current = handle_escape_sequence(input); | ||||||
|     break; |     break; | ||||||
|   case LEXER_STATE_UNICODE_HEX: |   case LEXER_STATE_UNICODE_HEX: | ||||||
|  |     lexer->current = handle_unicode_sequence(lexer, input); | ||||||
|     break; |     break; | ||||||
|   case LEXER_STATE_TRUE: |   case LEXER_STATE_TRUE: | ||||||
|     lexer->current = handle_true(lexer, input); |     lexer->current = handle_true(lexer, input); | ||||||
| @@ -445,6 +475,22 @@ lexer_state_t handle_escape_sequence(char input) { | |||||||
|   return LEXER_STATE_ERROR; |   return LEXER_STATE_ERROR; | ||||||
| } | } | ||||||
|  |  | ||||||
|  | lexer_state_t handle_unicode_sequence(lexer_t *lexer, char input) { | ||||||
|  |   append_to_string(&(lexer->codepoint), input); | ||||||
|  |  | ||||||
|  |   if (!ishex(input)) { | ||||||
|  |     clear_string(&(lexer->codepoint)); | ||||||
|  |  | ||||||
|  |     return LEXER_STATE_ERROR; | ||||||
|  |   } else if (lexer->codepoint.size == UNICODE_LENGTH) { | ||||||
|  |     clear_string(&(lexer->codepoint)); | ||||||
|  |  | ||||||
|  |     return LEXER_STATE_STRING; | ||||||
|  |   } | ||||||
|  |  | ||||||
|  |   return LEXER_STATE_UNICODE_HEX; | ||||||
|  | } | ||||||
|  |  | ||||||
| lexer_state_t handle_decimal(char input) { | lexer_state_t handle_decimal(char input) { | ||||||
|   if (input == '.') { |   if (input == '.') { | ||||||
|     return LEXER_STATE_FRACTION; |     return LEXER_STATE_FRACTION; | ||||||
|   | |||||||
| @@ -11,7 +11,9 @@ int main(int argc, char *argv[]) { | |||||||
|     return EXIT_FAILURE; |     return EXIT_FAILURE; | ||||||
|   } |   } | ||||||
|  |  | ||||||
|   FILE *fp = fopen(argv[1], "r"); |   const char *filename = argv[1]; | ||||||
|  |  | ||||||
|  |   FILE *fp = fopen(filename, "r"); | ||||||
|  |  | ||||||
|   fseek(fp, 0, SEEK_END); |   fseek(fp, 0, SEEK_END); | ||||||
|  |  | ||||||
| @@ -26,7 +28,7 @@ int main(int argc, char *argv[]) { | |||||||
|  |  | ||||||
|   fclose(fp); |   fclose(fp); | ||||||
|  |  | ||||||
|   printf("\n%s\n", validate_json(json) ? "VALID" : "INVALID"); |   printf("\n%35s: %s\n", filename, validate_json(json) ? "VALID" : "INVALID"); | ||||||
|  |  | ||||||
|   return EXIT_SUCCESS; |   return EXIT_SUCCESS; | ||||||
| } | } | ||||||
|   | |||||||
							
								
								
									
										3
									
								
								test_files/unicode_invalid_01.json
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										3
									
								
								test_files/unicode_invalid_01.json
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,3 @@ | |||||||
|  | { | ||||||
|  | 	"hello\u124": "testing" | ||||||
|  | } | ||||||
							
								
								
									
										3
									
								
								test_files/unicode_invalid_02.json
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										3
									
								
								test_files/unicode_invalid_02.json
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,3 @@ | |||||||
|  | { | ||||||
|  | 	"hello\\u124f": "tes\uting" | ||||||
|  | } | ||||||
							
								
								
									
										3
									
								
								test_files/unicode_valid_01.json
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										3
									
								
								test_files/unicode_valid_01.json
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,3 @@ | |||||||
|  | { | ||||||
|  | 	"hello\\u124f": "testing" | ||||||
|  | } | ||||||
							
								
								
									
										3
									
								
								test_files/unicode_valid_02.json
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										3
									
								
								test_files/unicode_valid_02.json
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,3 @@ | |||||||
|  | { | ||||||
|  | 	"hello\\u124ffdr": "tes\\u7eacting" | ||||||
|  | } | ||||||
		Reference in New Issue
	
	Block a user