Save string and number values for tokenisation
This commit is contained in:
		
							
								
								
									
										2
									
								
								.vscode/launch.json
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										2
									
								
								.vscode/launch.json
									
									
									
									
										vendored
									
									
								
							| @@ -10,7 +10,7 @@ | ||||
|             "request": "launch", | ||||
|             "program": "${workspaceFolder}/main", | ||||
|             "args": [ | ||||
|                 "${workspaceFolder}/test_files/webapp.json" | ||||
|                 "${workspaceFolder}/test_files/menu.json" | ||||
|             ], | ||||
|             "stopAtEntry": false, | ||||
|             "cwd": "${workspaceFolder}", | ||||
|   | ||||
| @@ -118,12 +118,12 @@ | ||||
|       "-x", | ||||
|       "c", | ||||
|       "-o", | ||||
|       "/tmp/main-c4d09c.o", | ||||
|       "/tmp/main-e1ef59.o", | ||||
|       "src/main.c" | ||||
|     ], | ||||
|     "directory": "/mnt/3A5CDF785CDF2CFF/Users/abdoo/dev/say_it_in_json", | ||||
|     "file": "/mnt/3A5CDF785CDF2CFF/Users/abdoo/dev/say_it_in_json/src/main.c", | ||||
|     "output": "/tmp/main-c4d09c.o" | ||||
|     "output": "/tmp/main-e1ef59.o" | ||||
|   }, | ||||
|   { | ||||
|     "arguments": [ | ||||
| @@ -187,12 +187,12 @@ | ||||
|       "-x", | ||||
|       "c", | ||||
|       "-o", | ||||
|       "/tmp/dstring-9f956a.o", | ||||
|       "/tmp/dstring-b2eb78.o", | ||||
|       "src/dstring/dstring.c" | ||||
|     ], | ||||
|     "directory": "/mnt/3A5CDF785CDF2CFF/Users/abdoo/dev/say_it_in_json", | ||||
|     "file": "/mnt/3A5CDF785CDF2CFF/Users/abdoo/dev/say_it_in_json/src/dstring/dstring.c", | ||||
|     "output": "/tmp/dstring-9f956a.o" | ||||
|     "output": "/tmp/dstring-b2eb78.o" | ||||
|   }, | ||||
|   { | ||||
|     "arguments": [ | ||||
| @@ -256,11 +256,11 @@ | ||||
|       "-x", | ||||
|       "c", | ||||
|       "-o", | ||||
|       "/tmp/lexer-7622c3.o", | ||||
|       "/tmp/lexer-b0ee1f.o", | ||||
|       "src/lexer/lexer.c" | ||||
|     ], | ||||
|     "directory": "/mnt/3A5CDF785CDF2CFF/Users/abdoo/dev/say_it_in_json", | ||||
|     "file": "/mnt/3A5CDF785CDF2CFF/Users/abdoo/dev/say_it_in_json/src/lexer/lexer.c", | ||||
|     "output": "/tmp/lexer-7622c3.o" | ||||
|     "output": "/tmp/lexer-b0ee1f.o" | ||||
|   } | ||||
| ] | ||||
|   | ||||
| @@ -102,17 +102,17 @@ lexer_state_t handle_last_collection(char input); | ||||
| lexer_state_t handle_collection_end(lexer_t *lexer, char input); | ||||
| lexer_state_t handle_object(lexer_t *lexer, char input); | ||||
| lexer_state_t handle_array(lexer_t *lexer, char input); | ||||
| lexer_state_t handle_key(lexer_t *lexer); | ||||
| lexer_state_t handle_key(lexer_t *lexer, char input); | ||||
| lexer_state_t handle_value(lexer_t *lexer, char input); | ||||
| lexer_state_t handle_string(char input); | ||||
| lexer_state_t handle_string(lexer_t *lexer, char input); | ||||
| lexer_state_t handle_string_end(lexer_t *lexer, char input); | ||||
| lexer_state_t handle_escape_sequence(char input); | ||||
| lexer_state_t handle_escape_sequence(lexer_t *lexer, char input); | ||||
| lexer_state_t handle_unicode_sequence(lexer_t *lexer, char input); | ||||
| lexer_state_t handle_decimal(char input); | ||||
| lexer_state_t handle_decimal(lexer_t *lexer, char input); | ||||
| lexer_state_t handle_number(lexer_t *lexer, char input); | ||||
| lexer_state_t handle_fraction(lexer_t *lexer, char input); | ||||
| lexer_state_t handle_exponent(char input); | ||||
| lexer_state_t handle_exp_sign(char input); | ||||
| lexer_state_t handle_exponent(lexer_t *lexer, char input); | ||||
| lexer_state_t handle_exp_sign(lexer_t *lexer, char input); | ||||
| lexer_state_t handle_power(lexer_t *lexer, char input); | ||||
| lexer_state_t handle_number_end(lexer_t *lexer, char input); | ||||
| lexer_state_t handle_keyword(char input); | ||||
| @@ -123,6 +123,8 @@ lexer_state_t handle_keyword_end(lexer_t *lexer, char input); | ||||
|  | ||||
| bool validate_json(char *json) { | ||||
|   lexer_t lexer = {0}; | ||||
|   lexer.line = 1; | ||||
|   lexer.column = 0; | ||||
|   lexer.current = LEXER_STATE_START; | ||||
|   lexer.keyword.type = LEXER_STRING_KEYWORD; | ||||
|   lexer.codepoint.type = LEXER_STRING_UNICODE; | ||||
| @@ -135,10 +137,15 @@ bool validate_json(char *json) { | ||||
|   } | ||||
|  | ||||
|   for (char *c = json; *c != '\0'; ++c) { | ||||
|     // printf("\nINPUT=>%s\n", c); | ||||
|     // printf("STACK SIZE: %zu\n", lexer.stack.size); | ||||
|     lexer_state_machine(&lexer, *c); | ||||
|  | ||||
|     // Track the position in the text | ||||
|     ++(lexer.column); | ||||
|     if (*c == '\n') { | ||||
|       ++(lexer.line); | ||||
|       lexer.column = 0; | ||||
|     } | ||||
|  | ||||
|     if (lexer.current == LEXER_STATE_ERROR) { | ||||
|       return INVALID_JSON; | ||||
|     } | ||||
| @@ -276,10 +283,10 @@ void lexer_state_machine(lexer_t *lexer, char input) { | ||||
|  | ||||
|     break; | ||||
|   case LEXER_STATE_KEY: | ||||
|     lexer->current = handle_key(lexer); | ||||
|     lexer->current = handle_key(lexer, input); | ||||
|     break; | ||||
|   case LEXER_STATE_DECIMAL: | ||||
|     lexer->current = handle_decimal(input); | ||||
|     lexer->current = handle_decimal(lexer, input); | ||||
|     break; | ||||
|   case LEXER_STATE_NUMBER: | ||||
|     lexer->current = handle_number(lexer, input); | ||||
| @@ -288,10 +295,10 @@ void lexer_state_machine(lexer_t *lexer, char input) { | ||||
|     lexer->current = handle_fraction(lexer, input); | ||||
|     break; | ||||
|   case LEXER_STATE_EXPONENT: | ||||
|     lexer->current = handle_exponent(input); | ||||
|     lexer->current = handle_exponent(lexer, input); | ||||
|     break; | ||||
|   case LEXER_STATE_EXP_SIGN: | ||||
|     lexer->current = handle_exp_sign(input); | ||||
|     lexer->current = handle_exp_sign(lexer, input); | ||||
|     break; | ||||
|   case LEXER_STATE_POWER: | ||||
|     lexer->current = handle_power(lexer, input); | ||||
| @@ -300,13 +307,13 @@ void lexer_state_machine(lexer_t *lexer, char input) { | ||||
|     lexer->current = handle_number_end(lexer, input); | ||||
|     break; | ||||
|   case LEXER_STATE_STRING: | ||||
|     lexer->current = handle_string(input); | ||||
|     lexer->current = handle_string(lexer, input); | ||||
|     break; | ||||
|   case LEXER_STATE_STRING_END: | ||||
|     lexer->current = handle_string_end(lexer, input); | ||||
|     break; | ||||
|   case LEXER_STATE_ESCAPE_SEQUENCE: | ||||
|     lexer->current = handle_escape_sequence(input); | ||||
|     lexer->current = handle_escape_sequence(lexer, input); | ||||
|     break; | ||||
|   case LEXER_STATE_UNICODE_HEX: | ||||
|     lexer->current = handle_unicode_sequence(lexer, input); | ||||
| @@ -400,12 +407,18 @@ lexer_state_t handle_array(lexer_t *lexer, char input) { | ||||
|   return handle_value(lexer, input); | ||||
| } | ||||
|  | ||||
| lexer_state_t handle_key(lexer_t *lexer) { return LEXER_STATE_STRING; } | ||||
| lexer_state_t handle_key(lexer_t *lexer, char input) { | ||||
|   append_to_dstr(&(lexer->current_string), input); | ||||
|  | ||||
|   return LEXER_STATE_STRING; | ||||
| } | ||||
|  | ||||
| lexer_state_t handle_value(lexer_t *lexer, char input) { | ||||
|   if (isspace(input)) { | ||||
|     return LEXER_STATE_VALUE; | ||||
|   } else if (isdigit(input) && input != '0') { | ||||
|     append_to_dstr(&(lexer->current_string), input); | ||||
|  | ||||
|     return LEXER_STATE_NUMBER; | ||||
|   } | ||||
|  | ||||
| @@ -415,6 +428,8 @@ lexer_state_t handle_value(lexer_t *lexer, char input) { | ||||
|  | ||||
|     return LEXER_STATE_STRING; | ||||
|   case '0': | ||||
|     append_to_dstr(&(lexer->current_string), input); | ||||
|  | ||||
|     return LEXER_STATE_DECIMAL; | ||||
|   case '{': | ||||
|     return LEXER_STATE_OBJECT_START; | ||||
| @@ -431,14 +446,18 @@ lexer_state_t handle_value(lexer_t *lexer, char input) { | ||||
|   return LEXER_STATE_ERROR; | ||||
| } | ||||
|  | ||||
| lexer_state_t handle_string(char input) { | ||||
| lexer_state_t handle_string(lexer_t *lexer, char input) { | ||||
|   switch (input) { | ||||
|   case '\\': | ||||
|     append_to_dstr(&(lexer->current_string), input); | ||||
|  | ||||
|     return LEXER_STATE_ESCAPE_SEQUENCE; | ||||
|   case '"': | ||||
|     return LEXER_STATE_STRING_END; | ||||
|   } | ||||
|  | ||||
|   append_to_dstr(&(lexer->current_string), input); | ||||
|  | ||||
|   return LEXER_STATE_STRING; | ||||
| } | ||||
|  | ||||
| @@ -447,6 +466,8 @@ lexer_state_t handle_string_end(lexer_t *lexer, char input) { | ||||
|     return LEXER_STATE_STRING_END; | ||||
|   } | ||||
|  | ||||
|   empty_dstr(lexer->current_string); | ||||
|  | ||||
|   lexer->current = stack_pop(&(lexer->stack)); | ||||
|  | ||||
|   bool key_end = lexer->current == LEXER_STATE_KEY && input == ':'; | ||||
| @@ -467,7 +488,9 @@ lexer_state_t handle_string_end(lexer_t *lexer, char input) { | ||||
|                         : LEXER_STATE_ERROR; | ||||
| } | ||||
|  | ||||
| lexer_state_t handle_escape_sequence(char input) { | ||||
| lexer_state_t handle_escape_sequence(lexer_t *lexer, char input) { | ||||
|   append_to_dstr(&(lexer->current_string), input); | ||||
|  | ||||
|   switch (input) { | ||||
|   case '"': | ||||
|   case '/': | ||||
| @@ -487,6 +510,7 @@ lexer_state_t handle_escape_sequence(char input) { | ||||
|  | ||||
| lexer_state_t handle_unicode_sequence(lexer_t *lexer, char input) { | ||||
|   append_to_lex_str(&(lexer->codepoint), input); | ||||
|   append_to_dstr(&(lexer->current_string), input); | ||||
|  | ||||
|   if (!ishex(input)) { | ||||
|     clear_lex_str(&(lexer->codepoint)); | ||||
| @@ -501,7 +525,9 @@ lexer_state_t handle_unicode_sequence(lexer_t *lexer, char input) { | ||||
|   return LEXER_STATE_UNICODE_HEX; | ||||
| } | ||||
|  | ||||
| lexer_state_t handle_decimal(char input) { | ||||
| lexer_state_t handle_decimal(lexer_t *lexer, char input) { | ||||
|   append_to_dstr(&(lexer->current_string), input); | ||||
|  | ||||
|   if (input == '.') { | ||||
|     return LEXER_STATE_FRACTION; | ||||
|   } | ||||
| @@ -511,12 +537,20 @@ lexer_state_t handle_decimal(char input) { | ||||
|  | ||||
| lexer_state_t handle_number(lexer_t *lexer, char input) { | ||||
|   if (isdigit(input)) { | ||||
|     append_to_dstr(&(lexer->current_string), input); | ||||
|  | ||||
|     return LEXER_STATE_NUMBER; | ||||
|   } else if (input == '.') { | ||||
|     append_to_dstr(&(lexer->current_string), input); | ||||
|  | ||||
|     return LEXER_STATE_FRACTION; | ||||
|   } else if (input == '}' || input == ']') { | ||||
|     empty_dstr(lexer->current_string); | ||||
|  | ||||
|     return handle_collection_end(lexer, input); | ||||
|   } else if (input == ',') { | ||||
|     empty_dstr(lexer->current_string); | ||||
|  | ||||
|     return lexer->stack.stack[lexer->stack.size - 1]; | ||||
|   } else if (isspace(input)) { | ||||
|     return LEXER_STATE_NUMBER_END; | ||||
| @@ -527,12 +561,20 @@ lexer_state_t handle_number(lexer_t *lexer, char input) { | ||||
|  | ||||
| lexer_state_t handle_fraction(lexer_t *lexer, char input) { | ||||
|   if (isdigit(input)) { | ||||
|     append_to_dstr(&(lexer->current_string), input); | ||||
|  | ||||
|     return LEXER_STATE_FRACTION; | ||||
|   } else if (input == '}' || input == ']') { | ||||
|     empty_dstr(lexer->current_string); | ||||
|  | ||||
|     return handle_collection_end(lexer, input); | ||||
|   } else if (input == 'e' || input == 'E') { | ||||
|     append_to_dstr(&(lexer->current_string), input); | ||||
|  | ||||
|     return LEXER_STATE_EXPONENT; | ||||
|   } else if (input == ',') { | ||||
|     empty_dstr(lexer->current_string); | ||||
|  | ||||
|     return lexer->stack.stack[lexer->stack.size - 1]; | ||||
|   } else if (isspace(input)) { | ||||
|     return LEXER_STATE_NUMBER_END; | ||||
| @@ -541,7 +583,9 @@ lexer_state_t handle_fraction(lexer_t *lexer, char input) { | ||||
|   return LEXER_STATE_ERROR; | ||||
| } | ||||
|  | ||||
| lexer_state_t handle_exponent(char input) { | ||||
| lexer_state_t handle_exponent(lexer_t *lexer, char input) { | ||||
|   append_to_dstr(&(lexer->current_string), input); | ||||
|  | ||||
|   if (isdigit(input)) { | ||||
|     return LEXER_STATE_POWER; | ||||
|   } else if (input == '+' || input == '-') { | ||||
| @@ -551,7 +595,9 @@ lexer_state_t handle_exponent(char input) { | ||||
|   return LEXER_STATE_ERROR; | ||||
| } | ||||
|  | ||||
| lexer_state_t handle_exp_sign(char input) { | ||||
| lexer_state_t handle_exp_sign(lexer_t *lexer, char input) { | ||||
|   append_to_dstr(&(lexer->current_string), input); | ||||
|  | ||||
|   if (isdigit(input)) { | ||||
|     return LEXER_STATE_POWER; | ||||
|   } | ||||
| @@ -561,10 +607,16 @@ lexer_state_t handle_exp_sign(char input) { | ||||
|  | ||||
| lexer_state_t handle_power(lexer_t *lexer, char input) { | ||||
|   if (isdigit(input)) { | ||||
|     append_to_dstr(&(lexer->current_string), input); | ||||
|  | ||||
|     return LEXER_STATE_POWER; | ||||
|   } else if (input == '}' || input == ']') { | ||||
|     empty_dstr(lexer->current_string); | ||||
|  | ||||
|     return handle_collection_end(lexer, input); | ||||
|   } else if (input == ',') { | ||||
|     empty_dstr(lexer->current_string); | ||||
|  | ||||
|     return lexer->stack.stack[lexer->stack.size - 1]; | ||||
|   } else if (isspace(input)) { | ||||
|     return LEXER_STATE_NUMBER_END; | ||||
| @@ -574,6 +626,8 @@ lexer_state_t handle_power(lexer_t *lexer, char input) { | ||||
| } | ||||
|  | ||||
| lexer_state_t handle_number_end(lexer_t *lexer, char input) { | ||||
|   empty_dstr(lexer->current_string); | ||||
|  | ||||
|   if (isspace(input)) { | ||||
|     return LEXER_STATE_NUMBER_END; | ||||
|   } else if (input == ',') { | ||||
|   | ||||
		Reference in New Issue
	
	Block a user