Added string type that works for both keywords and unicode sequences
This commit is contained in:
		| @@ -95,12 +95,12 @@ | |||||||
|       "-x", |       "-x", | ||||||
|       "c", |       "c", | ||||||
|       "-o", |       "-o", | ||||||
|       "/tmp/main-69d465.o", |       "/tmp/main-977e60.o", | ||||||
|       "src/main.c" |       "src/main.c" | ||||||
|     ], |     ], | ||||||
|     "directory": "/mnt/3A5CDF785CDF2CFF/Users/abdoo/dev/say_it_in_json", |     "directory": "/mnt/3A5CDF785CDF2CFF/Users/abdoo/dev/say_it_in_json", | ||||||
|     "file": "/mnt/3A5CDF785CDF2CFF/Users/abdoo/dev/say_it_in_json/src/main.c", |     "file": "/mnt/3A5CDF785CDF2CFF/Users/abdoo/dev/say_it_in_json/src/main.c", | ||||||
|     "output": "/tmp/main-69d465.o" |     "output": "/tmp/main-977e60.o" | ||||||
|   }, |   }, | ||||||
|   { |   { | ||||||
|     "arguments": [ |     "arguments": [ | ||||||
| @@ -162,11 +162,11 @@ | |||||||
|       "-x", |       "-x", | ||||||
|       "c", |       "c", | ||||||
|       "-o", |       "-o", | ||||||
|       "/tmp/lexer_states-ad0df4.o", |       "/tmp/lexer_states-04f606.o", | ||||||
|       "src/lexer/lexer_states.c" |       "src/lexer/lexer_states.c" | ||||||
|     ], |     ], | ||||||
|     "directory": "/mnt/3A5CDF785CDF2CFF/Users/abdoo/dev/say_it_in_json", |     "directory": "/mnt/3A5CDF785CDF2CFF/Users/abdoo/dev/say_it_in_json", | ||||||
|     "file": "/mnt/3A5CDF785CDF2CFF/Users/abdoo/dev/say_it_in_json/src/lexer/lexer_states.c", |     "file": "/mnt/3A5CDF785CDF2CFF/Users/abdoo/dev/say_it_in_json/src/lexer/lexer_states.c", | ||||||
|     "output": "/tmp/lexer_states-ad0df4.o" |     "output": "/tmp/lexer_states-04f606.o" | ||||||
|   } |   } | ||||||
| ] | ] | ||||||
|   | |||||||
| @@ -1,5 +1,6 @@ | |||||||
| #include "lexer_states.h" | #include "lexer_states.h" | ||||||
| #include "aliases.h" | #include "aliases.h" | ||||||
|  | #include <assert.h> | ||||||
| #include <ctype.h> | #include <ctype.h> | ||||||
| #include <stdbool.h> | #include <stdbool.h> | ||||||
| #include <stdio.h> | #include <stdio.h> | ||||||
| @@ -7,6 +8,7 @@ | |||||||
| #include <string.h> | #include <string.h> | ||||||
|  |  | ||||||
| #define MAX_KEYWORD_LENGTH 5 | #define MAX_KEYWORD_LENGTH 5 | ||||||
|  | #define UNICODE_LENGTH 4 | ||||||
| #define MAX_STACK_CAPACITY 1024 | #define MAX_STACK_CAPACITY 1024 | ||||||
|  |  | ||||||
| typedef enum { | typedef enum { | ||||||
| @@ -51,24 +53,42 @@ typedef struct { | |||||||
|   u64 size; |   u64 size; | ||||||
| } state_stack_t; | } state_stack_t; | ||||||
|  |  | ||||||
|  | typedef enum { | ||||||
|  |   LEXER_STRING_KEYWORD, | ||||||
|  |   LEXER_STRING_UNICODE, | ||||||
|  | } lex_str_type; | ||||||
|  |  | ||||||
| typedef struct { | typedef struct { | ||||||
|   u64 size; |  | ||||||
|   char str[MAX_KEYWORD_LENGTH + 1]; |   char str[MAX_KEYWORD_LENGTH + 1]; | ||||||
| } keyword_t; | } keyword_t; | ||||||
|  |  | ||||||
|  | typedef struct { | ||||||
|  |   char codepoint[UNICODE_LENGTH]; | ||||||
|  | } unicode_t; | ||||||
|  |  | ||||||
|  | typedef struct { | ||||||
|  |   lex_str_type type; | ||||||
|  |   u64 size; | ||||||
|  |   union { | ||||||
|  |     keyword_t keyword; | ||||||
|  |     unicode_t unicode; | ||||||
|  |   }; | ||||||
|  | } lexer_string_t; | ||||||
|  |  | ||||||
| struct lexer { | struct lexer { | ||||||
|   lexer_state_t current; |   lexer_state_t current; | ||||||
|   state_stack_t stack; |   state_stack_t stack; | ||||||
|   u64 line; |   u64 line; | ||||||
|   u64 column; |   u64 column; | ||||||
|   keyword_t keyword; |   lexer_string_t keyword; | ||||||
|  |   lexer_string_t codepoint; | ||||||
| }; | }; | ||||||
|  |  | ||||||
| void stack_push(state_stack_t *stack, lexer_state_t value); | void stack_push(state_stack_t *stack, lexer_state_t value); | ||||||
| lexer_state_t stack_pop(state_stack_t *stack); | lexer_state_t stack_pop(state_stack_t *stack); | ||||||
|  |  | ||||||
| void append_to_keyword(keyword_t *kw, char input); | void append_to_string(lexer_string_t *str, char input); | ||||||
| void clear_keyword(keyword_t *kw); | void clear_string(lexer_string_t *str); | ||||||
| bool strequal(const char *first, const char *second); | bool strequal(const char *first, const char *second); | ||||||
|  |  | ||||||
| void lexer_state_machine(lexer_t *lexer, char input); | void lexer_state_machine(lexer_t *lexer, char input); | ||||||
| @@ -98,6 +118,8 @@ lexer_state_t handle_keyword_end(lexer_t *lexer, char input); | |||||||
| bool validate_json(char *json) { | bool validate_json(char *json) { | ||||||
|   lexer_t lexer = {0}; |   lexer_t lexer = {0}; | ||||||
|   lexer.current = LEXER_STATE_START; |   lexer.current = LEXER_STATE_START; | ||||||
|  |   lexer.keyword.type = LEXER_STRING_KEYWORD; | ||||||
|  |   lexer.codepoint.type = LEXER_STRING_UNICODE; | ||||||
|  |  | ||||||
|   for (char *c = json; *c != '\0'; ++c) { |   for (char *c = json; *c != '\0'; ++c) { | ||||||
|     // printf("\nINPUT=>%s\n", c); |     // printf("\nINPUT=>%s\n", c); | ||||||
| @@ -130,17 +152,51 @@ lexer_state_t stack_pop(state_stack_t *stack) { | |||||||
|   return state; |   return state; | ||||||
| } | } | ||||||
|  |  | ||||||
| void append_to_keyword(keyword_t *kw, char input) { | void append_to_string(lexer_string_t *lex_str, char input) { | ||||||
|   if (kw->size + 1 > MAX_KEYWORD_LENGTH) { |   u64 capacity = 0; | ||||||
|  |   char *str = NULL; | ||||||
|  |  | ||||||
|  |   switch (lex_str->type) { | ||||||
|  |   case LEXER_STRING_KEYWORD: | ||||||
|  |     capacity = MAX_KEYWORD_LENGTH; | ||||||
|  |     str = lex_str->keyword.str; | ||||||
|  |  | ||||||
|  |     break; | ||||||
|  |   case LEXER_STRING_UNICODE: | ||||||
|  |     capacity = UNICODE_LENGTH; | ||||||
|  |     str = lex_str->unicode.codepoint; | ||||||
|  |  | ||||||
|  |     break; | ||||||
|  |   } | ||||||
|  |  | ||||||
|  |   if (lex_str->size + 1 > capacity) { | ||||||
|     return; |     return; | ||||||
|   } |   } | ||||||
|  |  | ||||||
|   kw->str[(kw->size)++] = input; |   str[(lex_str->size)++] = input; | ||||||
| } | } | ||||||
|  |  | ||||||
| void clear_keyword(keyword_t *kw) { | void clear_string(lexer_string_t *lex_str) { | ||||||
|   memset(kw->str, 0, MAX_KEYWORD_LENGTH + 1); |   u64 capacity = 1; | ||||||
|   kw->size = 0; |   char *str = NULL; | ||||||
|  |  | ||||||
|  |   switch (lex_str->type) { | ||||||
|  |   case LEXER_STRING_KEYWORD: | ||||||
|  |     capacity += MAX_KEYWORD_LENGTH; | ||||||
|  |     str = lex_str->keyword.str; | ||||||
|  |  | ||||||
|  |     break; | ||||||
|  |   case LEXER_STRING_UNICODE: | ||||||
|  |     capacity += UNICODE_LENGTH; | ||||||
|  |     str = lex_str->unicode.codepoint; | ||||||
|  |  | ||||||
|  |     break; | ||||||
|  |   } | ||||||
|  |  | ||||||
|  |   assert(str != NULL); | ||||||
|  |  | ||||||
|  |   memset(str, 0, capacity); | ||||||
|  |   lex_str->size = 0; | ||||||
| } | } | ||||||
|  |  | ||||||
| bool strequal(const char *first, const char *second) { | bool strequal(const char *first, const char *second) { | ||||||
| @@ -327,7 +383,7 @@ lexer_state_t handle_value(lexer_t *lexer, char input) { | |||||||
|   case 't': |   case 't': | ||||||
|   case 'f': |   case 'f': | ||||||
|   case 'n': |   case 'n': | ||||||
|     append_to_keyword(&(lexer->keyword), input); |     append_to_string(&(lexer->keyword), input); | ||||||
|  |  | ||||||
|     return handle_keyword(input); |     return handle_keyword(input); | ||||||
|   } |   } | ||||||
| @@ -489,9 +545,9 @@ lexer_state_t handle_keyword(char input) { | |||||||
|  |  | ||||||
| lexer_state_t handle_true(lexer_t *lexer, char input) { | lexer_state_t handle_true(lexer_t *lexer, char input) { | ||||||
|   char current[MAX_KEYWORD_LENGTH + 1]; |   char current[MAX_KEYWORD_LENGTH + 1]; | ||||||
|   strcpy(current, lexer->keyword.str); |   strcpy(current, lexer->keyword.keyword.str); | ||||||
|  |  | ||||||
|   append_to_keyword(&(lexer->keyword), input); |   append_to_string(&(lexer->keyword), input); | ||||||
|  |  | ||||||
|   bool return_state_true = (strequal(current, "t") && input == 'r') || |   bool return_state_true = (strequal(current, "t") && input == 'r') || | ||||||
|                            (strequal(current, "tr") && input == 'u'); |                            (strequal(current, "tr") && input == 'u'); | ||||||
| @@ -509,9 +565,9 @@ lexer_state_t handle_true(lexer_t *lexer, char input) { | |||||||
|  |  | ||||||
| lexer_state_t handle_false(lexer_t *lexer, char input) { | lexer_state_t handle_false(lexer_t *lexer, char input) { | ||||||
|   char current[MAX_KEYWORD_LENGTH + 1]; |   char current[MAX_KEYWORD_LENGTH + 1]; | ||||||
|   strcpy(current, lexer->keyword.str); |   strcpy(current, lexer->keyword.keyword.str); | ||||||
|  |  | ||||||
|   append_to_keyword(&(lexer->keyword), input); |   append_to_string(&(lexer->keyword), input); | ||||||
|  |  | ||||||
|   bool return_state_false = (strequal(current, "f") && input == 'a') || |   bool return_state_false = (strequal(current, "f") && input == 'a') || | ||||||
|                             (strequal(current, "fa") && input == 'l') || |                             (strequal(current, "fa") && input == 'l') || | ||||||
| @@ -530,9 +586,9 @@ lexer_state_t handle_false(lexer_t *lexer, char input) { | |||||||
|  |  | ||||||
| lexer_state_t handle_null(lexer_t *lexer, char input) { | lexer_state_t handle_null(lexer_t *lexer, char input) { | ||||||
|   char current[MAX_KEYWORD_LENGTH + 1]; |   char current[MAX_KEYWORD_LENGTH + 1]; | ||||||
|   strcpy(current, lexer->keyword.str); |   strcpy(current, lexer->keyword.keyword.str); | ||||||
|  |  | ||||||
|   append_to_keyword(&(lexer->keyword), input); |   append_to_string(&(lexer->keyword), input); | ||||||
|  |  | ||||||
|   bool return_state_null = (strequal(current, "n") && input == 'u') || |   bool return_state_null = (strequal(current, "n") && input == 'u') || | ||||||
|                            (strequal(current, "nu") && input == 'l'); |                            (strequal(current, "nu") && input == 'l'); | ||||||
| @@ -549,7 +605,7 @@ lexer_state_t handle_null(lexer_t *lexer, char input) { | |||||||
| } | } | ||||||
|  |  | ||||||
| lexer_state_t handle_keyword_end(lexer_t *lexer, char input) { | lexer_state_t handle_keyword_end(lexer_t *lexer, char input) { | ||||||
|   clear_keyword(&(lexer->keyword)); |   clear_string(&(lexer->keyword)); | ||||||
|  |  | ||||||
|   if (isspace(input)) { |   if (isspace(input)) { | ||||||
|     return LEXER_STATE_KEYWORD_END; |     return LEXER_STATE_KEYWORD_END; | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user