Added string type that works for both keywords and unicode sequences
This commit is contained in:
		| @@ -95,12 +95,12 @@ | ||||
|       "-x", | ||||
|       "c", | ||||
|       "-o", | ||||
|       "/tmp/main-69d465.o", | ||||
|       "/tmp/main-977e60.o", | ||||
|       "src/main.c" | ||||
|     ], | ||||
|     "directory": "/mnt/3A5CDF785CDF2CFF/Users/abdoo/dev/say_it_in_json", | ||||
|     "file": "/mnt/3A5CDF785CDF2CFF/Users/abdoo/dev/say_it_in_json/src/main.c", | ||||
|     "output": "/tmp/main-69d465.o" | ||||
|     "output": "/tmp/main-977e60.o" | ||||
|   }, | ||||
|   { | ||||
|     "arguments": [ | ||||
| @@ -162,11 +162,11 @@ | ||||
|       "-x", | ||||
|       "c", | ||||
|       "-o", | ||||
|       "/tmp/lexer_states-ad0df4.o", | ||||
|       "/tmp/lexer_states-04f606.o", | ||||
|       "src/lexer/lexer_states.c" | ||||
|     ], | ||||
|     "directory": "/mnt/3A5CDF785CDF2CFF/Users/abdoo/dev/say_it_in_json", | ||||
|     "file": "/mnt/3A5CDF785CDF2CFF/Users/abdoo/dev/say_it_in_json/src/lexer/lexer_states.c", | ||||
|     "output": "/tmp/lexer_states-ad0df4.o" | ||||
|     "output": "/tmp/lexer_states-04f606.o" | ||||
|   } | ||||
| ] | ||||
|   | ||||
| @@ -1,5 +1,6 @@ | ||||
| #include "lexer_states.h" | ||||
| #include "aliases.h" | ||||
| #include <assert.h> | ||||
| #include <ctype.h> | ||||
| #include <stdbool.h> | ||||
| #include <stdio.h> | ||||
| @@ -7,6 +8,7 @@ | ||||
| #include <string.h> | ||||
|  | ||||
| #define MAX_KEYWORD_LENGTH 5 | ||||
| #define UNICODE_LENGTH 4 | ||||
| #define MAX_STACK_CAPACITY 1024 | ||||
|  | ||||
| typedef enum { | ||||
| @@ -51,24 +53,42 @@ typedef struct { | ||||
|   u64 size; | ||||
| } state_stack_t; | ||||
|  | ||||
| typedef enum { | ||||
|   LEXER_STRING_KEYWORD, | ||||
|   LEXER_STRING_UNICODE, | ||||
| } lex_str_type; | ||||
|  | ||||
| typedef struct { | ||||
|   u64 size; | ||||
|   char str[MAX_KEYWORD_LENGTH + 1]; | ||||
| } keyword_t; | ||||
|  | ||||
| typedef struct { | ||||
|   char codepoint[UNICODE_LENGTH]; | ||||
| } unicode_t; | ||||
|  | ||||
| typedef struct { | ||||
|   lex_str_type type; | ||||
|   u64 size; | ||||
|   union { | ||||
|     keyword_t keyword; | ||||
|     unicode_t unicode; | ||||
|   }; | ||||
| } lexer_string_t; | ||||
|  | ||||
| struct lexer { | ||||
|   lexer_state_t current; | ||||
|   state_stack_t stack; | ||||
|   u64 line; | ||||
|   u64 column; | ||||
|   keyword_t keyword; | ||||
|   lexer_string_t keyword; | ||||
|   lexer_string_t codepoint; | ||||
| }; | ||||
|  | ||||
| void stack_push(state_stack_t *stack, lexer_state_t value); | ||||
| lexer_state_t stack_pop(state_stack_t *stack); | ||||
|  | ||||
| void append_to_keyword(keyword_t *kw, char input); | ||||
| void clear_keyword(keyword_t *kw); | ||||
| void append_to_string(lexer_string_t *str, char input); | ||||
| void clear_string(lexer_string_t *str); | ||||
| bool strequal(const char *first, const char *second); | ||||
|  | ||||
| void lexer_state_machine(lexer_t *lexer, char input); | ||||
| @@ -98,6 +118,8 @@ lexer_state_t handle_keyword_end(lexer_t *lexer, char input); | ||||
| bool validate_json(char *json) { | ||||
|   lexer_t lexer = {0}; | ||||
|   lexer.current = LEXER_STATE_START; | ||||
|   lexer.keyword.type = LEXER_STRING_KEYWORD; | ||||
|   lexer.codepoint.type = LEXER_STRING_UNICODE; | ||||
|  | ||||
|   for (char *c = json; *c != '\0'; ++c) { | ||||
|     // printf("\nINPUT=>%s\n", c); | ||||
| @@ -130,17 +152,51 @@ lexer_state_t stack_pop(state_stack_t *stack) { | ||||
|   return state; | ||||
| } | ||||
|  | ||||
| void append_to_keyword(keyword_t *kw, char input) { | ||||
|   if (kw->size + 1 > MAX_KEYWORD_LENGTH) { | ||||
| void append_to_string(lexer_string_t *lex_str, char input) { | ||||
|   u64 capacity = 0; | ||||
|   char *str = NULL; | ||||
|  | ||||
|   switch (lex_str->type) { | ||||
|   case LEXER_STRING_KEYWORD: | ||||
|     capacity = MAX_KEYWORD_LENGTH; | ||||
|     str = lex_str->keyword.str; | ||||
|  | ||||
|     break; | ||||
|   case LEXER_STRING_UNICODE: | ||||
|     capacity = UNICODE_LENGTH; | ||||
|     str = lex_str->unicode.codepoint; | ||||
|  | ||||
|     break; | ||||
|   } | ||||
|  | ||||
|   if (lex_str->size + 1 > capacity) { | ||||
|     return; | ||||
|   } | ||||
|  | ||||
|   kw->str[(kw->size)++] = input; | ||||
|   str[(lex_str->size)++] = input; | ||||
| } | ||||
|  | ||||
| void clear_keyword(keyword_t *kw) { | ||||
|   memset(kw->str, 0, MAX_KEYWORD_LENGTH + 1); | ||||
|   kw->size = 0; | ||||
| void clear_string(lexer_string_t *lex_str) { | ||||
|   u64 capacity = 1; | ||||
|   char *str = NULL; | ||||
|  | ||||
|   switch (lex_str->type) { | ||||
|   case LEXER_STRING_KEYWORD: | ||||
|     capacity += MAX_KEYWORD_LENGTH; | ||||
|     str = lex_str->keyword.str; | ||||
|  | ||||
|     break; | ||||
|   case LEXER_STRING_UNICODE: | ||||
|     capacity += UNICODE_LENGTH; | ||||
|     str = lex_str->unicode.codepoint; | ||||
|  | ||||
|     break; | ||||
|   } | ||||
|  | ||||
|   assert(str != NULL); | ||||
|  | ||||
|   memset(str, 0, capacity); | ||||
|   lex_str->size = 0; | ||||
| } | ||||
|  | ||||
| bool strequal(const char *first, const char *second) { | ||||
| @@ -327,7 +383,7 @@ lexer_state_t handle_value(lexer_t *lexer, char input) { | ||||
|   case 't': | ||||
|   case 'f': | ||||
|   case 'n': | ||||
|     append_to_keyword(&(lexer->keyword), input); | ||||
|     append_to_string(&(lexer->keyword), input); | ||||
|  | ||||
|     return handle_keyword(input); | ||||
|   } | ||||
| @@ -489,9 +545,9 @@ lexer_state_t handle_keyword(char input) { | ||||
|  | ||||
| lexer_state_t handle_true(lexer_t *lexer, char input) { | ||||
|   char current[MAX_KEYWORD_LENGTH + 1]; | ||||
|   strcpy(current, lexer->keyword.str); | ||||
|   strcpy(current, lexer->keyword.keyword.str); | ||||
|  | ||||
|   append_to_keyword(&(lexer->keyword), input); | ||||
|   append_to_string(&(lexer->keyword), input); | ||||
|  | ||||
|   bool return_state_true = (strequal(current, "t") && input == 'r') || | ||||
|                            (strequal(current, "tr") && input == 'u'); | ||||
| @@ -509,9 +565,9 @@ lexer_state_t handle_true(lexer_t *lexer, char input) { | ||||
|  | ||||
| lexer_state_t handle_false(lexer_t *lexer, char input) { | ||||
|   char current[MAX_KEYWORD_LENGTH + 1]; | ||||
|   strcpy(current, lexer->keyword.str); | ||||
|   strcpy(current, lexer->keyword.keyword.str); | ||||
|  | ||||
|   append_to_keyword(&(lexer->keyword), input); | ||||
|   append_to_string(&(lexer->keyword), input); | ||||
|  | ||||
|   bool return_state_false = (strequal(current, "f") && input == 'a') || | ||||
|                             (strequal(current, "fa") && input == 'l') || | ||||
| @@ -530,9 +586,9 @@ lexer_state_t handle_false(lexer_t *lexer, char input) { | ||||
|  | ||||
| lexer_state_t handle_null(lexer_t *lexer, char input) { | ||||
|   char current[MAX_KEYWORD_LENGTH + 1]; | ||||
|   strcpy(current, lexer->keyword.str); | ||||
|   strcpy(current, lexer->keyword.keyword.str); | ||||
|  | ||||
|   append_to_keyword(&(lexer->keyword), input); | ||||
|   append_to_string(&(lexer->keyword), input); | ||||
|  | ||||
|   bool return_state_null = (strequal(current, "n") && input == 'u') || | ||||
|                            (strequal(current, "nu") && input == 'l'); | ||||
| @@ -549,7 +605,7 @@ lexer_state_t handle_null(lexer_t *lexer, char input) { | ||||
| } | ||||
|  | ||||
| lexer_state_t handle_keyword_end(lexer_t *lexer, char input) { | ||||
|   clear_keyword(&(lexer->keyword)); | ||||
|   clear_string(&(lexer->keyword)); | ||||
|  | ||||
|   if (isspace(input)) { | ||||
|     return LEXER_STATE_KEYWORD_END; | ||||
|   | ||||
		Reference in New Issue
	
	Block a user