Added string type that works for both keywords and unicode sequences

This commit is contained in:
Abdelrahman Said 2023-06-18 00:03:31 +01:00
parent 727d41c5ff
commit 386dfe72db
2 changed files with 78 additions and 22 deletions

View File

@ -95,12 +95,12 @@
"-x",
"c",
"-o",
"/tmp/main-69d465.o",
"/tmp/main-977e60.o",
"src/main.c"
],
"directory": "/mnt/3A5CDF785CDF2CFF/Users/abdoo/dev/say_it_in_json",
"file": "/mnt/3A5CDF785CDF2CFF/Users/abdoo/dev/say_it_in_json/src/main.c",
"output": "/tmp/main-69d465.o"
"output": "/tmp/main-977e60.o"
},
{
"arguments": [
@ -162,11 +162,11 @@
"-x",
"c",
"-o",
"/tmp/lexer_states-ad0df4.o",
"/tmp/lexer_states-04f606.o",
"src/lexer/lexer_states.c"
],
"directory": "/mnt/3A5CDF785CDF2CFF/Users/abdoo/dev/say_it_in_json",
"file": "/mnt/3A5CDF785CDF2CFF/Users/abdoo/dev/say_it_in_json/src/lexer/lexer_states.c",
"output": "/tmp/lexer_states-ad0df4.o"
"output": "/tmp/lexer_states-04f606.o"
}
]

View File

@ -1,5 +1,6 @@
#include "lexer_states.h"
#include "aliases.h"
#include <assert.h>
#include <ctype.h>
#include <stdbool.h>
#include <stdio.h>
@ -7,6 +8,7 @@
#include <string.h>
#define MAX_KEYWORD_LENGTH 5
#define UNICODE_LENGTH 4
#define MAX_STACK_CAPACITY 1024
typedef enum {
@ -51,24 +53,42 @@ typedef struct {
u64 size;
} state_stack_t;
typedef enum {
LEXER_STRING_KEYWORD,
LEXER_STRING_UNICODE,
} lex_str_type;
typedef struct {
u64 size;
char str[MAX_KEYWORD_LENGTH + 1];
} keyword_t;
typedef struct {
char codepoint[UNICODE_LENGTH];
} unicode_t;
typedef struct {
lex_str_type type;
u64 size;
union {
keyword_t keyword;
unicode_t unicode;
};
} lexer_string_t;
struct lexer {
lexer_state_t current;
state_stack_t stack;
u64 line;
u64 column;
keyword_t keyword;
lexer_string_t keyword;
lexer_string_t codepoint;
};
void stack_push(state_stack_t *stack, lexer_state_t value);
lexer_state_t stack_pop(state_stack_t *stack);
void append_to_keyword(keyword_t *kw, char input);
void clear_keyword(keyword_t *kw);
void append_to_string(lexer_string_t *str, char input);
void clear_string(lexer_string_t *str);
bool strequal(const char *first, const char *second);
void lexer_state_machine(lexer_t *lexer, char input);
@ -98,6 +118,8 @@ lexer_state_t handle_keyword_end(lexer_t *lexer, char input);
bool validate_json(char *json) {
lexer_t lexer = {0};
lexer.current = LEXER_STATE_START;
lexer.keyword.type = LEXER_STRING_KEYWORD;
lexer.codepoint.type = LEXER_STRING_UNICODE;
for (char *c = json; *c != '\0'; ++c) {
// printf("\nINPUT=>%s\n", c);
@ -130,17 +152,51 @@ lexer_state_t stack_pop(state_stack_t *stack) {
return state;
}
void append_to_keyword(keyword_t *kw, char input) {
if (kw->size + 1 > MAX_KEYWORD_LENGTH) {
void append_to_string(lexer_string_t *lex_str, char input) {
u64 capacity = 0;
char *str = NULL;
switch (lex_str->type) {
case LEXER_STRING_KEYWORD:
capacity = MAX_KEYWORD_LENGTH;
str = lex_str->keyword.str;
break;
case LEXER_STRING_UNICODE:
capacity = UNICODE_LENGTH;
str = lex_str->unicode.codepoint;
break;
}
if (lex_str->size + 1 > capacity) {
return;
}
kw->str[(kw->size)++] = input;
str[(lex_str->size)++] = input;
}
void clear_keyword(keyword_t *kw) {
memset(kw->str, 0, MAX_KEYWORD_LENGTH + 1);
kw->size = 0;
void clear_string(lexer_string_t *lex_str) {
u64 capacity = 1;
char *str = NULL;
switch (lex_str->type) {
case LEXER_STRING_KEYWORD:
capacity += MAX_KEYWORD_LENGTH;
str = lex_str->keyword.str;
break;
case LEXER_STRING_UNICODE:
capacity += UNICODE_LENGTH;
str = lex_str->unicode.codepoint;
break;
}
assert(str != NULL);
memset(str, 0, capacity);
lex_str->size = 0;
}
bool strequal(const char *first, const char *second) {
@ -327,7 +383,7 @@ lexer_state_t handle_value(lexer_t *lexer, char input) {
case 't':
case 'f':
case 'n':
append_to_keyword(&(lexer->keyword), input);
append_to_string(&(lexer->keyword), input);
return handle_keyword(input);
}
@ -489,9 +545,9 @@ lexer_state_t handle_keyword(char input) {
lexer_state_t handle_true(lexer_t *lexer, char input) {
char current[MAX_KEYWORD_LENGTH + 1];
strcpy(current, lexer->keyword.str);
strcpy(current, lexer->keyword.keyword.str);
append_to_keyword(&(lexer->keyword), input);
append_to_string(&(lexer->keyword), input);
bool return_state_true = (strequal(current, "t") && input == 'r') ||
(strequal(current, "tr") && input == 'u');
@ -509,9 +565,9 @@ lexer_state_t handle_true(lexer_t *lexer, char input) {
lexer_state_t handle_false(lexer_t *lexer, char input) {
char current[MAX_KEYWORD_LENGTH + 1];
strcpy(current, lexer->keyword.str);
strcpy(current, lexer->keyword.keyword.str);
append_to_keyword(&(lexer->keyword), input);
append_to_string(&(lexer->keyword), input);
bool return_state_false = (strequal(current, "f") && input == 'a') ||
(strequal(current, "fa") && input == 'l') ||
@ -530,9 +586,9 @@ lexer_state_t handle_false(lexer_t *lexer, char input) {
lexer_state_t handle_null(lexer_t *lexer, char input) {
char current[MAX_KEYWORD_LENGTH + 1];
strcpy(current, lexer->keyword.str);
strcpy(current, lexer->keyword.keyword.str);
append_to_keyword(&(lexer->keyword), input);
append_to_string(&(lexer->keyword), input);
bool return_state_null = (strequal(current, "n") && input == 'u') ||
(strequal(current, "nu") && input == 'l');
@ -549,7 +605,7 @@ lexer_state_t handle_null(lexer_t *lexer, char input) {
}
lexer_state_t handle_keyword_end(lexer_t *lexer, char input) {
clear_keyword(&(lexer->keyword));
clear_string(&(lexer->keyword));
if (isspace(input)) {
return LEXER_STATE_KEYWORD_END;