Added string type that works for both keywords and unicode sequences
This commit is contained in:
parent
727d41c5ff
commit
386dfe72db
@ -95,12 +95,12 @@
|
||||
"-x",
|
||||
"c",
|
||||
"-o",
|
||||
"/tmp/main-69d465.o",
|
||||
"/tmp/main-977e60.o",
|
||||
"src/main.c"
|
||||
],
|
||||
"directory": "/mnt/3A5CDF785CDF2CFF/Users/abdoo/dev/say_it_in_json",
|
||||
"file": "/mnt/3A5CDF785CDF2CFF/Users/abdoo/dev/say_it_in_json/src/main.c",
|
||||
"output": "/tmp/main-69d465.o"
|
||||
"output": "/tmp/main-977e60.o"
|
||||
},
|
||||
{
|
||||
"arguments": [
|
||||
@ -162,11 +162,11 @@
|
||||
"-x",
|
||||
"c",
|
||||
"-o",
|
||||
"/tmp/lexer_states-ad0df4.o",
|
||||
"/tmp/lexer_states-04f606.o",
|
||||
"src/lexer/lexer_states.c"
|
||||
],
|
||||
"directory": "/mnt/3A5CDF785CDF2CFF/Users/abdoo/dev/say_it_in_json",
|
||||
"file": "/mnt/3A5CDF785CDF2CFF/Users/abdoo/dev/say_it_in_json/src/lexer/lexer_states.c",
|
||||
"output": "/tmp/lexer_states-ad0df4.o"
|
||||
"output": "/tmp/lexer_states-04f606.o"
|
||||
}
|
||||
]
|
||||
|
@ -1,5 +1,6 @@
|
||||
#include "lexer_states.h"
|
||||
#include "aliases.h"
|
||||
#include <assert.h>
|
||||
#include <ctype.h>
|
||||
#include <stdbool.h>
|
||||
#include <stdio.h>
|
||||
@ -7,6 +8,7 @@
|
||||
#include <string.h>
|
||||
|
||||
#define MAX_KEYWORD_LENGTH 5
|
||||
#define UNICODE_LENGTH 4
|
||||
#define MAX_STACK_CAPACITY 1024
|
||||
|
||||
typedef enum {
|
||||
@ -51,24 +53,42 @@ typedef struct {
|
||||
u64 size;
|
||||
} state_stack_t;
|
||||
|
||||
typedef enum {
|
||||
LEXER_STRING_KEYWORD,
|
||||
LEXER_STRING_UNICODE,
|
||||
} lex_str_type;
|
||||
|
||||
typedef struct {
|
||||
u64 size;
|
||||
char str[MAX_KEYWORD_LENGTH + 1];
|
||||
} keyword_t;
|
||||
|
||||
typedef struct {
|
||||
char codepoint[UNICODE_LENGTH];
|
||||
} unicode_t;
|
||||
|
||||
typedef struct {
|
||||
lex_str_type type;
|
||||
u64 size;
|
||||
union {
|
||||
keyword_t keyword;
|
||||
unicode_t unicode;
|
||||
};
|
||||
} lexer_string_t;
|
||||
|
||||
struct lexer {
|
||||
lexer_state_t current;
|
||||
state_stack_t stack;
|
||||
u64 line;
|
||||
u64 column;
|
||||
keyword_t keyword;
|
||||
lexer_string_t keyword;
|
||||
lexer_string_t codepoint;
|
||||
};
|
||||
|
||||
void stack_push(state_stack_t *stack, lexer_state_t value);
|
||||
lexer_state_t stack_pop(state_stack_t *stack);
|
||||
|
||||
void append_to_keyword(keyword_t *kw, char input);
|
||||
void clear_keyword(keyword_t *kw);
|
||||
void append_to_string(lexer_string_t *str, char input);
|
||||
void clear_string(lexer_string_t *str);
|
||||
bool strequal(const char *first, const char *second);
|
||||
|
||||
void lexer_state_machine(lexer_t *lexer, char input);
|
||||
@ -98,6 +118,8 @@ lexer_state_t handle_keyword_end(lexer_t *lexer, char input);
|
||||
bool validate_json(char *json) {
|
||||
lexer_t lexer = {0};
|
||||
lexer.current = LEXER_STATE_START;
|
||||
lexer.keyword.type = LEXER_STRING_KEYWORD;
|
||||
lexer.codepoint.type = LEXER_STRING_UNICODE;
|
||||
|
||||
for (char *c = json; *c != '\0'; ++c) {
|
||||
// printf("\nINPUT=>%s\n", c);
|
||||
@ -130,17 +152,51 @@ lexer_state_t stack_pop(state_stack_t *stack) {
|
||||
return state;
|
||||
}
|
||||
|
||||
void append_to_keyword(keyword_t *kw, char input) {
|
||||
if (kw->size + 1 > MAX_KEYWORD_LENGTH) {
|
||||
void append_to_string(lexer_string_t *lex_str, char input) {
|
||||
u64 capacity = 0;
|
||||
char *str = NULL;
|
||||
|
||||
switch (lex_str->type) {
|
||||
case LEXER_STRING_KEYWORD:
|
||||
capacity = MAX_KEYWORD_LENGTH;
|
||||
str = lex_str->keyword.str;
|
||||
|
||||
break;
|
||||
case LEXER_STRING_UNICODE:
|
||||
capacity = UNICODE_LENGTH;
|
||||
str = lex_str->unicode.codepoint;
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
if (lex_str->size + 1 > capacity) {
|
||||
return;
|
||||
}
|
||||
|
||||
kw->str[(kw->size)++] = input;
|
||||
str[(lex_str->size)++] = input;
|
||||
}
|
||||
|
||||
void clear_keyword(keyword_t *kw) {
|
||||
memset(kw->str, 0, MAX_KEYWORD_LENGTH + 1);
|
||||
kw->size = 0;
|
||||
void clear_string(lexer_string_t *lex_str) {
|
||||
u64 capacity = 1;
|
||||
char *str = NULL;
|
||||
|
||||
switch (lex_str->type) {
|
||||
case LEXER_STRING_KEYWORD:
|
||||
capacity += MAX_KEYWORD_LENGTH;
|
||||
str = lex_str->keyword.str;
|
||||
|
||||
break;
|
||||
case LEXER_STRING_UNICODE:
|
||||
capacity += UNICODE_LENGTH;
|
||||
str = lex_str->unicode.codepoint;
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
assert(str != NULL);
|
||||
|
||||
memset(str, 0, capacity);
|
||||
lex_str->size = 0;
|
||||
}
|
||||
|
||||
bool strequal(const char *first, const char *second) {
|
||||
@ -327,7 +383,7 @@ lexer_state_t handle_value(lexer_t *lexer, char input) {
|
||||
case 't':
|
||||
case 'f':
|
||||
case 'n':
|
||||
append_to_keyword(&(lexer->keyword), input);
|
||||
append_to_string(&(lexer->keyword), input);
|
||||
|
||||
return handle_keyword(input);
|
||||
}
|
||||
@ -489,9 +545,9 @@ lexer_state_t handle_keyword(char input) {
|
||||
|
||||
lexer_state_t handle_true(lexer_t *lexer, char input) {
|
||||
char current[MAX_KEYWORD_LENGTH + 1];
|
||||
strcpy(current, lexer->keyword.str);
|
||||
strcpy(current, lexer->keyword.keyword.str);
|
||||
|
||||
append_to_keyword(&(lexer->keyword), input);
|
||||
append_to_string(&(lexer->keyword), input);
|
||||
|
||||
bool return_state_true = (strequal(current, "t") && input == 'r') ||
|
||||
(strequal(current, "tr") && input == 'u');
|
||||
@ -509,9 +565,9 @@ lexer_state_t handle_true(lexer_t *lexer, char input) {
|
||||
|
||||
lexer_state_t handle_false(lexer_t *lexer, char input) {
|
||||
char current[MAX_KEYWORD_LENGTH + 1];
|
||||
strcpy(current, lexer->keyword.str);
|
||||
strcpy(current, lexer->keyword.keyword.str);
|
||||
|
||||
append_to_keyword(&(lexer->keyword), input);
|
||||
append_to_string(&(lexer->keyword), input);
|
||||
|
||||
bool return_state_false = (strequal(current, "f") && input == 'a') ||
|
||||
(strequal(current, "fa") && input == 'l') ||
|
||||
@ -530,9 +586,9 @@ lexer_state_t handle_false(lexer_t *lexer, char input) {
|
||||
|
||||
lexer_state_t handle_null(lexer_t *lexer, char input) {
|
||||
char current[MAX_KEYWORD_LENGTH + 1];
|
||||
strcpy(current, lexer->keyword.str);
|
||||
strcpy(current, lexer->keyword.keyword.str);
|
||||
|
||||
append_to_keyword(&(lexer->keyword), input);
|
||||
append_to_string(&(lexer->keyword), input);
|
||||
|
||||
bool return_state_null = (strequal(current, "n") && input == 'u') ||
|
||||
(strequal(current, "nu") && input == 'l');
|
||||
@ -549,7 +605,7 @@ lexer_state_t handle_null(lexer_t *lexer, char input) {
|
||||
}
|
||||
|
||||
lexer_state_t handle_keyword_end(lexer_t *lexer, char input) {
|
||||
clear_keyword(&(lexer->keyword));
|
||||
clear_string(&(lexer->keyword));
|
||||
|
||||
if (isspace(input)) {
|
||||
return LEXER_STATE_KEYWORD_END;
|
||||
|
Loading…
Reference in New Issue
Block a user