Added string type that works for both keywords and unicode sequences
This commit is contained in:
parent
727d41c5ff
commit
386dfe72db
@ -95,12 +95,12 @@
|
|||||||
"-x",
|
"-x",
|
||||||
"c",
|
"c",
|
||||||
"-o",
|
"-o",
|
||||||
"/tmp/main-69d465.o",
|
"/tmp/main-977e60.o",
|
||||||
"src/main.c"
|
"src/main.c"
|
||||||
],
|
],
|
||||||
"directory": "/mnt/3A5CDF785CDF2CFF/Users/abdoo/dev/say_it_in_json",
|
"directory": "/mnt/3A5CDF785CDF2CFF/Users/abdoo/dev/say_it_in_json",
|
||||||
"file": "/mnt/3A5CDF785CDF2CFF/Users/abdoo/dev/say_it_in_json/src/main.c",
|
"file": "/mnt/3A5CDF785CDF2CFF/Users/abdoo/dev/say_it_in_json/src/main.c",
|
||||||
"output": "/tmp/main-69d465.o"
|
"output": "/tmp/main-977e60.o"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"arguments": [
|
"arguments": [
|
||||||
@ -162,11 +162,11 @@
|
|||||||
"-x",
|
"-x",
|
||||||
"c",
|
"c",
|
||||||
"-o",
|
"-o",
|
||||||
"/tmp/lexer_states-ad0df4.o",
|
"/tmp/lexer_states-04f606.o",
|
||||||
"src/lexer/lexer_states.c"
|
"src/lexer/lexer_states.c"
|
||||||
],
|
],
|
||||||
"directory": "/mnt/3A5CDF785CDF2CFF/Users/abdoo/dev/say_it_in_json",
|
"directory": "/mnt/3A5CDF785CDF2CFF/Users/abdoo/dev/say_it_in_json",
|
||||||
"file": "/mnt/3A5CDF785CDF2CFF/Users/abdoo/dev/say_it_in_json/src/lexer/lexer_states.c",
|
"file": "/mnt/3A5CDF785CDF2CFF/Users/abdoo/dev/say_it_in_json/src/lexer/lexer_states.c",
|
||||||
"output": "/tmp/lexer_states-ad0df4.o"
|
"output": "/tmp/lexer_states-04f606.o"
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
|
@ -1,5 +1,6 @@
|
|||||||
#include "lexer_states.h"
|
#include "lexer_states.h"
|
||||||
#include "aliases.h"
|
#include "aliases.h"
|
||||||
|
#include <assert.h>
|
||||||
#include <ctype.h>
|
#include <ctype.h>
|
||||||
#include <stdbool.h>
|
#include <stdbool.h>
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
@ -7,6 +8,7 @@
|
|||||||
#include <string.h>
|
#include <string.h>
|
||||||
|
|
||||||
#define MAX_KEYWORD_LENGTH 5
|
#define MAX_KEYWORD_LENGTH 5
|
||||||
|
#define UNICODE_LENGTH 4
|
||||||
#define MAX_STACK_CAPACITY 1024
|
#define MAX_STACK_CAPACITY 1024
|
||||||
|
|
||||||
typedef enum {
|
typedef enum {
|
||||||
@ -51,24 +53,42 @@ typedef struct {
|
|||||||
u64 size;
|
u64 size;
|
||||||
} state_stack_t;
|
} state_stack_t;
|
||||||
|
|
||||||
|
typedef enum {
|
||||||
|
LEXER_STRING_KEYWORD,
|
||||||
|
LEXER_STRING_UNICODE,
|
||||||
|
} lex_str_type;
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
u64 size;
|
|
||||||
char str[MAX_KEYWORD_LENGTH + 1];
|
char str[MAX_KEYWORD_LENGTH + 1];
|
||||||
} keyword_t;
|
} keyword_t;
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
char codepoint[UNICODE_LENGTH];
|
||||||
|
} unicode_t;
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
lex_str_type type;
|
||||||
|
u64 size;
|
||||||
|
union {
|
||||||
|
keyword_t keyword;
|
||||||
|
unicode_t unicode;
|
||||||
|
};
|
||||||
|
} lexer_string_t;
|
||||||
|
|
||||||
struct lexer {
|
struct lexer {
|
||||||
lexer_state_t current;
|
lexer_state_t current;
|
||||||
state_stack_t stack;
|
state_stack_t stack;
|
||||||
u64 line;
|
u64 line;
|
||||||
u64 column;
|
u64 column;
|
||||||
keyword_t keyword;
|
lexer_string_t keyword;
|
||||||
|
lexer_string_t codepoint;
|
||||||
};
|
};
|
||||||
|
|
||||||
void stack_push(state_stack_t *stack, lexer_state_t value);
|
void stack_push(state_stack_t *stack, lexer_state_t value);
|
||||||
lexer_state_t stack_pop(state_stack_t *stack);
|
lexer_state_t stack_pop(state_stack_t *stack);
|
||||||
|
|
||||||
void append_to_keyword(keyword_t *kw, char input);
|
void append_to_string(lexer_string_t *str, char input);
|
||||||
void clear_keyword(keyword_t *kw);
|
void clear_string(lexer_string_t *str);
|
||||||
bool strequal(const char *first, const char *second);
|
bool strequal(const char *first, const char *second);
|
||||||
|
|
||||||
void lexer_state_machine(lexer_t *lexer, char input);
|
void lexer_state_machine(lexer_t *lexer, char input);
|
||||||
@ -98,6 +118,8 @@ lexer_state_t handle_keyword_end(lexer_t *lexer, char input);
|
|||||||
bool validate_json(char *json) {
|
bool validate_json(char *json) {
|
||||||
lexer_t lexer = {0};
|
lexer_t lexer = {0};
|
||||||
lexer.current = LEXER_STATE_START;
|
lexer.current = LEXER_STATE_START;
|
||||||
|
lexer.keyword.type = LEXER_STRING_KEYWORD;
|
||||||
|
lexer.codepoint.type = LEXER_STRING_UNICODE;
|
||||||
|
|
||||||
for (char *c = json; *c != '\0'; ++c) {
|
for (char *c = json; *c != '\0'; ++c) {
|
||||||
// printf("\nINPUT=>%s\n", c);
|
// printf("\nINPUT=>%s\n", c);
|
||||||
@ -130,17 +152,51 @@ lexer_state_t stack_pop(state_stack_t *stack) {
|
|||||||
return state;
|
return state;
|
||||||
}
|
}
|
||||||
|
|
||||||
void append_to_keyword(keyword_t *kw, char input) {
|
void append_to_string(lexer_string_t *lex_str, char input) {
|
||||||
if (kw->size + 1 > MAX_KEYWORD_LENGTH) {
|
u64 capacity = 0;
|
||||||
|
char *str = NULL;
|
||||||
|
|
||||||
|
switch (lex_str->type) {
|
||||||
|
case LEXER_STRING_KEYWORD:
|
||||||
|
capacity = MAX_KEYWORD_LENGTH;
|
||||||
|
str = lex_str->keyword.str;
|
||||||
|
|
||||||
|
break;
|
||||||
|
case LEXER_STRING_UNICODE:
|
||||||
|
capacity = UNICODE_LENGTH;
|
||||||
|
str = lex_str->unicode.codepoint;
|
||||||
|
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (lex_str->size + 1 > capacity) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
kw->str[(kw->size)++] = input;
|
str[(lex_str->size)++] = input;
|
||||||
}
|
}
|
||||||
|
|
||||||
void clear_keyword(keyword_t *kw) {
|
void clear_string(lexer_string_t *lex_str) {
|
||||||
memset(kw->str, 0, MAX_KEYWORD_LENGTH + 1);
|
u64 capacity = 1;
|
||||||
kw->size = 0;
|
char *str = NULL;
|
||||||
|
|
||||||
|
switch (lex_str->type) {
|
||||||
|
case LEXER_STRING_KEYWORD:
|
||||||
|
capacity += MAX_KEYWORD_LENGTH;
|
||||||
|
str = lex_str->keyword.str;
|
||||||
|
|
||||||
|
break;
|
||||||
|
case LEXER_STRING_UNICODE:
|
||||||
|
capacity += UNICODE_LENGTH;
|
||||||
|
str = lex_str->unicode.codepoint;
|
||||||
|
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
assert(str != NULL);
|
||||||
|
|
||||||
|
memset(str, 0, capacity);
|
||||||
|
lex_str->size = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool strequal(const char *first, const char *second) {
|
bool strequal(const char *first, const char *second) {
|
||||||
@ -327,7 +383,7 @@ lexer_state_t handle_value(lexer_t *lexer, char input) {
|
|||||||
case 't':
|
case 't':
|
||||||
case 'f':
|
case 'f':
|
||||||
case 'n':
|
case 'n':
|
||||||
append_to_keyword(&(lexer->keyword), input);
|
append_to_string(&(lexer->keyword), input);
|
||||||
|
|
||||||
return handle_keyword(input);
|
return handle_keyword(input);
|
||||||
}
|
}
|
||||||
@ -489,9 +545,9 @@ lexer_state_t handle_keyword(char input) {
|
|||||||
|
|
||||||
lexer_state_t handle_true(lexer_t *lexer, char input) {
|
lexer_state_t handle_true(lexer_t *lexer, char input) {
|
||||||
char current[MAX_KEYWORD_LENGTH + 1];
|
char current[MAX_KEYWORD_LENGTH + 1];
|
||||||
strcpy(current, lexer->keyword.str);
|
strcpy(current, lexer->keyword.keyword.str);
|
||||||
|
|
||||||
append_to_keyword(&(lexer->keyword), input);
|
append_to_string(&(lexer->keyword), input);
|
||||||
|
|
||||||
bool return_state_true = (strequal(current, "t") && input == 'r') ||
|
bool return_state_true = (strequal(current, "t") && input == 'r') ||
|
||||||
(strequal(current, "tr") && input == 'u');
|
(strequal(current, "tr") && input == 'u');
|
||||||
@ -509,9 +565,9 @@ lexer_state_t handle_true(lexer_t *lexer, char input) {
|
|||||||
|
|
||||||
lexer_state_t handle_false(lexer_t *lexer, char input) {
|
lexer_state_t handle_false(lexer_t *lexer, char input) {
|
||||||
char current[MAX_KEYWORD_LENGTH + 1];
|
char current[MAX_KEYWORD_LENGTH + 1];
|
||||||
strcpy(current, lexer->keyword.str);
|
strcpy(current, lexer->keyword.keyword.str);
|
||||||
|
|
||||||
append_to_keyword(&(lexer->keyword), input);
|
append_to_string(&(lexer->keyword), input);
|
||||||
|
|
||||||
bool return_state_false = (strequal(current, "f") && input == 'a') ||
|
bool return_state_false = (strequal(current, "f") && input == 'a') ||
|
||||||
(strequal(current, "fa") && input == 'l') ||
|
(strequal(current, "fa") && input == 'l') ||
|
||||||
@ -530,9 +586,9 @@ lexer_state_t handle_false(lexer_t *lexer, char input) {
|
|||||||
|
|
||||||
lexer_state_t handle_null(lexer_t *lexer, char input) {
|
lexer_state_t handle_null(lexer_t *lexer, char input) {
|
||||||
char current[MAX_KEYWORD_LENGTH + 1];
|
char current[MAX_KEYWORD_LENGTH + 1];
|
||||||
strcpy(current, lexer->keyword.str);
|
strcpy(current, lexer->keyword.keyword.str);
|
||||||
|
|
||||||
append_to_keyword(&(lexer->keyword), input);
|
append_to_string(&(lexer->keyword), input);
|
||||||
|
|
||||||
bool return_state_null = (strequal(current, "n") && input == 'u') ||
|
bool return_state_null = (strequal(current, "n") && input == 'u') ||
|
||||||
(strequal(current, "nu") && input == 'l');
|
(strequal(current, "nu") && input == 'l');
|
||||||
@ -549,7 +605,7 @@ lexer_state_t handle_null(lexer_t *lexer, char input) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
lexer_state_t handle_keyword_end(lexer_t *lexer, char input) {
|
lexer_state_t handle_keyword_end(lexer_t *lexer, char input) {
|
||||||
clear_keyword(&(lexer->keyword));
|
clear_string(&(lexer->keyword));
|
||||||
|
|
||||||
if (isspace(input)) {
|
if (isspace(input)) {
|
||||||
return LEXER_STATE_KEYWORD_END;
|
return LEXER_STATE_KEYWORD_END;
|
||||||
|
Loading…
Reference in New Issue
Block a user