Add keyword support
This commit is contained in:
parent
aff2735b9f
commit
ed9b5fb638
@ -95,12 +95,12 @@
|
|||||||
"-x",
|
"-x",
|
||||||
"c",
|
"c",
|
||||||
"-o",
|
"-o",
|
||||||
"/tmp/main-d954cc.o",
|
"/tmp/main-69d465.o",
|
||||||
"src/main.c"
|
"src/main.c"
|
||||||
],
|
],
|
||||||
"directory": "/mnt/3A5CDF785CDF2CFF/Users/abdoo/dev/say_it_in_json",
|
"directory": "/mnt/3A5CDF785CDF2CFF/Users/abdoo/dev/say_it_in_json",
|
||||||
"file": "/mnt/3A5CDF785CDF2CFF/Users/abdoo/dev/say_it_in_json/src/main.c",
|
"file": "/mnt/3A5CDF785CDF2CFF/Users/abdoo/dev/say_it_in_json/src/main.c",
|
||||||
"output": "/tmp/main-d954cc.o"
|
"output": "/tmp/main-69d465.o"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"arguments": [
|
"arguments": [
|
||||||
@ -162,11 +162,11 @@
|
|||||||
"-x",
|
"-x",
|
||||||
"c",
|
"c",
|
||||||
"-o",
|
"-o",
|
||||||
"/tmp/lexer_states-f7dff6.o",
|
"/tmp/lexer_states-ad0df4.o",
|
||||||
"src/lexer/lexer_states.c"
|
"src/lexer/lexer_states.c"
|
||||||
],
|
],
|
||||||
"directory": "/mnt/3A5CDF785CDF2CFF/Users/abdoo/dev/say_it_in_json",
|
"directory": "/mnt/3A5CDF785CDF2CFF/Users/abdoo/dev/say_it_in_json",
|
||||||
"file": "/mnt/3A5CDF785CDF2CFF/Users/abdoo/dev/say_it_in_json/src/lexer/lexer_states.c",
|
"file": "/mnt/3A5CDF785CDF2CFF/Users/abdoo/dev/say_it_in_json/src/lexer/lexer_states.c",
|
||||||
"output": "/tmp/lexer_states-f7dff6.o"
|
"output": "/tmp/lexer_states-ad0df4.o"
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
|
@ -3,17 +3,17 @@
|
|||||||
#include <ctype.h>
|
#include <ctype.h>
|
||||||
#include <stdbool.h>
|
#include <stdbool.h>
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <string.h>
|
||||||
|
|
||||||
#define MAX_KEYWORD_LENGTH 5
|
#define MAX_KEYWORD_LENGTH 5
|
||||||
#define MAX_STACK_CAPACITY 1024
|
#define MAX_STACK_CAPACITY 1024
|
||||||
|
|
||||||
// clang-format off
|
|
||||||
typedef enum {
|
typedef enum {
|
||||||
// GENERAL STATES
|
// GENERAL STATES
|
||||||
LEXER_STATE_START,
|
LEXER_STATE_START,
|
||||||
LEXER_STATE_ERROR,
|
LEXER_STATE_ERROR,
|
||||||
LEXER_STATE_VALUE,
|
LEXER_STATE_VALUE,
|
||||||
LEXER_STATE_KEYWORD,
|
|
||||||
// COLLECTION STATES
|
// COLLECTION STATES
|
||||||
LEXER_STATE_OBJECT_START,
|
LEXER_STATE_OBJECT_START,
|
||||||
LEXER_STATE_OBJECT,
|
LEXER_STATE_OBJECT,
|
||||||
@ -37,27 +37,40 @@ typedef enum {
|
|||||||
LEXER_STATE_STRING_END,
|
LEXER_STATE_STRING_END,
|
||||||
LEXER_STATE_ESCAPE_SEQUENCE,
|
LEXER_STATE_ESCAPE_SEQUENCE,
|
||||||
LEXER_STATE_UNICODE_HEX,
|
LEXER_STATE_UNICODE_HEX,
|
||||||
|
// KEYWORD STATES
|
||||||
|
LEXER_STATE_TRUE,
|
||||||
|
LEXER_STATE_FALSE,
|
||||||
|
LEXER_STATE_NULL,
|
||||||
|
LEXER_STATE_KEYWORD_END,
|
||||||
|
|
||||||
COUNT_LEXER_STATES,
|
COUNT_LEXER_STATES,
|
||||||
} lexer_state_t;
|
} lexer_state_t;
|
||||||
// clang-format on
|
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
lexer_state_t stack[MAX_STACK_CAPACITY];
|
lexer_state_t stack[MAX_STACK_CAPACITY];
|
||||||
u64 size;
|
u64 size;
|
||||||
} state_stack_t;
|
} state_stack_t;
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
u64 size;
|
||||||
|
char str[MAX_KEYWORD_LENGTH + 1];
|
||||||
|
} keyword_t;
|
||||||
|
|
||||||
struct lexer {
|
struct lexer {
|
||||||
lexer_state_t current;
|
lexer_state_t current;
|
||||||
state_stack_t stack;
|
state_stack_t stack;
|
||||||
u64 line;
|
u64 line;
|
||||||
u64 column;
|
u64 column;
|
||||||
char current_keyword[MAX_KEYWORD_LENGTH + 1];
|
keyword_t keyword;
|
||||||
};
|
};
|
||||||
|
|
||||||
void stack_push(state_stack_t *stack, lexer_state_t value);
|
void stack_push(state_stack_t *stack, lexer_state_t value);
|
||||||
lexer_state_t stack_pop(state_stack_t *stack);
|
lexer_state_t stack_pop(state_stack_t *stack);
|
||||||
|
|
||||||
|
void append_to_keyword(keyword_t *kw, char input);
|
||||||
|
void clear_keyword(keyword_t *kw);
|
||||||
|
bool strequal(const char *first, const char *second);
|
||||||
|
|
||||||
void lexer_state_machine(lexer_t *lexer, char input);
|
void lexer_state_machine(lexer_t *lexer, char input);
|
||||||
lexer_state_t handle_lexer_start(char input);
|
lexer_state_t handle_lexer_start(char input);
|
||||||
lexer_state_t handle_last_collection(char input);
|
lexer_state_t handle_last_collection(char input);
|
||||||
@ -76,6 +89,11 @@ lexer_state_t handle_exponent(char input);
|
|||||||
lexer_state_t handle_exp_sign(char input);
|
lexer_state_t handle_exp_sign(char input);
|
||||||
lexer_state_t handle_power(lexer_t *lexer, char input);
|
lexer_state_t handle_power(lexer_t *lexer, char input);
|
||||||
lexer_state_t handle_number_end(lexer_t *lexer, char input);
|
lexer_state_t handle_number_end(lexer_t *lexer, char input);
|
||||||
|
lexer_state_t handle_keyword(char input);
|
||||||
|
lexer_state_t handle_true(lexer_t *lexer, char input);
|
||||||
|
lexer_state_t handle_false(lexer_t *lexer, char input);
|
||||||
|
lexer_state_t handle_null(lexer_t *lexer, char input);
|
||||||
|
lexer_state_t handle_keyword_end(lexer_t *lexer, char input);
|
||||||
|
|
||||||
bool validate_json(char *json) {
|
bool validate_json(char *json) {
|
||||||
lexer_t lexer = {0};
|
lexer_t lexer = {0};
|
||||||
@ -112,6 +130,23 @@ lexer_state_t stack_pop(state_stack_t *stack) {
|
|||||||
return state;
|
return state;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void append_to_keyword(keyword_t *kw, char input) {
|
||||||
|
if (kw->size + 1 > MAX_KEYWORD_LENGTH) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
kw->str[(kw->size)++] = input;
|
||||||
|
}
|
||||||
|
|
||||||
|
void clear_keyword(keyword_t *kw) {
|
||||||
|
memset(kw->str, 0, MAX_KEYWORD_LENGTH + 1);
|
||||||
|
kw->size = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool strequal(const char *first, const char *second) {
|
||||||
|
return strcmp(first, second) == 0;
|
||||||
|
}
|
||||||
|
|
||||||
void lexer_state_machine(lexer_t *lexer, char input) {
|
void lexer_state_machine(lexer_t *lexer, char input) {
|
||||||
switch (lexer->current) {
|
switch (lexer->current) {
|
||||||
case LEXER_STATE_START:
|
case LEXER_STATE_START:
|
||||||
@ -120,19 +155,17 @@ void lexer_state_machine(lexer_t *lexer, char input) {
|
|||||||
case LEXER_STATE_VALUE:
|
case LEXER_STATE_VALUE:
|
||||||
lexer->current = handle_value(lexer, input);
|
lexer->current = handle_value(lexer, input);
|
||||||
break;
|
break;
|
||||||
case LEXER_STATE_KEYWORD:
|
|
||||||
break;
|
|
||||||
case LEXER_STATE_OBJECT_START:
|
case LEXER_STATE_OBJECT_START:
|
||||||
stack_push(&(lexer->stack), LEXER_STATE_OBJECT);
|
stack_push(&(lexer->stack), LEXER_STATE_OBJECT);
|
||||||
// break is left intentionally here to utilise the fallthrough behaviour of
|
// break is left out intentionally here to utilise the fallthrough behaviour
|
||||||
// the switch statement
|
// of the switch statement
|
||||||
case LEXER_STATE_OBJECT:
|
case LEXER_STATE_OBJECT:
|
||||||
lexer->current = handle_object(lexer, input);
|
lexer->current = handle_object(lexer, input);
|
||||||
break;
|
break;
|
||||||
case LEXER_STATE_ARRAY_START:
|
case LEXER_STATE_ARRAY_START:
|
||||||
stack_push(&(lexer->stack), LEXER_STATE_ARRAY);
|
stack_push(&(lexer->stack), LEXER_STATE_ARRAY);
|
||||||
// break is left intentionally here to utilise the fallthrough behaviour of
|
// break is left out intentionally here to utilise the fallthrough behaviour
|
||||||
// the switch statement
|
// of the switch statement
|
||||||
case LEXER_STATE_ARRAY:
|
case LEXER_STATE_ARRAY:
|
||||||
lexer->current = handle_array(lexer, input);
|
lexer->current = handle_array(lexer, input);
|
||||||
break;
|
break;
|
||||||
@ -182,6 +215,18 @@ void lexer_state_machine(lexer_t *lexer, char input) {
|
|||||||
break;
|
break;
|
||||||
case LEXER_STATE_UNICODE_HEX:
|
case LEXER_STATE_UNICODE_HEX:
|
||||||
break;
|
break;
|
||||||
|
case LEXER_STATE_TRUE:
|
||||||
|
lexer->current = handle_true(lexer, input);
|
||||||
|
break;
|
||||||
|
case LEXER_STATE_FALSE:
|
||||||
|
lexer->current = handle_false(lexer, input);
|
||||||
|
break;
|
||||||
|
case LEXER_STATE_NULL:
|
||||||
|
lexer->current = handle_null(lexer, input);
|
||||||
|
break;
|
||||||
|
case LEXER_STATE_KEYWORD_END:
|
||||||
|
lexer->current = handle_keyword_end(lexer, input);
|
||||||
|
break;
|
||||||
case LEXER_STATE_LAST_COLLECTION:
|
case LEXER_STATE_LAST_COLLECTION:
|
||||||
lexer->current = handle_last_collection(input);
|
lexer->current = handle_last_collection(input);
|
||||||
break;
|
break;
|
||||||
@ -282,7 +327,9 @@ lexer_state_t handle_value(lexer_t *lexer, char input) {
|
|||||||
case 't':
|
case 't':
|
||||||
case 'f':
|
case 'f':
|
||||||
case 'n':
|
case 'n':
|
||||||
return LEXER_STATE_KEYWORD;
|
append_to_keyword(&(lexer->keyword), input);
|
||||||
|
|
||||||
|
return handle_keyword(input);
|
||||||
}
|
}
|
||||||
|
|
||||||
return LEXER_STATE_ERROR;
|
return LEXER_STATE_ERROR;
|
||||||
@ -426,3 +473,92 @@ lexer_state_t handle_number_end(lexer_t *lexer, char input) {
|
|||||||
return collection_end ? handle_collection_end(lexer, input)
|
return collection_end ? handle_collection_end(lexer, input)
|
||||||
: LEXER_STATE_ERROR;
|
: LEXER_STATE_ERROR;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
lexer_state_t handle_keyword(char input) {
|
||||||
|
switch (input) {
|
||||||
|
case 't':
|
||||||
|
return LEXER_STATE_TRUE;
|
||||||
|
case 'f':
|
||||||
|
return LEXER_STATE_FALSE;
|
||||||
|
case 'n':
|
||||||
|
return LEXER_STATE_NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
return LEXER_STATE_ERROR;
|
||||||
|
}
|
||||||
|
|
||||||
|
lexer_state_t handle_true(lexer_t *lexer, char input) {
|
||||||
|
char current[MAX_KEYWORD_LENGTH + 1];
|
||||||
|
strcpy(current, lexer->keyword.str);
|
||||||
|
|
||||||
|
append_to_keyword(&(lexer->keyword), input);
|
||||||
|
|
||||||
|
bool return_state_true = (strequal(current, "t") && input == 'r') ||
|
||||||
|
(strequal(current, "tr") && input == 'u');
|
||||||
|
|
||||||
|
bool return_state_end = strequal(current, "tru") && input == 'e';
|
||||||
|
|
||||||
|
if (return_state_true) {
|
||||||
|
return LEXER_STATE_TRUE;
|
||||||
|
} else if (return_state_end) {
|
||||||
|
return LEXER_STATE_KEYWORD_END;
|
||||||
|
}
|
||||||
|
|
||||||
|
return LEXER_STATE_ERROR;
|
||||||
|
}
|
||||||
|
|
||||||
|
lexer_state_t handle_false(lexer_t *lexer, char input) {
|
||||||
|
char current[MAX_KEYWORD_LENGTH + 1];
|
||||||
|
strcpy(current, lexer->keyword.str);
|
||||||
|
|
||||||
|
append_to_keyword(&(lexer->keyword), input);
|
||||||
|
|
||||||
|
bool return_state_false = (strequal(current, "f") && input == 'a') ||
|
||||||
|
(strequal(current, "fa") && input == 'l') ||
|
||||||
|
(strequal(current, "fal") && input == 's');
|
||||||
|
|
||||||
|
bool return_state_end = strequal(current, "fals") && input == 'e';
|
||||||
|
|
||||||
|
if (return_state_false) {
|
||||||
|
return LEXER_STATE_FALSE;
|
||||||
|
} else if (return_state_end) {
|
||||||
|
return LEXER_STATE_KEYWORD_END;
|
||||||
|
}
|
||||||
|
|
||||||
|
return LEXER_STATE_ERROR;
|
||||||
|
}
|
||||||
|
|
||||||
|
lexer_state_t handle_null(lexer_t *lexer, char input) {
|
||||||
|
char current[MAX_KEYWORD_LENGTH + 1];
|
||||||
|
strcpy(current, lexer->keyword.str);
|
||||||
|
|
||||||
|
append_to_keyword(&(lexer->keyword), input);
|
||||||
|
|
||||||
|
bool return_state_null = (strequal(current, "n") && input == 'u') ||
|
||||||
|
(strequal(current, "nu") && input == 'l');
|
||||||
|
|
||||||
|
bool return_state_end = strequal(current, "nul") && input == 'l';
|
||||||
|
|
||||||
|
if (return_state_null) {
|
||||||
|
return LEXER_STATE_NULL;
|
||||||
|
} else if (return_state_end) {
|
||||||
|
return LEXER_STATE_KEYWORD_END;
|
||||||
|
}
|
||||||
|
|
||||||
|
return LEXER_STATE_ERROR;
|
||||||
|
}
|
||||||
|
|
||||||
|
lexer_state_t handle_keyword_end(lexer_t *lexer, char input) {
|
||||||
|
clear_keyword(&(lexer->keyword));
|
||||||
|
|
||||||
|
if (isspace(input)) {
|
||||||
|
return LEXER_STATE_KEYWORD_END;
|
||||||
|
} else if (input == ',') {
|
||||||
|
return lexer->stack.stack[lexer->stack.size - 1];
|
||||||
|
}
|
||||||
|
|
||||||
|
bool collection_end = input == '}' || input == ']';
|
||||||
|
|
||||||
|
return collection_end ? handle_collection_end(lexer, input)
|
||||||
|
: LEXER_STATE_ERROR;
|
||||||
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user