Add keyword support

This commit is contained in:
Abdelrahman Said 2023-06-17 22:10:48 +01:00
parent aff2735b9f
commit ed9b5fb638
2 changed files with 168 additions and 32 deletions

View File

@ -95,12 +95,12 @@
"-x", "-x",
"c", "c",
"-o", "-o",
"/tmp/main-d954cc.o", "/tmp/main-69d465.o",
"src/main.c" "src/main.c"
], ],
"directory": "/mnt/3A5CDF785CDF2CFF/Users/abdoo/dev/say_it_in_json", "directory": "/mnt/3A5CDF785CDF2CFF/Users/abdoo/dev/say_it_in_json",
"file": "/mnt/3A5CDF785CDF2CFF/Users/abdoo/dev/say_it_in_json/src/main.c", "file": "/mnt/3A5CDF785CDF2CFF/Users/abdoo/dev/say_it_in_json/src/main.c",
"output": "/tmp/main-d954cc.o" "output": "/tmp/main-69d465.o"
}, },
{ {
"arguments": [ "arguments": [
@ -162,11 +162,11 @@
"-x", "-x",
"c", "c",
"-o", "-o",
"/tmp/lexer_states-f7dff6.o", "/tmp/lexer_states-ad0df4.o",
"src/lexer/lexer_states.c" "src/lexer/lexer_states.c"
], ],
"directory": "/mnt/3A5CDF785CDF2CFF/Users/abdoo/dev/say_it_in_json", "directory": "/mnt/3A5CDF785CDF2CFF/Users/abdoo/dev/say_it_in_json",
"file": "/mnt/3A5CDF785CDF2CFF/Users/abdoo/dev/say_it_in_json/src/lexer/lexer_states.c", "file": "/mnt/3A5CDF785CDF2CFF/Users/abdoo/dev/say_it_in_json/src/lexer/lexer_states.c",
"output": "/tmp/lexer_states-f7dff6.o" "output": "/tmp/lexer_states-ad0df4.o"
} }
] ]

View File

@ -3,28 +3,28 @@
#include <ctype.h> #include <ctype.h>
#include <stdbool.h> #include <stdbool.h>
#include <stdio.h> #include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define MAX_KEYWORD_LENGTH 5 #define MAX_KEYWORD_LENGTH 5
#define MAX_STACK_CAPACITY 1024 #define MAX_STACK_CAPACITY 1024
// clang-format off
typedef enum { typedef enum {
// GENERAL STATES // GENERAL STATES
LEXER_STATE_START, LEXER_STATE_START,
LEXER_STATE_ERROR, LEXER_STATE_ERROR,
LEXER_STATE_VALUE, LEXER_STATE_VALUE,
LEXER_STATE_KEYWORD, // COLLECTION STATES
// COLLECTION STATES LEXER_STATE_OBJECT_START,
LEXER_STATE_OBJECT_START, LEXER_STATE_OBJECT,
LEXER_STATE_OBJECT, LEXER_STATE_OBJECT_END,
LEXER_STATE_OBJECT_END, LEXER_STATE_ARRAY_START,
LEXER_STATE_ARRAY_START, LEXER_STATE_ARRAY,
LEXER_STATE_ARRAY, LEXER_STATE_ARRAY_END,
LEXER_STATE_ARRAY_END, LEXER_STATE_LAST_COLLECTION,
LEXER_STATE_LAST_COLLECTION, // OBJECT STATES
// OBJECT STATES LEXER_STATE_KEY,
LEXER_STATE_KEY, // NUMBER STATES
// NUMBER STATES
LEXER_STATE_DECIMAL, LEXER_STATE_DECIMAL,
LEXER_STATE_NUMBER, LEXER_STATE_NUMBER,
LEXER_STATE_FRACTION, LEXER_STATE_FRACTION,
@ -32,32 +32,45 @@ typedef enum {
LEXER_STATE_EXP_SIGN, LEXER_STATE_EXP_SIGN,
LEXER_STATE_POWER, LEXER_STATE_POWER,
LEXER_STATE_NUMBER_END, LEXER_STATE_NUMBER_END,
// STRING STATES // STRING STATES
LEXER_STATE_STRING, LEXER_STATE_STRING,
LEXER_STATE_STRING_END, LEXER_STATE_STRING_END,
LEXER_STATE_ESCAPE_SEQUENCE, LEXER_STATE_ESCAPE_SEQUENCE,
LEXER_STATE_UNICODE_HEX, LEXER_STATE_UNICODE_HEX,
// KEYWORD STATES
LEXER_STATE_TRUE,
LEXER_STATE_FALSE,
LEXER_STATE_NULL,
LEXER_STATE_KEYWORD_END,
COUNT_LEXER_STATES, COUNT_LEXER_STATES,
} lexer_state_t; } lexer_state_t;
// clang-format on
typedef struct { typedef struct {
lexer_state_t stack[MAX_STACK_CAPACITY]; lexer_state_t stack[MAX_STACK_CAPACITY];
u64 size; u64 size;
} state_stack_t; } state_stack_t;
typedef struct {
u64 size;
char str[MAX_KEYWORD_LENGTH + 1];
} keyword_t;
struct lexer { struct lexer {
lexer_state_t current; lexer_state_t current;
state_stack_t stack; state_stack_t stack;
u64 line; u64 line;
u64 column; u64 column;
char current_keyword[MAX_KEYWORD_LENGTH + 1]; keyword_t keyword;
}; };
void stack_push(state_stack_t *stack, lexer_state_t value); void stack_push(state_stack_t *stack, lexer_state_t value);
lexer_state_t stack_pop(state_stack_t *stack); lexer_state_t stack_pop(state_stack_t *stack);
void append_to_keyword(keyword_t *kw, char input);
void clear_keyword(keyword_t *kw);
bool strequal(const char *first, const char *second);
void lexer_state_machine(lexer_t *lexer, char input); void lexer_state_machine(lexer_t *lexer, char input);
lexer_state_t handle_lexer_start(char input); lexer_state_t handle_lexer_start(char input);
lexer_state_t handle_last_collection(char input); lexer_state_t handle_last_collection(char input);
@ -76,6 +89,11 @@ lexer_state_t handle_exponent(char input);
lexer_state_t handle_exp_sign(char input); lexer_state_t handle_exp_sign(char input);
lexer_state_t handle_power(lexer_t *lexer, char input); lexer_state_t handle_power(lexer_t *lexer, char input);
lexer_state_t handle_number_end(lexer_t *lexer, char input); lexer_state_t handle_number_end(lexer_t *lexer, char input);
lexer_state_t handle_keyword(char input);
lexer_state_t handle_true(lexer_t *lexer, char input);
lexer_state_t handle_false(lexer_t *lexer, char input);
lexer_state_t handle_null(lexer_t *lexer, char input);
lexer_state_t handle_keyword_end(lexer_t *lexer, char input);
bool validate_json(char *json) { bool validate_json(char *json) {
lexer_t lexer = {0}; lexer_t lexer = {0};
@ -112,6 +130,23 @@ lexer_state_t stack_pop(state_stack_t *stack) {
return state; return state;
} }
void append_to_keyword(keyword_t *kw, char input) {
if (kw->size + 1 > MAX_KEYWORD_LENGTH) {
return;
}
kw->str[(kw->size)++] = input;
}
void clear_keyword(keyword_t *kw) {
memset(kw->str, 0, MAX_KEYWORD_LENGTH + 1);
kw->size = 0;
}
bool strequal(const char *first, const char *second) {
return strcmp(first, second) == 0;
}
void lexer_state_machine(lexer_t *lexer, char input) { void lexer_state_machine(lexer_t *lexer, char input) {
switch (lexer->current) { switch (lexer->current) {
case LEXER_STATE_START: case LEXER_STATE_START:
@ -120,19 +155,17 @@ void lexer_state_machine(lexer_t *lexer, char input) {
case LEXER_STATE_VALUE: case LEXER_STATE_VALUE:
lexer->current = handle_value(lexer, input); lexer->current = handle_value(lexer, input);
break; break;
case LEXER_STATE_KEYWORD:
break;
case LEXER_STATE_OBJECT_START: case LEXER_STATE_OBJECT_START:
stack_push(&(lexer->stack), LEXER_STATE_OBJECT); stack_push(&(lexer->stack), LEXER_STATE_OBJECT);
// break is left intentionally here to utilise the fallthrough behaviour of // break is left out intentionally here to utilise the fallthrough behaviour
// the switch statement // of the switch statement
case LEXER_STATE_OBJECT: case LEXER_STATE_OBJECT:
lexer->current = handle_object(lexer, input); lexer->current = handle_object(lexer, input);
break; break;
case LEXER_STATE_ARRAY_START: case LEXER_STATE_ARRAY_START:
stack_push(&(lexer->stack), LEXER_STATE_ARRAY); stack_push(&(lexer->stack), LEXER_STATE_ARRAY);
// break is left intentionally here to utilise the fallthrough behaviour of // break is left out intentionally here to utilise the fallthrough behaviour
// the switch statement // of the switch statement
case LEXER_STATE_ARRAY: case LEXER_STATE_ARRAY:
lexer->current = handle_array(lexer, input); lexer->current = handle_array(lexer, input);
break; break;
@ -182,6 +215,18 @@ void lexer_state_machine(lexer_t *lexer, char input) {
break; break;
case LEXER_STATE_UNICODE_HEX: case LEXER_STATE_UNICODE_HEX:
break; break;
case LEXER_STATE_TRUE:
lexer->current = handle_true(lexer, input);
break;
case LEXER_STATE_FALSE:
lexer->current = handle_false(lexer, input);
break;
case LEXER_STATE_NULL:
lexer->current = handle_null(lexer, input);
break;
case LEXER_STATE_KEYWORD_END:
lexer->current = handle_keyword_end(lexer, input);
break;
case LEXER_STATE_LAST_COLLECTION: case LEXER_STATE_LAST_COLLECTION:
lexer->current = handle_last_collection(input); lexer->current = handle_last_collection(input);
break; break;
@ -282,7 +327,9 @@ lexer_state_t handle_value(lexer_t *lexer, char input) {
case 't': case 't':
case 'f': case 'f':
case 'n': case 'n':
return LEXER_STATE_KEYWORD; append_to_keyword(&(lexer->keyword), input);
return handle_keyword(input);
} }
return LEXER_STATE_ERROR; return LEXER_STATE_ERROR;
@ -426,3 +473,92 @@ lexer_state_t handle_number_end(lexer_t *lexer, char input) {
return collection_end ? handle_collection_end(lexer, input) return collection_end ? handle_collection_end(lexer, input)
: LEXER_STATE_ERROR; : LEXER_STATE_ERROR;
} }
lexer_state_t handle_keyword(char input) {
switch (input) {
case 't':
return LEXER_STATE_TRUE;
case 'f':
return LEXER_STATE_FALSE;
case 'n':
return LEXER_STATE_NULL;
}
return LEXER_STATE_ERROR;
}
lexer_state_t handle_true(lexer_t *lexer, char input) {
char current[MAX_KEYWORD_LENGTH + 1];
strcpy(current, lexer->keyword.str);
append_to_keyword(&(lexer->keyword), input);
bool return_state_true = (strequal(current, "t") && input == 'r') ||
(strequal(current, "tr") && input == 'u');
bool return_state_end = strequal(current, "tru") && input == 'e';
if (return_state_true) {
return LEXER_STATE_TRUE;
} else if (return_state_end) {
return LEXER_STATE_KEYWORD_END;
}
return LEXER_STATE_ERROR;
}
lexer_state_t handle_false(lexer_t *lexer, char input) {
char current[MAX_KEYWORD_LENGTH + 1];
strcpy(current, lexer->keyword.str);
append_to_keyword(&(lexer->keyword), input);
bool return_state_false = (strequal(current, "f") && input == 'a') ||
(strequal(current, "fa") && input == 'l') ||
(strequal(current, "fal") && input == 's');
bool return_state_end = strequal(current, "fals") && input == 'e';
if (return_state_false) {
return LEXER_STATE_FALSE;
} else if (return_state_end) {
return LEXER_STATE_KEYWORD_END;
}
return LEXER_STATE_ERROR;
}
lexer_state_t handle_null(lexer_t *lexer, char input) {
char current[MAX_KEYWORD_LENGTH + 1];
strcpy(current, lexer->keyword.str);
append_to_keyword(&(lexer->keyword), input);
bool return_state_null = (strequal(current, "n") && input == 'u') ||
(strequal(current, "nu") && input == 'l');
bool return_state_end = strequal(current, "nul") && input == 'l';
if (return_state_null) {
return LEXER_STATE_NULL;
} else if (return_state_end) {
return LEXER_STATE_KEYWORD_END;
}
return LEXER_STATE_ERROR;
}
lexer_state_t handle_keyword_end(lexer_t *lexer, char input) {
clear_keyword(&(lexer->keyword));
if (isspace(input)) {
return LEXER_STATE_KEYWORD_END;
} else if (input == ',') {
return lexer->stack.stack[lexer->stack.size - 1];
}
bool collection_end = input == '}' || input == ']';
return collection_end ? handle_collection_end(lexer, input)
: LEXER_STATE_ERROR;
}