Handle objects, arrays and strings
This commit is contained in:
@@ -1,21 +1,29 @@
|
||||
#include "lexer_states.h"
|
||||
#include "aliases.h"
|
||||
#include <ctype.h>
|
||||
#include <stdbool.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#define MAX_KEYWORD_LENGTH 5
|
||||
#define MAX_STACK_CAPACITY 1024
|
||||
|
||||
// clang-format off
|
||||
typedef enum {
|
||||
// GENERAL STATES
|
||||
LEXER_STATE_START,
|
||||
LEXER_STATE_SUCCESS,
|
||||
LEXER_STATE_ERROR,
|
||||
LEXER_STATE_VALUE,
|
||||
LEXER_STATE_KEYWORD,
|
||||
// COLLECTION STATES
|
||||
LEXER_STATE_OBJECT_START,
|
||||
LEXER_STATE_OBJECT,
|
||||
LEXER_STATE_OBJECT_END,
|
||||
LEXER_STATE_ARRAY_START,
|
||||
LEXER_STATE_ARRAY,
|
||||
LEXER_STATE_ARRAY_END,
|
||||
LEXER_STATE_LAST_COLLECTION,
|
||||
// OBJECT STATES
|
||||
LEXER_STATE_KEY_START,
|
||||
LEXER_STATE_KEY_END,
|
||||
LEXER_STATE_KEY,
|
||||
// NUMBER STATES
|
||||
LEXER_STATE_NUMBER_START,
|
||||
LEXER_STATE_DECIMAL,
|
||||
@@ -27,6 +35,7 @@ typedef enum {
|
||||
LEXER_STATE_NUMBER_END,
|
||||
// STRING STATES
|
||||
LEXER_STATE_STRING,
|
||||
LEXER_STATE_STRING_END,
|
||||
LEXER_STATE_ESCAPE_SEQUENCE,
|
||||
LEXER_STATE_UNICODE_HEX,
|
||||
|
||||
@@ -34,41 +43,106 @@ typedef enum {
|
||||
} lexer_state_t;
|
||||
// clang-format on
|
||||
|
||||
typedef struct {
|
||||
lexer_state_t stack[MAX_STACK_CAPACITY];
|
||||
u64 size;
|
||||
} state_stack_t;
|
||||
|
||||
struct lexer {
|
||||
lexer_state_t current;
|
||||
state_stack_t stack;
|
||||
u64 line;
|
||||
u64 column;
|
||||
char current_keyword[MAX_KEYWORD_LENGTH + 1];
|
||||
};
|
||||
|
||||
void stack_push(state_stack_t *stack, lexer_state_t value);
|
||||
lexer_state_t stack_pop(state_stack_t *stack);
|
||||
|
||||
void lexer_state_machine(lexer_t *lexer, char input);
|
||||
lexer_state_t handle_lexer_start(char input);
|
||||
lexer_state_t handle_object(char input);
|
||||
lexer_state_t handle_character(char input);
|
||||
lexer_state_t handle_last_collection(char input);
|
||||
lexer_state_t handle_collection_end(lexer_t *lexer, char input);
|
||||
lexer_state_t handle_object(lexer_t *lexer, char input);
|
||||
lexer_state_t handle_array(lexer_t *lexer, char input);
|
||||
lexer_state_t handle_key(lexer_t *lexer);
|
||||
lexer_state_t handle_value(lexer_t *lexer, char input);
|
||||
lexer_state_t handle_string(char input);
|
||||
lexer_state_t handle_string_end(lexer_t *lexer, char input);
|
||||
lexer_state_t handle_escape_sequence(char input);
|
||||
|
||||
bool validate_json(char *json) {
|
||||
lexer_t lexer = {0};
|
||||
lexer.current = LEXER_STATE_START;
|
||||
|
||||
for (char *c = json; *c != '\0'; ++c) {
|
||||
// printf("\nINPUT=>%s\n", c);
|
||||
// printf("STACK SIZE: %zu\n", lexer.stack.size);
|
||||
lexer_state_machine(&lexer, *c);
|
||||
|
||||
if (lexer.current == LEXER_STATE_ERROR) {
|
||||
return INVALID_JSON;
|
||||
}
|
||||
}
|
||||
|
||||
return lexer.current == LEXER_STATE_LAST_COLLECTION || lexer.stack.size == 0;
|
||||
}
|
||||
|
||||
void stack_push(state_stack_t *stack, lexer_state_t state) {
|
||||
if (stack->size + 1 >= MAX_STACK_CAPACITY) {
|
||||
return;
|
||||
}
|
||||
|
||||
stack->stack[(stack->size)++] = state;
|
||||
}
|
||||
|
||||
lexer_state_t stack_pop(state_stack_t *stack) {
|
||||
if (stack->size == 0) {
|
||||
return LEXER_STATE_ERROR;
|
||||
}
|
||||
|
||||
lexer_state_t state = stack->stack[--(stack->size)];
|
||||
|
||||
return state;
|
||||
}
|
||||
|
||||
void lexer_state_machine(lexer_t *lexer, char input) {
|
||||
switch (lexer->current) {
|
||||
case LEXER_STATE_START:
|
||||
lexer->current = handle_lexer_start(input);
|
||||
break;
|
||||
case LEXER_STATE_SUCCESS:
|
||||
break;
|
||||
case LEXER_STATE_ERROR:
|
||||
break;
|
||||
case LEXER_STATE_VALUE:
|
||||
lexer->current = handle_value(lexer, input);
|
||||
break;
|
||||
case LEXER_STATE_KEYWORD:
|
||||
break;
|
||||
case LEXER_STATE_OBJECT_START:
|
||||
stack_push(&(lexer->stack), LEXER_STATE_OBJECT);
|
||||
// break is left intentionally here to utilise the fallthrough behaviour of
|
||||
// the switch statement
|
||||
case LEXER_STATE_OBJECT:
|
||||
lexer->current = handle_object(input);
|
||||
lexer->current = handle_object(lexer, input);
|
||||
break;
|
||||
case LEXER_STATE_ARRAY_START:
|
||||
stack_push(&(lexer->stack), LEXER_STATE_ARRAY);
|
||||
// break is left intentionally here to utilise the fallthrough behaviour of
|
||||
// the switch statement
|
||||
case LEXER_STATE_ARRAY:
|
||||
lexer->current = LEXER_STATE_VALUE;
|
||||
lexer->current = handle_array(lexer, input);
|
||||
break;
|
||||
case LEXER_STATE_KEY_START:
|
||||
lexer->current = LEXER_STATE_STRING;
|
||||
case LEXER_STATE_OBJECT_END:
|
||||
case LEXER_STATE_ARRAY_END:
|
||||
if (lexer->stack.size > 1) {
|
||||
stack_pop(&(lexer->stack));
|
||||
|
||||
lexer->current = lexer->stack.stack[lexer->stack.size - 1];
|
||||
} else {
|
||||
lexer->current = LEXER_STATE_LAST_COLLECTION;
|
||||
}
|
||||
|
||||
break;
|
||||
case LEXER_STATE_KEY_END:
|
||||
case LEXER_STATE_KEY:
|
||||
lexer->current = handle_key(lexer);
|
||||
break;
|
||||
case LEXER_STATE_NUMBER_START:
|
||||
break;
|
||||
@@ -87,15 +161,23 @@ void lexer_state_machine(lexer_t *lexer, char input) {
|
||||
case LEXER_STATE_NUMBER_END:
|
||||
break;
|
||||
case LEXER_STATE_STRING:
|
||||
lexer->current = handle_character(input);
|
||||
lexer->current = handle_string(input);
|
||||
break;
|
||||
case LEXER_STATE_STRING_END:
|
||||
lexer->current = handle_string_end(lexer, input);
|
||||
break;
|
||||
case LEXER_STATE_ESCAPE_SEQUENCE:
|
||||
lexer->current = handle_escape_sequence(input);
|
||||
break;
|
||||
case LEXER_STATE_UNICODE_HEX:
|
||||
break;
|
||||
default:
|
||||
case LEXER_STATE_LAST_COLLECTION:
|
||||
lexer->current = handle_last_collection(input);
|
||||
break;
|
||||
case LEXER_STATE_ERROR:
|
||||
case COUNT_LEXER_STATES:
|
||||
lexer->current = LEXER_STATE_ERROR;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -106,35 +188,133 @@ lexer_state_t handle_lexer_start(char input) {
|
||||
|
||||
switch (input) {
|
||||
case '{':
|
||||
return LEXER_STATE_OBJECT;
|
||||
return LEXER_STATE_OBJECT_START;
|
||||
case '[':
|
||||
return LEXER_STATE_ARRAY;
|
||||
return LEXER_STATE_ARRAY_START;
|
||||
}
|
||||
|
||||
return LEXER_STATE_ERROR;
|
||||
}
|
||||
|
||||
lexer_state_t handle_object(char input) {
|
||||
lexer_state_t handle_last_collection(char input) {
|
||||
if (isspace(input)) {
|
||||
return LEXER_STATE_LAST_COLLECTION;
|
||||
}
|
||||
|
||||
return LEXER_STATE_ERROR;
|
||||
}
|
||||
|
||||
lexer_state_t handle_collection_end(lexer_t *lexer, char input) {
|
||||
// No need to ignore space as this is only called when input is } or ]
|
||||
|
||||
lexer->current = lexer->stack.stack[lexer->stack.size - 1];
|
||||
|
||||
bool object_end = lexer->current == LEXER_STATE_OBJECT && input == '}';
|
||||
|
||||
if (object_end) {
|
||||
return LEXER_STATE_OBJECT_END;
|
||||
}
|
||||
|
||||
bool array_end = lexer->current == LEXER_STATE_ARRAY && input == ']';
|
||||
|
||||
if (array_end) {
|
||||
return LEXER_STATE_ARRAY_END;
|
||||
}
|
||||
|
||||
return LEXER_STATE_ERROR;
|
||||
}
|
||||
|
||||
lexer_state_t handle_object(lexer_t *lexer, char input) {
|
||||
if (isspace(input)) {
|
||||
return LEXER_STATE_OBJECT;
|
||||
} else if (input == '"') {
|
||||
return LEXER_STATE_KEY_START;
|
||||
stack_push(&(lexer->stack), LEXER_STATE_KEY);
|
||||
|
||||
return LEXER_STATE_KEY;
|
||||
} else if (input == '}') {
|
||||
return handle_collection_end(lexer, input);
|
||||
}
|
||||
|
||||
return LEXER_STATE_ERROR;
|
||||
}
|
||||
|
||||
lexer_state_t handle_character(char input) {
|
||||
lexer_state_t handle_array(lexer_t *lexer, char input) {
|
||||
if (isspace(input)) {
|
||||
return LEXER_STATE_ARRAY;
|
||||
} else if (input == '"') {
|
||||
stack_push(&(lexer->stack), LEXER_STATE_VALUE);
|
||||
|
||||
return LEXER_STATE_STRING;
|
||||
} else if (input == ']') {
|
||||
return handle_collection_end(lexer, input);
|
||||
}
|
||||
|
||||
return LEXER_STATE_VALUE;
|
||||
}
|
||||
|
||||
lexer_state_t handle_key(lexer_t *lexer) { return LEXER_STATE_STRING; }
|
||||
|
||||
lexer_state_t handle_value(lexer_t *lexer, char input) {
|
||||
if (isspace(input)) {
|
||||
return LEXER_STATE_VALUE;
|
||||
}
|
||||
|
||||
switch (input) {
|
||||
case '"':
|
||||
stack_push(&(lexer->stack), LEXER_STATE_VALUE);
|
||||
|
||||
return LEXER_STATE_STRING;
|
||||
case '0':
|
||||
return LEXER_STATE_DECIMAL;
|
||||
case '{':
|
||||
return LEXER_STATE_OBJECT_START;
|
||||
case '[':
|
||||
return LEXER_STATE_ARRAY_START;
|
||||
case 't':
|
||||
case 'f':
|
||||
case 'n':
|
||||
return LEXER_STATE_KEYWORD;
|
||||
}
|
||||
|
||||
return LEXER_STATE_ERROR;
|
||||
}
|
||||
|
||||
lexer_state_t handle_string(char input) {
|
||||
switch (input) {
|
||||
case '\\':
|
||||
return LEXER_STATE_ESCAPE_SEQUENCE;
|
||||
case '"':
|
||||
return LEXER_STATE_STRING; // WHAT TO RETURN HERE?
|
||||
return LEXER_STATE_STRING_END;
|
||||
}
|
||||
|
||||
return LEXER_STATE_STRING;
|
||||
}
|
||||
|
||||
lexer_state_t handle_string_end(lexer_t *lexer, char input) {
|
||||
if (isspace(input)) {
|
||||
return LEXER_STATE_STRING_END;
|
||||
}
|
||||
|
||||
lexer->current = stack_pop(&(lexer->stack));
|
||||
|
||||
bool key_end = lexer->current == LEXER_STATE_KEY && input == ':';
|
||||
|
||||
if (key_end) {
|
||||
return LEXER_STATE_VALUE;
|
||||
}
|
||||
|
||||
bool value_end = lexer->current == LEXER_STATE_VALUE && input == ',';
|
||||
|
||||
if (value_end) {
|
||||
return lexer->stack.stack[lexer->stack.size - 1];
|
||||
}
|
||||
|
||||
bool collection_end = input == '}' || input == ']';
|
||||
|
||||
return collection_end ? handle_collection_end(lexer, input)
|
||||
: LEXER_STATE_ERROR;
|
||||
}
|
||||
|
||||
lexer_state_t handle_escape_sequence(char input) {
|
||||
switch (input) {
|
||||
case '"':
|
||||
|
31
src/main.c
31
src/main.c
@@ -1,3 +1,32 @@
|
||||
#include "aliases.h"
|
||||
#include "lexer_states.h"
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
int main(int argc, char *argv[]) { return EXIT_SUCCESS; }
|
||||
int main(int argc, char *argv[]) {
|
||||
if (argc < 2) {
|
||||
printf("NO FILE PROVIDED\n");
|
||||
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
|
||||
FILE *fp = fopen(argv[1], "r");
|
||||
|
||||
fseek(fp, 0, SEEK_END);
|
||||
|
||||
u64 length = ftell(fp);
|
||||
|
||||
fseek(fp, 0, SEEK_SET);
|
||||
|
||||
char json[length + 1];
|
||||
memset(json, 0, length + 1);
|
||||
|
||||
fread(json, sizeof(char), length, fp);
|
||||
|
||||
fclose(fp);
|
||||
|
||||
printf("\n%s\n", validate_json(json) ? "VALID" : "INVALID");
|
||||
|
||||
return EXIT_SUCCESS;
|
||||
}
|
||||
|
Reference in New Issue
Block a user