diff --git a/haversine_02/include/json/dstring.h b/haversine_02/include/json/dstring.h new file mode 100644 index 0000000..b966986 --- /dev/null +++ b/haversine_02/include/json/dstring.h @@ -0,0 +1,30 @@ +#ifndef DSTRING_H +#define DSTRING_H + +#include "aliases.h" + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct dstring dstr_t; + +dstr_t *dstr_with_capacity(u64 capacity); +dstr_t *dstr_from_string(const char *str); +void dstr_update(dstr_t **dst, const char *src); +void dstr_free(dstr_t **str); +void dstr_concat(dstr_t **dst, const char *src); +void dstr_append(dstr_t **dst, char c); +void dstr_resize(dstr_t **str); +void dstr_clear(dstr_t *str); +void dstr_print(const dstr_t *str); +i64 dstr_find(const dstr_t *str, const char *substr); +u64 dstr_length(const dstr_t *str); +u64 dstr_capacity(const dstr_t *str); +const char *dstr_to_cstr(const dstr_t *str); + +#ifdef __cplusplus +} +#endif + +#endif // !DSTRING_H diff --git a/haversine_02/include/json/json_entities.h b/haversine_02/include/json/json_entities.h new file mode 100644 index 0000000..d34a8a7 --- /dev/null +++ b/haversine_02/include/json/json_entities.h @@ -0,0 +1,82 @@ +#ifndef JSON_ENTITIES_H +#define JSON_ENTITIES_H + +#include "aliases.h" +#include "dstring.h" +#include + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct json_entity jentity_t; +typedef struct json_collection jcoll_t; +typedef struct json_value jval_t; +typedef struct json_pair jpair_t; + +typedef enum { + JVAL_EMPTY, + JVAL_COLLECTION, + JVAL_STRING, + JVAL_INTEGER, + JVAL_DOUBLE, + JVAL_BOOLEAN, + JVAL_NULL, +} jval_type; + +struct json_value { + jval_type type; + union { + void *null_val; + jcoll_t *collection; + dstr_t *string; + i64 num_int; + f64 num_dbl; + bool boolean; + }; +}; + +struct json_pair { + dstr_t *key; + jval_t value; +}; + +typedef enum { + JENTITY_SINGLE, + JENTITY_PAIR, +} jentity_type; + +struct json_entity { + jentity_type type; + union { + jval_t value; + jpair_t pair; + }; + jentity_t *parent; + jentity_t *next; +}; + +typedef enum { + JCOLL_OBJECT, + JCOLL_ARRAY, +} jcoll_type; + +struct json_collection { + u64 size; + jcoll_type type; + jentity_t *begin; + jentity_t *end; +}; + +void print_json(const jentity_t *entity, u32 indent); +void free_json(jentity_t **entity); +jcoll_t *get_collection_from_entity(const jentity_t *entity); +jentity_t *create_new_single_entity(const jval_t value, jentity_t *parent); +jentity_t *create_new_pair_entity(dstr_t *key, const jval_t value, + jentity_t *parent); + +#ifdef __cplusplus +} +#endif + +#endif // !JSON_ENTITIES_H diff --git a/haversine_02/include/json/lexer.h b/haversine_02/include/json/lexer.h new file mode 100644 index 0000000..7b3f384 --- /dev/null +++ b/haversine_02/include/json/lexer.h @@ -0,0 +1,72 @@ +#ifndef LEXER_STATES_H +#define LEXER_STATES_H + +#include "aliases.h" +#include + +#define VALID_JSON true +#define INVALID_JSON false + +#ifdef __cplusplus +extern "C" { +#endif + +typedef const char *str_view_t; + +typedef enum { + TK_NO_TOKEN, + TK_L_BRACE, + TK_R_BRACE, + TK_L_BRACKET, + TK_R_BRACKET, + TK_NULL, + TK_BOOL, + TK_STR_KEY, + TK_STR_VAL, + TK_INTEGER, + TK_DOUBLE, +} token_type; + +typedef union { + void *no_val; + i64 num_int; + f64 num_frac; + str_view_t string; + bool boolean; +} token_value_t; + +typedef struct { + u64 line; + u64 column; + token_type type; + token_value_t value; +} token_t; + +typedef enum { + LEX_ERR_NONE, + LEX_ERR_INVALID, +} lex_err_type; + +typedef struct { + lex_err_type errno; + str_view_t msg; +} lex_err_t; + +typedef struct { + lex_err_t error; + token_t token; +} lex_result_t; + +typedef struct lexer_s lexer_t; + +void lexer_init(lexer_t **lexer); +void lexer_free(lexer_t **lexer); +lex_result_t get_next_token(lexer_t *lexer, const char *text); + +void print_token(token_t token); + +#ifdef __cplusplus +} +#endif + +#endif // !LEXER_STATES_H diff --git a/haversine_02/include/json/parser.h b/haversine_02/include/json/parser.h new file mode 100644 index 0000000..514ca81 --- /dev/null +++ b/haversine_02/include/json/parser.h @@ -0,0 +1,19 @@ +#ifndef PARSER_H +#define PARSER_H + +#include "json_entities.h" +#include "lexer.h" + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct parser_s parser_t; + +jentity_t *load_json(const char *filepath); + +#ifdef __cplusplus +} +#endif + +#endif // !PARSER_H diff --git a/haversine_02/src/json/dstring.c b/haversine_02/src/json/dstring.c new file mode 100644 index 0000000..56d402b --- /dev/null +++ b/haversine_02/src/json/dstring.c @@ -0,0 +1,211 @@ +#include "json/dstring.h" +#include "aliases.h" +#include +#include +#include + +// Use this scalar to allocate extra memory in order to avoid having to +// constantly reallocate +#define CAPACITY_SCALAR 8 + +struct dstring { + u64 capacity; + u64 size; + char buf[]; +}; + +dstr_t *dstr_with_capacity(u64 capacity) { + dstr_t *out = (dstr_t *)malloc(sizeof(dstr_t) + capacity + 1); + + if (!out) { + return NULL; + } + + out->capacity = capacity; + out->size = 0; + memset(out->buf, 0, capacity + 1); + + return out; +} + +dstr_t *dstr_from_string(const char *str) { + u64 length = strlen(str); + + u64 capacity = length * CAPACITY_SCALAR; + + dstr_t *out = dstr_with_capacity(capacity); + + if (!out) { + return NULL; + } + + out->size = length; + strncpy(out->buf, str, length); + + return out; +} + +void dstr_update(dstr_t **dst, const char *src) { + if (!(*dst)) { + return; + } + + u64 length = strlen(src); + + dstr_t *str = *dst; + + if (length <= str->capacity) { + memset(str->buf, 0, str->capacity); + + str->size = length; + + strncpy(str->buf, src, length); + } else { + u64 capacity = length * CAPACITY_SCALAR; + + dstr_t *tmp = (dstr_t *)realloc(*dst, sizeof(dstr_t) + capacity + 1); + + if (!tmp) { + return; + } + + tmp->capacity = capacity; + tmp->size = length; + strncpy(tmp->buf, src, length); + + *dst = tmp; + } +} + +void dstr_free(dstr_t **str) { + if (!(*str)) { + return; + } + + free(*str); + *str = NULL; +} + +void dstr_concat(dstr_t **dst, const char *src) { + if (!(*dst)) { + return; + } + + u64 src_length = strlen(src); + + if (src_length == 0) { + return; + } + + u64 new_length = (*dst)->size + src_length; + + char str[new_length + 1]; + memset(str, 0, new_length + 1); + + strncpy(str, (*dst)->buf, (*dst)->size); + strncat(str, src, src_length); + + dstr_update(dst, str); +} + +void dstr_append(dstr_t **dst, char c) { + if (!(*dst)) { + return; + } + + u64 new_length = (*dst)->size + 1; + + char str[new_length + 1]; + memset(str, 0, new_length + 1); + + strncpy(str, (*dst)->buf, (*dst)->size); + str[(*dst)->size] = c; + + dstr_update(dst, str); +} + +void dstr_resize(dstr_t **str) { + u64 capacity = (*str)->size; + + dstr_t *tmp = (dstr_t *)realloc(*str, sizeof(dstr_t) + capacity + 1); + + if (!tmp) { + return; + } + + tmp->capacity = capacity; + + *str = tmp; +} + +void dstr_clear(dstr_t *str) { + if (!str || str->size == 0) { + return; + } + + memset(str->buf, 0, str->capacity); + str->size = 0; +} + +void dstr_print(const dstr_t *str) { + if (!str) { + return; + } + + printf("%s\n", str->buf); +} + +i64 dstr_find(const dstr_t *str, const char *substr) { + if (!str || !substr) { + return -1; + } + + u64 substr_length = strlen(substr); + + if (substr_length == 0 || substr_length > str->size) { + return -1; + } + + char buf[substr_length + 1]; + memset(buf, 0, substr_length + 1); + + for (u64 i = 0; i < str->size; ++i) { + if (i + substr_length >= str->size) { + break; + } + + for (u64 j = 0; j < substr_length; ++j) { + buf[j] = str->buf[i + j]; + } + + if (strcmp(buf, substr) == 0) { + return i; + } + } + + return -1; +} + +u64 dstr_length(const dstr_t *str) { + if (!str) { + return 0; + } + + return str->size; +} + +u64 dstr_capacity(const dstr_t *str) { + if (!str) { + return 0; + } + + return str->capacity; +} + +const char *dstr_to_cstr(const dstr_t *str) { + if (!str) { + return ""; + } + + return str->buf; +} diff --git a/haversine_02/src/json/json_entities.c b/haversine_02/src/json/json_entities.c new file mode 100644 index 0000000..4b7f64e --- /dev/null +++ b/haversine_02/src/json/json_entities.c @@ -0,0 +1,194 @@ +#include "json/json_entities.h" +#include "aliases.h" +#include "json/dstring.h" +#include +#include + +void print_json(const jentity_t *entity, u32 indent) { + PERSISTENT i32 indentation = 0; + + dstr_t *key = NULL; + const jval_t *value = NULL; + + if (entity->type == JENTITY_SINGLE) { + value = &(entity->value); + } else { + key = entity->pair.key; + value = &(entity->pair.value); + } + + if (key) { + printf("%*s\"%s\": ", indentation * indent, "", dstr_to_cstr(key)); + } + + switch (value->type) { + case JVAL_COLLECTION: { + const char *open = ""; + const char *close = ""; + + if (value->collection->type == JCOLL_OBJECT) { + open = "{"; + close = "}"; + } else { + open = "["; + close = "]"; + } + + if (key) { + printf("%s\n", open); + } else { + printf("%*s%s\n", indentation * indent, "", open); + } + + ++indentation; + + if (value->collection->begin) { + print_json(value->collection->begin, indent); + } + + --indentation; + + printf("\n%*s%s", indentation * indent, "", close); + + break; + } + case JVAL_STRING: + if (key) { + printf("\"%s\"", dstr_to_cstr(value->string)); + } else { + printf("%*s\"%s\"", indentation * indent, "", + dstr_to_cstr(value->string)); + } + + break; + case JVAL_INTEGER: + if (key) { + printf("%llu", (unsigned long long)value->num_int); + } else { + printf("%*s%llu", indentation * indent, "", + (unsigned long long)value->num_int); + } + + break; + case JVAL_DOUBLE: + if (key) { + printf("%f", value->num_dbl); + } else { + printf("%*s%f", indentation * indent, "", value->num_dbl); + } + + break; + case JVAL_BOOLEAN: + if (key) { + printf("%s", value->boolean ? "true" : "false"); + } else { + printf("%*s%s", indentation * indent, "", + value->boolean ? "true" : "false"); + } + + break; + case JVAL_NULL: + if (key) { + printf("%s", "null"); + } else { + printf("%*s%s", indentation * indent, "", "null"); + } + + break; + case JVAL_EMPTY: + break; + } + + if (entity->next) { + printf(",\n"); + print_json(entity->next, indent); + } + + // Add newline after printing the entire json tree + if (indentation == 0 && entity->parent == NULL && entity->next == NULL) { + printf("\n"); + } +} + +void free_json(jentity_t **entity) { + if (!(*entity)) { + return; + } + + jentity_t *entt_ptr = *entity; + + dstr_t *key = NULL; + const jval_t *value = NULL; + + if (entt_ptr->type == JENTITY_SINGLE) { + value = &(entt_ptr->value); + } else { + key = entt_ptr->pair.key; + value = &(entt_ptr->pair.value); + } + + if (key) { + dstr_free(&(entt_ptr->pair.key)); + } + + switch (value->type) { + case JVAL_COLLECTION: + if (value->collection->begin) { + free_json(&(value->collection->begin)); + } + + free(value->collection); + + break; + case JVAL_STRING: + dstr_free(&(entt_ptr->pair.value.string)); + + break; + default: + break; + } + + if (entt_ptr->next) { + free_json(&(entt_ptr->next)); + } + + free(*entity); + *entity = NULL; +} + +jcoll_t *get_collection_from_entity(const jentity_t *entity) { + return entity->type == JENTITY_SINGLE ? entity->value.collection + : entity->pair.value.collection; +} + +jentity_t *create_new_single_entity(const jval_t value, jentity_t *parent) { + jentity_t *entity = (jentity_t *)malloc(sizeof(jentity_t)); + + if (!entity) { + return NULL; + } + + entity->type = JENTITY_SINGLE; + entity->value = value; + entity->parent = parent; + entity->next = NULL; + + return entity; +} + +jentity_t *create_new_pair_entity(dstr_t *key, const jval_t value, + jentity_t *parent) { + jentity_t *entity = (jentity_t *)malloc(sizeof(jentity_t)); + + if (!entity) { + return NULL; + } + + entity->type = JENTITY_PAIR; + entity->pair.key = key; + entity->pair.value = value; + entity->parent = parent; + entity->next = NULL; + + return entity; +} diff --git a/haversine_02/src/json/lexer.c b/haversine_02/src/json/lexer.c new file mode 100644 index 0000000..c2ffeb1 --- /dev/null +++ b/haversine_02/src/json/lexer.c @@ -0,0 +1,1067 @@ +#include "json/lexer.h" +#include "aliases.h" +#include "json/dstring.h" +#include +#include +#include +#include +#include +#include + +#define MAX_KEYWORD_LENGTH 5 +#define UNICODE_LENGTH 4 +#define MAX_STACK_CAPACITY 1024 +#define STRING_BUF_START_CAPACITY 1024 + +typedef enum { + // GENERAL STATES + LEXER_STATE_START, + LEXER_STATE_ERROR, + LEXER_STATE_VALUE, + // COLLECTION STATES + LEXER_STATE_OBJECT_START, + LEXER_STATE_OBJECT, + LEXER_STATE_OBJECT_END, + LEXER_STATE_ARRAY_START, + LEXER_STATE_ARRAY, + LEXER_STATE_ARRAY_END, + LEXER_STATE_LAST_COLLECTION, + // OBJECT STATES + LEXER_STATE_KEY, + // NUMBER STATES + LEXER_STATE_DECIMAL, + LEXER_STATE_NUMBER, + LEXER_STATE_FRACTION, + LEXER_STATE_EXPONENT, + LEXER_STATE_EXP_SIGN, + LEXER_STATE_POWER, + LEXER_STATE_NUMBER_END, + // STRING STATES + LEXER_STATE_STRING, + LEXER_STATE_STRING_END, + LEXER_STATE_ESCAPE_SEQUENCE, + LEXER_STATE_UNICODE_HEX, + // KEYWORD STATES + LEXER_STATE_TRUE, + LEXER_STATE_FALSE, + LEXER_STATE_NULL, + LEXER_STATE_KEYWORD_END, + + COUNT_LEXER_STATES, +} lexer_state_t; + +typedef struct { + lexer_state_t stack[MAX_STACK_CAPACITY]; + u64 size; +} state_stack_t; + +typedef enum { + LEXER_STRING_KEYWORD, + LEXER_STRING_UNICODE, +} lex_str_type; + +typedef struct { + char str[MAX_KEYWORD_LENGTH + 1]; +} keyword_t; + +typedef struct { + char codepoint[UNICODE_LENGTH]; +} unicode_t; + +typedef struct { + lex_str_type type; + u64 size; + union { + keyword_t keyword; + unicode_t unicode; + }; +} lexer_string_t; + +struct lexer_s { + u64 cursor; + u64 line; + u64 column; + u64 text_length; + const char *text; + lexer_state_t current; + state_stack_t stack; + lexer_string_t keyword; + lexer_string_t codepoint; + dstr_t *current_string; + bool token_ready; + token_t token; + bool has_extra_token; + token_t extra_token; + dstr_t *error_message; +}; + +INTERNAL void stack_push(state_stack_t *stack, lexer_state_t value); +INTERNAL lexer_state_t stack_pop(state_stack_t *stack); + +INTERNAL void append_to_lex_str(lexer_string_t *str, char input); +INTERNAL void clear_lex_str(lexer_string_t *str); +INTERNAL bool strequal(const char *const first, const char *const second); +INTERNAL bool is_valid_hex_char(const char input); +INTERNAL bool ishex(const char input); + +INTERNAL token_t dstr_to_numerical_token(const dstr_t *str); +INTERNAL void set_token(token_t *token, u64 line, u64 column, token_type type, + token_value_t value); + +INTERNAL void lexer_state_machine(lexer_t *lexer, char input); +INTERNAL lexer_state_t handle_lexer_start(lexer_t *lexer, char input); +INTERNAL lexer_state_t handle_last_collection(char input); +INTERNAL lexer_state_t handle_collection_end(lexer_t *lexer, char input); +INTERNAL void handle_input_after_collection_end(lexer_t *lexer, char input); +INTERNAL lexer_state_t handle_object(lexer_t *lexer, char input); +INTERNAL lexer_state_t handle_array(lexer_t *lexer, char input); +INTERNAL lexer_state_t handle_key(lexer_t *lexer, char input); +INTERNAL lexer_state_t handle_value(lexer_t *lexer, char input); +INTERNAL lexer_state_t handle_string(lexer_t *lexer, char input); +INTERNAL lexer_state_t handle_string_end(lexer_t *lexer, char input); +INTERNAL lexer_state_t handle_escape_sequence(lexer_t *lexer, char input); +INTERNAL lexer_state_t handle_unicode_sequence(lexer_t *lexer, char input); +INTERNAL lexer_state_t handle_decimal(lexer_t *lexer, char input); +INTERNAL lexer_state_t handle_number(lexer_t *lexer, char input); +INTERNAL lexer_state_t handle_fraction(lexer_t *lexer, char input); +INTERNAL lexer_state_t handle_exponent(lexer_t *lexer, char input); +INTERNAL lexer_state_t handle_exp_sign(lexer_t *lexer, char input); +INTERNAL lexer_state_t handle_power(lexer_t *lexer, char input); +INTERNAL lexer_state_t handle_number_end(lexer_t *lexer, char input); +INTERNAL lexer_state_t handle_keyword(char input); +INTERNAL lexer_state_t handle_true(lexer_t *lexer, char input); +INTERNAL lexer_state_t handle_false(lexer_t *lexer, char input); +INTERNAL lexer_state_t handle_null(lexer_t *lexer, char input); +INTERNAL lexer_state_t handle_keyword_end(lexer_t *lexer, char input); + +void lexer_init(lexer_t **lexer) { + if (*lexer) { + lexer_free(lexer); + } + + *lexer = (lexer_t *)malloc(sizeof(lexer_t)); + + if (!(*lexer)) { + return; + } + + (*lexer)->cursor = 0; + (*lexer)->line = 1; + (*lexer)->column = 0; + (*lexer)->text_length = 0; + (*lexer)->text = ""; + (*lexer)->current = LEXER_STATE_START; + (*lexer)->keyword.type = LEXER_STRING_KEYWORD; + (*lexer)->codepoint.type = LEXER_STRING_UNICODE; + (*lexer)->current_string = dstr_with_capacity(STRING_BUF_START_CAPACITY); + (*lexer)->error_message = dstr_with_capacity(STRING_BUF_START_CAPACITY); + (*lexer)->token_ready = false; + (*lexer)->token = (token_t){}; + (*lexer)->has_extra_token = false; + (*lexer)->extra_token = (token_t){}; + + if (!((*lexer)->current_string)) { + lexer_free(lexer); + } +} + +void lexer_free(lexer_t **lexer) { + if (!(*lexer)) { + return; + } + + dstr_free(&((*lexer)->current_string)); + dstr_free(&((*lexer)->error_message)); + + free(*lexer); + *lexer = NULL; +} + +lex_result_t get_next_token(lexer_t *lexer, const char *text) { + if (text != NULL) { + lexer->cursor = 0; + lexer->text = text; + lexer->text_length = strlen(text); + } + + dstr_clear(lexer->current_string); + + char c; + + while (lexer->cursor < lexer->text_length) { + if (lexer->has_extra_token) { + lexer->has_extra_token = false; + + return (lex_result_t){ + (lex_err_t){.errno = LEX_ERR_NONE, .msg = ""}, + lexer->extra_token, + }; + } + + c = lexer->text[(lexer->cursor)++]; + + lexer_state_machine(lexer, c); + + if (c == '\n') { + ++(lexer->line); + lexer->column = 0; + } else { + ++(lexer->column); + } + + if (lexer->current == LEXER_STATE_ERROR) { + char msg[STRING_BUF_START_CAPACITY + 1]; + memset(msg, 0, STRING_BUF_START_CAPACITY + 1); + + u64 slice_length = 20; + char slice[slice_length]; + snprintf(slice, slice_length, "%s", &(lexer->text[lexer->cursor - 1])); + + snprintf( + msg, STRING_BUF_START_CAPACITY, + "\n(%llu:%llu) Encountered an error while parsing the following:\n%s", + (unsigned long long)lexer->line, (unsigned long long)lexer->column, + slice); + + dstr_update(&(lexer->error_message), msg); + + return (lex_result_t){ + (lex_err_t){.errno = LEX_ERR_INVALID, + .msg = dstr_to_cstr(lexer->error_message)}, + (token_t){}, + }; + } else if (lexer->token_ready) { + lexer->token_ready = false; + + return (lex_result_t){ + (lex_err_t){.errno = LEX_ERR_NONE, .msg = ""}, + lexer->token, + }; + } + } + + return (lex_result_t){ + (lex_err_t){.errno = LEX_ERR_NONE, .msg = ""}, + (token_t){}, + }; +} + +void print_token(token_t token) { + i32 num_padding = 4; + + printf("{LINE: %*llu, COLUMN: %*llu, TYPE: ", num_padding, + (unsigned long long)token.line, num_padding, + (unsigned long long)token.column); + + i32 token_type_padding = 15; + + switch (token.type) { + case TK_NO_TOKEN: + break; + case TK_L_BRACE: + printf("%*s, VALUE: N/A", token_type_padding, "TK_L_BRACE"); + break; + case TK_R_BRACE: + printf("%*s, VALUE: N/A", token_type_padding, "TK_R_BRACE"); + break; + case TK_L_BRACKET: + printf("%*s, VALUE: N/A", token_type_padding, "TK_L_BRACKET"); + break; + case TK_R_BRACKET: + printf("%*s, VALUE: N/A", token_type_padding, "TK_R_BRACKET"); + break; + case TK_NULL: + printf("%*s, VALUE: N/A", token_type_padding, "TK_NULL"); + break; + case TK_BOOL: + printf("%*s, VALUE: %s", token_type_padding, "TK_BOOL", + token.value.boolean ? "true" : "false"); + break; + case TK_STR_KEY: + printf("%*s, VALUE: %s", token_type_padding, "TK_STR_KEY", + token.value.string); + break; + case TK_STR_VAL: + printf("%*s, VALUE: %s", token_type_padding, "TK_STR_VAL", + token.value.string); + break; + case TK_INTEGER: + printf("%*s, VALUE: %lld", token_type_padding, "TK_INTEGER", + (long long)token.value.num_int); + break; + case TK_DOUBLE: + printf("%*s, VALUE: %f", token_type_padding, "TK_DOUBLE", + token.value.num_frac); + break; + } + + printf("}\n"); +} + +void stack_push(state_stack_t *stack, lexer_state_t state) { + if (stack->size + 1 >= MAX_STACK_CAPACITY) { + return; + } + + stack->stack[(stack->size)++] = state; +} + +lexer_state_t stack_pop(state_stack_t *stack) { + if (stack->size == 0) { + return LEXER_STATE_ERROR; + } + + lexer_state_t state = stack->stack[--(stack->size)]; + + return state; +} + +void append_to_lex_str(lexer_string_t *lex_str, char input) { + u64 capacity = 0; + char *str = NULL; + + switch (lex_str->type) { + case LEXER_STRING_KEYWORD: + capacity = MAX_KEYWORD_LENGTH; + str = lex_str->keyword.str; + + break; + case LEXER_STRING_UNICODE: + capacity = UNICODE_LENGTH; + str = lex_str->unicode.codepoint; + + break; + } + + if (lex_str->size + 1 > capacity) { + return; + } + + assert(str != NULL); + + str[(lex_str->size)++] = input; +} + +void clear_lex_str(lexer_string_t *lex_str) { + u64 capacity = 1; + char *str = NULL; + + switch (lex_str->type) { + case LEXER_STRING_KEYWORD: + capacity += MAX_KEYWORD_LENGTH; + str = lex_str->keyword.str; + + break; + case LEXER_STRING_UNICODE: + capacity += UNICODE_LENGTH; + str = lex_str->unicode.codepoint; + + break; + } + + assert(str != NULL); + + memset(str, 0, capacity); + lex_str->size = 0; +} + +bool strequal(const char *const first, const char *const second) { + return strcmp(first, second) == 0; +} + +bool is_valid_hex_char(const char input) { + switch (input) { + case 'A': + case 'B': + case 'C': + case 'D': + case 'E': + case 'F': + case 'a': + case 'b': + case 'c': + case 'd': + case 'e': + case 'f': + return true; + } + + return false; +} + +bool ishex(const char input) { + return isdigit(input) || is_valid_hex_char(input); +} + +token_t dstr_to_numerical_token(const dstr_t *str) { + token_t token = {}; + + bool is_double = dstr_find(str, ".") != -1; + + token.type = is_double ? TK_DOUBLE : TK_INTEGER; + + if (is_double) { + token.value.num_frac = strtod(dstr_to_cstr(str), NULL); + } else { + token.value.num_int = atol(dstr_to_cstr(str)); + } + + return token; +} + +void set_token(token_t *token, u64 line, u64 column, token_type type, + token_value_t value) { + *token = (token_t){ + .line = line, + .column = column, + .type = type, + .value = value, + }; +} + +void lexer_state_machine(lexer_t *lexer, char input) { + switch (lexer->current) { + case LEXER_STATE_START: + lexer->current = handle_lexer_start(lexer, input); + break; + case LEXER_STATE_VALUE: + lexer->current = handle_value(lexer, input); + break; + case LEXER_STATE_OBJECT_START: + stack_push(&(lexer->stack), LEXER_STATE_OBJECT); + // break is left out intentionally here to utilise the fallthrough behaviour + // of the switch statement + case LEXER_STATE_OBJECT: + lexer->current = handle_object(lexer, input); + break; + case LEXER_STATE_ARRAY_START: + stack_push(&(lexer->stack), LEXER_STATE_ARRAY); + // break is left out intentionally here to utilise the fallthrough behaviour + // of the switch statement + case LEXER_STATE_ARRAY: + lexer->current = handle_array(lexer, input); + break; + case LEXER_STATE_OBJECT_END: + case LEXER_STATE_ARRAY_END: + if (lexer->stack.size > 1) { + stack_pop(&(lexer->stack)); + + lexer->current = lexer->stack.stack[lexer->stack.size - 1]; + } else { + lexer->current = LEXER_STATE_LAST_COLLECTION; + } + + handle_input_after_collection_end(lexer, input); + + break; + case LEXER_STATE_KEY: + lexer->current = handle_key(lexer, input); + break; + case LEXER_STATE_DECIMAL: + lexer->current = handle_decimal(lexer, input); + break; + case LEXER_STATE_NUMBER: + lexer->current = handle_number(lexer, input); + break; + case LEXER_STATE_FRACTION: + lexer->current = handle_fraction(lexer, input); + break; + case LEXER_STATE_EXPONENT: + lexer->current = handle_exponent(lexer, input); + break; + case LEXER_STATE_EXP_SIGN: + lexer->current = handle_exp_sign(lexer, input); + break; + case LEXER_STATE_POWER: + lexer->current = handle_power(lexer, input); + break; + case LEXER_STATE_NUMBER_END: + lexer->current = handle_number_end(lexer, input); + break; + case LEXER_STATE_STRING: + lexer->current = handle_string(lexer, input); + break; + case LEXER_STATE_STRING_END: + lexer->current = handle_string_end(lexer, input); + break; + case LEXER_STATE_ESCAPE_SEQUENCE: + lexer->current = handle_escape_sequence(lexer, input); + break; + case LEXER_STATE_UNICODE_HEX: + lexer->current = handle_unicode_sequence(lexer, input); + break; + case LEXER_STATE_TRUE: + lexer->current = handle_true(lexer, input); + break; + case LEXER_STATE_FALSE: + lexer->current = handle_false(lexer, input); + break; + case LEXER_STATE_NULL: + lexer->current = handle_null(lexer, input); + break; + case LEXER_STATE_KEYWORD_END: + lexer->current = handle_keyword_end(lexer, input); + break; + case LEXER_STATE_LAST_COLLECTION: + lexer->current = handle_last_collection(input); + break; + case LEXER_STATE_ERROR: + case COUNT_LEXER_STATES: + lexer->current = LEXER_STATE_ERROR; + break; + } +} + +lexer_state_t handle_lexer_start(lexer_t *lexer, char input) { + if (isspace(input)) { + return LEXER_STATE_START; + } + + switch (input) { + case '{': + lexer->token_ready = true; + set_token(&(lexer->token), lexer->line, lexer->column, TK_L_BRACE, + (token_value_t){0}); + + return LEXER_STATE_OBJECT_START; + case '[': + lexer->token_ready = true; + set_token(&(lexer->token), lexer->line, lexer->column, TK_L_BRACKET, + (token_value_t){0}); + + return LEXER_STATE_ARRAY_START; + } + + return LEXER_STATE_ERROR; +} + +lexer_state_t handle_last_collection(char input) { + if (isspace(input)) { + return LEXER_STATE_LAST_COLLECTION; + } + + return LEXER_STATE_ERROR; +} + +lexer_state_t handle_collection_end(lexer_t *lexer, char input) { + // No need to ignore space as this is only called when input is } or ] + + lexer->current = lexer->stack.stack[lexer->stack.size - 1]; + + bool object_end = lexer->current == LEXER_STATE_OBJECT && input == '}'; + + if (object_end) { + token_t *token; + + if (lexer->token_ready) { + lexer->has_extra_token = true; + token = &(lexer->extra_token); + } else { + lexer->token_ready = true; + token = &(lexer->token); + } + + set_token(token, lexer->line, lexer->column, TK_R_BRACE, + (token_value_t){0}); + + return LEXER_STATE_OBJECT_END; + } + + bool array_end = lexer->current == LEXER_STATE_ARRAY && input == ']'; + + if (array_end) { + token_t *token; + + if (lexer->token_ready) { + lexer->has_extra_token = true; + token = &(lexer->extra_token); + } else { + lexer->token_ready = true; + token = &(lexer->token); + } + + set_token(token, lexer->line, lexer->column, TK_R_BRACKET, + (token_value_t){0}); + + return LEXER_STATE_ARRAY_END; + } + + return LEXER_STATE_ERROR; +} + +void handle_input_after_collection_end(lexer_t *lexer, char input) { + switch (input) { + case '}': + lexer->token_ready = true; + set_token(&(lexer->token), lexer->line, lexer->column, TK_R_BRACE, + (token_value_t){0}); + + break; + case ']': + lexer->token_ready = true; + set_token(&(lexer->token), lexer->line, lexer->column, TK_R_BRACKET, + (token_value_t){0}); + + break; + } +} + +lexer_state_t handle_object(lexer_t *lexer, char input) { + if (isspace(input)) { + return LEXER_STATE_OBJECT; + } else if (input == '"') { + stack_push(&(lexer->stack), LEXER_STATE_KEY); + + return LEXER_STATE_KEY; + } else if (input == '}') { + return handle_collection_end(lexer, input); + } + + return LEXER_STATE_ERROR; +} + +lexer_state_t handle_array(lexer_t *lexer, char input) { + if (isspace(input)) { + return LEXER_STATE_ARRAY; + } else if (input == ']') { + return handle_collection_end(lexer, input); + } + + return handle_value(lexer, input); +} + +lexer_state_t handle_key(lexer_t *lexer, char input) { + dstr_append(&(lexer->current_string), input); + + return LEXER_STATE_STRING; +} + +lexer_state_t handle_value(lexer_t *lexer, char input) { + if (isspace(input)) { + return LEXER_STATE_VALUE; + } else if ((isdigit(input) && input != '0') || input == '-') { + dstr_append(&(lexer->current_string), input); + + return LEXER_STATE_NUMBER; + } + + switch (input) { + case '"': + stack_push(&(lexer->stack), LEXER_STATE_VALUE); + + return LEXER_STATE_STRING; + case '0': + dstr_append(&(lexer->current_string), input); + + return LEXER_STATE_DECIMAL; + case '{': + lexer->token_ready = true; + set_token(&(lexer->token), lexer->line, lexer->column, TK_L_BRACE, + (token_value_t){0}); + + return LEXER_STATE_OBJECT_START; + case '[': + lexer->token_ready = true; + set_token(&(lexer->token), lexer->line, lexer->column, TK_L_BRACKET, + (token_value_t){0}); + + return LEXER_STATE_ARRAY_START; + case 't': + case 'f': + case 'n': + append_to_lex_str(&(lexer->keyword), input); + + return handle_keyword(input); + } + + return LEXER_STATE_ERROR; +} + +lexer_state_t handle_string(lexer_t *lexer, char input) { + switch (input) { + case '\\': + dstr_append(&(lexer->current_string), input); + + return LEXER_STATE_ESCAPE_SEQUENCE; + case '"': { + lexer_state_t string_type = lexer->stack.stack[lexer->stack.size - 1]; + + lexer->token_ready = true; + token_t *token = &(lexer->token); + u64 column = lexer->column - dstr_length(lexer->current_string); + token_value_t value = {.string = dstr_to_cstr(lexer->current_string)}; + + if (string_type == LEXER_STATE_KEY) { + set_token(token, lexer->line, column, TK_STR_KEY, value); + } else if (string_type == LEXER_STATE_VALUE) { + set_token(token, lexer->line, column, TK_STR_VAL, value); + } + + return LEXER_STATE_STRING_END; + } + } + + dstr_append(&(lexer->current_string), input); + + return LEXER_STATE_STRING; +} + +lexer_state_t handle_string_end(lexer_t *lexer, char input) { + if (isspace(input)) { + return LEXER_STATE_STRING_END; + } + + lexer->current = stack_pop(&(lexer->stack)); + + bool key_end = lexer->current == LEXER_STATE_KEY && input == ':'; + + if (key_end) { + return LEXER_STATE_VALUE; + } + + bool value_end = lexer->current == LEXER_STATE_VALUE && input == ','; + + if (value_end) { + return lexer->stack.stack[lexer->stack.size - 1]; + } + + bool collection_end = input == '}' || input == ']'; + + return collection_end ? handle_collection_end(lexer, input) + : LEXER_STATE_ERROR; +} + +lexer_state_t handle_escape_sequence(lexer_t *lexer, char input) { + dstr_append(&(lexer->current_string), input); + + switch (input) { + case '"': + case '/': + case '\\': + case 'b': + case 'f': + case 'n': + case 'r': + case 't': + return LEXER_STATE_STRING; + case 'u': + return LEXER_STATE_UNICODE_HEX; + } + + return LEXER_STATE_ERROR; +} + +lexer_state_t handle_unicode_sequence(lexer_t *lexer, char input) { + append_to_lex_str(&(lexer->codepoint), input); + dstr_append(&(lexer->current_string), input); + + if (!ishex(input)) { + clear_lex_str(&(lexer->codepoint)); + + return LEXER_STATE_ERROR; + } else if (lexer->codepoint.size == UNICODE_LENGTH) { + clear_lex_str(&(lexer->codepoint)); + + return LEXER_STATE_STRING; + } + + return LEXER_STATE_UNICODE_HEX; +} + +lexer_state_t handle_decimal(lexer_t *lexer, char input) { + dstr_append(&(lexer->current_string), input); + + if (input == '.') { + return LEXER_STATE_FRACTION; + } + + return LEXER_STATE_ERROR; +} + +lexer_state_t handle_number(lexer_t *lexer, char input) { + if (isdigit(input)) { + dstr_append(&(lexer->current_string), input); + + return LEXER_STATE_NUMBER; + } else if (input == '.') { + dstr_append(&(lexer->current_string), input); + + return LEXER_STATE_FRACTION; + } else if (input == '}' || input == ']') { + // TODO (Abdelrahman): Set the token type correctly based on whether the + // number is an integer or a double + lexer->token_ready = true; + u64 column = lexer->column - dstr_length(lexer->current_string); + + token_t token = dstr_to_numerical_token(lexer->current_string); + + set_token(&(lexer->token), lexer->line, column, token.type, token.value); + + return handle_collection_end(lexer, input); + } else if (input == ',') { + // TODO (Abdelrahman): Set the token type correctly based on whether the + // number is an integer or a double + lexer->token_ready = true; + u64 column = lexer->column - dstr_length(lexer->current_string); + + token_t token = dstr_to_numerical_token(lexer->current_string); + + set_token(&(lexer->token), lexer->line, column, token.type, token.value); + + return lexer->stack.stack[lexer->stack.size - 1]; + } else if (isspace(input)) { + // TODO (Abdelrahman): Set the token type correctly based on whether the + // number is an integer or a double + lexer->token_ready = true; + u64 column = lexer->column - dstr_length(lexer->current_string); + + token_t token = dstr_to_numerical_token(lexer->current_string); + + set_token(&(lexer->token), lexer->line, column, token.type, token.value); + + return LEXER_STATE_NUMBER_END; + } + + return LEXER_STATE_ERROR; +} + +lexer_state_t handle_fraction(lexer_t *lexer, char input) { + if (isdigit(input)) { + dstr_append(&(lexer->current_string), input); + + return LEXER_STATE_FRACTION; + } else if (input == '}' || input == ']') { + // TODO (Abdelrahman): Set the token type correctly based on whether the + // number is an integer or a double + lexer->token_ready = true; + u64 column = lexer->column - dstr_length(lexer->current_string); + + token_t token = dstr_to_numerical_token(lexer->current_string); + + set_token(&(lexer->token), lexer->line, column, token.type, token.value); + + return handle_collection_end(lexer, input); + } else if (input == 'e' || input == 'E') { + dstr_append(&(lexer->current_string), input); + + return LEXER_STATE_EXPONENT; + } else if (input == ',') { + // TODO (Abdelrahman): Set the token type correctly based on whether the + // number is an integer or a double + lexer->token_ready = true; + u64 column = lexer->column - dstr_length(lexer->current_string); + + token_t token = dstr_to_numerical_token(lexer->current_string); + + set_token(&(lexer->token), lexer->line, column, token.type, token.value); + + return lexer->stack.stack[lexer->stack.size - 1]; + } else if (isspace(input)) { + // TODO (Abdelrahman): Set the token type correctly based on whether the + // number is an integer or a double + lexer->token_ready = true; + u64 column = lexer->column - dstr_length(lexer->current_string); + + token_t token = dstr_to_numerical_token(lexer->current_string); + + set_token(&(lexer->token), lexer->line, column, token.type, token.value); + + return LEXER_STATE_NUMBER_END; + } + + return LEXER_STATE_ERROR; +} + +lexer_state_t handle_exponent(lexer_t *lexer, char input) { + dstr_append(&(lexer->current_string), input); + + if (isdigit(input)) { + return LEXER_STATE_POWER; + } else if (input == '+' || input == '-') { + return LEXER_STATE_EXP_SIGN; + } + + return LEXER_STATE_ERROR; +} + +lexer_state_t handle_exp_sign(lexer_t *lexer, char input) { + dstr_append(&(lexer->current_string), input); + + if (isdigit(input)) { + return LEXER_STATE_POWER; + } + + return LEXER_STATE_ERROR; +} + +lexer_state_t handle_power(lexer_t *lexer, char input) { + if (isdigit(input)) { + dstr_append(&(lexer->current_string), input); + + return LEXER_STATE_POWER; + } else if (input == '}' || input == ']') { + // TODO (Abdelrahman): Set the token type correctly based on whether the + // number is an integer or a double + lexer->token_ready = true; + u64 column = lexer->column - dstr_length(lexer->current_string); + + token_t token = dstr_to_numerical_token(lexer->current_string); + + set_token(&(lexer->token), lexer->line, column, token.type, token.value); + + return handle_collection_end(lexer, input); + } else if (input == ',') { + // TODO (Abdelrahman): Set the token type correctly based on whether the + // number is an integer or a double + lexer->token_ready = true; + u64 column = lexer->column - dstr_length(lexer->current_string); + + token_t token = dstr_to_numerical_token(lexer->current_string); + + set_token(&(lexer->token), lexer->line, column, token.type, token.value); + + return lexer->stack.stack[lexer->stack.size - 1]; + } else if (isspace(input)) { + // TODO (Abdelrahman): Set the token type correctly based on whether the + // number is an integer or a double + lexer->token_ready = true; + u64 column = lexer->column - dstr_length(lexer->current_string); + + token_t token = dstr_to_numerical_token(lexer->current_string); + + set_token(&(lexer->token), lexer->line, column, token.type, token.value); + + return LEXER_STATE_NUMBER_END; + } + + return LEXER_STATE_ERROR; +} + +lexer_state_t handle_number_end(lexer_t *lexer, char input) { + if (isspace(input)) { + return LEXER_STATE_NUMBER_END; + } else if (input == ',') { + // TODO (Abdelrahman): Set the token type correctly based on whether the + // number is an integer or a double + lexer->token_ready = true; + u64 column = lexer->column - dstr_length(lexer->current_string); + + token_t token = dstr_to_numerical_token(lexer->current_string); + + set_token(&(lexer->token), lexer->line, column, token.type, token.value); + + return lexer->stack.stack[lexer->stack.size - 1]; + } + + bool collection_end = input == '}' || input == ']'; + + return collection_end ? handle_collection_end(lexer, input) + : LEXER_STATE_ERROR; +} + +lexer_state_t handle_keyword(char input) { + switch (input) { + case 't': + return LEXER_STATE_TRUE; + case 'f': + return LEXER_STATE_FALSE; + case 'n': + return LEXER_STATE_NULL; + } + + return LEXER_STATE_ERROR; +} + +lexer_state_t handle_true(lexer_t *lexer, char input) { + char current[MAX_KEYWORD_LENGTH + 1]; + strcpy(current, lexer->keyword.keyword.str); + + append_to_lex_str(&(lexer->keyword), input); + + bool return_state_true = (strequal(current, "t") && input == 'r') || + (strequal(current, "tr") && input == 'u'); + + bool return_state_end = strequal(current, "tru") && input == 'e'; + + if (return_state_true) { + return LEXER_STATE_TRUE; + } else if (return_state_end) { + return LEXER_STATE_KEYWORD_END; + } + + return LEXER_STATE_ERROR; +} + +lexer_state_t handle_false(lexer_t *lexer, char input) { + char current[MAX_KEYWORD_LENGTH + 1]; + strcpy(current, lexer->keyword.keyword.str); + + append_to_lex_str(&(lexer->keyword), input); + + bool return_state_false = (strequal(current, "f") && input == 'a') || + (strequal(current, "fa") && input == 'l') || + (strequal(current, "fal") && input == 's'); + + bool return_state_end = strequal(current, "fals") && input == 'e'; + + if (return_state_false) { + return LEXER_STATE_FALSE; + } else if (return_state_end) { + return LEXER_STATE_KEYWORD_END; + } + + return LEXER_STATE_ERROR; +} + +lexer_state_t handle_null(lexer_t *lexer, char input) { + char current[MAX_KEYWORD_LENGTH + 1]; + strcpy(current, lexer->keyword.keyword.str); + + append_to_lex_str(&(lexer->keyword), input); + + bool return_state_null = (strequal(current, "n") && input == 'u') || + (strequal(current, "nu") && input == 'l'); + + bool return_state_end = strequal(current, "nul") && input == 'l'; + + if (return_state_null) { + return LEXER_STATE_NULL; + } else if (return_state_end) { + return LEXER_STATE_KEYWORD_END; + } + + return LEXER_STATE_ERROR; +} + +lexer_state_t handle_keyword_end(lexer_t *lexer, char input) { + const char *keyword = lexer->keyword.keyword.str; + + if (lexer->keyword.size > 0) { + lexer->token_ready = true; + token_t *token = &(lexer->token); + u64 column = lexer->column - lexer->keyword.size; + + if (strequal(keyword, "null")) { + set_token(token, lexer->line, column, TK_NULL, (token_value_t){0}); + } else if (strequal(keyword, "true")) { + set_token(token, lexer->line, column, TK_BOOL, + (token_value_t){.boolean = true}); + } else if (strequal(keyword, "false")) { + set_token(token, lexer->line, column, TK_BOOL, + (token_value_t){.boolean = false}); + } + + clear_lex_str(&(lexer->keyword)); + } + + if (isspace(input)) { + return LEXER_STATE_KEYWORD_END; + } else if (input == ',') { + return lexer->stack.stack[lexer->stack.size - 1]; + } + + bool collection_end = input == '}' || input == ']'; + + return collection_end ? handle_collection_end(lexer, input) + : LEXER_STATE_ERROR; +} diff --git a/haversine_02/src/json/parser.c b/haversine_02/src/json/parser.c new file mode 100644 index 0000000..3ef9d87 --- /dev/null +++ b/haversine_02/src/json/parser.c @@ -0,0 +1,256 @@ +#include "json/parser.h" +#include "aliases.h" +#include "json/dstring.h" +#include "json/json_entities.h" +#include "json/lexer.h" +#include +#include +#include + +struct parser_s { + jentity_t *root; + jentity_t *current; + jval_t value; +}; + +INTERNAL void parser_free(parser_t **parser); +INTERNAL void parser_init(parser_t **parser); +INTERNAL void parse_token(parser_t *parser, token_t token); +INTERNAL void add_key(parser_t *parser, dstr_t *key); +INTERNAL jentity_t *add_value(parser_t *parser); +INTERNAL void add_collection(parser_t *parser); + +jentity_t *load_json(const char *filepath) { + FILE *fp = fopen(filepath, "r"); + + if (!fp) { + return NULL; + } + + fseek(fp, 0, SEEK_END); + + u64 length = ftell(fp); + + fseek(fp, 0, SEEK_SET); + + char json[length + 1]; + memset(json, 0, length + 1); + + fread(json, sizeof(char), length, fp); + + fclose(fp); + + lexer_t *lexer = NULL; + parser_t *parser = NULL; + + lexer_init(&lexer); + if (!lexer) { + return NULL; + } + + parser_init(&parser); + if (!parser) { + lexer_free(&lexer); + + return NULL; + } + + lex_result_t result = get_next_token(lexer, json); + + if (result.error.errno) { + printf("%s\n", result.error.msg); + } else { + while (result.token.type != TK_NO_TOKEN) { + parse_token(parser, result.token); + + result = get_next_token(lexer, NULL); + + if (result.error.errno) { + printf("%s\n", result.error.msg); + break; + } + } + } + + jentity_t *root = parser->root; + + parser_free(&parser); + lexer_free(&lexer); + + return root; +} + +void parser_init(parser_t **parser) { + if (*parser) { + parser_free(parser); + } + + *parser = (parser_t *)malloc(sizeof(parser_t)); + + if (!(*parser)) { + return; + } + + (*parser)->root = NULL; + (*parser)->current = NULL; + (*parser)->value = (jval_t){}; +} + +void parser_free(parser_t **parser) { + if (!(*parser)) { + return; + } + + (*parser)->root = NULL; + (*parser)->current = NULL; + + free(*parser); + *parser = NULL; +} + +void parse_token(parser_t *parser, token_t token) { + switch (token.type) { + case TK_L_BRACE: + case TK_L_BRACKET: { + parser->value = (jval_t){ + .type = JVAL_COLLECTION, + .collection = (jcoll_t *)malloc(sizeof(jcoll_t)), + }; + + if (token.type == TK_L_BRACE) { + parser->value.collection->type = JCOLL_OBJECT; + } else { + parser->value.collection->type = JCOLL_ARRAY; + } + + parser->value.collection->size = 0; + parser->value.collection->begin = NULL; + parser->value.collection->end = NULL; + + add_collection(parser); + + break; + } + case TK_R_BRACE: + case TK_R_BRACKET: + if (parser->current->parent) { + parser->current = parser->current->parent; + } + + break; + case TK_STR_KEY: { + parser->value = (jval_t){.type = JVAL_EMPTY, .null_val = NULL}; + + add_key(parser, dstr_from_string(token.value.string)); + + break; + } + case TK_NULL: { + parser->value = (jval_t){.type = JVAL_NULL, .null_val = NULL}; + + add_value(parser); + + break; + } + case TK_BOOL: { + parser->value = + (jval_t){.type = JVAL_BOOLEAN, .boolean = token.value.boolean}; + + add_value(parser); + + break; + } + case TK_STR_VAL: { + parser->value = (jval_t){.type = JVAL_STRING, + .string = dstr_from_string(token.value.string)}; + + add_value(parser); + + break; + } + case TK_INTEGER: { + parser->value = + (jval_t){.type = JVAL_INTEGER, .num_int = token.value.num_int}; + + add_value(parser); + + break; + } + case TK_DOUBLE: { + parser->value = + (jval_t){.type = JVAL_DOUBLE, .num_dbl = token.value.num_frac}; + + add_value(parser); + + break; + } + case TK_NO_TOKEN: + break; + } +} + +void add_key(parser_t *parser, dstr_t *key) { + jcoll_t *collection = get_collection_from_entity(parser->current); + + if (!collection) { + return; + } + + if (!(collection->end)) { + collection->begin = collection->end = + create_new_pair_entity(key, parser->value, parser->current); + + collection->size = 1; + } else { + jentity_t *new_entity = + create_new_pair_entity(key, parser->value, parser->current); + + collection->end->next = new_entity; + + collection->end = new_entity; + + ++(collection->size); + } +} + +jentity_t *add_value(parser_t *parser) { + jcoll_t *collection = get_collection_from_entity(parser->current); + + if (!collection) { + return NULL; + } + + if (!(collection->end)) { + collection->begin = collection->end = + create_new_single_entity(parser->value, parser->current); + + collection->size = 1; + } else { + if (collection->end->type == JENTITY_PAIR && + collection->end->pair.value.type == JVAL_EMPTY) { + collection->end->pair.value = parser->value; + } else { + jentity_t *new_entity = + create_new_single_entity(parser->value, parser->current); + + collection->end->next = new_entity; + + collection->end = new_entity; + + ++(collection->size); + } + } + + return collection->end; +} + +void add_collection(parser_t *parser) { + if (!(parser->root)) { + parser->root = parser->current = + create_new_single_entity(parser->value, NULL); + + return; + } + + parser->current = add_value(parser); +}