753 lines
19 KiB
C
753 lines
19 KiB
C
#include "lexer.h"
|
|
#include "aliases.h"
|
|
#include "dstring.h"
|
|
#include <assert.h>
|
|
#include <ctype.h>
|
|
#include <stdbool.h>
|
|
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
|
|
#define MAX_KEYWORD_LENGTH 5
|
|
#define UNICODE_LENGTH 4
|
|
#define MAX_STACK_CAPACITY 1024
|
|
#define STRING_BUF_START_CAPACITY 1024
|
|
|
|
typedef enum {
|
|
// GENERAL STATES
|
|
LEXER_STATE_START,
|
|
LEXER_STATE_ERROR,
|
|
LEXER_STATE_VALUE,
|
|
// COLLECTION STATES
|
|
LEXER_STATE_OBJECT_START,
|
|
LEXER_STATE_OBJECT,
|
|
LEXER_STATE_OBJECT_END,
|
|
LEXER_STATE_ARRAY_START,
|
|
LEXER_STATE_ARRAY,
|
|
LEXER_STATE_ARRAY_END,
|
|
LEXER_STATE_LAST_COLLECTION,
|
|
// OBJECT STATES
|
|
LEXER_STATE_KEY,
|
|
// NUMBER STATES
|
|
LEXER_STATE_DECIMAL,
|
|
LEXER_STATE_NUMBER,
|
|
LEXER_STATE_FRACTION,
|
|
LEXER_STATE_EXPONENT,
|
|
LEXER_STATE_EXP_SIGN,
|
|
LEXER_STATE_POWER,
|
|
LEXER_STATE_NUMBER_END,
|
|
// STRING STATES
|
|
LEXER_STATE_STRING,
|
|
LEXER_STATE_STRING_END,
|
|
LEXER_STATE_ESCAPE_SEQUENCE,
|
|
LEXER_STATE_UNICODE_HEX,
|
|
// KEYWORD STATES
|
|
LEXER_STATE_TRUE,
|
|
LEXER_STATE_FALSE,
|
|
LEXER_STATE_NULL,
|
|
LEXER_STATE_KEYWORD_END,
|
|
|
|
COUNT_LEXER_STATES,
|
|
} lexer_state_t;
|
|
|
|
typedef struct {
|
|
lexer_state_t stack[MAX_STACK_CAPACITY];
|
|
u64 size;
|
|
} state_stack_t;
|
|
|
|
typedef enum {
|
|
LEXER_STRING_KEYWORD,
|
|
LEXER_STRING_UNICODE,
|
|
} lex_str_type;
|
|
|
|
typedef struct {
|
|
char str[MAX_KEYWORD_LENGTH + 1];
|
|
} keyword_t;
|
|
|
|
typedef struct {
|
|
char codepoint[UNICODE_LENGTH];
|
|
} unicode_t;
|
|
|
|
typedef struct {
|
|
lex_str_type type;
|
|
u64 size;
|
|
union {
|
|
keyword_t keyword;
|
|
unicode_t unicode;
|
|
};
|
|
} lexer_string_t;
|
|
|
|
struct lexer {
|
|
u64 line;
|
|
u64 column;
|
|
lexer_state_t current;
|
|
state_stack_t stack;
|
|
lexer_string_t keyword;
|
|
lexer_string_t codepoint;
|
|
dstr_t *current_string;
|
|
};
|
|
|
|
void stack_push(state_stack_t *stack, lexer_state_t value);
|
|
lexer_state_t stack_pop(state_stack_t *stack);
|
|
|
|
void append_to_lex_str(lexer_string_t *str, char input);
|
|
void clear_lex_str(lexer_string_t *str);
|
|
bool strequal(const char *first, const char *second);
|
|
bool is_valid_hex_char(const char input);
|
|
bool ishex(const char input);
|
|
|
|
void lexer_state_machine(lexer_t *lexer, char input);
|
|
lexer_state_t handle_lexer_start(char input);
|
|
lexer_state_t handle_last_collection(char input);
|
|
lexer_state_t handle_collection_end(lexer_t *lexer, char input);
|
|
lexer_state_t handle_object(lexer_t *lexer, char input);
|
|
lexer_state_t handle_array(lexer_t *lexer, char input);
|
|
lexer_state_t handle_key(lexer_t *lexer, char input);
|
|
lexer_state_t handle_value(lexer_t *lexer, char input);
|
|
lexer_state_t handle_string(lexer_t *lexer, char input);
|
|
lexer_state_t handle_string_end(lexer_t *lexer, char input);
|
|
lexer_state_t handle_escape_sequence(lexer_t *lexer, char input);
|
|
lexer_state_t handle_unicode_sequence(lexer_t *lexer, char input);
|
|
lexer_state_t handle_decimal(lexer_t *lexer, char input);
|
|
lexer_state_t handle_number(lexer_t *lexer, char input);
|
|
lexer_state_t handle_fraction(lexer_t *lexer, char input);
|
|
lexer_state_t handle_exponent(lexer_t *lexer, char input);
|
|
lexer_state_t handle_exp_sign(lexer_t *lexer, char input);
|
|
lexer_state_t handle_power(lexer_t *lexer, char input);
|
|
lexer_state_t handle_number_end(lexer_t *lexer, char input);
|
|
lexer_state_t handle_keyword(char input);
|
|
lexer_state_t handle_true(lexer_t *lexer, char input);
|
|
lexer_state_t handle_false(lexer_t *lexer, char input);
|
|
lexer_state_t handle_null(lexer_t *lexer, char input);
|
|
lexer_state_t handle_keyword_end(lexer_t *lexer, char input);
|
|
|
|
// TODO (Abdelrahman): The printf functions in the state handlers are the exit
|
|
// points for the tokenisation function. Replace them once ready.
|
|
|
|
bool validate_json(char *json) {
|
|
lexer_t lexer = {0};
|
|
lexer.line = 1;
|
|
lexer.column = 0;
|
|
lexer.current = LEXER_STATE_START;
|
|
lexer.keyword.type = LEXER_STRING_KEYWORD;
|
|
lexer.codepoint.type = LEXER_STRING_UNICODE;
|
|
lexer.current_string = dstr_with_capacity(STRING_BUF_START_CAPACITY);
|
|
|
|
if (!lexer.current_string) {
|
|
// TODO (Abdelrahman): This is fine for now, but it doesn't make sense to
|
|
// return INVALID_JSON if string allocation fails
|
|
return INVALID_JSON;
|
|
}
|
|
|
|
for (char *c = json; *c != '\0'; ++c) {
|
|
lexer_state_machine(&lexer, *c);
|
|
|
|
// Track the position in the text
|
|
++(lexer.column);
|
|
if (*c == '\n') {
|
|
++(lexer.line);
|
|
lexer.column = 0;
|
|
}
|
|
|
|
if (lexer.current == LEXER_STATE_ERROR) {
|
|
return INVALID_JSON;
|
|
}
|
|
}
|
|
|
|
return lexer.current == LEXER_STATE_LAST_COLLECTION || lexer.stack.size == 0;
|
|
}
|
|
|
|
void stack_push(state_stack_t *stack, lexer_state_t state) {
|
|
if (stack->size + 1 >= MAX_STACK_CAPACITY) {
|
|
return;
|
|
}
|
|
|
|
stack->stack[(stack->size)++] = state;
|
|
}
|
|
|
|
lexer_state_t stack_pop(state_stack_t *stack) {
|
|
if (stack->size == 0) {
|
|
return LEXER_STATE_ERROR;
|
|
}
|
|
|
|
lexer_state_t state = stack->stack[--(stack->size)];
|
|
|
|
return state;
|
|
}
|
|
|
|
void append_to_lex_str(lexer_string_t *lex_str, char input) {
|
|
u64 capacity = 0;
|
|
char *str = NULL;
|
|
|
|
switch (lex_str->type) {
|
|
case LEXER_STRING_KEYWORD:
|
|
capacity = MAX_KEYWORD_LENGTH;
|
|
str = lex_str->keyword.str;
|
|
|
|
break;
|
|
case LEXER_STRING_UNICODE:
|
|
capacity = UNICODE_LENGTH;
|
|
str = lex_str->unicode.codepoint;
|
|
|
|
break;
|
|
}
|
|
|
|
if (lex_str->size + 1 > capacity) {
|
|
return;
|
|
}
|
|
|
|
assert(str != NULL);
|
|
|
|
str[(lex_str->size)++] = input;
|
|
}
|
|
|
|
void clear_lex_str(lexer_string_t *lex_str) {
|
|
u64 capacity = 1;
|
|
char *str = NULL;
|
|
|
|
switch (lex_str->type) {
|
|
case LEXER_STRING_KEYWORD:
|
|
capacity += MAX_KEYWORD_LENGTH;
|
|
str = lex_str->keyword.str;
|
|
|
|
break;
|
|
case LEXER_STRING_UNICODE:
|
|
capacity += UNICODE_LENGTH;
|
|
str = lex_str->unicode.codepoint;
|
|
|
|
break;
|
|
}
|
|
|
|
assert(str != NULL);
|
|
|
|
memset(str, 0, capacity);
|
|
lex_str->size = 0;
|
|
}
|
|
|
|
bool strequal(const char *first, const char *second) {
|
|
return strcmp(first, second) == 0;
|
|
}
|
|
|
|
bool is_valid_hex_char(const char input) {
|
|
switch (input) {
|
|
case 'A':
|
|
case 'B':
|
|
case 'C':
|
|
case 'D':
|
|
case 'E':
|
|
case 'F':
|
|
case 'a':
|
|
case 'b':
|
|
case 'c':
|
|
case 'd':
|
|
case 'e':
|
|
case 'f':
|
|
return true;
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
bool ishex(const char input) {
|
|
return isdigit(input) || is_valid_hex_char(input);
|
|
}
|
|
|
|
void lexer_state_machine(lexer_t *lexer, char input) {
|
|
switch (lexer->current) {
|
|
case LEXER_STATE_START:
|
|
lexer->current = handle_lexer_start(input);
|
|
break;
|
|
case LEXER_STATE_VALUE:
|
|
lexer->current = handle_value(lexer, input);
|
|
break;
|
|
case LEXER_STATE_OBJECT_START:
|
|
stack_push(&(lexer->stack), LEXER_STATE_OBJECT);
|
|
// break is left out intentionally here to utilise the fallthrough behaviour
|
|
// of the switch statement
|
|
case LEXER_STATE_OBJECT:
|
|
lexer->current = handle_object(lexer, input);
|
|
break;
|
|
case LEXER_STATE_ARRAY_START:
|
|
stack_push(&(lexer->stack), LEXER_STATE_ARRAY);
|
|
// break is left out intentionally here to utilise the fallthrough behaviour
|
|
// of the switch statement
|
|
case LEXER_STATE_ARRAY:
|
|
lexer->current = handle_array(lexer, input);
|
|
break;
|
|
case LEXER_STATE_OBJECT_END:
|
|
case LEXER_STATE_ARRAY_END:
|
|
if (lexer->stack.size > 1) {
|
|
stack_pop(&(lexer->stack));
|
|
|
|
lexer->current = lexer->stack.stack[lexer->stack.size - 1];
|
|
} else {
|
|
lexer->current = LEXER_STATE_LAST_COLLECTION;
|
|
}
|
|
|
|
break;
|
|
case LEXER_STATE_KEY:
|
|
lexer->current = handle_key(lexer, input);
|
|
break;
|
|
case LEXER_STATE_DECIMAL:
|
|
lexer->current = handle_decimal(lexer, input);
|
|
break;
|
|
case LEXER_STATE_NUMBER:
|
|
lexer->current = handle_number(lexer, input);
|
|
break;
|
|
case LEXER_STATE_FRACTION:
|
|
lexer->current = handle_fraction(lexer, input);
|
|
break;
|
|
case LEXER_STATE_EXPONENT:
|
|
lexer->current = handle_exponent(lexer, input);
|
|
break;
|
|
case LEXER_STATE_EXP_SIGN:
|
|
lexer->current = handle_exp_sign(lexer, input);
|
|
break;
|
|
case LEXER_STATE_POWER:
|
|
lexer->current = handle_power(lexer, input);
|
|
break;
|
|
case LEXER_STATE_NUMBER_END:
|
|
lexer->current = handle_number_end(lexer, input);
|
|
break;
|
|
case LEXER_STATE_STRING:
|
|
lexer->current = handle_string(lexer, input);
|
|
break;
|
|
case LEXER_STATE_STRING_END:
|
|
lexer->current = handle_string_end(lexer, input);
|
|
break;
|
|
case LEXER_STATE_ESCAPE_SEQUENCE:
|
|
lexer->current = handle_escape_sequence(lexer, input);
|
|
break;
|
|
case LEXER_STATE_UNICODE_HEX:
|
|
lexer->current = handle_unicode_sequence(lexer, input);
|
|
break;
|
|
case LEXER_STATE_TRUE:
|
|
lexer->current = handle_true(lexer, input);
|
|
break;
|
|
case LEXER_STATE_FALSE:
|
|
lexer->current = handle_false(lexer, input);
|
|
break;
|
|
case LEXER_STATE_NULL:
|
|
lexer->current = handle_null(lexer, input);
|
|
break;
|
|
case LEXER_STATE_KEYWORD_END:
|
|
lexer->current = handle_keyword_end(lexer, input);
|
|
break;
|
|
case LEXER_STATE_LAST_COLLECTION:
|
|
lexer->current = handle_last_collection(input);
|
|
break;
|
|
case LEXER_STATE_ERROR:
|
|
case COUNT_LEXER_STATES:
|
|
lexer->current = LEXER_STATE_ERROR;
|
|
break;
|
|
}
|
|
}
|
|
|
|
lexer_state_t handle_lexer_start(char input) {
|
|
if (isspace(input)) {
|
|
return LEXER_STATE_START;
|
|
}
|
|
|
|
switch (input) {
|
|
case '{':
|
|
printf("TK_L_BRACE\n");
|
|
return LEXER_STATE_OBJECT_START;
|
|
case '[':
|
|
printf("TK_L_BRACKET\n");
|
|
return LEXER_STATE_ARRAY_START;
|
|
}
|
|
|
|
return LEXER_STATE_ERROR;
|
|
}
|
|
|
|
lexer_state_t handle_last_collection(char input) {
|
|
if (isspace(input)) {
|
|
return LEXER_STATE_LAST_COLLECTION;
|
|
}
|
|
|
|
return LEXER_STATE_ERROR;
|
|
}
|
|
|
|
lexer_state_t handle_collection_end(lexer_t *lexer, char input) {
|
|
// No need to ignore space as this is only called when input is } or ]
|
|
|
|
lexer->current = lexer->stack.stack[lexer->stack.size - 1];
|
|
|
|
bool object_end = lexer->current == LEXER_STATE_OBJECT && input == '}';
|
|
|
|
if (object_end) {
|
|
printf("TK_R_BRACE\n");
|
|
return LEXER_STATE_OBJECT_END;
|
|
}
|
|
|
|
bool array_end = lexer->current == LEXER_STATE_ARRAY && input == ']';
|
|
|
|
if (array_end) {
|
|
printf("TK_R_BRACKET\n");
|
|
return LEXER_STATE_ARRAY_END;
|
|
}
|
|
|
|
return LEXER_STATE_ERROR;
|
|
}
|
|
|
|
lexer_state_t handle_object(lexer_t *lexer, char input) {
|
|
if (isspace(input)) {
|
|
return LEXER_STATE_OBJECT;
|
|
} else if (input == '"') {
|
|
stack_push(&(lexer->stack), LEXER_STATE_KEY);
|
|
|
|
return LEXER_STATE_KEY;
|
|
} else if (input == '}') {
|
|
printf("TK_R_BRACE\n");
|
|
return handle_collection_end(lexer, input);
|
|
}
|
|
|
|
return LEXER_STATE_ERROR;
|
|
}
|
|
|
|
lexer_state_t handle_array(lexer_t *lexer, char input) {
|
|
if (isspace(input)) {
|
|
return LEXER_STATE_ARRAY;
|
|
} else if (input == ']') {
|
|
printf("TK_R_BRACKET\n");
|
|
return handle_collection_end(lexer, input);
|
|
}
|
|
|
|
return handle_value(lexer, input);
|
|
}
|
|
|
|
lexer_state_t handle_key(lexer_t *lexer, char input) {
|
|
dstr_append(&(lexer->current_string), input);
|
|
|
|
return LEXER_STATE_STRING;
|
|
}
|
|
|
|
lexer_state_t handle_value(lexer_t *lexer, char input) {
|
|
if (isspace(input)) {
|
|
return LEXER_STATE_VALUE;
|
|
} else if (isdigit(input) && input != '0') {
|
|
dstr_append(&(lexer->current_string), input);
|
|
|
|
return LEXER_STATE_NUMBER;
|
|
}
|
|
|
|
switch (input) {
|
|
case '"':
|
|
stack_push(&(lexer->stack), LEXER_STATE_VALUE);
|
|
|
|
return LEXER_STATE_STRING;
|
|
case '0':
|
|
dstr_append(&(lexer->current_string), input);
|
|
|
|
return LEXER_STATE_DECIMAL;
|
|
case '{':
|
|
printf("TK_L_BRACE\n");
|
|
return LEXER_STATE_OBJECT_START;
|
|
case '[':
|
|
printf("TK_L_BRACKET\n");
|
|
return LEXER_STATE_ARRAY_START;
|
|
case 't':
|
|
case 'f':
|
|
case 'n':
|
|
append_to_lex_str(&(lexer->keyword), input);
|
|
|
|
return handle_keyword(input);
|
|
}
|
|
|
|
return LEXER_STATE_ERROR;
|
|
}
|
|
|
|
lexer_state_t handle_string(lexer_t *lexer, char input) {
|
|
switch (input) {
|
|
case '\\':
|
|
dstr_append(&(lexer->current_string), input);
|
|
|
|
return LEXER_STATE_ESCAPE_SEQUENCE;
|
|
case '"':
|
|
printf("TK_STRING: %s\n", dstr_to_cstr(lexer->current_string));
|
|
return LEXER_STATE_STRING_END;
|
|
}
|
|
|
|
dstr_append(&(lexer->current_string), input);
|
|
|
|
return LEXER_STATE_STRING;
|
|
}
|
|
|
|
lexer_state_t handle_string_end(lexer_t *lexer, char input) {
|
|
if (isspace(input)) {
|
|
return LEXER_STATE_STRING_END;
|
|
}
|
|
|
|
dstr_clear(lexer->current_string);
|
|
|
|
lexer->current = stack_pop(&(lexer->stack));
|
|
|
|
bool key_end = lexer->current == LEXER_STATE_KEY && input == ':';
|
|
|
|
if (key_end) {
|
|
printf("TK_COLON\n");
|
|
return LEXER_STATE_VALUE;
|
|
}
|
|
|
|
bool value_end = lexer->current == LEXER_STATE_VALUE && input == ',';
|
|
|
|
if (value_end) {
|
|
printf("TK_COMMA\n");
|
|
return lexer->stack.stack[lexer->stack.size - 1];
|
|
}
|
|
|
|
bool collection_end = input == '}' || input == ']';
|
|
|
|
return collection_end ? handle_collection_end(lexer, input)
|
|
: LEXER_STATE_ERROR;
|
|
}
|
|
|
|
lexer_state_t handle_escape_sequence(lexer_t *lexer, char input) {
|
|
dstr_append(&(lexer->current_string), input);
|
|
|
|
switch (input) {
|
|
case '"':
|
|
case '/':
|
|
case '\\':
|
|
case 'b':
|
|
case 'f':
|
|
case 'n':
|
|
case 'r':
|
|
case 't':
|
|
return LEXER_STATE_STRING;
|
|
case 'u':
|
|
return LEXER_STATE_UNICODE_HEX;
|
|
}
|
|
|
|
return LEXER_STATE_ERROR;
|
|
}
|
|
|
|
lexer_state_t handle_unicode_sequence(lexer_t *lexer, char input) {
|
|
append_to_lex_str(&(lexer->codepoint), input);
|
|
dstr_append(&(lexer->current_string), input);
|
|
|
|
if (!ishex(input)) {
|
|
clear_lex_str(&(lexer->codepoint));
|
|
|
|
return LEXER_STATE_ERROR;
|
|
} else if (lexer->codepoint.size == UNICODE_LENGTH) {
|
|
clear_lex_str(&(lexer->codepoint));
|
|
|
|
return LEXER_STATE_STRING;
|
|
}
|
|
|
|
return LEXER_STATE_UNICODE_HEX;
|
|
}
|
|
|
|
lexer_state_t handle_decimal(lexer_t *lexer, char input) {
|
|
dstr_append(&(lexer->current_string), input);
|
|
|
|
if (input == '.') {
|
|
return LEXER_STATE_FRACTION;
|
|
}
|
|
|
|
return LEXER_STATE_ERROR;
|
|
}
|
|
|
|
lexer_state_t handle_number(lexer_t *lexer, char input) {
|
|
if (isdigit(input)) {
|
|
dstr_append(&(lexer->current_string), input);
|
|
|
|
return LEXER_STATE_NUMBER;
|
|
} else if (input == '.') {
|
|
dstr_append(&(lexer->current_string), input);
|
|
|
|
return LEXER_STATE_FRACTION;
|
|
} else if (input == '}' || input == ']') {
|
|
printf("TK_NUMBER: %s\n", dstr_to_cstr(lexer->current_string));
|
|
dstr_clear(lexer->current_string);
|
|
|
|
return handle_collection_end(lexer, input);
|
|
} else if (input == ',') {
|
|
printf("TK_NUMBER: %s\n", dstr_to_cstr(lexer->current_string));
|
|
dstr_clear(lexer->current_string);
|
|
|
|
return lexer->stack.stack[lexer->stack.size - 1];
|
|
} else if (isspace(input)) {
|
|
return LEXER_STATE_NUMBER_END;
|
|
}
|
|
|
|
return LEXER_STATE_ERROR;
|
|
}
|
|
|
|
lexer_state_t handle_fraction(lexer_t *lexer, char input) {
|
|
if (isdigit(input)) {
|
|
dstr_append(&(lexer->current_string), input);
|
|
|
|
return LEXER_STATE_FRACTION;
|
|
} else if (input == '}' || input == ']') {
|
|
printf("TK_NUMBER: %s\n", dstr_to_cstr(lexer->current_string));
|
|
dstr_clear(lexer->current_string);
|
|
|
|
return handle_collection_end(lexer, input);
|
|
} else if (input == 'e' || input == 'E') {
|
|
dstr_append(&(lexer->current_string), input);
|
|
|
|
return LEXER_STATE_EXPONENT;
|
|
} else if (input == ',') {
|
|
printf("TK_NUMBER: %s\n", dstr_to_cstr(lexer->current_string));
|
|
dstr_clear(lexer->current_string);
|
|
|
|
return lexer->stack.stack[lexer->stack.size - 1];
|
|
} else if (isspace(input)) {
|
|
return LEXER_STATE_NUMBER_END;
|
|
}
|
|
|
|
return LEXER_STATE_ERROR;
|
|
}
|
|
|
|
lexer_state_t handle_exponent(lexer_t *lexer, char input) {
|
|
dstr_append(&(lexer->current_string), input);
|
|
|
|
if (isdigit(input)) {
|
|
return LEXER_STATE_POWER;
|
|
} else if (input == '+' || input == '-') {
|
|
return LEXER_STATE_EXP_SIGN;
|
|
}
|
|
|
|
return LEXER_STATE_ERROR;
|
|
}
|
|
|
|
lexer_state_t handle_exp_sign(lexer_t *lexer, char input) {
|
|
dstr_append(&(lexer->current_string), input);
|
|
|
|
if (isdigit(input)) {
|
|
return LEXER_STATE_POWER;
|
|
}
|
|
|
|
return LEXER_STATE_ERROR;
|
|
}
|
|
|
|
lexer_state_t handle_power(lexer_t *lexer, char input) {
|
|
if (isdigit(input)) {
|
|
dstr_append(&(lexer->current_string), input);
|
|
|
|
return LEXER_STATE_POWER;
|
|
} else if (input == '}' || input == ']') {
|
|
printf("TK_NUMBER: %s\n", dstr_to_cstr(lexer->current_string));
|
|
dstr_clear(lexer->current_string);
|
|
|
|
return handle_collection_end(lexer, input);
|
|
} else if (input == ',') {
|
|
printf("TK_NUMBER: %s\n", dstr_to_cstr(lexer->current_string));
|
|
dstr_clear(lexer->current_string);
|
|
|
|
return lexer->stack.stack[lexer->stack.size - 1];
|
|
} else if (isspace(input)) {
|
|
return LEXER_STATE_NUMBER_END;
|
|
}
|
|
|
|
return LEXER_STATE_ERROR;
|
|
}
|
|
|
|
lexer_state_t handle_number_end(lexer_t *lexer, char input) {
|
|
printf("TK_NUMBER: %s\n", dstr_to_cstr(lexer->current_string));
|
|
dstr_clear(lexer->current_string);
|
|
|
|
if (isspace(input)) {
|
|
return LEXER_STATE_NUMBER_END;
|
|
} else if (input == ',') {
|
|
return lexer->stack.stack[lexer->stack.size - 1];
|
|
}
|
|
|
|
bool collection_end = input == '}' || input == ']';
|
|
|
|
return collection_end ? handle_collection_end(lexer, input)
|
|
: LEXER_STATE_ERROR;
|
|
}
|
|
|
|
lexer_state_t handle_keyword(char input) {
|
|
switch (input) {
|
|
case 't':
|
|
return LEXER_STATE_TRUE;
|
|
case 'f':
|
|
return LEXER_STATE_FALSE;
|
|
case 'n':
|
|
return LEXER_STATE_NULL;
|
|
}
|
|
|
|
return LEXER_STATE_ERROR;
|
|
}
|
|
|
|
lexer_state_t handle_true(lexer_t *lexer, char input) {
|
|
char current[MAX_KEYWORD_LENGTH + 1];
|
|
strcpy(current, lexer->keyword.keyword.str);
|
|
|
|
append_to_lex_str(&(lexer->keyword), input);
|
|
|
|
bool return_state_true = (strequal(current, "t") && input == 'r') ||
|
|
(strequal(current, "tr") && input == 'u');
|
|
|
|
bool return_state_end = strequal(current, "tru") && input == 'e';
|
|
|
|
if (return_state_true) {
|
|
return LEXER_STATE_TRUE;
|
|
} else if (return_state_end) {
|
|
return LEXER_STATE_KEYWORD_END;
|
|
}
|
|
|
|
return LEXER_STATE_ERROR;
|
|
}
|
|
|
|
lexer_state_t handle_false(lexer_t *lexer, char input) {
|
|
char current[MAX_KEYWORD_LENGTH + 1];
|
|
strcpy(current, lexer->keyword.keyword.str);
|
|
|
|
append_to_lex_str(&(lexer->keyword), input);
|
|
|
|
bool return_state_false = (strequal(current, "f") && input == 'a') ||
|
|
(strequal(current, "fa") && input == 'l') ||
|
|
(strequal(current, "fal") && input == 's');
|
|
|
|
bool return_state_end = strequal(current, "fals") && input == 'e';
|
|
|
|
if (return_state_false) {
|
|
return LEXER_STATE_FALSE;
|
|
} else if (return_state_end) {
|
|
return LEXER_STATE_KEYWORD_END;
|
|
}
|
|
|
|
return LEXER_STATE_ERROR;
|
|
}
|
|
|
|
lexer_state_t handle_null(lexer_t *lexer, char input) {
|
|
char current[MAX_KEYWORD_LENGTH + 1];
|
|
strcpy(current, lexer->keyword.keyword.str);
|
|
|
|
append_to_lex_str(&(lexer->keyword), input);
|
|
|
|
bool return_state_null = (strequal(current, "n") && input == 'u') ||
|
|
(strequal(current, "nu") && input == 'l');
|
|
|
|
bool return_state_end = strequal(current, "nul") && input == 'l';
|
|
|
|
if (return_state_null) {
|
|
return LEXER_STATE_NULL;
|
|
} else if (return_state_end) {
|
|
return LEXER_STATE_KEYWORD_END;
|
|
}
|
|
|
|
return LEXER_STATE_ERROR;
|
|
}
|
|
|
|
lexer_state_t handle_keyword_end(lexer_t *lexer, char input) {
|
|
printf("TK_KEYWORD: %s\n", lexer->keyword.keyword.str);
|
|
clear_lex_str(&(lexer->keyword));
|
|
|
|
if (isspace(input)) {
|
|
return LEXER_STATE_KEYWORD_END;
|
|
} else if (input == ',') {
|
|
return lexer->stack.stack[lexer->stack.size - 1];
|
|
}
|
|
|
|
bool collection_end = input == '}' || input == ']';
|
|
|
|
return collection_end ? handle_collection_end(lexer, input)
|
|
: LEXER_STATE_ERROR;
|
|
}
|