Compare commits
2 Commits
fd8231b02d
...
main
| Author | SHA1 | Date | |
|---|---|---|---|
| 4d02e90aac | |||
| da69ad6d43 |
1
.gitignore
vendored
1
.gitignore
vendored
@@ -3,4 +3,3 @@
|
||||
src/ignore/**/*
|
||||
main
|
||||
compile_commands.json
|
||||
gentable
|
||||
|
||||
@@ -1,675 +0,0 @@
|
||||
#include "aliases.h"
|
||||
#include "lexer_data.h"
|
||||
#include <ctype.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#define STRING_BUF_LENGTH 100
|
||||
|
||||
INTERNAL void write_table(void);
|
||||
INTERNAL void clear_file(void);
|
||||
INTERNAL lexer_input_t char_type(char input);
|
||||
INTERNAL void lexer_state_machine(lexer_input_t input);
|
||||
INTERNAL lexer_state_t handle_lexer_start(lexer_input_t input);
|
||||
INTERNAL lexer_state_t handle_last_collection(char input);
|
||||
INTERNAL lexer_state_t handle_collection_end(lexer_input_t input);
|
||||
INTERNAL void handle_input_after_collection_end(lexer_input_t input);
|
||||
INTERNAL lexer_state_t handle_object(lexer_input_t input);
|
||||
INTERNAL lexer_state_t handle_array(lexer_input_t input);
|
||||
INTERNAL lexer_state_t handle_key(lexer_input_t input);
|
||||
INTERNAL lexer_state_t handle_value(lexer_input_t input);
|
||||
INTERNAL lexer_state_t handle_string(lexer_input_t input);
|
||||
INTERNAL lexer_state_t handle_string_end(lexer_input_t input);
|
||||
INTERNAL lexer_state_t handle_escape_sequence(lexer_input_t input);
|
||||
INTERNAL lexer_state_t handle_unicode_hex(lexer_input_t input,
|
||||
lexer_state_t return_state);
|
||||
INTERNAL lexer_state_t handle_decimal(lexer_input_t input);
|
||||
INTERNAL lexer_state_t handle_number(lexer_input_t input);
|
||||
INTERNAL lexer_state_t handle_fraction(lexer_input_t input);
|
||||
INTERNAL lexer_state_t handle_exponent(lexer_input_t input);
|
||||
INTERNAL lexer_state_t handle_exp_sign(lexer_input_t input);
|
||||
INTERNAL lexer_state_t handle_power(lexer_input_t input);
|
||||
INTERNAL lexer_state_t handle_number_end(lexer_input_t input);
|
||||
INTERNAL lexer_state_t handle_true(lexer_input_t input,
|
||||
lexer_state_t start_state);
|
||||
INTERNAL lexer_state_t handle_false(lexer_input_t input,
|
||||
lexer_state_t start_state);
|
||||
INTERNAL lexer_state_t handle_null(lexer_input_t input,
|
||||
lexer_state_t start_state);
|
||||
INTERNAL lexer_state_t handle_keyword_end(lexer_input_t input);
|
||||
|
||||
INTERNAL lexer_state_t current_state = LEXER_STATE_START;
|
||||
INTERNAL lexer_state_t state_table[COUNT_LEXER_STATES][COUNT_LEXER_INPUTS] = {
|
||||
0};
|
||||
|
||||
INTERNAL const char *filename = "./include/lexer/lexer_state_transitions.table";
|
||||
|
||||
int main(void) {
|
||||
clear_file();
|
||||
|
||||
write_table();
|
||||
|
||||
return EXIT_SUCCESS;
|
||||
}
|
||||
|
||||
void clear_file(void) {
|
||||
FILE *fp = fopen(filename, "w");
|
||||
if (!fp) {
|
||||
printf("Failed to open file\n");
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
fclose(fp);
|
||||
}
|
||||
|
||||
void write_table(void) {
|
||||
FILE *fp = fopen(filename, "a");
|
||||
|
||||
if (!fp) {
|
||||
printf("Failed to open file\n");
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
char output[STRING_BUF_LENGTH] = {0};
|
||||
u64 length = 0;
|
||||
|
||||
for (u64 i = 0; i < COUNT_LEXER_STATES; ++i) {
|
||||
sprintf(output, "{ ");
|
||||
length = strlen(output);
|
||||
fwrite(output, 1, length, fp);
|
||||
|
||||
for (u64 j = 0; j < COUNT_LEXER_INPUTS; ++j) {
|
||||
sprintf(output, "%lld, ", (unsigned long long)state_table[i][j]);
|
||||
length = strlen(output);
|
||||
fwrite(output, 1, length, fp);
|
||||
}
|
||||
|
||||
sprintf(output, "},\n");
|
||||
length = strlen(output);
|
||||
fwrite(output, 1, length, fp);
|
||||
|
||||
memset(output, 0, STRING_BUF_LENGTH);
|
||||
}
|
||||
|
||||
fclose(fp);
|
||||
}
|
||||
|
||||
INTERNAL lexer_input_t char_type(char input) {
|
||||
if (isspace(input)) {
|
||||
return LEXER_INPUT_WHITE_SPACE;
|
||||
} else if (input >= '1' && input <= '9') {
|
||||
return LEXER_INPUT_NON_ZERO;
|
||||
}
|
||||
|
||||
switch (input) {
|
||||
case '{':
|
||||
return LEXER_INPUT_OPEN_BRACE;
|
||||
case '}':
|
||||
return LEXER_INPUT_CLOSE_BRACE;
|
||||
case '[':
|
||||
return LEXER_INPUT_OPEN_BRACKET;
|
||||
case ']':
|
||||
return LEXER_INPUT_CLOSE_BRACKET;
|
||||
case ',':
|
||||
return LEXER_INPUT_COMMA;
|
||||
case ':':
|
||||
return LEXER_INPUT_COLON;
|
||||
case '"':
|
||||
return LEXER_INPUT_DOUBLE_QUOTE;
|
||||
case '\\':
|
||||
return LEXER_INPUT_BACK_SLASH;
|
||||
case '/':
|
||||
return LEXER_INPUT_FORWARD_SLASH;
|
||||
case 'a':
|
||||
return LEXER_INPUT_LOWER_A;
|
||||
case 'b':
|
||||
return LEXER_INPUT_LOWER_B;
|
||||
case 'c':
|
||||
return LEXER_INPUT_LOWER_C;
|
||||
case 'd':
|
||||
return LEXER_INPUT_LOWER_D;
|
||||
case 'e':
|
||||
return LEXER_INPUT_LOWER_E;
|
||||
case 'f':
|
||||
return LEXER_INPUT_LOWER_F;
|
||||
case 'l':
|
||||
return LEXER_INPUT_LOWER_L;
|
||||
case 'n':
|
||||
return LEXER_INPUT_LOWER_N;
|
||||
case 'r':
|
||||
return LEXER_INPUT_LOWER_R;
|
||||
case 's':
|
||||
return LEXER_INPUT_LOWER_S;
|
||||
case 't':
|
||||
return LEXER_INPUT_LOWER_T;
|
||||
case 'u':
|
||||
return LEXER_INPUT_LOWER_U;
|
||||
case 'A':
|
||||
return LEXER_INPUT_UPPER_A;
|
||||
case 'B':
|
||||
return LEXER_INPUT_UPPER_B;
|
||||
case 'C':
|
||||
return LEXER_INPUT_UPPER_C;
|
||||
case 'D':
|
||||
return LEXER_INPUT_UPPER_D;
|
||||
case 'E':
|
||||
return LEXER_INPUT_UPPER_E;
|
||||
case 'F':
|
||||
return LEXER_INPUT_UPPER_F;
|
||||
case '-':
|
||||
return LEXER_INPUT_MINUS;
|
||||
case '+':
|
||||
return LEXER_INPUT_PLUS;
|
||||
case '.':
|
||||
return LEXER_INPUT_DECIMAL;
|
||||
case '0':
|
||||
return LEXER_INPUT_ZERO;
|
||||
default:
|
||||
return LEXER_INPUT_OTHER;
|
||||
}
|
||||
}
|
||||
|
||||
void lexer_state_machine(lexer_input_t input) {
|
||||
switch (current_state) {
|
||||
case LEXER_STATE_START:
|
||||
current_state = handle_lexer_start(input);
|
||||
break;
|
||||
case LEXER_STATE_VALUE:
|
||||
current_state = handle_value(input);
|
||||
break;
|
||||
case LEXER_STATE_OBJECT_START:
|
||||
case LEXER_STATE_OBJECT:
|
||||
current_state = handle_object(input);
|
||||
break;
|
||||
case LEXER_STATE_ARRAY_START:
|
||||
case LEXER_STATE_ARRAY:
|
||||
current_state = handle_array(input);
|
||||
break;
|
||||
case LEXER_STATE_OBJECT_END:
|
||||
case LEXER_STATE_ARRAY_END:
|
||||
handle_input_after_collection_end(input);
|
||||
|
||||
break;
|
||||
case LEXER_STATE_KEY:
|
||||
current_state = handle_key(input);
|
||||
break;
|
||||
case LEXER_STATE_DECIMAL:
|
||||
current_state = handle_decimal(input);
|
||||
break;
|
||||
case LEXER_STATE_NUMBER:
|
||||
current_state = handle_number(input);
|
||||
break;
|
||||
case LEXER_STATE_FRACTION:
|
||||
current_state = handle_fraction(input);
|
||||
break;
|
||||
case LEXER_STATE_EXPONENT:
|
||||
current_state = handle_exponent(input);
|
||||
break;
|
||||
case LEXER_STATE_EXP_SIGN:
|
||||
current_state = handle_exp_sign(input);
|
||||
break;
|
||||
case LEXER_STATE_POWER:
|
||||
current_state = handle_power(input);
|
||||
break;
|
||||
case LEXER_STATE_NUMBER_END:
|
||||
current_state = handle_number_end(input);
|
||||
break;
|
||||
case LEXER_STATE_STRING:
|
||||
current_state = handle_string(input);
|
||||
break;
|
||||
case LEXER_STATE_STRING_END:
|
||||
current_state = handle_string_end(input);
|
||||
break;
|
||||
case LEXER_STATE_ESCAPE_SEQUENCE:
|
||||
current_state = handle_escape_sequence(input);
|
||||
break;
|
||||
case LEXER_STATE_UNICODE_HEX1:
|
||||
current_state = handle_unicode_hex(input, LEXER_STATE_UNICODE_HEX2);
|
||||
break;
|
||||
case LEXER_STATE_UNICODE_HEX2:
|
||||
current_state = handle_unicode_hex(input, LEXER_STATE_UNICODE_HEX3);
|
||||
break;
|
||||
case LEXER_STATE_UNICODE_HEX3:
|
||||
current_state = handle_unicode_hex(input, LEXER_STATE_UNICODE_HEX4);
|
||||
break;
|
||||
case LEXER_STATE_UNICODE_HEX4:
|
||||
current_state = handle_unicode_hex(input, LEXER_STATE_STRING);
|
||||
break;
|
||||
case LEXER_STATE_T:
|
||||
current_state = handle_true(input, LEXER_STATE_T);
|
||||
break;
|
||||
case LEXER_STATE_TR:
|
||||
current_state = handle_true(input, LEXER_STATE_TR);
|
||||
break;
|
||||
case LEXER_STATE_TRU:
|
||||
current_state = handle_true(input, LEXER_STATE_TRU);
|
||||
break;
|
||||
case LEXER_STATE_TRUE:
|
||||
current_state = handle_true(input, LEXER_STATE_TRUE);
|
||||
break;
|
||||
case LEXER_STATE_F:
|
||||
current_state = handle_false(input, LEXER_STATE_F);
|
||||
break;
|
||||
case LEXER_STATE_FA:
|
||||
current_state = handle_false(input, LEXER_STATE_FA);
|
||||
break;
|
||||
case LEXER_STATE_FAL:
|
||||
current_state = handle_false(input, LEXER_STATE_FAL);
|
||||
break;
|
||||
case LEXER_STATE_FALS:
|
||||
current_state = handle_false(input, LEXER_STATE_FALS);
|
||||
break;
|
||||
case LEXER_STATE_FALSE:
|
||||
current_state = handle_false(input, LEXER_STATE_FALSE);
|
||||
break;
|
||||
case LEXER_STATE_N:
|
||||
current_state = handle_null(input, LEXER_STATE_N);
|
||||
break;
|
||||
case LEXER_STATE_NU:
|
||||
current_state = handle_null(input, LEXER_STATE_NU);
|
||||
break;
|
||||
case LEXER_STATE_NUL:
|
||||
current_state = handle_null(input, LEXER_STATE_NUL);
|
||||
break;
|
||||
case LEXER_STATE_NULL:
|
||||
current_state = handle_null(input, LEXER_STATE_NULL);
|
||||
break;
|
||||
case LEXER_STATE_KEYWORD_END:
|
||||
current_state = handle_keyword_end(input);
|
||||
break;
|
||||
case LEXER_STATE_LAST_COLLECTION:
|
||||
current_state = handle_last_collection(input);
|
||||
break;
|
||||
case LEXER_STATE_ERROR:
|
||||
case COUNT_LEXER_STATES:
|
||||
current_state = LEXER_STATE_ERROR;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
lexer_state_t handle_lexer_start(lexer_input_t input) {
|
||||
switch (input) {
|
||||
case LEXER_INPUT_WHITE_SPACE:
|
||||
return LEXER_STATE_START;
|
||||
case LEXER_INPUT_OPEN_BRACE:
|
||||
return LEXER_STATE_OBJECT_START;
|
||||
case LEXER_INPUT_OPEN_BRACKET:
|
||||
return LEXER_STATE_ARRAY_START;
|
||||
default:
|
||||
return LEXER_STATE_ERROR;
|
||||
}
|
||||
}
|
||||
|
||||
lexer_state_t handle_last_collection(char input) {
|
||||
if (input == LEXER_INPUT_WHITE_SPACE) {
|
||||
return LEXER_STATE_LAST_COLLECTION;
|
||||
}
|
||||
|
||||
return LEXER_STATE_ERROR;
|
||||
}
|
||||
|
||||
// TODO (Abdelrahman): Figure out how to handle this
|
||||
lexer_state_t handle_collection_end(lexer_input_t input) {
|
||||
// No need to ignore space as this is only called when input is } or ]
|
||||
|
||||
lexer->current = lexer->stack.stack[lexer->stack.size - 1];
|
||||
|
||||
bool object_end = lexer->current == LEXER_STATE_OBJECT && input == '}';
|
||||
|
||||
if (object_end) {
|
||||
token_t *token;
|
||||
|
||||
if (lexer->token_ready) {
|
||||
lexer->has_extra_token = true;
|
||||
token = &(lexer->extra_token);
|
||||
} else {
|
||||
lexer->token_ready = true;
|
||||
token = &(lexer->token);
|
||||
}
|
||||
|
||||
set_token(token, lexer->line, lexer->column, TK_R_BRACE,
|
||||
(token_value_t){0});
|
||||
|
||||
return LEXER_STATE_OBJECT_END;
|
||||
}
|
||||
|
||||
bool array_end = lexer->current == LEXER_STATE_ARRAY && input == ']';
|
||||
|
||||
if (array_end) {
|
||||
token_t *token;
|
||||
|
||||
if (lexer->token_ready) {
|
||||
lexer->has_extra_token = true;
|
||||
token = &(lexer->extra_token);
|
||||
} else {
|
||||
lexer->token_ready = true;
|
||||
token = &(lexer->token);
|
||||
}
|
||||
|
||||
set_token(token, lexer->line, lexer->column, TK_R_BRACKET,
|
||||
(token_value_t){0});
|
||||
|
||||
return LEXER_STATE_ARRAY_END;
|
||||
}
|
||||
|
||||
return LEXER_STATE_ERROR;
|
||||
}
|
||||
|
||||
// TODO (Abdelrahman): Figure out how to handle this
|
||||
void handle_input_after_collection_end(lexer_input_t input) {
|
||||
switch (input) {
|
||||
case '}':
|
||||
lexer->token_ready = true;
|
||||
set_token(&(lexer->token), lexer->line, lexer->column, TK_R_BRACE,
|
||||
(token_value_t){0});
|
||||
|
||||
break;
|
||||
case ']':
|
||||
lexer->token_ready = true;
|
||||
set_token(&(lexer->token), lexer->line, lexer->column, TK_R_BRACKET,
|
||||
(token_value_t){0});
|
||||
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
lexer_state_t handle_object(lexer_input_t input) {
|
||||
switch (input) {
|
||||
case LEXER_INPUT_WHITE_SPACE:
|
||||
return LEXER_STATE_OBJECT;
|
||||
case LEXER_INPUT_DOUBLE_QUOTE:
|
||||
return LEXER_STATE_KEY;
|
||||
case LEXER_INPUT_CLOSE_BRACE:
|
||||
return handle_collection_end(input);
|
||||
default:
|
||||
return LEXER_STATE_ERROR;
|
||||
}
|
||||
}
|
||||
|
||||
lexer_state_t handle_array(lexer_input_t input) {
|
||||
switch (input) {
|
||||
case LEXER_INPUT_WHITE_SPACE:
|
||||
return LEXER_STATE_ARRAY;
|
||||
case LEXER_INPUT_CLOSE_BRACKET:
|
||||
return handle_collection_end(input);
|
||||
default:
|
||||
return handle_value(input);
|
||||
}
|
||||
}
|
||||
|
||||
lexer_state_t handle_key(lexer_input_t input) { return LEXER_STATE_STRING; }
|
||||
|
||||
lexer_state_t handle_value(lexer_input_t input) {
|
||||
switch (input) {
|
||||
case LEXER_INPUT_WHITE_SPACE:
|
||||
return LEXER_STATE_VALUE;
|
||||
case LEXER_INPUT_NON_ZERO:
|
||||
case LEXER_INPUT_MINUS:
|
||||
return LEXER_STATE_NUMBER;
|
||||
case LEXER_INPUT_ZERO:
|
||||
return LEXER_STATE_DECIMAL;
|
||||
case LEXER_INPUT_DOUBLE_QUOTE:
|
||||
return LEXER_STATE_STRING;
|
||||
case LEXER_INPUT_OPEN_BRACE:
|
||||
return LEXER_STATE_OBJECT_START;
|
||||
case LEXER_INPUT_OPEN_BRACKET:
|
||||
return LEXER_STATE_ARRAY_START;
|
||||
case LEXER_INPUT_LOWER_T:
|
||||
return LEXER_STATE_T;
|
||||
case LEXER_INPUT_LOWER_F:
|
||||
return LEXER_STATE_F;
|
||||
case LEXER_INPUT_LOWER_N:
|
||||
return LEXER_STATE_N;
|
||||
default:
|
||||
return LEXER_STATE_ERROR;
|
||||
}
|
||||
}
|
||||
|
||||
lexer_state_t handle_string(lexer_input_t input) {
|
||||
switch (input) {
|
||||
case LEXER_INPUT_BACK_SLASH:
|
||||
return LEXER_STATE_ESCAPE_SEQUENCE;
|
||||
case LEXER_INPUT_DOUBLE_QUOTE:
|
||||
return LEXER_STATE_STRING_END;
|
||||
default:
|
||||
return LEXER_STATE_STRING;
|
||||
}
|
||||
}
|
||||
|
||||
// TODO (Abdelrahman): Figure out how to handle this
|
||||
lexer_state_t handle_string_end(lexer_input_t input) {
|
||||
switch (input) {
|
||||
case LEXER_INPUT_WHITE_SPACE:
|
||||
return LEXER_STATE_STRING_END;
|
||||
}
|
||||
|
||||
lexer->current = stack_pop(&(lexer->stack));
|
||||
|
||||
bool key_end = lexer->current == LEXER_STATE_KEY && input == ':';
|
||||
|
||||
if (key_end) {
|
||||
return LEXER_STATE_VALUE;
|
||||
}
|
||||
|
||||
bool value_end = lexer->current == LEXER_STATE_VALUE && input == ',';
|
||||
|
||||
if (value_end) {
|
||||
return lexer->stack.stack[lexer->stack.size - 1];
|
||||
}
|
||||
|
||||
bool collection_end = input == '}' || input == ']';
|
||||
|
||||
return collection_end ? handle_collection_end(lexer, input)
|
||||
: LEXER_STATE_ERROR;
|
||||
}
|
||||
|
||||
lexer_state_t handle_escape_sequence(lexer_input_t input) {
|
||||
switch (input) {
|
||||
case LEXER_INPUT_DOUBLE_QUOTE:
|
||||
case LEXER_INPUT_FORWARD_SLASH:
|
||||
case LEXER_INPUT_BACK_SLASH:
|
||||
case LEXER_INPUT_LOWER_B:
|
||||
case LEXER_INPUT_LOWER_F:
|
||||
case LEXER_INPUT_LOWER_N:
|
||||
case LEXER_INPUT_LOWER_R:
|
||||
case LEXER_INPUT_LOWER_T:
|
||||
return LEXER_STATE_STRING;
|
||||
case LEXER_INPUT_LOWER_U:
|
||||
return LEXER_STATE_UNICODE_HEX1;
|
||||
default:
|
||||
return LEXER_STATE_ERROR;
|
||||
}
|
||||
}
|
||||
|
||||
INTERNAL lexer_state_t handle_unicode_hex(lexer_input_t input,
|
||||
lexer_state_t return_state) {
|
||||
switch (input) {
|
||||
case LEXER_INPUT_LOWER_A:
|
||||
case LEXER_INPUT_LOWER_B:
|
||||
case LEXER_INPUT_LOWER_C:
|
||||
case LEXER_INPUT_LOWER_D:
|
||||
case LEXER_INPUT_LOWER_E:
|
||||
case LEXER_INPUT_LOWER_F:
|
||||
case LEXER_INPUT_UPPER_A:
|
||||
case LEXER_INPUT_UPPER_B:
|
||||
case LEXER_INPUT_UPPER_C:
|
||||
case LEXER_INPUT_UPPER_D:
|
||||
case LEXER_INPUT_UPPER_E:
|
||||
case LEXER_INPUT_UPPER_F:
|
||||
case LEXER_INPUT_ZERO:
|
||||
case LEXER_INPUT_NON_ZERO:
|
||||
return return_state;
|
||||
default:
|
||||
return LEXER_STATE_ERROR;
|
||||
}
|
||||
}
|
||||
|
||||
lexer_state_t handle_decimal(lexer_input_t input) {
|
||||
if (input == LEXER_INPUT_DECIMAL) {
|
||||
return LEXER_STATE_FRACTION;
|
||||
}
|
||||
|
||||
return LEXER_STATE_ERROR;
|
||||
}
|
||||
|
||||
lexer_state_t handle_number(lexer_input_t input) {
|
||||
switch (input) {
|
||||
case LEXER_INPUT_ZERO:
|
||||
case LEXER_INPUT_NON_ZERO:
|
||||
return LEXER_STATE_NUMBER;
|
||||
case LEXER_INPUT_DECIMAL:
|
||||
return LEXER_STATE_FRACTION;
|
||||
case LEXER_INPUT_CLOSE_BRACE:
|
||||
case LEXER_INPUT_CLOSE_BRACKET:
|
||||
return handle_collection_end(input);
|
||||
case LEXER_INPUT_COMMA:
|
||||
// TODO (Abdelrahman): Figure out how to handle this
|
||||
return lexer->stack.stack[lexer->stack.size - 1];
|
||||
case LEXER_INPUT_WHITE_SPACE:
|
||||
return LEXER_STATE_NUMBER_END;
|
||||
default:
|
||||
return LEXER_STATE_ERROR;
|
||||
}
|
||||
}
|
||||
|
||||
lexer_state_t handle_fraction(lexer_input_t input) {
|
||||
switch (input) {
|
||||
case LEXER_INPUT_ZERO:
|
||||
case LEXER_INPUT_NON_ZERO:
|
||||
return LEXER_STATE_FRACTION;
|
||||
case LEXER_INPUT_CLOSE_BRACE:
|
||||
case LEXER_INPUT_CLOSE_BRACKET:
|
||||
return handle_collection_end(input);
|
||||
case LEXER_INPUT_LOWER_E:
|
||||
case LEXER_INPUT_UPPER_E:
|
||||
return LEXER_STATE_EXPONENT;
|
||||
case LEXER_INPUT_COMMA:
|
||||
// TODO (Abdelrahman): Figure out how to handle this
|
||||
return lexer->stack.stack[lexer->stack.size - 1];
|
||||
case LEXER_INPUT_WHITE_SPACE:
|
||||
return LEXER_STATE_NUMBER_END;
|
||||
default:
|
||||
return LEXER_STATE_ERROR;
|
||||
}
|
||||
}
|
||||
|
||||
lexer_state_t handle_exponent(lexer_input_t input) {
|
||||
switch (input) {
|
||||
case LEXER_INPUT_ZERO:
|
||||
case LEXER_INPUT_NON_ZERO:
|
||||
return LEXER_STATE_POWER;
|
||||
case LEXER_INPUT_PLUS:
|
||||
case LEXER_INPUT_MINUS:
|
||||
return LEXER_STATE_EXP_SIGN;
|
||||
default:
|
||||
return LEXER_STATE_ERROR;
|
||||
}
|
||||
}
|
||||
|
||||
lexer_state_t handle_exp_sign(lexer_input_t input) {
|
||||
switch (input) {
|
||||
case LEXER_INPUT_ZERO:
|
||||
case LEXER_INPUT_NON_ZERO:
|
||||
return LEXER_STATE_POWER;
|
||||
default:
|
||||
return LEXER_STATE_ERROR;
|
||||
}
|
||||
}
|
||||
|
||||
lexer_state_t handle_power(lexer_input_t input) {
|
||||
switch (input) {
|
||||
case LEXER_INPUT_ZERO:
|
||||
case LEXER_INPUT_NON_ZERO:
|
||||
return LEXER_STATE_POWER;
|
||||
case LEXER_INPUT_CLOSE_BRACE:
|
||||
case LEXER_INPUT_CLOSE_BRACKET:
|
||||
return handle_collection_end(input);
|
||||
case LEXER_INPUT_COMMA:
|
||||
// TODO (Abdelrahman): Figure out how to handle this
|
||||
return lexer->stack.stack[lexer->stack.size - 1];
|
||||
case LEXER_INPUT_WHITE_SPACE:
|
||||
return LEXER_STATE_NUMBER_END;
|
||||
default:
|
||||
return LEXER_STATE_ERROR;
|
||||
}
|
||||
}
|
||||
|
||||
lexer_state_t handle_number_end(lexer_input_t input) {
|
||||
switch (input) {
|
||||
case LEXER_INPUT_WHITE_SPACE:
|
||||
return LEXER_STATE_NUMBER_END;
|
||||
case LEXER_INPUT_CLOSE_BRACE:
|
||||
case LEXER_INPUT_CLOSE_BRACKET:
|
||||
return handle_collection_end(input);
|
||||
case LEXER_INPUT_COMMA:
|
||||
// TODO (Abdelrahman): Figure out how to handle this
|
||||
return lexer->stack.stack[lexer->stack.size - 1];
|
||||
default:
|
||||
return LEXER_STATE_ERROR;
|
||||
}
|
||||
}
|
||||
|
||||
lexer_state_t handle_true(lexer_input_t input, lexer_state_t start_state) {
|
||||
switch (start_state) {
|
||||
case LEXER_STATE_T:
|
||||
return input == LEXER_INPUT_LOWER_R ? LEXER_STATE_TR : LEXER_STATE_ERROR;
|
||||
case LEXER_STATE_TR:
|
||||
return input == LEXER_INPUT_LOWER_U ? LEXER_STATE_TRU : LEXER_STATE_ERROR;
|
||||
case LEXER_STATE_TRU:
|
||||
return input == LEXER_INPUT_LOWER_E ? LEXER_STATE_TRUE : LEXER_STATE_ERROR;
|
||||
case LEXER_STATE_TRUE:
|
||||
return LEXER_STATE_KEYWORD_END;
|
||||
default:
|
||||
return LEXER_STATE_ERROR;
|
||||
}
|
||||
}
|
||||
|
||||
lexer_state_t handle_false(lexer_input_t input, lexer_state_t start_state) {
|
||||
switch (start_state) {
|
||||
case LEXER_STATE_F:
|
||||
return input == LEXER_INPUT_LOWER_A ? LEXER_STATE_FA : LEXER_STATE_ERROR;
|
||||
case LEXER_STATE_FA:
|
||||
return input == LEXER_INPUT_LOWER_L ? LEXER_STATE_FAL : LEXER_STATE_ERROR;
|
||||
case LEXER_STATE_FAL:
|
||||
return input == LEXER_INPUT_LOWER_S ? LEXER_STATE_FALS : LEXER_STATE_ERROR;
|
||||
case LEXER_STATE_FALS:
|
||||
return input == LEXER_INPUT_LOWER_E ? LEXER_STATE_FALSE : LEXER_STATE_ERROR;
|
||||
case LEXER_STATE_FALSE:
|
||||
return LEXER_STATE_KEYWORD_END;
|
||||
default:
|
||||
return LEXER_STATE_ERROR;
|
||||
}
|
||||
}
|
||||
|
||||
lexer_state_t handle_null(lexer_input_t input, lexer_state_t start_state) {
|
||||
switch (start_state) {
|
||||
case LEXER_STATE_N:
|
||||
return input == LEXER_INPUT_LOWER_U ? LEXER_STATE_NU : LEXER_STATE_ERROR;
|
||||
case LEXER_STATE_NU:
|
||||
return input == LEXER_INPUT_LOWER_L ? LEXER_STATE_NUL : LEXER_STATE_ERROR;
|
||||
case LEXER_STATE_NUL:
|
||||
return input == LEXER_INPUT_LOWER_L ? LEXER_STATE_NULL : LEXER_STATE_ERROR;
|
||||
case LEXER_STATE_NULL:
|
||||
return LEXER_STATE_KEYWORD_END;
|
||||
default:
|
||||
return LEXER_STATE_ERROR;
|
||||
}
|
||||
}
|
||||
|
||||
lexer_state_t handle_keyword_end(lexer_input_t input) {
|
||||
switch (input) {
|
||||
case LEXER_INPUT_WHITE_SPACE:
|
||||
return LEXER_STATE_KEYWORD_END;
|
||||
case LEXER_INPUT_CLOSE_BRACE:
|
||||
case LEXER_INPUT_CLOSE_BRACKET:
|
||||
return handle_collection_end(input);
|
||||
case LEXER_INPUT_COMMA:
|
||||
// TODO (Abdelrahman): Figure out how to handle this
|
||||
return lexer->stack.stack[lexer->stack.size - 1];
|
||||
default:
|
||||
return LEXER_STATE_ERROR;
|
||||
}
|
||||
}
|
||||
@@ -1,93 +0,0 @@
|
||||
#ifndef LEXER_DATA_H
|
||||
#define LEXER_DATA_H
|
||||
|
||||
typedef enum {
|
||||
// GENERAL STATES
|
||||
LEXER_STATE_ERROR,
|
||||
LEXER_STATE_START,
|
||||
LEXER_STATE_VALUE,
|
||||
// COLLECTION STATES
|
||||
LEXER_STATE_OBJECT_START,
|
||||
LEXER_STATE_OBJECT,
|
||||
LEXER_STATE_OBJECT_END,
|
||||
LEXER_STATE_ARRAY_START,
|
||||
LEXER_STATE_ARRAY,
|
||||
LEXER_STATE_ARRAY_END,
|
||||
LEXER_STATE_LAST_COLLECTION,
|
||||
// OBJECT STATES
|
||||
LEXER_STATE_KEY,
|
||||
// NUMBER STATES
|
||||
LEXER_STATE_DECIMAL,
|
||||
LEXER_STATE_NUMBER,
|
||||
LEXER_STATE_FRACTION,
|
||||
LEXER_STATE_EXPONENT,
|
||||
LEXER_STATE_EXP_SIGN,
|
||||
LEXER_STATE_POWER,
|
||||
LEXER_STATE_NUMBER_END,
|
||||
// STRING STATES
|
||||
LEXER_STATE_STRING,
|
||||
LEXER_STATE_STRING_END,
|
||||
LEXER_STATE_ESCAPE_SEQUENCE,
|
||||
LEXER_STATE_UNICODE_HEX1,
|
||||
LEXER_STATE_UNICODE_HEX2,
|
||||
LEXER_STATE_UNICODE_HEX3,
|
||||
LEXER_STATE_UNICODE_HEX4,
|
||||
// KEYWORD STATES
|
||||
LEXER_STATE_T,
|
||||
LEXER_STATE_TR,
|
||||
LEXER_STATE_TRU,
|
||||
LEXER_STATE_TRUE,
|
||||
LEXER_STATE_F,
|
||||
LEXER_STATE_FA,
|
||||
LEXER_STATE_FAL,
|
||||
LEXER_STATE_FALS,
|
||||
LEXER_STATE_FALSE,
|
||||
LEXER_STATE_N,
|
||||
LEXER_STATE_NU,
|
||||
LEXER_STATE_NUL,
|
||||
LEXER_STATE_NULL,
|
||||
LEXER_STATE_KEYWORD_END,
|
||||
|
||||
COUNT_LEXER_STATES,
|
||||
} lexer_state_t;
|
||||
|
||||
typedef enum {
|
||||
LEXER_INPUT_WHITE_SPACE,
|
||||
LEXER_INPUT_OPEN_BRACE,
|
||||
LEXER_INPUT_CLOSE_BRACE,
|
||||
LEXER_INPUT_OPEN_BRACKET,
|
||||
LEXER_INPUT_CLOSE_BRACKET,
|
||||
LEXER_INPUT_COMMA,
|
||||
LEXER_INPUT_COLON,
|
||||
LEXER_INPUT_DOUBLE_QUOTE,
|
||||
LEXER_INPUT_BACK_SLASH,
|
||||
LEXER_INPUT_FORWARD_SLASH,
|
||||
LEXER_INPUT_LOWER_A,
|
||||
LEXER_INPUT_LOWER_B,
|
||||
LEXER_INPUT_LOWER_C,
|
||||
LEXER_INPUT_LOWER_D,
|
||||
LEXER_INPUT_LOWER_E,
|
||||
LEXER_INPUT_LOWER_F,
|
||||
LEXER_INPUT_LOWER_L,
|
||||
LEXER_INPUT_LOWER_N,
|
||||
LEXER_INPUT_LOWER_R,
|
||||
LEXER_INPUT_LOWER_S,
|
||||
LEXER_INPUT_LOWER_T,
|
||||
LEXER_INPUT_LOWER_U,
|
||||
LEXER_INPUT_UPPER_A,
|
||||
LEXER_INPUT_UPPER_B,
|
||||
LEXER_INPUT_UPPER_C,
|
||||
LEXER_INPUT_UPPER_D,
|
||||
LEXER_INPUT_UPPER_E,
|
||||
LEXER_INPUT_UPPER_F,
|
||||
LEXER_INPUT_MINUS,
|
||||
LEXER_INPUT_PLUS,
|
||||
LEXER_INPUT_DECIMAL,
|
||||
LEXER_INPUT_ZERO,
|
||||
LEXER_INPUT_NON_ZERO,
|
||||
LEXER_INPUT_OTHER,
|
||||
|
||||
COUNT_LEXER_INPUTS,
|
||||
} lexer_input_t;
|
||||
|
||||
#endif // !LEXER_DATA_H
|
||||
@@ -1,38 +0,0 @@
|
||||
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },
|
||||
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },
|
||||
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },
|
||||
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },
|
||||
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },
|
||||
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },
|
||||
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },
|
||||
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },
|
||||
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },
|
||||
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },
|
||||
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },
|
||||
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },
|
||||
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },
|
||||
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },
|
||||
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },
|
||||
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },
|
||||
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },
|
||||
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },
|
||||
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },
|
||||
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },
|
||||
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },
|
||||
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },
|
||||
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },
|
||||
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },
|
||||
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },
|
||||
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },
|
||||
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },
|
||||
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },
|
||||
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },
|
||||
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },
|
||||
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },
|
||||
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },
|
||||
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },
|
||||
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },
|
||||
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },
|
||||
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },
|
||||
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },
|
||||
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },
|
||||
@@ -1,3 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
clang -Iinclude -Iinclude/lexer generate_state_table.c -o gentable
|
||||
@@ -1,5 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
for JSON in $(find test_files -iregex .*json); do
|
||||
./main $JSON
|
||||
done
|
||||
71
run_tests.py
Executable file
71
run_tests.py
Executable file
@@ -0,0 +1,71 @@
|
||||
#!/bin/env python3
|
||||
|
||||
import json
|
||||
import subprocess
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
class TermFormat:
|
||||
OKGREEN = "\033[92m"
|
||||
FAIL = "\033[91m"
|
||||
ENDC = "\033[0m"
|
||||
BOLD = "\033[1m"
|
||||
|
||||
|
||||
def print_header(header, width, padding):
|
||||
print(
|
||||
f"{TermFormat.BOLD}{'=' * (int(width / 3) + padding)}{header}{'=' * (int(width / 3) + padding)}{TermFormat.ENDC}"
|
||||
)
|
||||
|
||||
|
||||
test_dir = Path(__file__).parent / "test_files"
|
||||
hj_exec = Path(__file__).parent / "main"
|
||||
|
||||
valid_files = [
|
||||
json_file
|
||||
for json_file in test_dir.iterdir()
|
||||
if json_file.is_file()
|
||||
and "json" in json_file.suffix
|
||||
and "invalid" not in json_file.stem
|
||||
]
|
||||
|
||||
name_width = 0
|
||||
|
||||
for vf in valid_files:
|
||||
if len(str(vf)) > name_width:
|
||||
name_width = len(str(vf))
|
||||
|
||||
|
||||
print_header("helloJSON TEST SUITE", name_width, 9)
|
||||
print()
|
||||
print_header("VALID FILES", name_width, 13)
|
||||
|
||||
for vf in valid_files:
|
||||
with open(vf, "r") as infile:
|
||||
try:
|
||||
original = json.load(infile)
|
||||
except json.JSONDecodeError as e:
|
||||
print(
|
||||
f"\n{TermFormat.BOLD}{TermFormat.FAIL}ERROR:{TermFormat.ENDC} Failed to decode {str(vf)}\n{e.msg}"
|
||||
)
|
||||
continue
|
||||
|
||||
cmd = [hj_exec, vf]
|
||||
|
||||
proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
|
||||
|
||||
proc.wait()
|
||||
|
||||
stdout, _ = proc.communicate()
|
||||
|
||||
try:
|
||||
parsed = json.loads(stdout)
|
||||
except:
|
||||
parsed = None
|
||||
pass
|
||||
|
||||
match = original == parsed
|
||||
|
||||
print(
|
||||
f"\n{str(vf):{name_width + 3}} {TermFormat.BOLD}{TermFormat.OKGREEN if match else TermFormat.FAIL}{'PASSED' if match else 'FAILED'}{TermFormat.ENDC}"
|
||||
)
|
||||
@@ -5,7 +5,7 @@
|
||||
"position": [
|
||||
25.1212,
|
||||
55.1535
|
||||
],
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "Shangri-La Hotel",
|
||||
|
||||
Reference in New Issue
Block a user