Skip to content

Commit f86e812

Browse files
committed
Dump WIP changes
This commit contains all the uncommited code, there probably won't be any more work done on this front as I am planning to re-write most of the parser with the help of Lex & Yacc.
1 parent 0de259b commit f86e812

File tree

6 files changed

+227
-11
lines changed

6 files changed

+227
-11
lines changed

.gitmodules

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
[submodule "dynarr"]
2+
path = dynarr
3+
url = https://github.com/TheDcoder/dynarr

CMakeLists.txt

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,3 +12,7 @@ if(CMAKE_COMPILER_IS_GNUCC)
1212
add_compile_options(-fsanitize=undefined,address)
1313
endif()
1414
endif()
15+
16+
# Link dynarr (dynamic array)
17+
add_subdirectory(dynarr)
18+
target_link_libraries(eci PRIVATE dynarr)

dynarr

Submodule dynarr added at 2eaf15a

eci.c

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,11 @@ int main(int argc, char *argv[]) {
3535
if (!code) die("Failed to read from source file!");
3636

3737
// Parse the code
38-
parse(code);
38+
char *parse_error = parse(code);
39+
if (parse_error) {
40+
fputs("An error occured while parsing the code!\n", stderr);
41+
die(parse_error);
42+
}
3943

4044
// Free the resources
4145
free(code);

parse.c

Lines changed: 198 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,10 @@
2929
#include <string.h>
3030
#include "parse.h"
3131
#include "utils.h"
32+
#include "dynarr/dynarr.h"
33+
34+
// Internal functions
35+
void *p_malloc(size_t size, char *context);
3236

3337
const char CHR_COMMENT = ';';
3438
const char CHR_DIRECTIVE = '#';
@@ -177,19 +181,32 @@ static void print_token(struct Token *token) {
177181
putchar('\n');
178182
}
179183

180-
bool parse(char *code) {
181-
if (setjmp(parse_error.jump)) return false;
184+
char *parse(char *code) {
185+
if (setjmp(parse_error.jump)) return parse_error.msg;
182186

183187
struct TokenList token_list = token_get_list(code);
184188
if (!token_list.length) raise_mem("generating token list");
185189
struct TokenListNode *token_list_node = token_list.head;
186190

191+
puts("> Printing tokens");
187192
if (token_list.dirty) fputs("!!! WARNING: Unknown token(s) encountered !!!\n", stderr);
188193
do {
189194
struct Token *token = token_list_node->token;
190195
if (token->type != TOK_WHITESPACE) print_token(token);
191196
token_list_node = token_list_node->next;
192197
} while (token_list_node);
198+
//return true;
199+
puts("> Printing units");
200+
struct Token *tokens = token_list_to_array(&token_list, true, true);
201+
if (!tokens) raise_mem("flattening token list");
202+
struct Token *curr_token = tokens + 1;
203+
do {
204+
unit_get(curr_token, &curr_token);
205+
} while (curr_token->type != TOK_EOF);
206+
207+
//expression_get(tokens + 1, token_list.length);
208+
209+
return NULL;
193210
}
194211

195212
struct Token token_get(char *code, char **next) {
@@ -208,6 +225,7 @@ struct Token token_get(char *code, char **next) {
208225
token.type = TOK_WHITESPACE;
209226
token.data = code;
210227
token.data_len = length;
228+
token.newline = *code == '\n' || *code == '\r';
211229
} else if (*code == CHR_COMMENT || *code == CHR_DIRECTIVE) {
212230
// Comment or Directive
213231
token.type = *code == CHR_COMMENT ? TOK_COMMENT : TOK_DIRECTIVE;
@@ -402,21 +420,33 @@ struct TokenList token_get_list(char *code) {
402420
end: return list;
403421
};
404422

405-
struct Token *token_list_to_array(struct TokenList *list, bool pad) {
406-
struct Token *tokens = malloc(sizeof(struct Token) * (list->length + (pad ? 2 : 0)));
423+
struct Token *token_list_to_array(struct TokenList *list, bool pad, bool strip_ws) {
424+
size_t token_count = list->length;
425+
if (strip_ws) {
426+
struct TokenListNode *node = list->head;
427+
do {
428+
if (node->token->type == TOK_WHITESPACE && !node->token->newline) --token_count;
429+
} while (node = node->next);
430+
}
431+
432+
struct Token *tokens = malloc(sizeof(struct Token) * (token_count + (pad ? 2 : 0)));
407433
if (!tokens) return NULL;
408434
if (pad) /* Reserve first element for padding */ ++tokens;
409435

410436
struct TokenListNode *node = list->head;
411-
for (size_t i = 0; i < list->length; ++i) {
437+
438+
for (size_t i = 0; i < token_count; ++i) {
439+
if (node->token->type == TOK_WHITESPACE && !node->token->newline) {
440+
--i; // No increment in the next iteration
441+
goto next_node;
442+
}
412443
tokens[i] = *node->token;
413-
node = node->next;
444+
next_node: node = node->next;
414445
}
415446

416447
if (pad) {
417448
// Apply padding
418-
//struct Token padding = {.type = TOK_EOF};
419-
tokens[list->length] = (struct Token){
449+
tokens[token_count] = (struct Token){
420450
.type = TOK_EOF,
421451
.data = list->tail->token->data + list->tail->token->data_len,
422452
.data_len = 0,
@@ -671,6 +701,11 @@ bool kwd_is_declarator(enum Keyword kwd) {
671701
struct Expression expression_get(struct Token *tokens, size_t count) {
672702
struct Expression expression = {.op = OP_NOP};
673703

704+
if (count == 0) {
705+
// Assume the expression ends at line end
706+
for (;;++count) if (tokens[count].type == TOK_EOF || tokens[count].type == TOK_WHITESPACE && tokens[count].newline) break;
707+
}
708+
674709
// Calculate the number of actual tokens (anything not a whitespace)
675710
size_t actual_count = 0;
676711
struct Token *actual_tokens = tokens;
@@ -904,6 +939,161 @@ struct Token *find_token_by_opr(struct Token *tokens, size_t count, enum Operato
904939
return NULL;
905940
}
906941

942+
struct Statement statement_get(struct Token *token, struct Token **next) {
943+
struct Statement statement;
944+
struct Token *next_token = NULL;
945+
946+
bool function, declaration = false;
947+
if (token->type == TOK_WORD && kwd_is_declarator(token->keyword)) {
948+
function = token->keyword == KWD_FUNC;
949+
declaration = true;
950+
}
951+
952+
if (declaration) {
953+
statement.type = SMT_DECLARATION;
954+
statement.declaration = malloc(sizeof *statement.declaration);
955+
if (statement.declaration == NULL) raise_mem("parsing declaration statement");
956+
957+
statement.declaration->is_function = function;
958+
if (function) {
959+
// Function Declaration
960+
statement.declaration->scope = SCO_GLOBAL;
961+
statement.declaration->is_function = true;
962+
statement.declaration->name = NULL;
963+
statement.declaration->code.block = NULL;
964+
statement.declaration->code.size = 0;
965+
966+
// Name
967+
++token;
968+
if (token->type != TOK_WORD) raise_unexpected_token("a function name", token);
969+
statement.declaration->name = p_malloc(token->data_len + 1, "storing function name");
970+
strncpy(statement.declaration->name, token->data, token->data_len);
971+
972+
// Parameters
973+
// TODO: Implement a dynamic array library
974+
// TODO: make an "expect function"
975+
expect_token(++token, &(struct Token){.type = TOK_BRACKET, .data = "("}, "opening bracket for function parameters");
976+
for (;;) {
977+
// ...
978+
}
979+
980+
// Code block
981+
dynarr code_block = dynarr_init(sizeof *statement.declaration->code.block);
982+
do {
983+
struct Statement func_stmt = statement_get(token, &next_token);
984+
dynarr_push(&code_block, &func_stmt);
985+
} while (next_token->type != TOK_WORD || next_token->kwd != KWD_END_FUNC)
986+
statement.declaration->code.block = dynarr_get(&code_block, &statement.declaration->code.size);
987+
} else {
988+
// Variable Declaration
989+
statement.declaration->scope = SCO_AUTO;
990+
statement.declaration->is_static = false;
991+
statement.declaration->is_constant = false;
992+
statement.declaration->name = NULL;
993+
statement.declaration->initializer = NULL;
994+
995+
// Metadata
996+
do {
997+
if (token->keyword == KWD_NONE) /* Not a keyword*/ break;
998+
if (!kwd_is_declarator(token->keyword)) break;
999+
switch (token->keyword) {
1000+
case KWD_GLOBAL:
1001+
statement.declaration->scope = SCO_GLOBAL;
1002+
break;
1003+
case KWD_LOCAL:
1004+
statement.declaration->scope = SCO_LOCAL;
1005+
break;
1006+
case KWD_STATIC:
1007+
statement.declaration->is_static = true;
1008+
break;
1009+
case KWD_CONST:
1010+
statement.declaration->is_constant = true;
1011+
break;
1012+
}
1013+
} while (TOK_WORD == (++token)->type);
1014+
1015+
// Name
1016+
if (token->type != TOK_VARIABLE) raise_unexpected_token("a variable", token);
1017+
1018+
statement.declaration->name = malloc(token->data_len + 1);
1019+
if (!statement.declaration->name) raise_mem("storing variable name");
1020+
strncpy(statement.declaration->name, token->data, token->data_len);
1021+
1022+
// Initializer
1023+
if (token[1].type != TOK_OPERATOR) goto next;
1024+
if (token[1].op_info.sym != OPR_EQU) raise_unexpected_token("simple assignment operator (=)", token);
1025+
statement.declaration->initializer = malloc(sizeof *statement.declaration->initializer);
1026+
if (!statement.declaration->initializer) raise_mem("parsing initializer");
1027+
*statement.declaration->initializer = expression_get(token + 2, 0);
1028+
}
1029+
} else {
1030+
statement.type = SMT_EXPRESSION;
1031+
statement.expression = malloc(sizeof *statement.expression);
1032+
if (!statement.expression) raise_mem("parsing expression statement");
1033+
size_t token_count = 0;
1034+
while (true) {
1035+
if (token[token_count].type == TOK_WHITESPACE && token[token_count].newline || token[token_count].type == TOK_EOF) break;
1036+
++token_count;
1037+
}
1038+
*statement.expression = expression_get(token, token_count);
1039+
next_token = token + token_count + 1;
1040+
}
1041+
1042+
// Set the next token
1043+
next: *next = next_token ? next_token : token + 1;
1044+
return statement;
1045+
}
1046+
1047+
struct Unit unit_get(struct Token *token, struct Token **next) {
1048+
struct Unit unit;
1049+
struct Token *next_token = NULL;
1050+
1051+
switch (token->type) {
1052+
case TOK_WHITESPACE:
1053+
break;
1054+
case TOK_COMMENT:
1055+
case TOK_DIRECTIVE:
1056+
unit.type = token->type == TOK_COMMENT ? UNT_COMMENT : UNT_DIRECTIVE;
1057+
unit.token = token;
1058+
puts("It's a comment/directive");
1059+
break;
1060+
default:
1061+
// Statement
1062+
unit.type = UNT_STATEMENT;
1063+
unit.statement = malloc(sizeof *unit.statement);
1064+
if (!unit.statement) raise_mem("parsing statement");
1065+
*unit.statement = statement_get(token, &next_token);
1066+
puts("It's a statement");
1067+
break;
1068+
}
1069+
1070+
// Set the next token
1071+
*next = next_token ? next_token : token + 1;
1072+
1073+
return unit;
1074+
}
1075+
1076+
//struct Token *token_peek() {}
1077+
1078+
void expect_token(struct Token *token, struct Token *expected, char *description) {
1079+
bool match = false;
1080+
if (token->type != expected->type) goto unexpected;
1081+
switch (token->type) {
1082+
case TOK_BRACKET:
1083+
match = *token->data == *expected->data;
1084+
break;
1085+
}
1086+
if (match) return;
1087+
unexpected: raise_unexpected_token(description, token);
1088+
};
1089+
1090+
void *p_malloc(size_t size, char *context) {
1091+
// 'p' as in parser
1092+
void *mem = malloc(size);
1093+
if (!mem) raise_mem(context);
1094+
return mem;
1095+
}
1096+
9071097
noreturn void raise_error(char *msg, bool free_msg) {
9081098
if (parse_error.free_msg && parse_error.msg) free(parse_error.msg);
9091099

parse.h

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -154,6 +154,9 @@ struct Token {
154154
char *data;
155155
size_t data_len;
156156
union {
157+
// Whitespace
158+
bool newline;
159+
157160
// Number
158161
double number;
159162

@@ -220,6 +223,13 @@ struct Expression {
220223
struct Operand *operands;
221224
};
222225

226+
struct DeclarationFuncParam {
227+
// Function parameter declaration
228+
bool by_ref : 1;
229+
bool is_constant : 1;
230+
char *name;
231+
}
232+
223233
struct Declaration {
224234
enum {SCO_AUTO, SCO_LOCAL, SCO_GLOBAL} scope;
225235
bool is_constant : 1;
@@ -232,6 +242,8 @@ struct Declaration {
232242
// Function
233243
struct {
234244
struct Statement *block;
245+
struct DeclarationFuncParam *parameters;
246+
//struct Declaration *parameters;
235247
size_t size;
236248
} code;
237249
};
@@ -260,10 +272,10 @@ struct Unit {
260272
};
261273
};
262274

263-
bool parse(char *code);
275+
char *parse(char *code);
264276
struct Token token_get(char *code, char **next);
265277
struct TokenList token_get_list(char *code);
266-
struct Token *token_list_to_array(struct TokenList *list, bool pad);
278+
struct Token *token_list_to_array(struct TokenList *list, bool pad, bool strip_ws);
267279

268280
enum Operator opsym_to_opr(char sym);
269281
enum Operation opr_to_op(enum Operator opr);
@@ -288,6 +300,8 @@ struct Token *expression_parse_comp(struct Token *tokens, size_t count, struct E
288300
struct Token *expression_parse_assign(struct Token *tokens, size_t count, struct Expression *expression);
289301
struct Operand *expression_alloc_operands(size_t count);
290302
struct Token *find_token_by_opr(struct Token *tokens, size_t count, enum Operator opr_list[], size_t opr_count, bool left);
303+
struct Statement statement_get(struct Token *token, struct Token **next);
304+
struct Unit unit_get(struct Token *token, struct Token **next);
291305

292306
noreturn void raise_error(char *msg, bool free_msg);
293307
noreturn void raise_error_fmt(char *def, char *fmt, ...);

0 commit comments

Comments
 (0)