Dump WIP changes

TheDcoder · TheDcoder · commit f86e8121b41d · 2021-05-27T10:39:06.000+05:30
This commit contains all the uncommited code, there probably won't be
any more work done on this front as I am planning to re-write most of
the parser with the help of Lex &amp; Yacc.
diff --git a/.gitmodules b/.gitmodules
@@ -0,0 +1,3 @@
+[submodule "dynarr"]
+	path = dynarr
+	url = https://github.com/TheDcoder/dynarr
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -12,3 +12,7 @@ if(CMAKE_COMPILER_IS_GNUCC)
 		add_compile_options(-fsanitize=undefined,address)
 	endif()
 endif()
+
+# Link dynarr (dynamic array)
+add_subdirectory(dynarr)
+target_link_libraries(eci PRIVATE dynarr)
diff --git a/dynarr b/dynarr
@@ -0,0 +1 @@
+Subproject commit 2eaf15a0f93cac510778673c0d8ed62a520ed85c
diff --git a/eci.c b/eci.c
@@ -35,7 +35,11 @@ int main(int argc, char *argv[]) {
 	if (!code) die("Failed to read from source file!");
 	
 	// Parse the code
-	parse(code);
+	char *parse_error = parse(code);
+	if (parse_error) {
+		fputs("An error occured while parsing the code!\n", stderr);
+		die(parse_error);
+	}
 	
 	// Free the resources
 	free(code);
diff --git a/parse.c b/parse.c
@@ -29,6 +29,10 @@
 #include <string.h>
 #include "parse.h"
 #include "utils.h"
+#include "dynarr/dynarr.h"
+
+// Internal functions
+void *p_malloc(size_t size, char *context);
 
 const char CHR_COMMENT = ';';
 const char CHR_DIRECTIVE = '#';
@@ -177,19 +181,32 @@ static void print_token(struct Token *token) {
 	putchar('\n');
 }
 
-bool parse(char *code) {
-	if (setjmp(parse_error.jump)) return false;
+char *parse(char *code) {
+	if (setjmp(parse_error.jump)) return parse_error.msg;
 	
 	struct TokenList token_list = token_get_list(code);
 	if (!token_list.length) raise_mem("generating token list");
 	struct TokenListNode *token_list_node = token_list.head;
 	
+	puts("> Printing tokens");
 	if (token_list.dirty) fputs("!!! WARNING: Unknown token(s) encountered !!!\n", stderr);
 	do {
 		struct Token *token = token_list_node->token;
 		if (token->type != TOK_WHITESPACE) print_token(token);
 		token_list_node = token_list_node->next;
 	} while (token_list_node);
+	//return true;
+	puts("> Printing units");
+	struct Token *tokens = token_list_to_array(&token_list, true, true);
+	if (!tokens) raise_mem("flattening token list");
+	struct Token *curr_token = tokens + 1;
+	do {
+		unit_get(curr_token, &curr_token);
+	} while (curr_token->type != TOK_EOF);
+	
+	//expression_get(tokens + 1, token_list.length);
+	
+	return NULL;
 }
 
 struct Token token_get(char *code, char **next) {
@@ -208,6 +225,7 @@ struct Token token_get(char *code, char **next) {
 		token.type = TOK_WHITESPACE;
 		token.data = code;
 		token.data_len = length;
+		token.newline = *code == '\n' || *code == '\r';
 	} else if (*code == CHR_COMMENT || *code == CHR_DIRECTIVE) {
 		// Comment or Directive
 		token.type = *code == CHR_COMMENT ? TOK_COMMENT : TOK_DIRECTIVE;
@@ -402,21 +420,33 @@ struct TokenList token_get_list(char *code) {
 	end: return list;
 };
 
-struct Token *token_list_to_array(struct TokenList *list, bool pad) {
-	struct Token *tokens = malloc(sizeof(struct Token) * (list->length + (pad ? 2 : 0)));
+struct Token *token_list_to_array(struct TokenList *list, bool pad, bool strip_ws) {
+	size_t token_count = list->length;
+	if (strip_ws) {
+		struct TokenListNode *node = list->head;
+		do {
+			if (node->token->type == TOK_WHITESPACE && !node->token->newline) --token_count;
+		} while (node = node->next);
+	}
+	
+	struct Token *tokens = malloc(sizeof(struct Token) * (token_count + (pad ? 2 : 0)));
 	if (!tokens) return NULL;
 	if (pad) /* Reserve first element for padding */ ++tokens;
 	
 	struct TokenListNode *node = list->head;
-	for (size_t i = 0; i < list->length; ++i) {
+	
+	for (size_t i = 0; i < token_count; ++i) {
+		if (node->token->type == TOK_WHITESPACE && !node->token->newline) {
+			--i; // No increment in the next iteration
+			goto next_node;
+		}
 		tokens[i] = *node->token;
-		node = node->next;
+		next_node: node = node->next;
 	}
 	
 	if (pad) {
 		// Apply padding
-		//struct Token padding = {.type = TOK_EOF};
-		tokens[list->length] = (struct Token){
+		tokens[token_count] = (struct Token){
 			.type = TOK_EOF,
 			.data = list->tail->token->data + list->tail->token->data_len,
 			.data_len = 0,
@@ -671,6 +701,11 @@ bool kwd_is_declarator(enum Keyword kwd) {
 struct Expression expression_get(struct Token *tokens, size_t count) {
 	struct Expression expression = {.op = OP_NOP};
 	
+	if (count == 0) {
+		// Assume the expression ends at line end
+		for (;;++count) if (tokens[count].type == TOK_EOF || tokens[count].type == TOK_WHITESPACE && tokens[count].newline) break;
+	}
+	
 	// Calculate the number of actual tokens (anything not a whitespace)
 	size_t actual_count = 0;
 	struct Token *actual_tokens = tokens;
@@ -904,6 +939,161 @@ struct Token *find_token_by_opr(struct Token *tokens, size_t count, enum Operato
 	return NULL;
 }
 
+struct Statement statement_get(struct Token *token, struct Token **next) {
+	struct Statement statement;
+	struct Token *next_token = NULL;
+	
+	bool function, declaration = false;
+	if (token->type == TOK_WORD && kwd_is_declarator(token->keyword)) {
+		function = token->keyword == KWD_FUNC;
+		declaration = true;
+	}
+	
+	if (declaration) {
+		statement.type = SMT_DECLARATION;
+		statement.declaration = malloc(sizeof *statement.declaration);
+		if (statement.declaration == NULL) raise_mem("parsing declaration statement");
+		
+		statement.declaration->is_function = function;
+		if (function) {
+			// Function Declaration
+			statement.declaration->scope = SCO_GLOBAL;
+			statement.declaration->is_function = true;
+			statement.declaration->name = NULL;
+			statement.declaration->code.block = NULL;
+			statement.declaration->code.size = 0;
+			
+			// Name
+			++token;
+			if (token->type != TOK_WORD) raise_unexpected_token("a function name", token);
+			statement.declaration->name = p_malloc(token->data_len + 1, "storing function name");
+			strncpy(statement.declaration->name, token->data, token->data_len);
+			
+			// Parameters
+			// TODO: Implement a dynamic array library
+			// TODO: make an "expect function"
+			expect_token(++token, &(struct Token){.type = TOK_BRACKET, .data = "("}, "opening bracket for function parameters");
+			for (;;) {
+				// ...
+			}
+			
+			// Code block
+			dynarr code_block = dynarr_init(sizeof *statement.declaration->code.block);
+			do {
+				struct Statement func_stmt = statement_get(token, &next_token);
+				dynarr_push(&code_block, &func_stmt);
+			} while (next_token->type != TOK_WORD || next_token->kwd != KWD_END_FUNC)
+			statement.declaration->code.block = dynarr_get(&code_block, &statement.declaration->code.size);
+		} else {
+			// Variable Declaration
+			statement.declaration->scope = SCO_AUTO;
+			statement.declaration->is_static = false;
+			statement.declaration->is_constant = false;
+			statement.declaration->name = NULL;
+			statement.declaration->initializer = NULL;
+			
+			// Metadata
+			do {
+				if (token->keyword == KWD_NONE) /* Not a keyword*/ break;
+				if (!kwd_is_declarator(token->keyword)) break;
+				switch (token->keyword) {
+					case KWD_GLOBAL:
+						statement.declaration->scope = SCO_GLOBAL;
+						break;
+					case KWD_LOCAL:
+						statement.declaration->scope = SCO_LOCAL;
+						break;
+					case KWD_STATIC:
+						statement.declaration->is_static = true;
+						break;
+					case KWD_CONST:
+						statement.declaration->is_constant = true;
+						break;
+				}
+			} while (TOK_WORD == (++token)->type);
+			
+			// Name
+			if (token->type != TOK_VARIABLE) raise_unexpected_token("a variable", token);
+			
+			statement.declaration->name = malloc(token->data_len + 1);
+			if (!statement.declaration->name) raise_mem("storing variable name");
+			strncpy(statement.declaration->name, token->data, token->data_len);
+			
+			// Initializer
+			if (token[1].type != TOK_OPERATOR) goto next;
+			if (token[1].op_info.sym != OPR_EQU) raise_unexpected_token("simple assignment operator (=)", token);
+			statement.declaration->initializer = malloc(sizeof *statement.declaration->initializer);
+			if (!statement.declaration->initializer) raise_mem("parsing initializer");
+			*statement.declaration->initializer = expression_get(token + 2, 0);
+		}
+	} else {
+		statement.type = SMT_EXPRESSION;
+		statement.expression = malloc(sizeof *statement.expression);
+		if (!statement.expression) raise_mem("parsing expression statement");
+		size_t token_count = 0;
+		while (true) {
+			if (token[token_count].type == TOK_WHITESPACE && token[token_count].newline || token[token_count].type == TOK_EOF) break;
+			++token_count;
+		}
+		*statement.expression = expression_get(token, token_count);
+		next_token = token + token_count + 1;
+	}
+	
+	// Set the next token
+	next: *next = next_token ? next_token : token + 1;
+	return statement;
+}
+
+struct Unit unit_get(struct Token *token, struct Token **next) {
+	struct Unit unit;
+	struct Token *next_token = NULL;
+	
+	switch (token->type) {
+		case TOK_WHITESPACE:
+			break;
+		case TOK_COMMENT:
+		case TOK_DIRECTIVE:
+			unit.type = token->type == TOK_COMMENT ? UNT_COMMENT : UNT_DIRECTIVE;
+			unit.token = token;
+			puts("It's a comment/directive");
+			break;
+		default:
+			// Statement
+			unit.type = UNT_STATEMENT;
+			unit.statement = malloc(sizeof *unit.statement);
+			if (!unit.statement) raise_mem("parsing statement");
+			*unit.statement = statement_get(token, &next_token);
+			puts("It's a statement");
+			break;
+	}
+	
+	// Set the next token
+	*next = next_token ? next_token : token + 1;
+	
+	return unit;
+}
+
+//struct Token *token_peek() {}
+
+void expect_token(struct Token *token, struct Token *expected, char *description) {
+	bool match = false;
+	if (token->type != expected->type) goto unexpected;
+	switch (token->type) {
+		case TOK_BRACKET:
+			match = *token->data == *expected->data;
+			break;
+	}
+	if (match) return;
+	unexpected: raise_unexpected_token(description, token);
+};
+
+void *p_malloc(size_t size, char *context) {
+	// 'p' as in parser
+	void *mem = malloc(size);
+	if (!mem) raise_mem(context);
+	return mem;
+}
+
 noreturn void raise_error(char *msg, bool free_msg) {
 	if (parse_error.free_msg && parse_error.msg) free(parse_error.msg);
 	
diff --git a/parse.h b/parse.h
@@ -154,6 +154,9 @@ struct Token {
 	char *data;
 	size_t data_len;
 	union {
+		// Whitespace
+		bool newline;
+		
 		// Number
 		double number;
 		
@@ -220,6 +223,13 @@ struct Expression {
 	struct Operand *operands;
 };
 
+struct DeclarationFuncParam {
+	// Function parameter declaration
+	bool by_ref : 1;
+	bool is_constant : 1;
+	char *name;
+}
+
 struct Declaration {
 	enum {SCO_AUTO, SCO_LOCAL, SCO_GLOBAL} scope;
 	bool is_constant : 1;
@@ -232,6 +242,8 @@ struct Declaration {
 		// Function
 		struct {
 			struct Statement *block;
+			struct DeclarationFuncParam *parameters;
+			//struct Declaration *parameters;
 			size_t size;
 		} code;
 	};
@@ -260,10 +272,10 @@ struct Unit {
 	};
 };
 
-bool parse(char *code);
+char *parse(char *code);
 struct Token token_get(char *code, char **next);
 struct TokenList token_get_list(char *code);
-struct Token *token_list_to_array(struct TokenList *list, bool pad);
+struct Token *token_list_to_array(struct TokenList *list, bool pad, bool strip_ws);
 
 enum Operator opsym_to_opr(char sym);
 enum Operation opr_to_op(enum Operator opr);
@@ -288,6 +300,8 @@ struct Token *expression_parse_comp(struct Token *tokens, size_t count, struct E
 struct Token *expression_parse_assign(struct Token *tokens, size_t count, struct Expression *expression);
 struct Operand *expression_alloc_operands(size_t count);
 struct Token *find_token_by_opr(struct Token *tokens, size_t count, enum Operator opr_list[], size_t opr_count, bool left);
+struct Statement statement_get(struct Token *token, struct Token **next);
+struct Unit unit_get(struct Token *token, struct Token **next);
 
 noreturn void raise_error(char *msg, bool free_msg);
 noreturn void raise_error_fmt(char *def, char *fmt, ...);

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+[submodule "dynarr"]`
	`2`	`+ path = dynarr`
	`3`	`+ url = https://github.com/TheDcoder/dynarr`