29
29
#include <string.h>
30
30
#include "parse.h"
31
31
#include "utils.h"
32
+ #include "dynarr/dynarr.h"
33
+
34
+ // Internal functions
35
+ void * p_malloc (size_t size , char * context );
32
36
33
37
const char CHR_COMMENT = ';' ;
34
38
const char CHR_DIRECTIVE = '#' ;
@@ -177,19 +181,32 @@ static void print_token(struct Token *token) {
177
181
putchar ('\n' );
178
182
}
179
183
180
- bool parse (char * code ) {
181
- if (setjmp (parse_error .jump )) return false ;
184
+ char * parse (char * code ) {
185
+ if (setjmp (parse_error .jump )) return parse_error . msg ;
182
186
183
187
struct TokenList token_list = token_get_list (code );
184
188
if (!token_list .length ) raise_mem ("generating token list" );
185
189
struct TokenListNode * token_list_node = token_list .head ;
186
190
191
+ puts ("> Printing tokens" );
187
192
if (token_list .dirty ) fputs ("!!! WARNING: Unknown token(s) encountered !!!\n" , stderr );
188
193
do {
189
194
struct Token * token = token_list_node -> token ;
190
195
if (token -> type != TOK_WHITESPACE ) print_token (token );
191
196
token_list_node = token_list_node -> next ;
192
197
} while (token_list_node );
198
+ //return true;
199
+ puts ("> Printing units" );
200
+ struct Token * tokens = token_list_to_array (& token_list , true, true);
201
+ if (!tokens ) raise_mem ("flattening token list" );
202
+ struct Token * curr_token = tokens + 1 ;
203
+ do {
204
+ unit_get (curr_token , & curr_token );
205
+ } while (curr_token -> type != TOK_EOF );
206
+
207
+ //expression_get(tokens + 1, token_list.length);
208
+
209
+ return NULL ;
193
210
}
194
211
195
212
struct Token token_get (char * code , char * * next ) {
@@ -208,6 +225,7 @@ struct Token token_get(char *code, char **next) {
208
225
token .type = TOK_WHITESPACE ;
209
226
token .data = code ;
210
227
token .data_len = length ;
228
+ token .newline = * code == '\n' || * code == '\r' ;
211
229
} else if (* code == CHR_COMMENT || * code == CHR_DIRECTIVE ) {
212
230
// Comment or Directive
213
231
token .type = * code == CHR_COMMENT ? TOK_COMMENT : TOK_DIRECTIVE ;
@@ -402,21 +420,33 @@ struct TokenList token_get_list(char *code) {
402
420
end : return list ;
403
421
};
404
422
405
- struct Token * token_list_to_array (struct TokenList * list , bool pad ) {
406
- struct Token * tokens = malloc (sizeof (struct Token ) * (list -> length + (pad ? 2 : 0 )));
423
+ struct Token * token_list_to_array (struct TokenList * list , bool pad , bool strip_ws ) {
424
+ size_t token_count = list -> length ;
425
+ if (strip_ws ) {
426
+ struct TokenListNode * node = list -> head ;
427
+ do {
428
+ if (node -> token -> type == TOK_WHITESPACE && !node -> token -> newline ) -- token_count ;
429
+ } while (node = node -> next );
430
+ }
431
+
432
+ struct Token * tokens = malloc (sizeof (struct Token ) * (token_count + (pad ? 2 : 0 )));
407
433
if (!tokens ) return NULL ;
408
434
if (pad ) /* Reserve first element for padding */ ++ tokens ;
409
435
410
436
struct TokenListNode * node = list -> head ;
411
- for (size_t i = 0 ; i < list -> length ; ++ i ) {
437
+
438
+ for (size_t i = 0 ; i < token_count ; ++ i ) {
439
+ if (node -> token -> type == TOK_WHITESPACE && !node -> token -> newline ) {
440
+ -- i ; // No increment in the next iteration
441
+ goto next_node ;
442
+ }
412
443
tokens [i ] = * node -> token ;
413
- node = node -> next ;
444
+ next_node : node = node -> next ;
414
445
}
415
446
416
447
if (pad ) {
417
448
// Apply padding
418
- //struct Token padding = {.type = TOK_EOF};
419
- tokens [list -> length ] = (struct Token ){
449
+ tokens [token_count ] = (struct Token ){
420
450
.type = TOK_EOF ,
421
451
.data = list -> tail -> token -> data + list -> tail -> token -> data_len ,
422
452
.data_len = 0 ,
@@ -671,6 +701,11 @@ bool kwd_is_declarator(enum Keyword kwd) {
671
701
struct Expression expression_get (struct Token * tokens , size_t count ) {
672
702
struct Expression expression = {.op = OP_NOP };
673
703
704
+ if (count == 0 ) {
705
+ // Assume the expression ends at line end
706
+ for (;;++ count ) if (tokens [count ].type == TOK_EOF || tokens [count ].type == TOK_WHITESPACE && tokens [count ].newline ) break ;
707
+ }
708
+
674
709
// Calculate the number of actual tokens (anything not a whitespace)
675
710
size_t actual_count = 0 ;
676
711
struct Token * actual_tokens = tokens ;
@@ -904,6 +939,161 @@ struct Token *find_token_by_opr(struct Token *tokens, size_t count, enum Operato
904
939
return NULL ;
905
940
}
906
941
942
+ struct Statement statement_get (struct Token * token , struct Token * * next ) {
943
+ struct Statement statement ;
944
+ struct Token * next_token = NULL ;
945
+
946
+ bool function , declaration = false;
947
+ if (token -> type == TOK_WORD && kwd_is_declarator (token -> keyword )) {
948
+ function = token -> keyword == KWD_FUNC ;
949
+ declaration = true;
950
+ }
951
+
952
+ if (declaration ) {
953
+ statement .type = SMT_DECLARATION ;
954
+ statement .declaration = malloc (sizeof * statement .declaration );
955
+ if (statement .declaration == NULL ) raise_mem ("parsing declaration statement" );
956
+
957
+ statement .declaration -> is_function = function ;
958
+ if (function ) {
959
+ // Function Declaration
960
+ statement .declaration -> scope = SCO_GLOBAL ;
961
+ statement .declaration -> is_function = true;
962
+ statement .declaration -> name = NULL ;
963
+ statement .declaration -> code .block = NULL ;
964
+ statement .declaration -> code .size = 0 ;
965
+
966
+ // Name
967
+ ++ token ;
968
+ if (token -> type != TOK_WORD ) raise_unexpected_token ("a function name" , token );
969
+ statement .declaration -> name = p_malloc (token -> data_len + 1 , "storing function name" );
970
+ strncpy (statement .declaration -> name , token -> data , token -> data_len );
971
+
972
+ // Parameters
973
+ // TODO: Implement a dynamic array library
974
+ // TODO: make an "expect function"
975
+ expect_token (++ token , & (struct Token ){.type = TOK_BRACKET , .data = "(" }, "opening bracket for function parameters" );
976
+ for (;;) {
977
+ // ...
978
+ }
979
+
980
+ // Code block
981
+ dynarr code_block = dynarr_init (sizeof * statement .declaration -> code .block );
982
+ do {
983
+ struct Statement func_stmt = statement_get (token , & next_token );
984
+ dynarr_push (& code_block , & func_stmt );
985
+ } while (next_token -> type != TOK_WORD || next_token -> kwd != KWD_END_FUNC )
986
+ statement .declaration -> code .block = dynarr_get (& code_block , & statement .declaration -> code .size );
987
+ } else {
988
+ // Variable Declaration
989
+ statement .declaration -> scope = SCO_AUTO ;
990
+ statement .declaration -> is_static = false;
991
+ statement .declaration -> is_constant = false;
992
+ statement .declaration -> name = NULL ;
993
+ statement .declaration -> initializer = NULL ;
994
+
995
+ // Metadata
996
+ do {
997
+ if (token -> keyword == KWD_NONE ) /* Not a keyword*/ break ;
998
+ if (!kwd_is_declarator (token -> keyword )) break ;
999
+ switch (token -> keyword ) {
1000
+ case KWD_GLOBAL :
1001
+ statement .declaration -> scope = SCO_GLOBAL ;
1002
+ break ;
1003
+ case KWD_LOCAL :
1004
+ statement .declaration -> scope = SCO_LOCAL ;
1005
+ break ;
1006
+ case KWD_STATIC :
1007
+ statement .declaration -> is_static = true;
1008
+ break ;
1009
+ case KWD_CONST :
1010
+ statement .declaration -> is_constant = true;
1011
+ break ;
1012
+ }
1013
+ } while (TOK_WORD == (++ token )-> type );
1014
+
1015
+ // Name
1016
+ if (token -> type != TOK_VARIABLE ) raise_unexpected_token ("a variable" , token );
1017
+
1018
+ statement .declaration -> name = malloc (token -> data_len + 1 );
1019
+ if (!statement .declaration -> name ) raise_mem ("storing variable name" );
1020
+ strncpy (statement .declaration -> name , token -> data , token -> data_len );
1021
+
1022
+ // Initializer
1023
+ if (token [1 ].type != TOK_OPERATOR ) goto next ;
1024
+ if (token [1 ].op_info .sym != OPR_EQU ) raise_unexpected_token ("simple assignment operator (=)" , token );
1025
+ statement .declaration -> initializer = malloc (sizeof * statement .declaration -> initializer );
1026
+ if (!statement .declaration -> initializer ) raise_mem ("parsing initializer" );
1027
+ * statement .declaration -> initializer = expression_get (token + 2 , 0 );
1028
+ }
1029
+ } else {
1030
+ statement .type = SMT_EXPRESSION ;
1031
+ statement .expression = malloc (sizeof * statement .expression );
1032
+ if (!statement .expression ) raise_mem ("parsing expression statement" );
1033
+ size_t token_count = 0 ;
1034
+ while (true) {
1035
+ if (token [token_count ].type == TOK_WHITESPACE && token [token_count ].newline || token [token_count ].type == TOK_EOF ) break ;
1036
+ ++ token_count ;
1037
+ }
1038
+ * statement .expression = expression_get (token , token_count );
1039
+ next_token = token + token_count + 1 ;
1040
+ }
1041
+
1042
+ // Set the next token
1043
+ next : * next = next_token ? next_token : token + 1 ;
1044
+ return statement ;
1045
+ }
1046
+
1047
+ struct Unit unit_get (struct Token * token , struct Token * * next ) {
1048
+ struct Unit unit ;
1049
+ struct Token * next_token = NULL ;
1050
+
1051
+ switch (token -> type ) {
1052
+ case TOK_WHITESPACE :
1053
+ break ;
1054
+ case TOK_COMMENT :
1055
+ case TOK_DIRECTIVE :
1056
+ unit .type = token -> type == TOK_COMMENT ? UNT_COMMENT : UNT_DIRECTIVE ;
1057
+ unit .token = token ;
1058
+ puts ("It's a comment/directive" );
1059
+ break ;
1060
+ default :
1061
+ // Statement
1062
+ unit .type = UNT_STATEMENT ;
1063
+ unit .statement = malloc (sizeof * unit .statement );
1064
+ if (!unit .statement ) raise_mem ("parsing statement" );
1065
+ * unit .statement = statement_get (token , & next_token );
1066
+ puts ("It's a statement" );
1067
+ break ;
1068
+ }
1069
+
1070
+ // Set the next token
1071
+ * next = next_token ? next_token : token + 1 ;
1072
+
1073
+ return unit ;
1074
+ }
1075
+
1076
+ //struct Token *token_peek() {}
1077
+
1078
+ void expect_token (struct Token * token , struct Token * expected , char * description ) {
1079
+ bool match = false;
1080
+ if (token -> type != expected -> type ) goto unexpected ;
1081
+ switch (token -> type ) {
1082
+ case TOK_BRACKET :
1083
+ match = * token -> data == * expected -> data ;
1084
+ break ;
1085
+ }
1086
+ if (match ) return ;
1087
+ unexpected : raise_unexpected_token (description , token );
1088
+ };
1089
+
1090
+ void * p_malloc (size_t size , char * context ) {
1091
+ // 'p' as in parser
1092
+ void * mem = malloc (size );
1093
+ if (!mem ) raise_mem (context );
1094
+ return mem ;
1095
+ }
1096
+
907
1097
noreturn void raise_error (char * msg , bool free_msg ) {
908
1098
if (parse_error .free_msg && parse_error .msg ) free (parse_error .msg );
909
1099
0 commit comments