GCC Code Coverage Report


Directory: ./
File: src/parser/parser.h
Date: 2023-04-27 00:55:30
Exec Total Coverage
Lines: 36 38 94.7%
Functions: 17 17 100.0%
Branches: 18 24 75.0%

Line Branch Exec Source
1 #ifndef LYTHON_PARSER_H
2 #define LYTHON_PARSER_H
3
4 #include "ast/magic.h"
5 #include "ast/nodes.h"
6 #include "dependencies/coz_wrap.h"
7 #include "lexer/lexer.h"
8 #include "logging/logging.h"
9 #include "parser/parsing_error.h"
10 #include "utilities/metadata.h"
11
12 #include <iostream>
13 #include <numeric>
14
15 namespace lython {
16
17 enum class ParsingContext
18 {
19 None,
20 Comprehension,
21 Slice,
22 };
23
24 /* Saves all the token that makes up for the current line
25 * this is used for printing better error message
26 */
27 struct TokenBuffer {
28
29 void add(Token const& tok) {
30
2/2
✓ Branch 1 taken 2067 times.
✓ Branch 2 taken 17629 times.
19696 if (tok.type() == tok_newline) {
31 2067 tokens.clear();
32 } else {
33 17629 tokens.push_back(tok);
34 }
35 19696 }
36
37 Array<Token> tokens;
38 };
39
40 /**
41 * The parser is responsible for transforming the source code into the AST.
42 * notifying the users of any syntax error that could prevent it from completing
43 * the transformation.
44 *
45 * SyntaxError will exclusively come from this object.
46 *
47 * The parser is an handwritten recursive decent parser. It uses the next token
48 * to decide which AST-node it should parse next.
49 * In case of an error (i.e unexpected token) the parser continues as if was token
50 * was defined. This can cause cascading SyntaxError, users should focus on the first one.
51 *
52 */
53 class Parser {
54 public:
55 Parser(AbstractLexer& lexer): _lex(lexer) { metadata_init_names(); }
56
57 ParsingError&
58 parser_kwerror(lython::CodeLocation const& loc, String const& exception, String const& msg) {
59 565 current_error += 1;
60 assert(current_error == errors.size(), "Only one error at a time can happen");
61
62
2/2
✓ Branch 1 taken 565 times.
✓ Branch 4 taken 565 times.
565 auto& details = errors.emplace_back(ParsingError());
63 565 details.error_kind = exception;
64 565 details.message = msg;
65 565 details.loc = loc;
66 565 details.received_token = token();
67
68
1/1
✓ Branch 1 taken 565 times.
565 lython::log(lython::LogLevel::Error, loc, "{}: {}", exception, msg);
69 565 return details;
70 }
71
72 bool has_errors() const { return errors.size() > 0; }
73
74 void parse_to_module(Module* module) {
75 // lookup the module
76
77 try {
78
1/2
✓ Branch 1 taken 1 times.
✗ Branch 2 not taken.
1 parse_body(module, module->body, 0);
79 } catch (ParsingException const&) {
80 // this is SyntaxError: Expected a body
81 // it was inserted by parse_body
82 // and raised to reach the parent block
83 // this is the top level block no need to go further up
84 }
85 1 }
86
87 Module* parse_module() {
88 // lookup the module
89
1/4
✓ Branch 2 taken 1 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
1 Module* module = new Module();
90 1 module->class_id = meta::type_id<Module>();
91
92 1 parse_to_module(module);
93 1 return module;
94 }
95
96 Token parse_body(Node* parent, Array<StmtNode*>& out, int depth);
97 Token parse_except_handler(Try* parent, Array<ExceptHandler>& out, int depth);
98 void parse_alias(Node* parent, Array<Alias>& out, int depth);
99 Token parse_match_case(Node* parent, Array<MatchCase>& out, int depth);
100 String parse_module_path(Node* parent, int& level, int depth);
101
102 // Patterns
103 Pattern* parse_match_sequence(Node* parent, int depth);
104 Pattern* parse_match_star(Node* parent, int depth);
105 Pattern* parse_match_mapping(Node* parent, int depth);
106
107 Pattern* parse_match_or(Node* parent, Pattern* primary, int depth);
108 Pattern* parse_match_as(Node* parent, Pattern* primary, int depth);
109 Pattern* parse_match_class(Node* parent, ExprNode* cls, int depth);
110
111 // Pattern Dispatch
112 Pattern* parse_pattern(Node* parent, int depth);
113 Pattern* parse_pattern_1(Node* parent, int depth);
114
115 // Statement_1
116 StmtNode* parse_function_def(Node* parent, bool async, int depth);
117 StmtNode* parse_class_def(Node* parent, int depth);
118 StmtNode* parse_for(Node* parent, int depth);
119 StmtNode* parse_while(Node* parent, int depth);
120 StmtNode* parse_if(Node* parent, int depth);
121 StmtNode* parse_if_alt(Node* parent, int depth);
122 StmtNode* parse_match(Node* parent, int depth);
123 StmtNode* parse_with(Node* parent, int depth);
124 StmtNode* parse_raise(Node* parent, int depth);
125 StmtNode* parse_try(Node* parent, int depth);
126 StmtNode* parse_assert(Node* parent, int depth);
127 StmtNode* parse_import(Node* parent, int depth);
128 StmtNode* parse_import_from(Node* parent, int depth);
129 StmtNode* parse_global(Node* parent, int depth);
130 StmtNode* parse_nonlocal(Node* parent, int depth);
131 StmtNode* parse_return(Node* parent, int depth);
132 StmtNode* parse_del(Node* parent, int depth);
133 StmtNode* parse_pass(Node* parent, int depth);
134 StmtNode* parse_break(Node* parent, int depth);
135 StmtNode* parse_continue(Node* parent, int depth);
136 StmtNode* parse_comment_stmt(Node* parent, int depth);
137
138 // Statement_2
139 StmtNode* parse_assign(Node* parent, ExprNode* epxr, int depth);
140 StmtNode* parse_augassign(Node* parent, ExprNode* epxr, int depth);
141 StmtNode* parse_annassign(Node* parent, ExprNode* epxr, int depth);
142
143 // Statement Dispatch
144 // ------------------
145 StmtNode* parse_statement(Node* parent, int depth);
146 StmtNode* parse_statement_primary(Node* parent, int depth);
147
148 // Primary expression
149 // parse_expression_1
150 Comment* parse_comment(Node* parent, int depth);
151 ExprNode* parse_await(Node* parent, int depth);
152 StmtNode* parse_yield_stmt(Node* parent, int depth);
153 ExprNode* parse_yield(Node* parent, int depth);
154 ExprNode* parse_yield_from(Node* parent, int depth);
155 ExprNode* parse_name(Node* parent, int depth);
156 ExprNode* parse_lambda(Node* parent, int depth);
157 ExprNode* parse_constant(Node* parent, int depth);
158 ExprNode* parse_ifexp_ext(Node* parent, int depth);
159
160 ExprNode* parse_special_string(Node* parent, int depth);
161 ExprNode* parse_joined_string(Node* parent, int depth);
162 ExprNode* parse_formatted_value_string(Node* parent, int depth);
163 JoinedStr* parse_format_spec(Node* parent, int depth);
164
165 ExprNode* parse_starred(Node* parent, int depth);
166 ExprNode* parse_list(Node* parent, int depth);
167 ExprNode* parse_tuple_generator(Node* parent, int depth);
168 ExprNode* parse_set_dict(Node* parent, int depth);
169 ExprNode* parse_prefix_unary(Node* parent, int depth);
170
171 void parse_comprehension(Node* parent, Array<Comprehension>& out, char kind, int depth);
172 Arguments parse_arguments(Node* parent, char kind, int depth);
173 Token
174 parse_call_args(Node* parent, Array<ExprNode*>& args, Array<Keyword>& keywords, int depth);
175 void parse_withitem(Node* parent, Array<WithItem>& out, int depth);
176 ExprNode* parse_star_targets(Node* parent, int depth);
177
178 // parse_expression_2
179 // TODO: primary has the parent has GC not the expression it belongs to
180 ExprNode* parse_named_expr(Node* parent, ExprNode* primary, int depth);
181 ExprNode* parse_bool_operator(Node* parent, ExprNode* primary, int depth);
182 ExprNode* parse_binary_operator(Node* parent, ExprNode* primary, int depth);
183 ExprNode* parse_compare_operator(Node* parent, ExprNode* primary, int depth);
184 ExprNode* parse_suffix_unary(Node* parent, ExprNode* primary, int depth);
185 ExprNode* parse_call(Node* parent, ExprNode* primary, int depth);
186 ExprNode* parse_attribute(Node* parent, ExprNode* primary, int depth);
187 ExprNode* parse_subscript(Node* parent, ExprNode* primary, int depth);
188 ExprNode* parse_slice(Node* parent, ExprNode* primary, int depth);
189 ExprNode* parse_star_expression(Node* parent, int depth);
190 ExprNode* parse_ifexp(Node* parent, ExprNode* primary, int depth);
191
192 // Expression Dispatcher
193 // ---------------------
194 // using the current token dispatch to the correct parsing routine
195 ExprNode* parse_expression(Node* parent, int depth, bool comma = false);
196
197 ExprNode* parse_expression_primary(Node* parent, int depth);
198
199 ExprNode* parse_expression_1(
200 Node* parent, ExprNode* primary, int min_precedence, int depth, bool comma = false);
201
202 ExprNode* parse_operators(Node* parent, ExprNode* lhs, int min_precedence, int depth);
203
204 // Helpers
205 // -------
206 void start_code_loc(CommonAttributes* target, Token tok);
207
208 void end_code_loc(CommonAttributes* target, Token tok);
209
210 bool is_valid_value();
211 ConstantValue get_value(Node* parent);
212
213 OpConfig const& get_operator_config(Token const& tok) const;
214
215 bool is_binary_operator_family(OpConfig const& conf);
216
217 void add_inline_comment(StmtNode* stmt, int depth) {
218 //
219 578 add_inline_comment(stmt, stmt, depth);
220 578 }
221
222 template <typename T>
223 void add_inline_comment(Node* parent, T* stmt, int depth) {
224
2/2
✓ Branch 2 taken 20 times.
✓ Branch 3 taken 582 times.
1204 if (token().type() == tok_comment) {
225 40 stmt->comment = parse_comment(parent, depth + 1);
226 }
227 1204 }
228
229 // Error Handling
230 // --------------
231 void expect_token(int expected, bool eat, Node* wip_expression, CodeLocation const& loc);
232
233 void expect_operator(String const& op, bool eat, Node* wip_expression, CodeLocation const& loc);
234
235 void expect_tokens(Array<int> const& expected,
236 bool eat,
237 Node* wip_expression,
238 CodeLocation const& loc);
239
240 // check for a new lines and eats all the subsequent newlines
241 // NB: maybe we should each repeating newlines inside the lexer
242 // NB: making the lexer not eat new line enable the lexer to be closer
243 // to the original code when it is used for formatting only
244 void expect_newline(Node* wip_expression, CodeLocation const& loc);
245
246 void expect_comment_or_newline(StmtNode* stmt, int depth, CodeLocation const& loc);
247
248 void error_recovery(ParsingError* error);
249
250 void ensure_valid();
251
252 void show_diagnostics(std::ostream& out);
253
254 // Shortcuts
255 // ---------
256 Token const& next_token();
257 Token const& token() const { return _lex.token(); }
258 Token const& peek_token() const { return _lex.peek_token(); }
259
260 String get_identifier() const {
261
2/2
✓ Branch 2 taken 4729 times.
✓ Branch 3 taken 49 times.
4778 if (token().type() == tok_identifier) {
262 4729 return token().identifier();
263 }
264
1/1
✓ Branch 2 taken 49 times.
49 return String("<identifier>");
265 }
266
267 bool async() const {
268
2/2
✓ Branch 1 taken 127 times.
✓ Branch 2 taken 9 times.
136 if (async_mode.size() <= 0) {
269 127 return false;
270 }
271
272 9 return async_mode[int(async_mode.size()) - 1];
273 }
274
275 // not 100% sure this is the right way
276 ExprContext context() const {
277
2/2
✓ Branch 1 taken 3487 times.
✓ Branch 2 taken 93 times.
3580 if (_context.size() <= 0) {
278 3487 return ExprContext::Load;
279 }
280
281 93 return _context[int(_context.size()) - 1];
282 }
283
284 ParsingContext parsing_ctx() const {
285 1087 int n = int(parsing_context.size());
286
2/2
✓ Branch 0 taken 1051 times.
✓ Branch 1 taken 36 times.
1087 if (n == 0)
287 1051 return ParsingContext::None;
288
289 36 return parsing_context[n - 1];
290 }
291
292 bool is_tok_statement_ender() const;
293
294 bool allow_slice() const { return parsing_ctx() == ParsingContext::Slice; }
295
296 Array<ParsingError> const& get_errors() const { return errors; }
297
298 enum class Mode
299 {
300 Stmt,
301 Expr,
302 Pattern
303 };
304
305 private:
306 Token previous = dummy();
307 TokenBuffer currentline;
308
309 public:
310 int expression_depth = 0;
311
312 private:
313 Array<StmtNode*> _pending_comments;
314 bool with_extension = true;
315 Array<ExprContext> _context;
316 Array<bool> async_mode;
317 Array<ParsingContext> parsing_context;
318 AbstractLexer& _lex;
319
320 bool is_empty_line = true;
321 int current_error = -1;
322 Array<ParsingError> errors;
323 };
324
325 } // namespace lython
326 #endif
327