Line | Branch | Exec | Source |
---|---|---|---|
1 | #ifndef LYTHON_PARSER_H | ||
2 | #define LYTHON_PARSER_H | ||
3 | |||
4 | #include "ast/magic.h" | ||
5 | #include "ast/nodes.h" | ||
6 | #include "dependencies/coz_wrap.h" | ||
7 | #include "lexer/lexer.h" | ||
8 | #include "logging/logging.h" | ||
9 | #include "parser/parsing_error.h" | ||
10 | #include "utilities/metadata.h" | ||
11 | |||
12 | #include <iostream> | ||
13 | #include <numeric> | ||
14 | |||
15 | namespace lython { | ||
16 | |||
17 | enum class ParsingContext | ||
18 | { | ||
19 | None, | ||
20 | Comprehension, | ||
21 | Slice, | ||
22 | }; | ||
23 | |||
24 | /* Saves all the token that makes up for the current line | ||
25 | * this is used for printing better error message | ||
26 | */ | ||
27 | struct TokenBuffer { | ||
28 | |||
29 | void add(Token const& tok) { | ||
30 |
2/2✓ Branch 1 taken 2067 times.
✓ Branch 2 taken 17629 times.
|
19696 | if (tok.type() == tok_newline) { |
31 | 2067 | tokens.clear(); | |
32 | } else { | ||
33 | 17629 | tokens.push_back(tok); | |
34 | } | ||
35 | 19696 | } | |
36 | |||
37 | Array<Token> tokens; | ||
38 | }; | ||
39 | |||
40 | /** | ||
41 | * The parser is responsible for transforming the source code into the AST. | ||
42 | * notifying the users of any syntax error that could prevent it from completing | ||
43 | * the transformation. | ||
44 | * | ||
45 | * SyntaxError will exclusively come from this object. | ||
46 | * | ||
47 | * The parser is an handwritten recursive decent parser. It uses the next token | ||
48 | * to decide which AST-node it should parse next. | ||
49 | * In case of an error (i.e unexpected token) the parser continues as if was token | ||
50 | * was defined. This can cause cascading SyntaxError, users should focus on the first one. | ||
51 | * | ||
52 | */ | ||
53 | class Parser { | ||
54 | public: | ||
55 | Parser(AbstractLexer& lexer): _lex(lexer) { metadata_init_names(); } | ||
56 | |||
57 | ParsingError& | ||
58 | parser_kwerror(lython::CodeLocation const& loc, String const& exception, String const& msg) { | ||
59 | 565 | current_error += 1; | |
60 | assert(current_error == errors.size(), "Only one error at a time can happen"); | ||
61 | |||
62 |
2/2✓ Branch 1 taken 565 times.
✓ Branch 4 taken 565 times.
|
565 | auto& details = errors.emplace_back(ParsingError()); |
63 | 565 | details.error_kind = exception; | |
64 | 565 | details.message = msg; | |
65 | 565 | details.loc = loc; | |
66 | 565 | details.received_token = token(); | |
67 | |||
68 |
1/1✓ Branch 1 taken 565 times.
|
565 | lython::log(lython::LogLevel::Error, loc, "{}: {}", exception, msg); |
69 | 565 | return details; | |
70 | } | ||
71 | |||
72 | bool has_errors() const { return errors.size() > 0; } | ||
73 | |||
74 | void parse_to_module(Module* module) { | ||
75 | // lookup the module | ||
76 | |||
77 | try { | ||
78 |
1/2✓ Branch 1 taken 1 times.
✗ Branch 2 not taken.
|
1 | parse_body(module, module->body, 0); |
79 | ✗ | } catch (ParsingException const&) { | |
80 | // this is SyntaxError: Expected a body | ||
81 | // it was inserted by parse_body | ||
82 | // and raised to reach the parent block | ||
83 | // this is the top level block no need to go further up | ||
84 | ✗ | } | |
85 | 1 | } | |
86 | |||
87 | Module* parse_module() { | ||
88 | // lookup the module | ||
89 |
1/4✓ Branch 2 taken 1 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
|
1 | Module* module = new Module(); |
90 | 1 | module->class_id = meta::type_id<Module>(); | |
91 | |||
92 | 1 | parse_to_module(module); | |
93 | 1 | return module; | |
94 | } | ||
95 | |||
96 | Token parse_body(Node* parent, Array<StmtNode*>& out, int depth); | ||
97 | Token parse_except_handler(Try* parent, Array<ExceptHandler>& out, int depth); | ||
98 | void parse_alias(Node* parent, Array<Alias>& out, int depth); | ||
99 | Token parse_match_case(Node* parent, Array<MatchCase>& out, int depth); | ||
100 | String parse_module_path(Node* parent, int& level, int depth); | ||
101 | |||
102 | // Patterns | ||
103 | Pattern* parse_match_sequence(Node* parent, int depth); | ||
104 | Pattern* parse_match_star(Node* parent, int depth); | ||
105 | Pattern* parse_match_mapping(Node* parent, int depth); | ||
106 | |||
107 | Pattern* parse_match_or(Node* parent, Pattern* primary, int depth); | ||
108 | Pattern* parse_match_as(Node* parent, Pattern* primary, int depth); | ||
109 | Pattern* parse_match_class(Node* parent, ExprNode* cls, int depth); | ||
110 | |||
111 | // Pattern Dispatch | ||
112 | Pattern* parse_pattern(Node* parent, int depth); | ||
113 | Pattern* parse_pattern_1(Node* parent, int depth); | ||
114 | |||
115 | // Statement_1 | ||
116 | StmtNode* parse_function_def(Node* parent, bool async, int depth); | ||
117 | StmtNode* parse_class_def(Node* parent, int depth); | ||
118 | StmtNode* parse_for(Node* parent, int depth); | ||
119 | StmtNode* parse_while(Node* parent, int depth); | ||
120 | StmtNode* parse_if(Node* parent, int depth); | ||
121 | StmtNode* parse_if_alt(Node* parent, int depth); | ||
122 | StmtNode* parse_match(Node* parent, int depth); | ||
123 | StmtNode* parse_with(Node* parent, int depth); | ||
124 | StmtNode* parse_raise(Node* parent, int depth); | ||
125 | StmtNode* parse_try(Node* parent, int depth); | ||
126 | StmtNode* parse_assert(Node* parent, int depth); | ||
127 | StmtNode* parse_import(Node* parent, int depth); | ||
128 | StmtNode* parse_import_from(Node* parent, int depth); | ||
129 | StmtNode* parse_global(Node* parent, int depth); | ||
130 | StmtNode* parse_nonlocal(Node* parent, int depth); | ||
131 | StmtNode* parse_return(Node* parent, int depth); | ||
132 | StmtNode* parse_del(Node* parent, int depth); | ||
133 | StmtNode* parse_pass(Node* parent, int depth); | ||
134 | StmtNode* parse_break(Node* parent, int depth); | ||
135 | StmtNode* parse_continue(Node* parent, int depth); | ||
136 | StmtNode* parse_comment_stmt(Node* parent, int depth); | ||
137 | |||
138 | // Statement_2 | ||
139 | StmtNode* parse_assign(Node* parent, ExprNode* epxr, int depth); | ||
140 | StmtNode* parse_augassign(Node* parent, ExprNode* epxr, int depth); | ||
141 | StmtNode* parse_annassign(Node* parent, ExprNode* epxr, int depth); | ||
142 | |||
143 | // Statement Dispatch | ||
144 | // ------------------ | ||
145 | StmtNode* parse_statement(Node* parent, int depth); | ||
146 | StmtNode* parse_statement_primary(Node* parent, int depth); | ||
147 | |||
148 | // Primary expression | ||
149 | // parse_expression_1 | ||
150 | Comment* parse_comment(Node* parent, int depth); | ||
151 | ExprNode* parse_await(Node* parent, int depth); | ||
152 | StmtNode* parse_yield_stmt(Node* parent, int depth); | ||
153 | ExprNode* parse_yield(Node* parent, int depth); | ||
154 | ExprNode* parse_yield_from(Node* parent, int depth); | ||
155 | ExprNode* parse_name(Node* parent, int depth); | ||
156 | ExprNode* parse_lambda(Node* parent, int depth); | ||
157 | ExprNode* parse_constant(Node* parent, int depth); | ||
158 | ExprNode* parse_ifexp_ext(Node* parent, int depth); | ||
159 | |||
160 | ExprNode* parse_special_string(Node* parent, int depth); | ||
161 | ExprNode* parse_joined_string(Node* parent, int depth); | ||
162 | ExprNode* parse_formatted_value_string(Node* parent, int depth); | ||
163 | JoinedStr* parse_format_spec(Node* parent, int depth); | ||
164 | |||
165 | ExprNode* parse_starred(Node* parent, int depth); | ||
166 | ExprNode* parse_list(Node* parent, int depth); | ||
167 | ExprNode* parse_tuple_generator(Node* parent, int depth); | ||
168 | ExprNode* parse_set_dict(Node* parent, int depth); | ||
169 | ExprNode* parse_prefix_unary(Node* parent, int depth); | ||
170 | |||
171 | void parse_comprehension(Node* parent, Array<Comprehension>& out, char kind, int depth); | ||
172 | Arguments parse_arguments(Node* parent, char kind, int depth); | ||
173 | Token | ||
174 | parse_call_args(Node* parent, Array<ExprNode*>& args, Array<Keyword>& keywords, int depth); | ||
175 | void parse_withitem(Node* parent, Array<WithItem>& out, int depth); | ||
176 | ExprNode* parse_star_targets(Node* parent, int depth); | ||
177 | |||
178 | // parse_expression_2 | ||
179 | // TODO: primary has the parent has GC not the expression it belongs to | ||
180 | ExprNode* parse_named_expr(Node* parent, ExprNode* primary, int depth); | ||
181 | ExprNode* parse_bool_operator(Node* parent, ExprNode* primary, int depth); | ||
182 | ExprNode* parse_binary_operator(Node* parent, ExprNode* primary, int depth); | ||
183 | ExprNode* parse_compare_operator(Node* parent, ExprNode* primary, int depth); | ||
184 | ExprNode* parse_suffix_unary(Node* parent, ExprNode* primary, int depth); | ||
185 | ExprNode* parse_call(Node* parent, ExprNode* primary, int depth); | ||
186 | ExprNode* parse_attribute(Node* parent, ExprNode* primary, int depth); | ||
187 | ExprNode* parse_subscript(Node* parent, ExprNode* primary, int depth); | ||
188 | ExprNode* parse_slice(Node* parent, ExprNode* primary, int depth); | ||
189 | ExprNode* parse_star_expression(Node* parent, int depth); | ||
190 | ExprNode* parse_ifexp(Node* parent, ExprNode* primary, int depth); | ||
191 | |||
192 | // Expression Dispatcher | ||
193 | // --------------------- | ||
194 | // using the current token dispatch to the correct parsing routine | ||
195 | ExprNode* parse_expression(Node* parent, int depth, bool comma = false); | ||
196 | |||
197 | ExprNode* parse_expression_primary(Node* parent, int depth); | ||
198 | |||
199 | ExprNode* parse_expression_1( | ||
200 | Node* parent, ExprNode* primary, int min_precedence, int depth, bool comma = false); | ||
201 | |||
202 | ExprNode* parse_operators(Node* parent, ExprNode* lhs, int min_precedence, int depth); | ||
203 | |||
204 | // Helpers | ||
205 | // ------- | ||
206 | void start_code_loc(CommonAttributes* target, Token tok); | ||
207 | |||
208 | void end_code_loc(CommonAttributes* target, Token tok); | ||
209 | |||
210 | bool is_valid_value(); | ||
211 | ConstantValue get_value(Node* parent); | ||
212 | |||
213 | OpConfig const& get_operator_config(Token const& tok) const; | ||
214 | |||
215 | bool is_binary_operator_family(OpConfig const& conf); | ||
216 | |||
217 | void add_inline_comment(StmtNode* stmt, int depth) { | ||
218 | // | ||
219 | 578 | add_inline_comment(stmt, stmt, depth); | |
220 | 578 | } | |
221 | |||
222 | template <typename T> | ||
223 | void add_inline_comment(Node* parent, T* stmt, int depth) { | ||
224 |
2/2✓ Branch 2 taken 20 times.
✓ Branch 3 taken 582 times.
|
1204 | if (token().type() == tok_comment) { |
225 | 40 | stmt->comment = parse_comment(parent, depth + 1); | |
226 | } | ||
227 | 1204 | } | |
228 | |||
229 | // Error Handling | ||
230 | // -------------- | ||
231 | void expect_token(int expected, bool eat, Node* wip_expression, CodeLocation const& loc); | ||
232 | |||
233 | void expect_operator(String const& op, bool eat, Node* wip_expression, CodeLocation const& loc); | ||
234 | |||
235 | void expect_tokens(Array<int> const& expected, | ||
236 | bool eat, | ||
237 | Node* wip_expression, | ||
238 | CodeLocation const& loc); | ||
239 | |||
240 | // check for a new lines and eats all the subsequent newlines | ||
241 | // NB: maybe we should each repeating newlines inside the lexer | ||
242 | // NB: making the lexer not eat new line enable the lexer to be closer | ||
243 | // to the original code when it is used for formatting only | ||
244 | void expect_newline(Node* wip_expression, CodeLocation const& loc); | ||
245 | |||
246 | void expect_comment_or_newline(StmtNode* stmt, int depth, CodeLocation const& loc); | ||
247 | |||
248 | void error_recovery(ParsingError* error); | ||
249 | |||
250 | void ensure_valid(); | ||
251 | |||
252 | void show_diagnostics(std::ostream& out); | ||
253 | |||
254 | // Shortcuts | ||
255 | // --------- | ||
256 | Token const& next_token(); | ||
257 | Token const& token() const { return _lex.token(); } | ||
258 | Token const& peek_token() const { return _lex.peek_token(); } | ||
259 | |||
260 | String get_identifier() const { | ||
261 |
2/2✓ Branch 2 taken 4729 times.
✓ Branch 3 taken 49 times.
|
4778 | if (token().type() == tok_identifier) { |
262 | 4729 | return token().identifier(); | |
263 | } | ||
264 |
1/1✓ Branch 2 taken 49 times.
|
49 | return String("<identifier>"); |
265 | } | ||
266 | |||
267 | bool async() const { | ||
268 |
2/2✓ Branch 1 taken 127 times.
✓ Branch 2 taken 9 times.
|
136 | if (async_mode.size() <= 0) { |
269 | 127 | return false; | |
270 | } | ||
271 | |||
272 | 9 | return async_mode[int(async_mode.size()) - 1]; | |
273 | } | ||
274 | |||
275 | // not 100% sure this is the right way | ||
276 | ExprContext context() const { | ||
277 |
2/2✓ Branch 1 taken 3487 times.
✓ Branch 2 taken 93 times.
|
3580 | if (_context.size() <= 0) { |
278 | 3487 | return ExprContext::Load; | |
279 | } | ||
280 | |||
281 | 93 | return _context[int(_context.size()) - 1]; | |
282 | } | ||
283 | |||
284 | ParsingContext parsing_ctx() const { | ||
285 | 1087 | int n = int(parsing_context.size()); | |
286 |
2/2✓ Branch 0 taken 1051 times.
✓ Branch 1 taken 36 times.
|
1087 | if (n == 0) |
287 | 1051 | return ParsingContext::None; | |
288 | |||
289 | 36 | return parsing_context[n - 1]; | |
290 | } | ||
291 | |||
292 | bool is_tok_statement_ender() const; | ||
293 | |||
294 | bool allow_slice() const { return parsing_ctx() == ParsingContext::Slice; } | ||
295 | |||
296 | Array<ParsingError> const& get_errors() const { return errors; } | ||
297 | |||
298 | enum class Mode | ||
299 | { | ||
300 | Stmt, | ||
301 | Expr, | ||
302 | Pattern | ||
303 | }; | ||
304 | |||
305 | private: | ||
306 | Token previous = dummy(); | ||
307 | TokenBuffer currentline; | ||
308 | |||
309 | public: | ||
310 | int expression_depth = 0; | ||
311 | |||
312 | private: | ||
313 | Array<StmtNode*> _pending_comments; | ||
314 | bool with_extension = true; | ||
315 | Array<ExprContext> _context; | ||
316 | Array<bool> async_mode; | ||
317 | Array<ParsingContext> parsing_context; | ||
318 | AbstractLexer& _lex; | ||
319 | |||
320 | bool is_empty_line = true; | ||
321 | int current_error = -1; | ||
322 | Array<ParsingError> errors; | ||
323 | }; | ||
324 | |||
325 | } // namespace lython | ||
326 | #endif | ||
327 |