| Line | Branch | Exec | Source |
|---|---|---|---|
| 1 | #ifndef LYTHON_PARSER_H | ||
| 2 | #define LYTHON_PARSER_H | ||
| 3 | |||
| 4 | #include "ast/magic.h" | ||
| 5 | #include "ast/nodes.h" | ||
| 6 | #include "dependencies/coz_wrap.h" | ||
| 7 | #include "lexer/lexer.h" | ||
| 8 | #include "logging/logging.h" | ||
| 9 | #include "parser/parsing_error.h" | ||
| 10 | #include "utilities/metadata.h" | ||
| 11 | |||
| 12 | #include <iostream> | ||
| 13 | #include <numeric> | ||
| 14 | |||
| 15 | namespace lython { | ||
| 16 | |||
| 17 | enum class ParsingContext | ||
| 18 | { | ||
| 19 | None, | ||
| 20 | Comprehension, | ||
| 21 | Slice, | ||
| 22 | }; | ||
| 23 | |||
| 24 | /* Saves all the token that makes up for the current line | ||
| 25 | * this is used for printing better error message | ||
| 26 | */ | ||
| 27 | struct TokenBuffer { | ||
| 28 | |||
| 29 | void add(Token const& tok) { | ||
| 30 |
2/2✓ Branch 1 taken 2067 times.
✓ Branch 2 taken 17629 times.
|
19696 | if (tok.type() == tok_newline) { |
| 31 | 2067 | tokens.clear(); | |
| 32 | } else { | ||
| 33 | 17629 | tokens.push_back(tok); | |
| 34 | } | ||
| 35 | 19696 | } | |
| 36 | |||
| 37 | Array<Token> tokens; | ||
| 38 | }; | ||
| 39 | |||
| 40 | /** | ||
| 41 | * The parser is responsible for transforming the source code into the AST. | ||
| 42 | * notifying the users of any syntax error that could prevent it from completing | ||
| 43 | * the transformation. | ||
| 44 | * | ||
| 45 | * SyntaxError will exclusively come from this object. | ||
| 46 | * | ||
| 47 | * The parser is an handwritten recursive decent parser. It uses the next token | ||
| 48 | * to decide which AST-node it should parse next. | ||
| 49 | * In case of an error (i.e unexpected token) the parser continues as if was token | ||
| 50 | * was defined. This can cause cascading SyntaxError, users should focus on the first one. | ||
| 51 | * | ||
| 52 | */ | ||
| 53 | class Parser { | ||
| 54 | public: | ||
| 55 | Parser(AbstractLexer& lexer): _lex(lexer) { metadata_init_names(); } | ||
| 56 | |||
| 57 | ParsingError& | ||
| 58 | parser_kwerror(lython::CodeLocation const& loc, String const& exception, String const& msg) { | ||
| 59 | 565 | current_error += 1; | |
| 60 | assert(current_error == errors.size(), "Only one error at a time can happen"); | ||
| 61 | |||
| 62 |
2/2✓ Branch 1 taken 565 times.
✓ Branch 4 taken 565 times.
|
565 | auto& details = errors.emplace_back(ParsingError()); |
| 63 | 565 | details.error_kind = exception; | |
| 64 | 565 | details.message = msg; | |
| 65 | 565 | details.loc = loc; | |
| 66 | 565 | details.received_token = token(); | |
| 67 | |||
| 68 |
1/1✓ Branch 1 taken 565 times.
|
565 | lython::log(lython::LogLevel::Error, loc, "{}: {}", exception, msg); |
| 69 | 565 | return details; | |
| 70 | } | ||
| 71 | |||
| 72 | bool has_errors() const { return errors.size() > 0; } | ||
| 73 | |||
| 74 | void parse_to_module(Module* module) { | ||
| 75 | // lookup the module | ||
| 76 | |||
| 77 | try { | ||
| 78 |
1/2✓ Branch 1 taken 1 times.
✗ Branch 2 not taken.
|
1 | parse_body(module, module->body, 0); |
| 79 | ✗ | } catch (ParsingException const&) { | |
| 80 | // this is SyntaxError: Expected a body | ||
| 81 | // it was inserted by parse_body | ||
| 82 | // and raised to reach the parent block | ||
| 83 | // this is the top level block no need to go further up | ||
| 84 | ✗ | } | |
| 85 | 1 | } | |
| 86 | |||
| 87 | Module* parse_module() { | ||
| 88 | // lookup the module | ||
| 89 |
1/4✓ Branch 2 taken 1 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
|
1 | Module* module = new Module(); |
| 90 | 1 | module->class_id = meta::type_id<Module>(); | |
| 91 | |||
| 92 | 1 | parse_to_module(module); | |
| 93 | 1 | return module; | |
| 94 | } | ||
| 95 | |||
| 96 | Token parse_body(Node* parent, Array<StmtNode*>& out, int depth); | ||
| 97 | Token parse_except_handler(Try* parent, Array<ExceptHandler>& out, int depth); | ||
| 98 | void parse_alias(Node* parent, Array<Alias>& out, int depth); | ||
| 99 | Token parse_match_case(Node* parent, Array<MatchCase>& out, int depth); | ||
| 100 | String parse_module_path(Node* parent, int& level, int depth); | ||
| 101 | |||
| 102 | // Patterns | ||
| 103 | Pattern* parse_match_sequence(Node* parent, int depth); | ||
| 104 | Pattern* parse_match_star(Node* parent, int depth); | ||
| 105 | Pattern* parse_match_mapping(Node* parent, int depth); | ||
| 106 | |||
| 107 | Pattern* parse_match_or(Node* parent, Pattern* primary, int depth); | ||
| 108 | Pattern* parse_match_as(Node* parent, Pattern* primary, int depth); | ||
| 109 | Pattern* parse_match_class(Node* parent, ExprNode* cls, int depth); | ||
| 110 | |||
| 111 | // Pattern Dispatch | ||
| 112 | Pattern* parse_pattern(Node* parent, int depth); | ||
| 113 | Pattern* parse_pattern_1(Node* parent, int depth); | ||
| 114 | |||
| 115 | // Statement_1 | ||
| 116 | StmtNode* parse_function_def(Node* parent, bool async, int depth); | ||
| 117 | StmtNode* parse_class_def(Node* parent, int depth); | ||
| 118 | StmtNode* parse_for(Node* parent, int depth); | ||
| 119 | StmtNode* parse_while(Node* parent, int depth); | ||
| 120 | StmtNode* parse_if(Node* parent, int depth); | ||
| 121 | StmtNode* parse_if_alt(Node* parent, int depth); | ||
| 122 | StmtNode* parse_match(Node* parent, int depth); | ||
| 123 | StmtNode* parse_with(Node* parent, int depth); | ||
| 124 | StmtNode* parse_raise(Node* parent, int depth); | ||
| 125 | StmtNode* parse_try(Node* parent, int depth); | ||
| 126 | StmtNode* parse_assert(Node* parent, int depth); | ||
| 127 | StmtNode* parse_import(Node* parent, int depth); | ||
| 128 | StmtNode* parse_import_from(Node* parent, int depth); | ||
| 129 | StmtNode* parse_global(Node* parent, int depth); | ||
| 130 | StmtNode* parse_nonlocal(Node* parent, int depth); | ||
| 131 | StmtNode* parse_return(Node* parent, int depth); | ||
| 132 | StmtNode* parse_del(Node* parent, int depth); | ||
| 133 | StmtNode* parse_pass(Node* parent, int depth); | ||
| 134 | StmtNode* parse_break(Node* parent, int depth); | ||
| 135 | StmtNode* parse_continue(Node* parent, int depth); | ||
| 136 | StmtNode* parse_comment_stmt(Node* parent, int depth); | ||
| 137 | |||
| 138 | // Statement_2 | ||
| 139 | StmtNode* parse_assign(Node* parent, ExprNode* epxr, int depth); | ||
| 140 | StmtNode* parse_augassign(Node* parent, ExprNode* epxr, int depth); | ||
| 141 | StmtNode* parse_annassign(Node* parent, ExprNode* epxr, int depth); | ||
| 142 | |||
| 143 | // Statement Dispatch | ||
| 144 | // ------------------ | ||
| 145 | StmtNode* parse_statement(Node* parent, int depth); | ||
| 146 | StmtNode* parse_statement_primary(Node* parent, int depth); | ||
| 147 | |||
| 148 | // Primary expression | ||
| 149 | // parse_expression_1 | ||
| 150 | Comment* parse_comment(Node* parent, int depth); | ||
| 151 | ExprNode* parse_await(Node* parent, int depth); | ||
| 152 | StmtNode* parse_yield_stmt(Node* parent, int depth); | ||
| 153 | ExprNode* parse_yield(Node* parent, int depth); | ||
| 154 | ExprNode* parse_yield_from(Node* parent, int depth); | ||
| 155 | ExprNode* parse_name(Node* parent, int depth); | ||
| 156 | ExprNode* parse_lambda(Node* parent, int depth); | ||
| 157 | ExprNode* parse_constant(Node* parent, int depth); | ||
| 158 | ExprNode* parse_ifexp_ext(Node* parent, int depth); | ||
| 159 | |||
| 160 | ExprNode* parse_special_string(Node* parent, int depth); | ||
| 161 | ExprNode* parse_joined_string(Node* parent, int depth); | ||
| 162 | ExprNode* parse_formatted_value_string(Node* parent, int depth); | ||
| 163 | JoinedStr* parse_format_spec(Node* parent, int depth); | ||
| 164 | |||
| 165 | ExprNode* parse_starred(Node* parent, int depth); | ||
| 166 | ExprNode* parse_list(Node* parent, int depth); | ||
| 167 | ExprNode* parse_tuple_generator(Node* parent, int depth); | ||
| 168 | ExprNode* parse_set_dict(Node* parent, int depth); | ||
| 169 | ExprNode* parse_prefix_unary(Node* parent, int depth); | ||
| 170 | |||
| 171 | void parse_comprehension(Node* parent, Array<Comprehension>& out, char kind, int depth); | ||
| 172 | Arguments parse_arguments(Node* parent, char kind, int depth); | ||
| 173 | Token | ||
| 174 | parse_call_args(Node* parent, Array<ExprNode*>& args, Array<Keyword>& keywords, int depth); | ||
| 175 | void parse_withitem(Node* parent, Array<WithItem>& out, int depth); | ||
| 176 | ExprNode* parse_star_targets(Node* parent, int depth); | ||
| 177 | |||
| 178 | // parse_expression_2 | ||
| 179 | // TODO: primary has the parent has GC not the expression it belongs to | ||
| 180 | ExprNode* parse_named_expr(Node* parent, ExprNode* primary, int depth); | ||
| 181 | ExprNode* parse_bool_operator(Node* parent, ExprNode* primary, int depth); | ||
| 182 | ExprNode* parse_binary_operator(Node* parent, ExprNode* primary, int depth); | ||
| 183 | ExprNode* parse_compare_operator(Node* parent, ExprNode* primary, int depth); | ||
| 184 | ExprNode* parse_suffix_unary(Node* parent, ExprNode* primary, int depth); | ||
| 185 | ExprNode* parse_call(Node* parent, ExprNode* primary, int depth); | ||
| 186 | ExprNode* parse_attribute(Node* parent, ExprNode* primary, int depth); | ||
| 187 | ExprNode* parse_subscript(Node* parent, ExprNode* primary, int depth); | ||
| 188 | ExprNode* parse_slice(Node* parent, ExprNode* primary, int depth); | ||
| 189 | ExprNode* parse_star_expression(Node* parent, int depth); | ||
| 190 | ExprNode* parse_ifexp(Node* parent, ExprNode* primary, int depth); | ||
| 191 | |||
| 192 | // Expression Dispatcher | ||
| 193 | // --------------------- | ||
| 194 | // using the current token dispatch to the correct parsing routine | ||
| 195 | ExprNode* parse_expression(Node* parent, int depth, bool comma = false); | ||
| 196 | |||
| 197 | ExprNode* parse_expression_primary(Node* parent, int depth); | ||
| 198 | |||
| 199 | ExprNode* parse_expression_1( | ||
| 200 | Node* parent, ExprNode* primary, int min_precedence, int depth, bool comma = false); | ||
| 201 | |||
| 202 | ExprNode* parse_operators(Node* parent, ExprNode* lhs, int min_precedence, int depth); | ||
| 203 | |||
| 204 | // Helpers | ||
| 205 | // ------- | ||
| 206 | void start_code_loc(CommonAttributes* target, Token tok); | ||
| 207 | |||
| 208 | void end_code_loc(CommonAttributes* target, Token tok); | ||
| 209 | |||
| 210 | bool is_valid_value(); | ||
| 211 | ConstantValue get_value(Node* parent); | ||
| 212 | |||
| 213 | OpConfig const& get_operator_config(Token const& tok) const; | ||
| 214 | |||
| 215 | bool is_binary_operator_family(OpConfig const& conf); | ||
| 216 | |||
| 217 | void add_inline_comment(StmtNode* stmt, int depth) { | ||
| 218 | // | ||
| 219 | 578 | add_inline_comment(stmt, stmt, depth); | |
| 220 | 578 | } | |
| 221 | |||
| 222 | template <typename T> | ||
| 223 | void add_inline_comment(Node* parent, T* stmt, int depth) { | ||
| 224 |
2/2✓ Branch 2 taken 20 times.
✓ Branch 3 taken 582 times.
|
1204 | if (token().type() == tok_comment) { |
| 225 | 40 | stmt->comment = parse_comment(parent, depth + 1); | |
| 226 | } | ||
| 227 | 1204 | } | |
| 228 | |||
| 229 | // Error Handling | ||
| 230 | // -------------- | ||
| 231 | void expect_token(int expected, bool eat, Node* wip_expression, CodeLocation const& loc); | ||
| 232 | |||
| 233 | void expect_operator(String const& op, bool eat, Node* wip_expression, CodeLocation const& loc); | ||
| 234 | |||
| 235 | void expect_tokens(Array<int> const& expected, | ||
| 236 | bool eat, | ||
| 237 | Node* wip_expression, | ||
| 238 | CodeLocation const& loc); | ||
| 239 | |||
| 240 | // check for a new lines and eats all the subsequent newlines | ||
| 241 | // NB: maybe we should each repeating newlines inside the lexer | ||
| 242 | // NB: making the lexer not eat new line enable the lexer to be closer | ||
| 243 | // to the original code when it is used for formatting only | ||
| 244 | void expect_newline(Node* wip_expression, CodeLocation const& loc); | ||
| 245 | |||
| 246 | void expect_comment_or_newline(StmtNode* stmt, int depth, CodeLocation const& loc); | ||
| 247 | |||
| 248 | void error_recovery(ParsingError* error); | ||
| 249 | |||
| 250 | void ensure_valid(); | ||
| 251 | |||
| 252 | void show_diagnostics(std::ostream& out); | ||
| 253 | |||
| 254 | // Shortcuts | ||
| 255 | // --------- | ||
| 256 | Token const& next_token(); | ||
| 257 | Token const& token() const { return _lex.token(); } | ||
| 258 | Token const& peek_token() const { return _lex.peek_token(); } | ||
| 259 | |||
| 260 | String get_identifier() const { | ||
| 261 |
2/2✓ Branch 2 taken 4729 times.
✓ Branch 3 taken 49 times.
|
4778 | if (token().type() == tok_identifier) { |
| 262 | 4729 | return token().identifier(); | |
| 263 | } | ||
| 264 |
1/1✓ Branch 2 taken 49 times.
|
49 | return String("<identifier>"); |
| 265 | } | ||
| 266 | |||
| 267 | bool async() const { | ||
| 268 |
2/2✓ Branch 1 taken 127 times.
✓ Branch 2 taken 9 times.
|
136 | if (async_mode.size() <= 0) { |
| 269 | 127 | return false; | |
| 270 | } | ||
| 271 | |||
| 272 | 9 | return async_mode[int(async_mode.size()) - 1]; | |
| 273 | } | ||
| 274 | |||
| 275 | // not 100% sure this is the right way | ||
| 276 | ExprContext context() const { | ||
| 277 |
2/2✓ Branch 1 taken 3487 times.
✓ Branch 2 taken 93 times.
|
3580 | if (_context.size() <= 0) { |
| 278 | 3487 | return ExprContext::Load; | |
| 279 | } | ||
| 280 | |||
| 281 | 93 | return _context[int(_context.size()) - 1]; | |
| 282 | } | ||
| 283 | |||
| 284 | ParsingContext parsing_ctx() const { | ||
| 285 | 1087 | int n = int(parsing_context.size()); | |
| 286 |
2/2✓ Branch 0 taken 1051 times.
✓ Branch 1 taken 36 times.
|
1087 | if (n == 0) |
| 287 | 1051 | return ParsingContext::None; | |
| 288 | |||
| 289 | 36 | return parsing_context[n - 1]; | |
| 290 | } | ||
| 291 | |||
| 292 | bool is_tok_statement_ender() const; | ||
| 293 | |||
| 294 | bool allow_slice() const { return parsing_ctx() == ParsingContext::Slice; } | ||
| 295 | |||
| 296 | Array<ParsingError> const& get_errors() const { return errors; } | ||
| 297 | |||
| 298 | enum class Mode | ||
| 299 | { | ||
| 300 | Stmt, | ||
| 301 | Expr, | ||
| 302 | Pattern | ||
| 303 | }; | ||
| 304 | |||
| 305 | private: | ||
| 306 | Token previous = dummy(); | ||
| 307 | TokenBuffer currentline; | ||
| 308 | |||
| 309 | public: | ||
| 310 | int expression_depth = 0; | ||
| 311 | |||
| 312 | private: | ||
| 313 | Array<StmtNode*> _pending_comments; | ||
| 314 | bool with_extension = true; | ||
| 315 | Array<ExprContext> _context; | ||
| 316 | Array<bool> async_mode; | ||
| 317 | Array<ParsingContext> parsing_context; | ||
| 318 | AbstractLexer& _lex; | ||
| 319 | |||
| 320 | bool is_empty_line = true; | ||
| 321 | int current_error = -1; | ||
| 322 | Array<ParsingError> errors; | ||
| 323 | }; | ||
| 324 | |||
| 325 | } // namespace lython | ||
| 326 | #endif | ||
| 327 |