| Line | Branch | Exec | Source |
|---|---|---|---|
| 1 | #ifndef LYTHON_SEMA_HEADER | ||
| 2 | #define LYTHON_SEMA_HEADER | ||
| 3 | |||
| 4 | #include "ast/magic.h" | ||
| 5 | #include "ast/ops.h" | ||
| 6 | #include "ast/visitor.h" | ||
| 7 | #include "sema/bindings.h" | ||
| 8 | #include "sema/builtin.h" | ||
| 9 | #include "sema/errors.h" | ||
| 10 | #include "utilities/strings.h" | ||
| 11 | |||
| 12 | // #define SEMA_ERROR(exception) \ | ||
| 13 | // kwerror("{}", exception.what()); \ | ||
| 14 | // errors.push_back(std::unique_ptr<SemaException>(new exception)); | ||
| 15 | |||
| 16 | namespace lython { | ||
| 17 | |||
| 18 | Array<String> python_paths(); | ||
| 19 | |||
| 20 | struct SemaVisitorTrait { | ||
| 21 | using StmtRet = TypeExpr*; | ||
| 22 | using ExprRet = TypeExpr*; | ||
| 23 | using ModRet = TypeExpr*; | ||
| 24 | using PatRet = TypeExpr*; | ||
| 25 | using IsConst = std::false_type; | ||
| 26 | using Trace = std::true_type; | ||
| 27 | |||
| 28 | enum | ||
| 29 | { MaxRecursionDepth = LY_MAX_VISITOR_RECURSION_DEPTH }; | ||
| 30 | }; | ||
| 31 | |||
| 32 | struct SemaContext { | ||
| 33 | bool yield = false; | ||
| 34 | bool arrow = false; | ||
| 35 | }; | ||
| 36 | |||
| 37 | /* The semantic analysis (SEM-A) happens after the parsing, the AST can be assumed to be | ||
| 38 | * syntactically correct its job is to detect issues that could prevent a succesful compilation. | ||
| 39 | * | ||
| 40 | * Errors caught in that process are undeclared variables and mistypings, | ||
| 41 | * this includes missing attributes, missing methods | ||
| 42 | * | ||
| 43 | * In addition, our SEM-A will deduce types (i.e variables inherit the type of the expressions, | ||
| 44 | * this is NOT type inference) and allocate a register to each variables. | ||
| 45 | * | ||
| 46 | * To support type deduction SEM-A returns the type of the analysed expression, | ||
| 47 | * the deduction can then be used for typechecking. | ||
| 48 | * | ||
| 49 | * Type deduction is a weaker form of type inference where the type of the parent parent | ||
| 50 | * expression is deduced from the children. In the future we might add full type inference. Type | ||
| 51 | * deduction will still be useful then as it will reduce the cost of type inference for the | ||
| 52 | * trivial cases. | ||
| 53 | * | ||
| 54 | * Type deduction alone should provide a satisfactory development experience, as the user should | ||
| 55 | * only have to specify the type of the arguments which is good practice anyway as it serves as | ||
| 56 | * documentation. | ||
| 57 | * | ||
| 58 | * Notes | ||
| 59 | * ----- | ||
| 60 | * | ||
| 61 | * SEM-A does a quick first pass through the module to insert definitions | ||
| 62 | * that are used before their definitions. This allow us to get away | ||
| 63 | * with not forward-declaring everything, but it means some analytics | ||
| 64 | * will get delayed until the end. In the case of mutually recursive definitions | ||
| 65 | * forward declaration is required so typing can be checked. | ||
| 66 | * | ||
| 67 | * SEM-A will add type annotation & reorder arguments wherever it can. | ||
| 68 | * This has the goal and standardizing the code & simplifying its execution | ||
| 69 | * later on. | ||
| 70 | * | ||
| 71 | * You can inspect the change by saving the resulting AST. | ||
| 72 | * You could implement an automatic formatter that executes semantic analysis | ||
| 73 | * to format & complete the code. The completed code will then take less time | ||
| 74 | * to compile as the representation will be easier to analyse. | ||
| 75 | * | ||
| 76 | * Raises | ||
| 77 | * ------ | ||
| 78 | * | ||
| 79 | * TypeError | ||
| 80 | * Raised when types between expression mismatch | ||
| 81 | * | ||
| 82 | * UnsupportedOperand | ||
| 83 | * Raised when using an operand on an object that does not support it | ||
| 84 | * | ||
| 85 | * AttributeError | ||
| 86 | * Raised when using an object attribute that does not exist | ||
| 87 | * | ||
| 88 | * NameError | ||
| 89 | * Raised when using an undefined variable | ||
| 90 | * | ||
| 91 | * ModuleNotFoundError | ||
| 92 | * Raised when importing a module that was not found | ||
| 93 | * | ||
| 94 | * ImportError | ||
| 95 | * Raised when importing a statement that was not found from a module | ||
| 96 | * | ||
| 97 | */ | ||
| 98 | struct SemanticAnalyser: BaseVisitor<SemanticAnalyser, false, SemaVisitorTrait> { | ||
| 99 | Bindings bindings; | ||
| 100 | bool forwardpass = false; | ||
| 101 | Array<std::unique_ptr<SemaException>> errors; | ||
| 102 | Array<StmtNode*> nested; | ||
| 103 | Array<String> namespaces; | ||
| 104 | Dict<StringRef, bool> flags; | ||
| 105 | Array<String> paths = python_paths(); | ||
| 106 | |||
| 107 | // maybe conbine the semacontext with samespace | ||
| 108 | Array<SemaContext> semactx; | ||
| 109 | |||
| 110 | bool has_errors() const; | ||
| 111 | |||
| 112 | SemaContext& get_context() { | ||
| 113 | static SemaContext global_ctx; | ||
| 114 |
2/2✓ Branch 1 taken 3 times.
✓ Branch 2 taken 130 times.
|
133 | if (semactx.size() == 0) { |
| 115 | 3 | return global_ctx; | |
| 116 | } | ||
| 117 | 130 | return semactx[semactx.size() - 1]; | |
| 118 | } | ||
| 119 | |||
| 120 | void show_diagnostic(std::ostream& out, class AbstractLexer* lexer = nullptr); | ||
| 121 | |||
| 122 | bool is_type(TypeExpr* node, int depth, lython::CodeLocation const& loc); | ||
| 123 | |||
| 124 | template <typename T, typename... Args> | ||
| 125 | void sema_error(Node* node, lython::CodeLocation const& loc, Args... args) { | ||
| 126 |
5/11✓ Branch 1 taken 319 times.
✓ Branch 4 taken 319 times.
✗ Branch 6 not taken.
✓ Branch 7 taken 243 times.
✓ Branch 8 taken 8 times.
✓ Branch 11 taken 243 times.
✗ Branch 12 not taken.
✗ Branch 13 not taken.
✗ Branch 17 not taken.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
|
638 | errors.push_back(std::unique_ptr<SemaException>(new T(args...))); |
| 127 | 638 | SemaException* exception = errors[errors.size() - 1].get(); | |
| 128 | |||
| 129 | // Populate location info | ||
| 130 | 638 | exception->set_node(node); | |
| 131 | |||
| 132 | // use the LOC from parent function | ||
| 133 |
2/3✓ Branch 1 taken 319 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 319 times.
|
1276 | lython::log(lython::LogLevel::Error, loc, "{}", exception->what()); |
| 134 | 638 | } | |
| 135 | |||
| 136 | #define SEMA_ERROR(expr, exception, ...) sema_error<exception>(expr, LOC, __VA_ARGS__) | ||
| 137 | |||
| 138 | public: | ||
| 139 | virtual ~SemanticAnalyser() {} | ||
| 140 | |||
| 141 | StmtNode* current_namespace() { | ||
| 142 | if (nested.size() > 0) { | ||
| 143 | return nested[nested.size() - 1]; | ||
| 144 | } | ||
| 145 | return nullptr; | ||
| 146 | } | ||
| 147 | |||
| 148 | Tuple<ClassDef*, FunctionDef*> | ||
| 149 | find_method(TypeExpr* class_type, String const& methodname, int depth); | ||
| 150 | |||
| 151 | bool typecheck( | ||
| 152 | ExprNode* lhs, TypeExpr* lhs_t, ExprNode* rhs, TypeExpr* rhs_t, CodeLocation const& loc); | ||
| 153 | |||
| 154 | bool add_name(ExprNode* expr, ExprNode* value, ExprNode* type); | ||
| 155 | |||
| 156 | String operator_function(TypeExpr* expr_t, StringRef op); | ||
| 157 | |||
| 158 | Arrow* functiondef_arrow(FunctionDef* n, StmtNode* class_t, int depth); | ||
| 159 | void record_ctor_attributes(ClassDef* n, FunctionDef* ctor, int depth); | ||
| 160 | |||
| 161 | String generate_function_name(FunctionDef* n); | ||
| 162 | |||
| 163 | Arrow* get_arrow(ExprNode* fun, ExprNode* type, int depth, int& offset, ClassDef*& cls); | ||
| 164 | |||
| 165 | TypeExpr* oneof(Array<TypeExpr*> types) { | ||
| 166 |
2/2✓ Branch 1 taken 83 times.
✓ Branch 2 taken 5 times.
|
88 | if (types.size() > 0) { |
| 167 | 83 | return types[0]; | |
| 168 | } | ||
| 169 | 5 | return nullptr; | |
| 170 | } | ||
| 171 | |||
| 172 | Node* load_name(Name_t* variable); | ||
| 173 | |||
| 174 | Array<TypeExpr*> exec_body(Array<StmtNode*>& body, int depth); | ||
| 175 | |||
| 176 | Name* make_ref(Node* parent, String const& name, int varid = -1) { | ||
| 177 |
2/2✓ Branch 1 taken 323 times.
✓ Branch 4 taken 323 times.
|
323 | return make_ref(parent, StringRef(name), varid); |
| 178 | } | ||
| 179 | |||
| 180 | void record_attributes(ClassDef* n, | ||
| 181 | Array<StmtNode*> const& body, | ||
| 182 | Array<StmtNode*>& methods, | ||
| 183 | FunctionDef** ctor, | ||
| 184 | int depth); | ||
| 185 | |||
| 186 | Name* make_ref(Node* parent, StringRef const& name, int varid = -1) { | ||
| 187 | 327 | auto ref = parent->new_object<Name>(); | |
| 188 |
1/1✓ Branch 1 taken 327 times.
|
327 | ref->id = name; |
| 189 | |||
| 190 |
2/2✓ Branch 0 taken 310 times.
✓ Branch 1 taken 17 times.
|
327 | if (varid == -1) { |
| 191 |
2/2✓ Branch 1 taken 310 times.
✓ Branch 4 taken 310 times.
|
310 | ref->varid = bindings.get_varid(ref->id); |
| 192 | assert(ref->varid != -1, "Should be able to find the name we are refering to"); | ||
| 193 | } else { | ||
| 194 | 17 | ref->varid = varid; | |
| 195 | } | ||
| 196 | |||
| 197 | 327 | ref->ctx = ExprContext::Load; | |
| 198 | 327 | ref->size = int(bindings.bindings.size()); | |
| 199 | 327 | ref->dynamic = bindings.is_dynamic(ref->varid); | |
| 200 | 327 | return ref; | |
| 201 | } | ||
| 202 | |||
| 203 | ClassDef* get_class(ExprNode* classref, int depth); | ||
| 204 | TypeExpr* resolve_variable(ExprNode* node); | ||
| 205 | |||
| 206 | TypeExpr* attribute_assign(Attribute* n, int depth, TypeExpr* expected); | ||
| 207 | |||
| 208 | void add_arguments(Arguments& args, Arrow*, ClassDef* def, int); | ||
| 209 | |||
| 210 | #define FUNCTION_GEN(name, fun) virtual TypeExpr* fun(name* n, int depth); | ||
| 211 | |||
| 212 | #define X(name, _) | ||
| 213 | #define SSECTION(name) | ||
| 214 | #define MOD(name, fun) FUNCTION_GEN(name, fun) | ||
| 215 | #define EXPR(name, fun) FUNCTION_GEN(name, fun) | ||
| 216 | #define STMT(name, fun) FUNCTION_GEN(name, fun) | ||
| 217 | #define MATCH(name, fun) FUNCTION_GEN(name, fun) | ||
| 218 | |||
| 219 | NODEKIND_ENUM(X, SSECTION, EXPR, STMT, MOD, MATCH) | ||
| 220 | |||
| 221 | #undef X | ||
| 222 | #undef SSECTION | ||
| 223 | #undef EXPR | ||
| 224 | #undef STMT | ||
| 225 | #undef MOD | ||
| 226 | #undef MATCH | ||
| 227 | |||
| 228 | #undef FUNCTION_GEN | ||
| 229 | }; | ||
| 230 | |||
| 231 | } // namespace lython | ||
| 232 | |||
| 233 | #endif | ||
| 234 |