| Line | Branch | Exec | Source |
|---|---|---|---|
| 1 | #pragma once | ||
| 2 | |||
| 3 | #include <algorithm> | ||
| 4 | #include <ostream> | ||
| 5 | #include <string> | ||
| 6 | #include <unordered_map> | ||
| 7 | #include <vector> | ||
| 8 | |||
| 9 | #include "dtypes.h" | ||
| 10 | #include "logging/logging.h" | ||
| 11 | |||
| 12 | /* | ||
| 13 | * incorrect is used when the input is known to be wrong | ||
| 14 | * but we want to parse as much as we can anyway | ||
| 15 | * | ||
| 16 | * incorrect is also the first token and eof the last | ||
| 17 | * | ||
| 18 | * I could add an error policy arg | ||
| 19 | * | ||
| 20 | * I think those would be nice to have | ||
| 21 | * Hex: 0x000A9F | ||
| 22 | * bin: bx010101 | ||
| 23 | */ | ||
| 24 | #define LYTHON_INDENT 4 | ||
| 25 | #define LYTHON_TOKEN(X) \ | ||
| 26 | X(tok_identifier, -1) \ | ||
| 27 | X(tok_float, -2) \ | ||
| 28 | X(tok_string, -3) \ | ||
| 29 | X(tok_int, -4) \ | ||
| 30 | X(tok_newline, -5) \ | ||
| 31 | X(tok_indent, -6) \ | ||
| 32 | X(tok_desindent, -7) \ | ||
| 33 | X(tok_incorrect, -8) \ | ||
| 34 | X(tok_eof, -9) \ | ||
| 35 | X(tok_def, -10) \ | ||
| 36 | X(tok_docstring, -11) \ | ||
| 37 | X(tok_arrow, -12) \ | ||
| 38 | X(tok_struct, -13) \ | ||
| 39 | X(tok_return, -14) \ | ||
| 40 | X(tok_yield, -15) \ | ||
| 41 | X(tok_async, -16) \ | ||
| 42 | X(tok_operator, -17) \ | ||
| 43 | X(tok_import, -18) \ | ||
| 44 | X(tok_match, -19) \ | ||
| 45 | X(tok_as, -20) \ | ||
| 46 | X(tok_from, -21) \ | ||
| 47 | X(tok_if, -22) \ | ||
| 48 | X(tok_elif, -23) \ | ||
| 49 | X(tok_else, -24) \ | ||
| 50 | X(tok_try, -25) \ | ||
| 51 | X(tok_except, -26) \ | ||
| 52 | X(tok_raise, -27) \ | ||
| 53 | X(tok_case, -28) \ | ||
| 54 | X(tok_for, -29) \ | ||
| 55 | X(tok_while, -30) \ | ||
| 56 | X(tok_with, -31) \ | ||
| 57 | X(tok_class, -32) \ | ||
| 58 | X(tok_assert, -33) \ | ||
| 59 | X(tok_global, -34) \ | ||
| 60 | X(tok_del, -35) \ | ||
| 61 | X(tok_pass, -36) \ | ||
| 62 | X(tok_break, -37) \ | ||
| 63 | X(tok_continue, -38) \ | ||
| 64 | X(tok_parens, '(') \ | ||
| 65 | X(tok_square, '[') \ | ||
| 66 | X(tok_dot, '.') \ | ||
| 67 | X(tok_assign, '=') \ | ||
| 68 | X(tok_curly, '{') \ | ||
| 69 | X(tok_star, '*') \ | ||
| 70 | X(tok_augassign, -39) \ | ||
| 71 | X(tok_annassign, -40) \ | ||
| 72 | X(tok_walrus, -41) \ | ||
| 73 | X(tok_boolop, -42) \ | ||
| 74 | X(tok_binaryop, -43) \ | ||
| 75 | X(tok_compareop, -44) \ | ||
| 76 | X(tok_unaryop, -45) \ | ||
| 77 | X(tok_await, -46) \ | ||
| 78 | X(tok_lambda, -47) \ | ||
| 79 | X(tok_fstring, -48) \ | ||
| 80 | X(tok_yield_from, -49) \ | ||
| 81 | X(tok_in, -50) \ | ||
| 82 | X(tok_finally, -51) \ | ||
| 83 | X(tok_nonlocal, -52) \ | ||
| 84 | X(tok_comma, ',') \ | ||
| 85 | X(tok_none, -53) \ | ||
| 86 | X(tok_true, -54) \ | ||
| 87 | X(tok_false, -55) \ | ||
| 88 | X(tok_is, -56) \ | ||
| 89 | X(tok_not, -57) \ | ||
| 90 | X(tok_and, -58) \ | ||
| 91 | X(tok_or, -59) \ | ||
| 92 | X(tok_decorator, '@') \ | ||
| 93 | X(tok_comment, '#') \ | ||
| 94 | X(tok_formatstr, -60) | ||
| 95 | |||
| 96 | #define LYTHON_KEYWORDS(X) \ | ||
| 97 | X("def", tok_def) \ | ||
| 98 | X("->", tok_arrow) \ | ||
| 99 | X("struct", tok_struct) \ | ||
| 100 | X("return", tok_return) \ | ||
| 101 | X("yield", tok_yield) \ | ||
| 102 | X("async", tok_async) \ | ||
| 103 | X("import", tok_import) \ | ||
| 104 | X("from", tok_from) \ | ||
| 105 | X("as", tok_as) \ | ||
| 106 | X("if", tok_if) \ | ||
| 107 | X("elif", tok_elif) \ | ||
| 108 | X("else", tok_else) \ | ||
| 109 | X("try", tok_try) \ | ||
| 110 | X("except", tok_except) \ | ||
| 111 | X("match", tok_match) \ | ||
| 112 | X("raise", tok_raise) \ | ||
| 113 | X("case", tok_case) \ | ||
| 114 | X("while", tok_while) \ | ||
| 115 | X("for", tok_for) \ | ||
| 116 | X("with", tok_with) \ | ||
| 117 | X("class", tok_class) \ | ||
| 118 | X("assert", tok_assert) \ | ||
| 119 | X("global", tok_global) \ | ||
| 120 | X("del", tok_del) \ | ||
| 121 | X("pass", tok_pass) \ | ||
| 122 | X("break", tok_break) \ | ||
| 123 | X("continue", tok_continue) \ | ||
| 124 | X("await", tok_await) \ | ||
| 125 | X("lambda", tok_lambda) \ | ||
| 126 | X("in", tok_in) \ | ||
| 127 | X("finally", tok_finally) \ | ||
| 128 | X("nonlocal", tok_nonlocal) \ | ||
| 129 | X("None", tok_none) \ | ||
| 130 | X("True", tok_true) \ | ||
| 131 | X("False", tok_false) \ | ||
| 132 | X("not", tok_not) \ | ||
| 133 | X("is", tok_is) \ | ||
| 134 | X("or", tok_or) \ | ||
| 135 | X("and", tok_and) | ||
| 136 | |||
| 137 | namespace lython { | ||
| 138 | |||
| 139 | enum TokenType { | ||
| 140 | #define X(name, nb) name = nb, | ||
| 141 | LYTHON_TOKEN(X) | ||
| 142 | #undef X | ||
| 143 | }; | ||
| 144 | |||
| 145 | String to_human_name(int8 t); | ||
| 146 | String to_string(int8 t); | ||
| 147 | |||
| 148 | inline void print(TokenType const& t, std::ostream& out) { out << to_string(t); } | ||
| 149 | |||
| 150 | using ReservedKeyword = Dict<String, TokenType>; | ||
| 151 | using KeywordToString = Dict<int, String>; | ||
| 152 | |||
| 153 | ReservedKeyword& keywords(); | ||
| 154 | KeywordToString& keyword_as_string(); | ||
| 155 | |||
| 156 | int8 tok_name_size(); | ||
| 157 | |||
| 158 | class Token { | ||
| 159 | public: | ||
| 160 | Token(TokenType t, int32 l, int32 c): _type(t), _line(l), _col(c) {} | ||
| 161 | |||
| 162 | Token(int8 t, int32 l, int32 c): _type(t), _line(l), _col(c) {} | ||
| 163 | |||
| 164 | Token(): _type(tok_incorrect), _line(-1), _col(-1) {} | ||
| 165 | |||
| 166 | int8 type() const { return _type; } | ||
| 167 | int32 line() const { return _line; } | ||
| 168 | |||
| 169 | int32 begin_col() const { return _col - int32(identifier().size()); } | ||
| 170 | int32 end_col() const { return _col; } | ||
| 171 | int32 col() const { return _col; } | ||
| 172 | |||
| 173 | int32 end_line() const { return col(); } | ||
| 174 | int32 begin_line() const { return col() - int32(identifier().size()); } | ||
| 175 | |||
| 176 | String& operator_name() { return _identifier; } | ||
| 177 | String const& operator_name() const { return _identifier; } | ||
| 178 | String& identifier() { return _identifier; } | ||
| 179 | String const& identifier() const { return _identifier; } | ||
| 180 | |||
| 181 | float64 as_float() const { return std::stod(_identifier.c_str()); } | ||
| 182 | |||
| 183 | int64 as_integer() const { return std::strtoll(_identifier.c_str(), nullptr, 10); } | ||
| 184 | uint64 as_uint64() const { return std::strtoull(_identifier.c_str(), nullptr, 10); } | ||
| 185 | |||
| 186 | operator bool() const { return _type != tok_eof; } | ||
| 187 | |||
| 188 | int compare(Token const& tok) { | ||
| 189 |
2/2✓ Branch 0 taken 515 times.
✓ Branch 1 taken 20 times.
|
535 | if (_line != tok._line) |
| 190 | 515 | return _line - tok._line; | |
| 191 | 20 | return _col - tok._col; | |
| 192 | } | ||
| 193 | |||
| 194 | bool isbefore(Token const& tok) { return compare(tok) < 0; } | ||
| 195 | bool isafter(Token const& tok) { return compare(tok) > 0; } | ||
| 196 | bool isbetween(Token const& begin, Token const& end) { return isafter(begin) && isbefore(end); } | ||
| 197 | |||
| 198 | bool operator==(Token const& tok) const { | ||
| 199 |
5/6✓ Branch 0 taken 318 times.
✓ Branch 1 taken 2075 times.
✓ Branch 2 taken 18 times.
✓ Branch 3 taken 300 times.
✗ Branch 4 not taken.
✓ Branch 5 taken 18 times.
|
2393 | return (_type == tok._type) && (_line == tok._line) && (_col == tok._col); |
| 200 | } | ||
| 201 | |||
| 202 | private: | ||
| 203 | int8 _type = tok_incorrect; | ||
| 204 | int32 _line = -1; | ||
| 205 | int32 _col = -1; | ||
| 206 | |||
| 207 | // Data | ||
| 208 | String _identifier; | ||
| 209 | |||
| 210 | public: | ||
| 211 | // print all tokens and their info | ||
| 212 | std::ostream& debug_print(std::ostream& out) const; | ||
| 213 | |||
| 214 | std::ostream& print(std::ostream& out) const; | ||
| 215 | }; | ||
| 216 | |||
| 217 | inline Token& dummy() { | ||
| 218 |
3/4✓ Branch 0 taken 1 times.
✓ Branch 1 taken 3 times.
✓ Branch 3 taken 1 times.
✗ Branch 4 not taken.
|
4 | static Token dy = Token(tok_incorrect, 0, 0); |
| 219 | 4 | return dy; | |
| 220 | } | ||
| 221 | |||
| 222 | // Make something that look like clang's error underlining. | ||
| 223 | // offset is used if you need to print multiple underline on a same line | ||
| 224 | inline std::ostream& underline(std::ostream& out, Token& t, int32 offset = 0) { | ||
| 225 | int32 start = t.begin_line() - offset; | ||
| 226 | if (start > 0) { | ||
| 227 | out << std::string(uint32(start), ' '); | ||
| 228 | |||
| 229 | if (t.identifier().size() > 0) | ||
| 230 | out << std::string(t.identifier().size(), '~'); | ||
| 231 | else | ||
| 232 | out << "~"; | ||
| 233 | } | ||
| 234 | |||
| 235 | return out; | ||
| 236 | } | ||
| 237 | |||
| 238 | } // namespace lython | ||
| 239 |