Line | Branch | Exec | Source |
---|---|---|---|
1 | #pragma once | ||
2 | |||
3 | #include <algorithm> | ||
4 | #include <ostream> | ||
5 | #include <string> | ||
6 | #include <unordered_map> | ||
7 | #include <vector> | ||
8 | |||
9 | #include "dtypes.h" | ||
10 | #include "logging/logging.h" | ||
11 | |||
12 | /* | ||
13 | * incorrect is used when the input is known to be wrong | ||
14 | * but we want to parse as much as we can anyway | ||
15 | * | ||
16 | * incorrect is also the first token and eof the last | ||
17 | * | ||
18 | * I could add an error policy arg | ||
19 | * | ||
20 | * I think those would be nice to have | ||
21 | * Hex: 0x000A9F | ||
22 | * bin: bx010101 | ||
23 | */ | ||
24 | #define LYTHON_INDENT 4 | ||
25 | #define LYTHON_TOKEN(X) \ | ||
26 | X(tok_identifier, -1) \ | ||
27 | X(tok_float, -2) \ | ||
28 | X(tok_string, -3) \ | ||
29 | X(tok_int, -4) \ | ||
30 | X(tok_newline, -5) \ | ||
31 | X(tok_indent, -6) \ | ||
32 | X(tok_desindent, -7) \ | ||
33 | X(tok_incorrect, -8) \ | ||
34 | X(tok_eof, -9) \ | ||
35 | X(tok_def, -10) \ | ||
36 | X(tok_docstring, -11) \ | ||
37 | X(tok_arrow, -12) \ | ||
38 | X(tok_struct, -13) \ | ||
39 | X(tok_return, -14) \ | ||
40 | X(tok_yield, -15) \ | ||
41 | X(tok_async, -16) \ | ||
42 | X(tok_operator, -17) \ | ||
43 | X(tok_import, -18) \ | ||
44 | X(tok_match, -19) \ | ||
45 | X(tok_as, -20) \ | ||
46 | X(tok_from, -21) \ | ||
47 | X(tok_if, -22) \ | ||
48 | X(tok_elif, -23) \ | ||
49 | X(tok_else, -24) \ | ||
50 | X(tok_try, -25) \ | ||
51 | X(tok_except, -26) \ | ||
52 | X(tok_raise, -27) \ | ||
53 | X(tok_case, -28) \ | ||
54 | X(tok_for, -29) \ | ||
55 | X(tok_while, -30) \ | ||
56 | X(tok_with, -31) \ | ||
57 | X(tok_class, -32) \ | ||
58 | X(tok_assert, -33) \ | ||
59 | X(tok_global, -34) \ | ||
60 | X(tok_del, -35) \ | ||
61 | X(tok_pass, -36) \ | ||
62 | X(tok_break, -37) \ | ||
63 | X(tok_continue, -38) \ | ||
64 | X(tok_parens, '(') \ | ||
65 | X(tok_square, '[') \ | ||
66 | X(tok_dot, '.') \ | ||
67 | X(tok_assign, '=') \ | ||
68 | X(tok_curly, '{') \ | ||
69 | X(tok_star, '*') \ | ||
70 | X(tok_augassign, -39) \ | ||
71 | X(tok_annassign, -40) \ | ||
72 | X(tok_walrus, -41) \ | ||
73 | X(tok_boolop, -42) \ | ||
74 | X(tok_binaryop, -43) \ | ||
75 | X(tok_compareop, -44) \ | ||
76 | X(tok_unaryop, -45) \ | ||
77 | X(tok_await, -46) \ | ||
78 | X(tok_lambda, -47) \ | ||
79 | X(tok_fstring, -48) \ | ||
80 | X(tok_yield_from, -49) \ | ||
81 | X(tok_in, -50) \ | ||
82 | X(tok_finally, -51) \ | ||
83 | X(tok_nonlocal, -52) \ | ||
84 | X(tok_comma, ',') \ | ||
85 | X(tok_none, -53) \ | ||
86 | X(tok_true, -54) \ | ||
87 | X(tok_false, -55) \ | ||
88 | X(tok_is, -56) \ | ||
89 | X(tok_not, -57) \ | ||
90 | X(tok_and, -58) \ | ||
91 | X(tok_or, -59) \ | ||
92 | X(tok_decorator, '@') \ | ||
93 | X(tok_comment, '#') \ | ||
94 | X(tok_formatstr, -60) | ||
95 | |||
96 | #define LYTHON_KEYWORDS(X) \ | ||
97 | X("def", tok_def) \ | ||
98 | X("->", tok_arrow) \ | ||
99 | X("struct", tok_struct) \ | ||
100 | X("return", tok_return) \ | ||
101 | X("yield", tok_yield) \ | ||
102 | X("async", tok_async) \ | ||
103 | X("import", tok_import) \ | ||
104 | X("from", tok_from) \ | ||
105 | X("as", tok_as) \ | ||
106 | X("if", tok_if) \ | ||
107 | X("elif", tok_elif) \ | ||
108 | X("else", tok_else) \ | ||
109 | X("try", tok_try) \ | ||
110 | X("except", tok_except) \ | ||
111 | X("match", tok_match) \ | ||
112 | X("raise", tok_raise) \ | ||
113 | X("case", tok_case) \ | ||
114 | X("while", tok_while) \ | ||
115 | X("for", tok_for) \ | ||
116 | X("with", tok_with) \ | ||
117 | X("class", tok_class) \ | ||
118 | X("assert", tok_assert) \ | ||
119 | X("global", tok_global) \ | ||
120 | X("del", tok_del) \ | ||
121 | X("pass", tok_pass) \ | ||
122 | X("break", tok_break) \ | ||
123 | X("continue", tok_continue) \ | ||
124 | X("await", tok_await) \ | ||
125 | X("lambda", tok_lambda) \ | ||
126 | X("in", tok_in) \ | ||
127 | X("finally", tok_finally) \ | ||
128 | X("nonlocal", tok_nonlocal) \ | ||
129 | X("None", tok_none) \ | ||
130 | X("True", tok_true) \ | ||
131 | X("False", tok_false) \ | ||
132 | X("not", tok_not) \ | ||
133 | X("is", tok_is) \ | ||
134 | X("or", tok_or) \ | ||
135 | X("and", tok_and) | ||
136 | |||
137 | namespace lython { | ||
138 | |||
139 | enum TokenType { | ||
140 | #define X(name, nb) name = nb, | ||
141 | LYTHON_TOKEN(X) | ||
142 | #undef X | ||
143 | }; | ||
144 | |||
145 | String to_human_name(int8 t); | ||
146 | String to_string(int8 t); | ||
147 | |||
148 | inline void print(TokenType const& t, std::ostream& out) { out << to_string(t); } | ||
149 | |||
150 | using ReservedKeyword = Dict<String, TokenType>; | ||
151 | using KeywordToString = Dict<int, String>; | ||
152 | |||
153 | ReservedKeyword& keywords(); | ||
154 | KeywordToString& keyword_as_string(); | ||
155 | |||
156 | int8 tok_name_size(); | ||
157 | |||
158 | class Token { | ||
159 | public: | ||
160 | Token(TokenType t, int32 l, int32 c): _type(t), _line(l), _col(c) {} | ||
161 | |||
162 | Token(int8 t, int32 l, int32 c): _type(t), _line(l), _col(c) {} | ||
163 | |||
164 | Token(): _type(tok_incorrect), _line(-1), _col(-1) {} | ||
165 | |||
166 | int8 type() const { return _type; } | ||
167 | int32 line() const { return _line; } | ||
168 | |||
169 | int32 begin_col() const { return _col - int32(identifier().size()); } | ||
170 | int32 end_col() const { return _col; } | ||
171 | int32 col() const { return _col; } | ||
172 | |||
173 | int32 end_line() const { return col(); } | ||
174 | int32 begin_line() const { return col() - int32(identifier().size()); } | ||
175 | |||
176 | String& operator_name() { return _identifier; } | ||
177 | String const& operator_name() const { return _identifier; } | ||
178 | String& identifier() { return _identifier; } | ||
179 | String const& identifier() const { return _identifier; } | ||
180 | |||
181 | float64 as_float() const { return std::stod(_identifier.c_str()); } | ||
182 | |||
183 | int64 as_integer() const { return std::strtoll(_identifier.c_str(), nullptr, 10); } | ||
184 | uint64 as_uint64() const { return std::strtoull(_identifier.c_str(), nullptr, 10); } | ||
185 | |||
186 | operator bool() const { return _type != tok_eof; } | ||
187 | |||
188 | int compare(Token const& tok) { | ||
189 |
2/2✓ Branch 0 taken 515 times.
✓ Branch 1 taken 20 times.
|
535 | if (_line != tok._line) |
190 | 515 | return _line - tok._line; | |
191 | 20 | return _col - tok._col; | |
192 | } | ||
193 | |||
194 | bool isbefore(Token const& tok) { return compare(tok) < 0; } | ||
195 | bool isafter(Token const& tok) { return compare(tok) > 0; } | ||
196 | bool isbetween(Token const& begin, Token const& end) { return isafter(begin) && isbefore(end); } | ||
197 | |||
198 | bool operator==(Token const& tok) const { | ||
199 |
5/6✓ Branch 0 taken 318 times.
✓ Branch 1 taken 2075 times.
✓ Branch 2 taken 18 times.
✓ Branch 3 taken 300 times.
✗ Branch 4 not taken.
✓ Branch 5 taken 18 times.
|
2393 | return (_type == tok._type) && (_line == tok._line) && (_col == tok._col); |
200 | } | ||
201 | |||
202 | private: | ||
203 | int8 _type = tok_incorrect; | ||
204 | int32 _line = -1; | ||
205 | int32 _col = -1; | ||
206 | |||
207 | // Data | ||
208 | String _identifier; | ||
209 | |||
210 | public: | ||
211 | // print all tokens and their info | ||
212 | std::ostream& debug_print(std::ostream& out) const; | ||
213 | |||
214 | std::ostream& print(std::ostream& out) const; | ||
215 | }; | ||
216 | |||
217 | inline Token& dummy() { | ||
218 |
3/4✓ Branch 0 taken 1 times.
✓ Branch 1 taken 3 times.
✓ Branch 3 taken 1 times.
✗ Branch 4 not taken.
|
4 | static Token dy = Token(tok_incorrect, 0, 0); |
219 | 4 | return dy; | |
220 | } | ||
221 | |||
222 | // Make something that look like clang's error underlining. | ||
223 | // offset is used if you need to print multiple underline on a same line | ||
224 | inline std::ostream& underline(std::ostream& out, Token& t, int32 offset = 0) { | ||
225 | int32 start = t.begin_line() - offset; | ||
226 | if (start > 0) { | ||
227 | out << std::string(uint32(start), ' '); | ||
228 | |||
229 | if (t.identifier().size() > 0) | ||
230 | out << std::string(t.identifier().size(), '~'); | ||
231 | else | ||
232 | out << "~"; | ||
233 | } | ||
234 | |||
235 | return out; | ||
236 | } | ||
237 | |||
238 | } // namespace lython | ||
239 |