GCC Code Coverage Report


Directory: ./
File: src/lexer/token.h
Date: 2023-04-27 00:55:30
Exec Total Coverage
Lines: 6 6 100.0%
Functions: 20 20 100.0%
Branches: 10 12 83.3%

Line Branch Exec Source
1 #pragma once
2
3 #include <algorithm>
4 #include <ostream>
5 #include <string>
6 #include <unordered_map>
7 #include <vector>
8
9 #include "dtypes.h"
10 #include "logging/logging.h"
11
12 /*
13 * incorrect is used when the input is known to be wrong
14 * but we want to parse as much as we can anyway
15 *
16 * incorrect is also the first token and eof the last
17 *
18 * I could add an error policy arg
19 *
20 * I think those would be nice to have
21 * Hex: 0x000A9F
22 * bin: bx010101
23 */
24 #define LYTHON_INDENT 4
25 #define LYTHON_TOKEN(X) \
26 X(tok_identifier, -1) \
27 X(tok_float, -2) \
28 X(tok_string, -3) \
29 X(tok_int, -4) \
30 X(tok_newline, -5) \
31 X(tok_indent, -6) \
32 X(tok_desindent, -7) \
33 X(tok_incorrect, -8) \
34 X(tok_eof, -9) \
35 X(tok_def, -10) \
36 X(tok_docstring, -11) \
37 X(tok_arrow, -12) \
38 X(tok_struct, -13) \
39 X(tok_return, -14) \
40 X(tok_yield, -15) \
41 X(tok_async, -16) \
42 X(tok_operator, -17) \
43 X(tok_import, -18) \
44 X(tok_match, -19) \
45 X(tok_as, -20) \
46 X(tok_from, -21) \
47 X(tok_if, -22) \
48 X(tok_elif, -23) \
49 X(tok_else, -24) \
50 X(tok_try, -25) \
51 X(tok_except, -26) \
52 X(tok_raise, -27) \
53 X(tok_case, -28) \
54 X(tok_for, -29) \
55 X(tok_while, -30) \
56 X(tok_with, -31) \
57 X(tok_class, -32) \
58 X(tok_assert, -33) \
59 X(tok_global, -34) \
60 X(tok_del, -35) \
61 X(tok_pass, -36) \
62 X(tok_break, -37) \
63 X(tok_continue, -38) \
64 X(tok_parens, '(') \
65 X(tok_square, '[') \
66 X(tok_dot, '.') \
67 X(tok_assign, '=') \
68 X(tok_curly, '{') \
69 X(tok_star, '*') \
70 X(tok_augassign, -39) \
71 X(tok_annassign, -40) \
72 X(tok_walrus, -41) \
73 X(tok_boolop, -42) \
74 X(tok_binaryop, -43) \
75 X(tok_compareop, -44) \
76 X(tok_unaryop, -45) \
77 X(tok_await, -46) \
78 X(tok_lambda, -47) \
79 X(tok_fstring, -48) \
80 X(tok_yield_from, -49) \
81 X(tok_in, -50) \
82 X(tok_finally, -51) \
83 X(tok_nonlocal, -52) \
84 X(tok_comma, ',') \
85 X(tok_none, -53) \
86 X(tok_true, -54) \
87 X(tok_false, -55) \
88 X(tok_is, -56) \
89 X(tok_not, -57) \
90 X(tok_and, -58) \
91 X(tok_or, -59) \
92 X(tok_decorator, '@') \
93 X(tok_comment, '#') \
94 X(tok_formatstr, -60)
95
96 #define LYTHON_KEYWORDS(X) \
97 X("def", tok_def) \
98 X("->", tok_arrow) \
99 X("struct", tok_struct) \
100 X("return", tok_return) \
101 X("yield", tok_yield) \
102 X("async", tok_async) \
103 X("import", tok_import) \
104 X("from", tok_from) \
105 X("as", tok_as) \
106 X("if", tok_if) \
107 X("elif", tok_elif) \
108 X("else", tok_else) \
109 X("try", tok_try) \
110 X("except", tok_except) \
111 X("match", tok_match) \
112 X("raise", tok_raise) \
113 X("case", tok_case) \
114 X("while", tok_while) \
115 X("for", tok_for) \
116 X("with", tok_with) \
117 X("class", tok_class) \
118 X("assert", tok_assert) \
119 X("global", tok_global) \
120 X("del", tok_del) \
121 X("pass", tok_pass) \
122 X("break", tok_break) \
123 X("continue", tok_continue) \
124 X("await", tok_await) \
125 X("lambda", tok_lambda) \
126 X("in", tok_in) \
127 X("finally", tok_finally) \
128 X("nonlocal", tok_nonlocal) \
129 X("None", tok_none) \
130 X("True", tok_true) \
131 X("False", tok_false) \
132 X("not", tok_not) \
133 X("is", tok_is) \
134 X("or", tok_or) \
135 X("and", tok_and)
136
137 namespace lython {
138
139 enum TokenType {
140 #define X(name, nb) name = nb,
141 LYTHON_TOKEN(X)
142 #undef X
143 };
144
145 String to_human_name(int8 t);
146 String to_string(int8 t);
147
148 inline void print(TokenType const& t, std::ostream& out) { out << to_string(t); }
149
150 using ReservedKeyword = Dict<String, TokenType>;
151 using KeywordToString = Dict<int, String>;
152
153 ReservedKeyword& keywords();
154 KeywordToString& keyword_as_string();
155
156 int8 tok_name_size();
157
158 class Token {
159 public:
160 Token(TokenType t, int32 l, int32 c): _type(t), _line(l), _col(c) {}
161
162 Token(int8 t, int32 l, int32 c): _type(t), _line(l), _col(c) {}
163
164 Token(): _type(tok_incorrect), _line(-1), _col(-1) {}
165
166 int8 type() const { return _type; }
167 int32 line() const { return _line; }
168
169 int32 begin_col() const { return _col - int32(identifier().size()); }
170 int32 end_col() const { return _col; }
171 int32 col() const { return _col; }
172
173 int32 end_line() const { return col(); }
174 int32 begin_line() const { return col() - int32(identifier().size()); }
175
176 String& operator_name() { return _identifier; }
177 String const& operator_name() const { return _identifier; }
178 String& identifier() { return _identifier; }
179 String const& identifier() const { return _identifier; }
180
181 float64 as_float() const { return std::stod(_identifier.c_str()); }
182
183 int64 as_integer() const { return std::strtoll(_identifier.c_str(), nullptr, 10); }
184 uint64 as_uint64() const { return std::strtoull(_identifier.c_str(), nullptr, 10); }
185
186 operator bool() const { return _type != tok_eof; }
187
188 int compare(Token const& tok) {
189
2/2
✓ Branch 0 taken 515 times.
✓ Branch 1 taken 20 times.
535 if (_line != tok._line)
190 515 return _line - tok._line;
191 20 return _col - tok._col;
192 }
193
194 bool isbefore(Token const& tok) { return compare(tok) < 0; }
195 bool isafter(Token const& tok) { return compare(tok) > 0; }
196 bool isbetween(Token const& begin, Token const& end) { return isafter(begin) && isbefore(end); }
197
198 bool operator==(Token const& tok) const {
199
5/6
✓ Branch 0 taken 318 times.
✓ Branch 1 taken 2075 times.
✓ Branch 2 taken 18 times.
✓ Branch 3 taken 300 times.
✗ Branch 4 not taken.
✓ Branch 5 taken 18 times.
2393 return (_type == tok._type) && (_line == tok._line) && (_col == tok._col);
200 }
201
202 private:
203 int8 _type = tok_incorrect;
204 int32 _line = -1;
205 int32 _col = -1;
206
207 // Data
208 String _identifier;
209
210 public:
211 // print all tokens and their info
212 std::ostream& debug_print(std::ostream& out) const;
213
214 std::ostream& print(std::ostream& out) const;
215 };
216
217 inline Token& dummy() {
218
3/4
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 3 times.
✓ Branch 3 taken 1 times.
✗ Branch 4 not taken.
4 static Token dy = Token(tok_incorrect, 0, 0);
219 4 return dy;
220 }
221
222 // Make something that look like clang's error underlining.
223 // offset is used if you need to print multiple underline on a same line
224 inline std::ostream& underline(std::ostream& out, Token& t, int32 offset = 0) {
225 int32 start = t.begin_line() - offset;
226 if (start > 0) {
227 out << std::string(uint32(start), ' ');
228
229 if (t.identifier().size() > 0)
230 out << std::string(t.identifier().size(), '~');
231 else
232 out << "~";
233 }
234
235 return out;
236 }
237
238 } // namespace lython
239