Line | Branch | Exec | Source |
---|---|---|---|
1 | #pragma once | ||
2 | |||
3 | #include <cctype> | ||
4 | #include <ostream> | ||
5 | |||
6 | #include "ast/nodes.h" | ||
7 | #include "lexer/buffer.h" | ||
8 | #include "lexer/token.h" | ||
9 | #include "utilities/trie.h" | ||
10 | |||
11 | #include "dtypes.h" | ||
12 | |||
13 | #include <iostream> | ||
14 | |||
15 | /* | ||
16 | * Lexer is a stream of tokens | ||
17 | * | ||
18 | * TODO: DocString support | ||
19 | */ | ||
20 | |||
21 | namespace lython { | ||
22 | |||
23 | template <typename T, typename N> | ||
24 | bool in(T const& e, N const& v) { | ||
25 | 22732 | return e == v; | |
26 | } | ||
27 | |||
28 | template <typename T, typename N, typename... Args> | ||
29 | bool in(T const& e, N const& v, Args... args) { | ||
30 |
4/4✓ Branch 0 taken 41104 times.
✓ Branch 1 taken 3000 times.
✓ Branch 3 taken 4644 times.
✓ Branch 4 taken 36460 times.
|
88208 | return e == v || in(e, args...); |
31 | } | ||
32 | |||
33 | template <typename T, typename... Args> | ||
34 | bool in(T const& e, Args... args) { | ||
35 | return in(e, args...); | ||
36 | } | ||
37 | |||
38 | struct OpConfig { | ||
39 | int precedence = -1; | ||
40 | bool left_associative = true; | ||
41 | TokenType type = TokenType::tok_eof; | ||
42 | BinaryOperator binarykind = BinaryOperator::None; | ||
43 | UnaryOperator unarykind = UnaryOperator::None; | ||
44 | BoolOperator boolkind = BoolOperator::None; | ||
45 | CmpOperator cmpkind = CmpOperator::None; | ||
46 | |||
47 | void print(std::ostream& out) const { | ||
48 | ✗ | out << to_string(type) << "(pred: " << precedence << ") " | |
49 | ✗ | << "(binary: " << int(binarykind) << ") " | |
50 | ✗ | << "(unary: " << int(unarykind) << ") " | |
51 | ✗ | << "(bool: " << int(boolkind) << ") " | |
52 | ✗ | << "(cmp: " << int(cmpkind) << ") "; | |
53 | ✗ | } | |
54 | }; | ||
55 | |||
56 | Dict<String, OpConfig> const& default_precedence(); | ||
57 | |||
58 | class LexerOperators { | ||
59 | public: | ||
60 | LexerOperators() { | ||
61 |
2/2✓ Branch 4 taken 41 times.
✓ Branch 5 taken 1 times.
|
42 | for (auto& c: _precedence_table) { |
62 |
1/2✓ Branch 2 taken 41 times.
✗ Branch 3 not taken.
|
41 | _operators.insert(c.first); |
63 | } | ||
64 | 1 | } | |
65 | |||
66 | Trie<128> const* match(int c) const { return _operators.trie().matching(c); } | ||
67 | |||
68 | Dict<String, OpConfig> const& precedence_table() const { return _precedence_table; } | ||
69 | |||
70 | TokenType token_type(String const& str) const { return _precedence_table.at(str).type; } | ||
71 | |||
72 | private: | ||
73 | CoWTrie<128> _operators; | ||
74 | Dict<String, OpConfig> _precedence_table = default_precedence(); | ||
75 | }; | ||
76 | |||
77 | class AbstractLexer { | ||
78 | public: | ||
79 | virtual ~AbstractLexer() {} | ||
80 | |||
81 | virtual Token const& next_token() = 0; | ||
82 | |||
83 | virtual Token const& peek_token() = 0; | ||
84 | |||
85 | virtual Token const& token() = 0; | ||
86 | |||
87 | virtual char peekc() const { return '\0'; } | ||
88 | |||
89 | virtual const String& file_name() = 0; | ||
90 | |||
91 | virtual int get_mode() const { return 0; } | ||
92 | virtual void set_mode(int mode) {} | ||
93 | |||
94 | // print tokens with their info | ||
95 | ::std::ostream& debug_print(::std::ostream& out); | ||
96 | |||
97 | // print out tokens as they were inputed | ||
98 | ::std::ostream& print(::std::ostream& out); | ||
99 | |||
100 | // extract a token stream into a token vector | ||
101 | Array<Token> extract_token() { | ||
102 | Array<Token> v; | ||
103 | |||
104 | Token t = next_token(); | ||
105 | do { | ||
106 | v.push_back(t); | ||
107 | } while ((t = next_token())); | ||
108 | |||
109 | v.push_back(t); // push eof token | ||
110 | return v; | ||
111 | } | ||
112 | }; | ||
113 | |||
114 | class ReplayLexer: public AbstractLexer { | ||
115 | public: | ||
116 | ReplayLexer(Array<Token>& tokens): tokens(tokens) { | ||
117 | Token& last = tokens[tokens.size() - 1]; | ||
118 | if (last.type() != tok_eof) { | ||
119 | tokens.emplace_back(tok_eof, 0, 0); | ||
120 | } | ||
121 | } | ||
122 | |||
123 | Token const& next_token() override final { | ||
124 | if (i + 1 < tokens.size()) | ||
125 | i += 1; | ||
126 | |||
127 | return tokens[i]; | ||
128 | } | ||
129 | |||
130 | Token const& peek_token() override final { | ||
131 | auto n = i + 1; | ||
132 | |||
133 | if (n >= tokens.size()) | ||
134 | n = i; | ||
135 | |||
136 | return tokens[n]; | ||
137 | } | ||
138 | |||
139 | Token const& token() override final { return tokens[i]; } | ||
140 | |||
141 | const String& file_name() override { | ||
142 | static String fakefile = "<replay buffer>"; | ||
143 | return fakefile; | ||
144 | } | ||
145 | |||
146 | ~ReplayLexer() {} | ||
147 | |||
148 | private: | ||
149 | ::std::size_t i = 0; | ||
150 | Array<Token>& tokens; | ||
151 | }; | ||
152 | |||
153 | enum class LexerMode { | ||
154 | Default = 0, | ||
155 | Character = 1 | ||
156 | }; | ||
157 | |||
158 | class Lexer: public AbstractLexer { | ||
159 | public: | ||
160 | Lexer(AbstractBuffer& reader): | ||
161 |
2/4✓ Branch 3 taken 1 times.
✗ Branch 4 not taken.
✓ Branch 8 taken 1 times.
✗ Branch 9 not taken.
|
1 | AbstractLexer(), _reader(reader), _cindent(indent()), _oindent(indent()) {} |
162 | |||
163 | ~Lexer() {} | ||
164 | |||
165 | Token const& token() override final { | ||
166 |
2/2✓ Branch 0 taken 744 times.
✓ Branch 1 taken 107709 times.
|
108453 | if (_count == 0) { |
167 | 744 | return next_token(); | |
168 | } | ||
169 | 107709 | return _token; | |
170 | } | ||
171 | |||
172 | int get_mode() const override final; | ||
173 | void set_mode(int mode) override final; | ||
174 | Token const& format_tokenizer() ; | ||
175 | Token const& next_token() override final; | ||
176 | Token const& peek_token() override final { | ||
177 | // we can only peek ahead once | ||
178 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 154 times.
|
154 | if (_buffer.size() > 0) |
179 | ✗ | return _buffer[_buffer.size() - 1]; | |
180 | |||
181 | // Save current token a get next | ||
182 |
1/1✓ Branch 1 taken 154 times.
|
154 | Token current_token = _token; |
183 |
2/2✓ Branch 1 taken 154 times.
✓ Branch 4 taken 154 times.
|
154 | _buffer.push_back(next_token()); |
184 |
1/1✓ Branch 1 taken 154 times.
|
154 | _token = current_token; |
185 | 154 | return _buffer[_buffer.size() - 1]; | |
186 | 154 | } | |
187 | |||
188 | Token const& make_token(int8 t) { | ||
189 | 7174 | _token = Token(t, line(), col()); | |
190 | 7174 | return _token; | |
191 | } | ||
192 | |||
193 | Token const& make_token(int8 t, const String& identifier) { | ||
194 | 5166 | _token = Token(t, line(), col()); | |
195 | 5166 | _token.identifier() = identifier; | |
196 | 5166 | return _token; | |
197 | } | ||
198 | |||
199 | const String& file_name() override { return _reader.file_name(); } | ||
200 | char peekc() const { return _reader.peek(); } | ||
201 | |||
202 | private: | ||
203 | int _count = 0; | ||
204 | AbstractBuffer& _reader; | ||
205 | Token _token{dummy()}; | ||
206 | int32 _cindent; | ||
207 | int32 _oindent; | ||
208 | LexerOperators _operators; | ||
209 | Array<Token> _buffer; | ||
210 | bool _fmtstr = false; | ||
211 | char _quote; | ||
212 | int _quotes = 0; | ||
213 | |||
214 | // shortcuts | ||
215 | |||
216 | int32 line() { return _reader.line(); } | ||
217 | int32 col() { return _reader.col(); } | ||
218 | int32 indent() { return _reader.indent(); } | ||
219 | void consume() { return _reader.consume(); } | ||
220 | char peek() { return _reader.peek(); } | ||
221 | bool empty_line() { return _reader.empty_line(); } | ||
222 | |||
223 | // state | ||
224 | bool desindent_for_comment = false; | ||
225 | |||
226 | char nextc() { | ||
227 | 23828 | _reader.consume(); | |
228 | 23828 | return _reader.peek(); | |
229 | } | ||
230 | |||
231 | // what characters are allowed in identifiers | ||
232 | bool is_identifier(char c) { | ||
233 |
7/10✓ Branch 0 taken 4190 times.
✓ Branch 1 taken 6235 times.
✓ Branch 2 taken 4076 times.
✓ Branch 3 taken 114 times.
✓ Branch 4 taken 4076 times.
✗ Branch 5 not taken.
✓ Branch 6 taken 4076 times.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✓ Branch 9 taken 4076 times.
|
10425 | if (::std::isalnum(c) || c == '_' || c == '?' || c == '!' || c == '-') |
234 | 6349 | return true; | |
235 | 4076 | return false; | |
236 | } | ||
237 | }; | ||
238 | |||
239 | } // namespace lython | ||
240 |