GCC Code Coverage Report


Directory: ./
File: src/lexer/lexer.h
Date: 2023-04-27 00:55:30
Exec Total Coverage
Lines: 25 32 78.1%
Functions: 23 27 85.2%
Branches: 23 64 35.9%

Line Branch Exec Source
1 #pragma once
2
3 #include <cctype>
4 #include <ostream>
5
6 #include "ast/nodes.h"
7 #include "lexer/buffer.h"
8 #include "lexer/token.h"
9 #include "utilities/trie.h"
10
11 #include "dtypes.h"
12
13 #include <iostream>
14
15 /*
16 * Lexer is a stream of tokens
17 *
18 * TODO: DocString support
19 */
20
21 namespace lython {
22
23 template <typename T, typename N>
24 bool in(T const& e, N const& v) {
25 22732 return e == v;
26 }
27
28 template <typename T, typename N, typename... Args>
29 bool in(T const& e, N const& v, Args... args) {
30
4/4
✓ Branch 0 taken 41104 times.
✓ Branch 1 taken 3000 times.
✓ Branch 3 taken 4644 times.
✓ Branch 4 taken 36460 times.
88208 return e == v || in(e, args...);
31 }
32
33 template <typename T, typename... Args>
34 bool in(T const& e, Args... args) {
35 return in(e, args...);
36 }
37
38 struct OpConfig {
39 int precedence = -1;
40 bool left_associative = true;
41 TokenType type = TokenType::tok_eof;
42 BinaryOperator binarykind = BinaryOperator::None;
43 UnaryOperator unarykind = UnaryOperator::None;
44 BoolOperator boolkind = BoolOperator::None;
45 CmpOperator cmpkind = CmpOperator::None;
46
47 void print(std::ostream& out) const {
48 out << to_string(type) << "(pred: " << precedence << ") "
49 << "(binary: " << int(binarykind) << ") "
50 << "(unary: " << int(unarykind) << ") "
51 << "(bool: " << int(boolkind) << ") "
52 << "(cmp: " << int(cmpkind) << ") ";
53 }
54 };
55
56 Dict<String, OpConfig> const& default_precedence();
57
58 class LexerOperators {
59 public:
60 LexerOperators() {
61
2/2
✓ Branch 4 taken 41 times.
✓ Branch 5 taken 1 times.
42 for (auto& c: _precedence_table) {
62
1/2
✓ Branch 2 taken 41 times.
✗ Branch 3 not taken.
41 _operators.insert(c.first);
63 }
64 1 }
65
66 Trie<128> const* match(int c) const { return _operators.trie().matching(c); }
67
68 Dict<String, OpConfig> const& precedence_table() const { return _precedence_table; }
69
70 TokenType token_type(String const& str) const { return _precedence_table.at(str).type; }
71
72 private:
73 CoWTrie<128> _operators;
74 Dict<String, OpConfig> _precedence_table = default_precedence();
75 };
76
77 class AbstractLexer {
78 public:
79 virtual ~AbstractLexer() {}
80
81 virtual Token const& next_token() = 0;
82
83 virtual Token const& peek_token() = 0;
84
85 virtual Token const& token() = 0;
86
87 virtual char peekc() const { return '\0'; }
88
89 virtual const String& file_name() = 0;
90
91 virtual int get_mode() const { return 0; }
92 virtual void set_mode(int mode) {}
93
94 // print tokens with their info
95 ::std::ostream& debug_print(::std::ostream& out);
96
97 // print out tokens as they were inputed
98 ::std::ostream& print(::std::ostream& out);
99
100 // extract a token stream into a token vector
101 Array<Token> extract_token() {
102 Array<Token> v;
103
104 Token t = next_token();
105 do {
106 v.push_back(t);
107 } while ((t = next_token()));
108
109 v.push_back(t); // push eof token
110 return v;
111 }
112 };
113
114 class ReplayLexer: public AbstractLexer {
115 public:
116 ReplayLexer(Array<Token>& tokens): tokens(tokens) {
117 Token& last = tokens[tokens.size() - 1];
118 if (last.type() != tok_eof) {
119 tokens.emplace_back(tok_eof, 0, 0);
120 }
121 }
122
123 Token const& next_token() override final {
124 if (i + 1 < tokens.size())
125 i += 1;
126
127 return tokens[i];
128 }
129
130 Token const& peek_token() override final {
131 auto n = i + 1;
132
133 if (n >= tokens.size())
134 n = i;
135
136 return tokens[n];
137 }
138
139 Token const& token() override final { return tokens[i]; }
140
141 const String& file_name() override {
142 static String fakefile = "<replay buffer>";
143 return fakefile;
144 }
145
146 ~ReplayLexer() {}
147
148 private:
149 ::std::size_t i = 0;
150 Array<Token>& tokens;
151 };
152
153 enum class LexerMode {
154 Default = 0,
155 Character = 1
156 };
157
158 class Lexer: public AbstractLexer {
159 public:
160 Lexer(AbstractBuffer& reader):
161
2/4
✓ Branch 3 taken 1 times.
✗ Branch 4 not taken.
✓ Branch 8 taken 1 times.
✗ Branch 9 not taken.
1 AbstractLexer(), _reader(reader), _cindent(indent()), _oindent(indent()) {}
162
163 ~Lexer() {}
164
165 Token const& token() override final {
166
2/2
✓ Branch 0 taken 744 times.
✓ Branch 1 taken 107709 times.
108453 if (_count == 0) {
167 744 return next_token();
168 }
169 107709 return _token;
170 }
171
172 int get_mode() const override final;
173 void set_mode(int mode) override final;
174 Token const& format_tokenizer() ;
175 Token const& next_token() override final;
176 Token const& peek_token() override final {
177 // we can only peek ahead once
178
1/2
✗ Branch 1 not taken.
✓ Branch 2 taken 154 times.
154 if (_buffer.size() > 0)
179 return _buffer[_buffer.size() - 1];
180
181 // Save current token a get next
182
1/1
✓ Branch 1 taken 154 times.
154 Token current_token = _token;
183
2/2
✓ Branch 1 taken 154 times.
✓ Branch 4 taken 154 times.
154 _buffer.push_back(next_token());
184
1/1
✓ Branch 1 taken 154 times.
154 _token = current_token;
185 154 return _buffer[_buffer.size() - 1];
186 154 }
187
188 Token const& make_token(int8 t) {
189 7174 _token = Token(t, line(), col());
190 7174 return _token;
191 }
192
193 Token const& make_token(int8 t, const String& identifier) {
194 5166 _token = Token(t, line(), col());
195 5166 _token.identifier() = identifier;
196 5166 return _token;
197 }
198
199 const String& file_name() override { return _reader.file_name(); }
200 char peekc() const { return _reader.peek(); }
201
202 private:
203 int _count = 0;
204 AbstractBuffer& _reader;
205 Token _token{dummy()};
206 int32 _cindent;
207 int32 _oindent;
208 LexerOperators _operators;
209 Array<Token> _buffer;
210 bool _fmtstr = false;
211 char _quote;
212 int _quotes = 0;
213
214 // shortcuts
215
216 int32 line() { return _reader.line(); }
217 int32 col() { return _reader.col(); }
218 int32 indent() { return _reader.indent(); }
219 void consume() { return _reader.consume(); }
220 char peek() { return _reader.peek(); }
221 bool empty_line() { return _reader.empty_line(); }
222
223 // state
224 bool desindent_for_comment = false;
225
226 char nextc() {
227 23828 _reader.consume();
228 23828 return _reader.peek();
229 }
230
231 // what characters are allowed in identifiers
232 bool is_identifier(char c) {
233
7/10
✓ Branch 0 taken 4190 times.
✓ Branch 1 taken 6235 times.
✓ Branch 2 taken 4076 times.
✓ Branch 3 taken 114 times.
✓ Branch 4 taken 4076 times.
✗ Branch 5 not taken.
✓ Branch 6 taken 4076 times.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✓ Branch 9 taken 4076 times.
10425 if (::std::isalnum(c) || c == '_' || c == '?' || c == '!' || c == '-')
234 6349 return true;
235 4076 return false;
236 }
237 };
238
239 } // namespace lython
240