GCC Code Coverage Report

Directory:	./
File:	src/lexer/lexer.h
Date:	2023-04-27 00:55:30

	Exec	Total	Coverage
Lines:	25	32	78.1%
Functions:	23	27	85.2%
Branches:	23	64	35.9%

  
      Line
      Branch
      Exec
      Source
    
      #pragma once
    
      #include <cctype>
    
      #include <ostream>
    
      #include "ast/nodes.h"
    
      #include "lexer/buffer.h"
    
      #include "lexer/token.h"
    
      #include "utilities/trie.h"
    
      #include "dtypes.h"
    
      #include <iostream>
    
      /*
    
       *  Lexer is a stream of tokens
    
       *
    
       *      TODO:   DocString support
    
       */
    
      namespace lython {
    
      template <typename T, typename N>
    
      bool in(T const& e, N const& v) {
    
      22732
          return e == v;
    
      }
    
      template <typename T, typename N, typename... Args>
    
      bool in(T const& e, N const& v, Args... args) {
    
        4/4✓ Branch 0 taken 41104 times.
✓ Branch 1 taken 3000 times.
✓ Branch 3 taken 4644 times.
✓ Branch 4 taken 36460 times.

      88208
          return e == v || in(e, args...);
    
      }
    
      template <typename T, typename... Args>
    
      bool in(T const& e, Args... args) {
    
          return in(e, args...);
    
      }
    
      struct OpConfig {
    
          int            precedence       = -1;
    
          bool           left_associative = true;
    
          TokenType      type             = TokenType::tok_eof;
    
          BinaryOperator binarykind       = BinaryOperator::None;
    
          UnaryOperator  unarykind        = UnaryOperator::None;
    
          BoolOperator   boolkind         = BoolOperator::None;
    
          CmpOperator    cmpkind          = CmpOperator::None;
    
          void print(std::ostream& out) const {
    
      ✗
              out << to_string(type) << "(pred: " << precedence << ") "
    
      ✗
                  << "(binary: " << int(binarykind) << ") "
    
      ✗
                  << "(unary: " << int(unarykind) << ") "
    
      ✗
                  << "(bool: " << int(boolkind) << ") "
    
      ✗
                  << "(cmp: " << int(cmpkind) << ") ";
    
      ✗
          }
    
      };
    
      Dict<String, OpConfig> const& default_precedence();
    
      class LexerOperators {
    
          public:
    
          LexerOperators() {
    
        2/2✓ Branch 4 taken 41 times.
✓ Branch 5 taken 1 times.

      42
              for (auto& c: _precedence_table) {
    
        1/2✓ Branch 2 taken 41 times.
✗ Branch 3 not taken.

      41
                  _operators.insert(c.first);
    
              }
    
      1
          }
    
          Trie<128> const* match(int c) const { return _operators.trie().matching(c); }
    
          Dict<String, OpConfig> const& precedence_table() const { return _precedence_table; }
    
          TokenType token_type(String const& str) const { return _precedence_table.at(str).type; }
    
          private:
    
          CoWTrie<128>           _operators;
    
          Dict<String, OpConfig> _precedence_table = default_precedence();
    
      };
    
      class AbstractLexer {
    
          public:
    
          virtual ~AbstractLexer() {}
    
          virtual Token const& next_token() = 0;
    
          virtual Token const& peek_token() = 0;
    
          virtual Token const& token() = 0;
    
          virtual char peekc() const { return '\0'; }
    
          virtual const String& file_name() = 0;
    
          virtual int get_mode() const  { return 0; }
    
          virtual void set_mode(int mode) {}
    
          // print tokens with their info
    
          ::std::ostream& debug_print(::std::ostream& out);
    
          // print out tokens as they were inputed
    
          ::std::ostream& print(::std::ostream& out);
    
          // extract a token stream into a token vector
    
          Array<Token> extract_token() {
    
              Array<Token> v;
    
              Token t = next_token();
    
              do {
    
                  v.push_back(t);
    
              } while ((t = next_token()));
    
              v.push_back(t);  // push eof token
    
              return v;
    
          }
    
      };
    
      class ReplayLexer: public AbstractLexer {
    
          public:
    
          ReplayLexer(Array<Token>& tokens): tokens(tokens) {
    
              Token& last = tokens[tokens.size() - 1];
    
              if (last.type() != tok_eof) {
    
                  tokens.emplace_back(tok_eof, 0, 0);
    
              }
    
          }
    
          Token const& next_token() override final {
    
              if (i + 1 < tokens.size())
    
                  i += 1;
    
              return tokens[i];
    
          }
    
          Token const& peek_token() override final {
    
              auto n = i + 1;
    
              if (n >= tokens.size())
    
                  n = i;
    
              return tokens[n];
    
          }
    
          Token const& token() override final { return tokens[i]; }
    
          const String& file_name() override {
    
              static String fakefile = "<replay buffer>";
    
              return fakefile;
    
          }
    
          ~ReplayLexer() {}
    
          private:
    
          ::std::size_t i = 0;
    
          Array<Token>& tokens;
    
      };
    
      enum class LexerMode {
    
          Default = 0,
    
          Character = 1
    
      };
    
      class Lexer: public AbstractLexer {
    
          public:
    
          Lexer(AbstractBuffer& reader):
    
        2/4✓ Branch 3 taken 1 times.
✗ Branch 4 not taken.
✓ Branch 8 taken 1 times.
✗ Branch 9 not taken.

      1
              AbstractLexer(), _reader(reader), _cindent(indent()), _oindent(indent()) {}
    
          ~Lexer() {}
    
          Token const& token() override final {
    
        2/2✓ Branch 0 taken 744 times.
✓ Branch 1 taken 107709 times.

      108453
              if (_count == 0) {
    
      744
                  return next_token();
    
              }
    
      107709
              return _token;
    
          }
    
          int get_mode() const override final;
    
          void set_mode(int mode) override final;
    
          Token const& format_tokenizer() ;
    
          Token const& next_token() override final;
    
          Token const& peek_token() override final {
    
              // we can only peek ahead once
    
        1/2✗ Branch 1 not taken.
✓ Branch 2 taken 154 times.

      154
              if (_buffer.size() > 0)
    
      ✗
                  return _buffer[_buffer.size() - 1];
    
              // Save current token a get next
    
        1/1✓ Branch 1 taken 154 times.

      154
              Token current_token = _token;
    
        2/2✓ Branch 1 taken 154 times.
✓ Branch 4 taken 154 times.

      154
              _buffer.push_back(next_token());
    
        1/1✓ Branch 1 taken 154 times.

      154
              _token = current_token;
    
      154
              return _buffer[_buffer.size() - 1];
    
      154
          }
    
          Token const& make_token(int8 t) {
    
      7174
              _token = Token(t, line(), col());
    
      7174
              return _token;
    
          }
    
          Token const& make_token(int8 t, const String& identifier) {
    
      5166
              _token              = Token(t, line(), col());
    
      5166
              _token.identifier() = identifier;
    
      5166
              return _token;
    
          }
    
          const String& file_name() override { return _reader.file_name(); }
    
          char peekc() const { return _reader.peek(); }
    
          private:
    
          int             _count = 0;
    
          AbstractBuffer& _reader;
    
          Token           _token{dummy()};
    
          int32           _cindent;
    
          int32           _oindent;
    
          LexerOperators  _operators;
    
          Array<Token>    _buffer;
    
          bool            _fmtstr = false;
    
          char            _quote;
    
          int             _quotes = 0;
    
          // shortcuts
    
          int32 line() { return _reader.line(); }
    
          int32 col() { return _reader.col(); }
    
          int32 indent() { return _reader.indent(); }
    
          void  consume() { return _reader.consume(); }
    
          char  peek() { return _reader.peek(); }
    
          bool  empty_line() { return _reader.empty_line(); }
    
          // state
    
          bool desindent_for_comment = false;
    
          char nextc() {
    
      23828
              _reader.consume();
    
      23828
              return _reader.peek();
    
          }
    
          // what characters are allowed in identifiers
    
          bool is_identifier(char c) {
    
        7/10✓ Branch 0 taken 4190 times.
✓ Branch 1 taken 6235 times.
✓ Branch 2 taken 4076 times.
✓ Branch 3 taken 114 times.
✓ Branch 4 taken 4076 times.
✗ Branch 5 not taken.
✓ Branch 6 taken 4076 times.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✓ Branch 9 taken 4076 times.

      10425
              if (::std::isalnum(c) || c == '_' || c == '?' || c == '!' || c == '-')
    
      6349
                  return true;
    
      4076
              return false;
    
          }
    
      };
    
      }  // namespace lython

Line	Branch	Exec	Source
1			#pragma once
2
3			#include <cctype>
4			#include <ostream>
5
6			#include "ast/nodes.h"
7			#include "lexer/buffer.h"
8			#include "lexer/token.h"
9			#include "utilities/trie.h"
10
11			#include "dtypes.h"
12
13			#include <iostream>
14
15			/*
16			* Lexer is a stream of tokens
17			*
18			* TODO: DocString support
19			*/
20
21			namespace lython {
22
23			template <typename T, typename N>
24			bool in(T const& e, N const& v) {
25		22732	return e == v;
26			}
27
28			template <typename T, typename N, typename... Args>
29			bool in(T const& e, N const& v, Args... args) {
30	4/4 ✓ Branch 0 taken 41104 times. ✓ Branch 1 taken 3000 times. ✓ Branch 3 taken 4644 times. ✓ Branch 4 taken 36460 times.	88208	return e == v \|\| in(e, args...);
31			}
32
33			template <typename T, typename... Args>
34			bool in(T const& e, Args... args) {
35			return in(e, args...);
36			}
37
38			struct OpConfig {
39			int precedence = -1;
40			bool left_associative = true;
41			TokenType type = TokenType::tok_eof;
42			BinaryOperator binarykind = BinaryOperator::None;
43			UnaryOperator unarykind = UnaryOperator::None;
44			BoolOperator boolkind = BoolOperator::None;
45			CmpOperator cmpkind = CmpOperator::None;
46
47			void print(std::ostream& out) const {
48		✗	out << to_string(type) << "(pred: " << precedence << ") "
49		✗	<< "(binary: " << int(binarykind) << ") "
50		✗	<< "(unary: " << int(unarykind) << ") "
51		✗	<< "(bool: " << int(boolkind) << ") "
52		✗	<< "(cmp: " << int(cmpkind) << ") ";
53		✗	}
54			};
55
56			Dict<String, OpConfig> const& default_precedence();
57
58			class LexerOperators {
59			public:
60			LexerOperators() {
61	2/2 ✓ Branch 4 taken 41 times. ✓ Branch 5 taken 1 times.	42	for (auto& c: _precedence_table) {
62	1/2 ✓ Branch 2 taken 41 times. ✗ Branch 3 not taken.	41	_operators.insert(c.first);
63			}
64		1	}
65
66			Trie<128> const* match(int c) const { return _operators.trie().matching(c); }
67
68			Dict<String, OpConfig> const& precedence_table() const { return _precedence_table; }
69
70			TokenType token_type(String const& str) const { return _precedence_table.at(str).type; }
71
72			private:
73			CoWTrie<128> _operators;
74			Dict<String, OpConfig> _precedence_table = default_precedence();
75			};
76
77			class AbstractLexer {
78			public:
79			virtual ~AbstractLexer() {}
80
81			virtual Token const& next_token() = 0;
82
83			virtual Token const& peek_token() = 0;
84
85			virtual Token const& token() = 0;
86
87			virtual char peekc() const { return '\0'; }
88
89			virtual const String& file_name() = 0;
90
91			virtual int get_mode() const { return 0; }
92			virtual void set_mode(int mode) {}
93
94			// print tokens with their info
95			::std::ostream& debug_print(::std::ostream& out);
96
97			// print out tokens as they were inputed
98			::std::ostream& print(::std::ostream& out);
99
100			// extract a token stream into a token vector
101			Array<Token> extract_token() {
102			Array<Token> v;
103
104			Token t = next_token();
105			do {
106			v.push_back(t);
107			} while ((t = next_token()));
108
109			v.push_back(t); // push eof token
110			return v;
111			}
112			};
113
114			class ReplayLexer: public AbstractLexer {
115			public:
116			ReplayLexer(Array<Token>& tokens): tokens(tokens) {
117			Token& last = tokens[tokens.size() - 1];
118			if (last.type() != tok_eof) {
119			tokens.emplace_back(tok_eof, 0, 0);
120			}
121			}
122
123			Token const& next_token() override final {
124			if (i + 1 < tokens.size())
125			i += 1;
126
127			return tokens[i];
128			}
129
130			Token const& peek_token() override final {
131			auto n = i + 1;
132
133			if (n >= tokens.size())
134			n = i;
135
136			return tokens[n];
137			}
138
139			Token const& token() override final { return tokens[i]; }
140
141			const String& file_name() override {
142			static String fakefile = "<replay buffer>";
143			return fakefile;
144			}
145
146			~ReplayLexer() {}
147
148			private:
149			::std::size_t i = 0;
150			Array<Token>& tokens;
151			};
152
153			enum class LexerMode {
154			Default = 0,
155			Character = 1
156			};
157
158			class Lexer: public AbstractLexer {
159			public:
160			Lexer(AbstractBuffer& reader):
161	2/4 ✓ Branch 3 taken 1 times. ✗ Branch 4 not taken. ✓ Branch 8 taken 1 times. ✗ Branch 9 not taken.	1	AbstractLexer(), _reader(reader), _cindent(indent()), _oindent(indent()) {}
162
163			~Lexer() {}
164
165			Token const& token() override final {
166	2/2 ✓ Branch 0 taken 744 times. ✓ Branch 1 taken 107709 times.	108453	if (_count == 0) {
167		744	return next_token();
168			}
169		107709	return _token;
170			}
171
172			int get_mode() const override final;
173			void set_mode(int mode) override final;
174			Token const& format_tokenizer() ;
175			Token const& next_token() override final;
176			Token const& peek_token() override final {
177			// we can only peek ahead once
178	1/2 ✗ Branch 1 not taken. ✓ Branch 2 taken 154 times.	154	if (_buffer.size() > 0)
179		✗	return _buffer[_buffer.size() - 1];
180
181			// Save current token a get next
182	1/1 ✓ Branch 1 taken 154 times.	154	Token current_token = _token;
183	2/2 ✓ Branch 1 taken 154 times. ✓ Branch 4 taken 154 times.	154	_buffer.push_back(next_token());
184	1/1 ✓ Branch 1 taken 154 times.	154	_token = current_token;
185		154	return _buffer[_buffer.size() - 1];
186		154	}
187
188			Token const& make_token(int8 t) {
189		7174	_token = Token(t, line(), col());
190		7174	return _token;
191			}
192
193			Token const& make_token(int8 t, const String& identifier) {
194		5166	_token = Token(t, line(), col());
195		5166	_token.identifier() = identifier;
196		5166	return _token;
197			}
198
199			const String& file_name() override { return _reader.file_name(); }
200			char peekc() const { return _reader.peek(); }
201
202			private:
203			int _count = 0;
204			AbstractBuffer& _reader;
205			Token _token{dummy()};
206			int32 _cindent;
207			int32 _oindent;
208			LexerOperators _operators;
209			Array<Token> _buffer;
210			bool _fmtstr = false;
211			char _quote;
212			int _quotes = 0;
213
214			// shortcuts
215
216			int32 line() { return _reader.line(); }
217			int32 col() { return _reader.col(); }
218			int32 indent() { return _reader.indent(); }
219			void consume() { return _reader.consume(); }
220			char peek() { return _reader.peek(); }
221			bool empty_line() { return _reader.empty_line(); }
222
223			// state
224			bool desindent_for_comment = false;
225
226			char nextc() {
227		23828	_reader.consume();
228		23828	return _reader.peek();
229			}
230
231			// what characters are allowed in identifiers
232			bool is_identifier(char c) {
233	7/10 ✓ Branch 0 taken 4190 times. ✓ Branch 1 taken 6235 times. ✓ Branch 2 taken 4076 times. ✓ Branch 3 taken 114 times. ✓ Branch 4 taken 4076 times. ✗ Branch 5 not taken. ✓ Branch 6 taken 4076 times. ✗ Branch 7 not taken. ✗ Branch 8 not taken. ✓ Branch 9 taken 4076 times.	10425	if (::std::isalnum(c) \|\| c == '_' \|\| c == '?' \|\| c == '!' \|\| c == '-')
234		6349	return true;
235		4076	return false;
236			}
237			};
238
239			} // namespace lython
240