Ocean
Loading...
Searching...
No Matches
Scanner.h
Go to the documentation of this file.
1/*
2 * Copyright (c) Meta Platforms, Inc. and affiliates.
3 *
4 * This source code is licensed under the MIT license found in the
5 * LICENSE file in the root directory of this source tree.
6 */
7
8#ifndef META_OCEAN_IO_SCANNER_H
9#define META_OCEAN_IO_SCANNER_H
10
11#include "ocean/io/IO.h"
12
13#include "ocean/base/Memory.h"
14
15#include "ocean/math/Math.h"
16
17#include <array>
18#include <istream>
19
20namespace Ocean
21{
22
23namespace IO
24{
25
26/**
27 * This class implements a simple scanner.
28 * @ingroup io
29 */
30class OCEAN_IO_EXPORT Scanner
31{
32 public:
33
34 /**
35 * Definition of an invalid keyword or symbol id.
36 */
37 static constexpr uint32_t invalidId = uint32_t(-1);
38
39 /**
40 * This class implements a token for the scanner.
41 */
42 class OCEAN_IO_EXPORT Token final
43 {
44 public:
45
46 /**
47 * Definition of different token types.
48 */
49 enum Type : uint32_t
50 {
51 /// Invalid token.
52 TOKEN_INVALID = 0u,
53 /// Character token.
55 /// End of file token.
57 /// Identifier token.
59 /// Line token.
61 /// Number token.
63 /// Integer token.
65 /// Keyword token.
67 /// String token.
69 /// Symbol token.
70 TOKEN_SYMBOL
71 };
72
73 public:
74
75 /**
76 * Creates an invalid token.
77 */
78 Token() = default;
79
80 /**
81 * Creates a new token with given data and type.
82 * @param data Token data
83 * @param type Token type
84 */
85 Token(std::string&& data, const Type type);
86
87 /**
88 * Creates a new token with given data and type.
89 * @param data Token data
90 * @param type Token type
91 */
92 Token(const std::string& data, const Type type);
93
94 /**
95 * Creates a new token with given keyword or symbol id.
96 * @param data Raw token data
97 * @param id Keyword or symbol id of the token
98 * @param type Token type
99 */
100 Token(std::string&& data, const uint32_t id, const Type type);
101
102 /**
103 * Creates a new token with given keyword or symbol id.
104 * @param data Raw token data
105 * @param id Keyword or symbol id of the token
106 * @param type Token type
107 */
108 Token(const std::string& data, const uint32_t id, const Type type);
109
110 /**
111 * Returns the type of the token.
112 * @return Token type
113 */
114 inline Type type() const;
115
116 /**
117 * Returns whether this token is of a specific type.
118 * @param type The type to check
119 * @return True, if so
120 */
121 inline bool isType(const Type type) const;
122
123 /**
124 * Returns whether this token holds a character.
125 * @return True, if so
126 */
127 inline bool isCharacter() const;
128
129 /**
130 * Returns whether this token holds an identifier.
131 * @return True, if so
132 */
133 inline bool isIdentifier() const;
134
135 /**
136 * Returns whether this token holds an integer.
137 * @return True, if so
138 */
139 inline bool isInteger() const;
140
141 /**
142 * Returns whether this token holds a remaining line.
143 * @return True, if so
144 */
145 inline bool isLine() const;
146
147 /**
148 * Returns whether this token holds a number.
149 * @return True, if so
150 */
151 inline bool isNumber() const;
152
153 /**
154 * Returns whether this token holds an integer or a number.
155 * @return True, if so
156 */
157 inline bool isIntegerOrNumber() const;
158
159 /**
160 * Returns whether this token holds a keyword.
161 * @return True, if so
162 */
163 inline bool isKeyword() const;
164
165 /**
166 * Returns whether this token holds a special keyword.
167 * @param keyword The keyword id to check
168 * @return True, if so
169 */
170 bool isKeyword(const uint32_t keyword) const;
171
172 /**
173 * Returns whether this token holds a string.
174 * @return True, if so
175 */
176 inline bool isString() const;
177
178 /**
179 * Returns whether this token holds a symbol.
180 * @return True, if so
181 */
182 inline bool isSymbol() const;
183
184 /**
185 * Returns whether this token holds a special symbol.
186 * @param symbol The symbol id to check
187 * @return True, if so
188 */
189 bool isSymbol(const uint32_t symbol) const;
190
191 /**
192 * Returns whether this token holds a end of file.
193 * @return True, if so
194 */
195 inline bool isEndOfFile() const;
196
197 /**
198 * Returns the character value of this token.
199 * @return Character value
200 */
201 uint8_t character() const;
202
203 /**
204 * Returns the identifier value of this token.
205 * @return Identifier value
206 */
207 const std::string& identifier() const;
208
209 /**
210 * Returns the integer value of this token.
211 * @return Integer value
212 */
213 int integer() const;
214
215 /**
216 * Returns the float value of this token.
217 * @return Float value
218 */
219 Scalar number() const;
220
221 /**
222 * Returns the integer or float value of this token.
223 * @return Integer or float value as float value
224 */
226
227 /**
228 * Returns the remaining line of this token.
229 * @return Line value
230 */
231 const std::string& line() const;
232
233 /**
234 * Returns the id of the keyword of this token.
235 * @return Keyword id
236 */
237 uint32_t keyword() const;
238
239 /**
240 * Returns the id of the symbol of this token.
241 * @return Symbol id
242 */
243 uint32_t symbol() const;
244
245 /**
246 * Returns the string value of this token.
247 * @return The token's string value
248 */
249 const std::string& string() const;
250
251 /**
252 * Returns the string value of this token and invalidates the token.
253 * @return The token's string value
254 */
255 std::string moveString();
256
257 /**
258 * Returns the raw data of the token.
259 * @return Raw data
260 */
261 inline const std::string& raw() const;
262
263 /**
264 * Returns whether the token is valid.
265 * @return True, if so
266 */
267 explicit inline operator bool() const;
268
269 protected:
270
271 /// Holds the type of the token.
272 Type type_ = TOKEN_INVALID;
273
274 /// Holds the id of the keyword or symbol, if any.
275 uint32_t id_ = invalidId;
276
277 /// Holds the data of the token.
278 std::string data_;
279 };
280
281 protected:
282
283 /**
284 * Definition of an unordered map mapping strings to ids.
285 */
286 typedef std::unordered_map<std::string, uint32_t> IdMap;
287
288 /**
289 * Definition of an unordered set holding line remark symbols.
290 */
291 typedef std::unordered_set<std::string> LineRemarks;
292
293 /**
294 * Definition of an unordered map mapping begin remark symbols to end remark symbols.
295 */
296 typedef std::unordered_map<std::string, std::string> ScopeRemarks;
297
298 /**
299 * Definition of a character table.
300 */
301 typedef std::array<uint16_t, 256> CharTable;
302
303 /**
304 * Definition of first character types.
305 */
306 enum FirstChar : uint16_t
307 {
308 /// Invalid.
309 CHAR_INVALID = 0,
310 /// Character.
311 CHAR_CHARACTER = 1,
312 /// Identifier.
313 CHAR_IDENTIFIER = 2,
314 /// Number.
315 CHAR_NUMBER = 4,
316 /// Integer.
317 CHAR_INTEGER = 8,
318 /// Keyword.
319 CHAR_KEYWORD = 16,
320 /// String.
321 CHAR_STRING = 32,
322 /// Symbol.
323 CHAR_SYMBOL = 64,
324 /// Remark.
325 CHAR_REMARK = 128,
326 /// White space.
327 CHAR_SPACE = 256
328 };
329
330 public:
331
332 /**
333 * Creates a new scanner using a stream as input.
334 * @param stream The stream to be use as input
335 * @param progress Optional resulting scanner progress in percent, with range [0, 1]
336 * @param cancel Optional scanner cancel flag
337 */
338 explicit Scanner(const std::shared_ptr<std::istream>& stream, float* progress = nullptr, bool* cancel = nullptr);
339
340 /**
341 * Creates a new scanner using a file or a memory buffer as input.
342 * @param filename The name of the file to be used as input, `buffer` must be empty
343 * @param buffer The buffer to be used as input, `filename` must be empty
344 * @param progress Optional resulting scanner progress in percent, with range [0, 1]
345 * @param cancel Optional scanner cancel flag
346 */
347 inline Scanner(const std::string& filename, const std::string& buffer, float* progress = nullptr, bool* cancel = nullptr);
348
349 /**
350 * Creates a new scanner using a file or a memory buffer as input.
351 * @param filename The name of the file to be used as input, `buffer` must be empty
352 * @param buffer The buffer to be used as input, `filename` must be empty
353 * @param progress Optional resulting scanner progress in percent, with range [0, 1]
354 * @param cancel Optional scanner cancel flag
355 */
356 inline Scanner(const std::string& filename, std::string&& buffer, float* progress = nullptr, bool* cancel = nullptr);
357
358 /**
359 * Destructs a scanner.
360 */
361 virtual ~Scanner();
362
363 /**
364 * Returns the recent token.
365 * @return Recent token
366 */
367 const Token& token();
368
369 /**
370 * Returns a line token starting at the current position.
371 * A line token does not handle remarks.
372 * @return Line token
373 */
374 const Token& lineToken();
375
376 /**
377 * Return the recent token and pops it afterwards.
378 * @return Recent token.
379 */
381
382 /**
383 * Returns a lookout to the next token.
384 * @return Next token
385 */
386 const Token& look();
387
388 /**
389 * Pops the recent token.
390 */
391 void pop();
392
393 /**
394 * Returns the recent line.
395 * @return Recent line
396 */
397 inline size_t line() const;
398
399 /**
400 * Returns the recent column.
401 * @return Recent column
402 */
403 inline size_t column() const;
404
405 /**
406 * Returns the position of the scanner.
407 * @return Position of the scanner in characters
408 */
409 size_t position() const;
410
411 /**
412 * Returns the size of the scanner.
413 * @return Size of the scanner in characters
414 */
415 size_t size() const;
416
417 /**
418 * Returns the name of the input file, if the input is a file.
419 * @return The scanner's input filename, empty if the input was a buffer
420 */
421 inline const std::string& filename() const;
422
423 /**
424 * Returns whether the scanner is valid and ready to use.
425 * @return True, if so
426 */
427 inline bool isValid() const;
428
429 /**
430 * Finds the next token in a given string starting from a specified position.
431 * A token is enclosed by white characters or by the borders of the given string, the length of the given string is explicitly defined by the parameter 'size'.
432 * @param pointer The pointer to the string in which the next token is to be found, must be valid
433 * @param size The length of the given string in characters, with range [1, infinity)
434 * @param start The first character within the given string that defines the first possible character of the token, with range [0, size - 1]
435 * @param tokenStart The resulting start location within the given string of the found token, with range [start, strlen(pointer) - 1]
436 * @param tokenLength The resulting length of the found token, with range [1, strlen(pointer) - start]
437 * @return True, if a second token may follow; False, if the token is the last token
438 */
439 static bool findNextToken(const char* pointer, const size_t size, const size_t start, size_t& tokenStart, size_t& tokenLength);
440
441 /**
442 * Finds the next token in a given string starting from a specified position.
443 * A token is enclosed by white characters or by the borders of the given string, the end is identified by a null character.
444 * @param pointer The pointer to the string in which the next token is to be found, can be nullptr
445 * @param start The first character within the given string that defines the first possible character of the token, with range [0, strlen(pointer)]
446 * @param tokenStart The resulting start location within the given string of the found token, with range [start, strlen(pointer) - 1]
447 * @param tokenLength The resulting length of the found token, with range [1, strlen(pointer) - start]
448 * @return True, if a second token may follow; False, if the token is the last token
449 */
450 static bool findNextToken(const char* pointer, const size_t start, size_t& tokenStart, size_t& tokenLength);
451
452 /**
453 * Returns whether a given character is a white space character.
454 * A white space character can be one of the following:
455 * <pre>
456 * ' ', '\\t', '\\n', or '\\r'
457 * </pre>
458 * @param character The character to be checked
459 * @return True, if so
460 */
461 static inline bool isWhitespace(const char& character);
462
463 protected:
464
465 /**
466 * Creates a new scanner.
467 * The scanner may forward an entire progress state, if the pointer value is defined.<br>
468 * Beware: Make sure that the value exists during the whole scanning timer!<br>
469 * Further, the scanner may be canceled by an explicit flag.<br>
470 * In the case the scanner is canceled an end of file token is returned.<br>
471 * Beware: As for the progress value, also the cancel object must exist during the whole scanning progress, if provided
472 * @param progress Optional progress parameter to forward the scanning progress with range [0, 1], use nullptr if the progress state is not necessary
473 * @param cancel Optional cancel state to cancel the scanner progress by setting the flag to 'true', use nullptr if the cancel state is not necessary
474 */
475 Scanner(float* progress, bool* cancel);
476
477 /**
478 * Returns one character.
479 * @param offset Offset to the recent position
480 * @return Character
481 */
482 uint8_t get(const size_t offset = 0);
483
484 /**
485 * Returns data of a specified size starting at the recent position.
486 * Beware: Make sure that enough pending buffer is available
487 * @param size Size of the data to receive
488 * @return Requested data
489 */
490 std::string data(const size_t size) const;
491
492 /**
493 * Returns data of a specified size starting at the offset position.
494 * Beware: Make sure that enough pending buffer is available
495 * @param offset Start position relative to the current position
496 * @param size Size of the data to receive
497 * @return Requested data
498 */
499 std::string data(const size_t offset, const size_t size) const;
500
501 /**
502 * Consumes one or more character.
503 * @param chars Number of characters to consume
504 */
505 void consume(const size_t chars = 1);
506
507 /**
508 * Refills the intermediate buffer.
509 * @return True, if the buffer could be refilled with new characters
510 */
512
513 /**
514 * Returns the keyword id of a given string.
515 * @param data Data to convert to a keyword
516 * @return Id of the identifier, otherwise invalidId
517 */
518 uint32_t keywordId(const std::string& data) const;
519
520 /**
521 * Returns the symbol id of a given string.
522 * @param data Data to convert to a symbol
523 * @return Id of the symbol, otherwise invalidId
524 */
525 uint32_t symbolId(const std::string& data) const;
526
527 /**
528 * Sets whether the keywords are case sensitive or not.
529 * As default all keywords are case sensitive.<br>
530 * Beware: This property has to be set before the first keyword is registered!
531 * @param caseSensitive True, if all keywords will be case sensitive
532 */
533 void setKeywordProperty(const bool caseSensitive);
534
535 /**
536 * Registers a new keyword.
537 * @param keyword New keyword
538 * @param id Id of the keyword
539 */
540 void registerKeyword(const std::string& keyword, const uint32_t id);
541
542 /**
543 * Registers a new symbol.
544 * @param symbol New symbol
545 * @param id Id of the symbol
546 */
547 void registerSymbol(const std::string& symbol, const uint32_t id);
548
549 /**
550 * Registers a line remark symbol.
551 * @param lineRemark Line remark symbol
552 */
553 void registerLineRemark(const std::string& lineRemark);
554
555 /**
556 * Registers a scope remark symbol.
557 * @param begin Begin remark symbol
558 * @param end End remark symbol
559 */
560 void registerScopeRemark(const std::string& begin, const std::string& end);
561
562 /**
563 * Registers a white space character.
564 * @param character White space character to register
565 * @return True, if succeeded
566 */
567 bool registerWhiteSpaceCharacter(const uint8_t character);
568
569 /**
570 * Reads and returns the next token.
571 * @param consumeBytes Determines whether the scanner consume the read characters.
572 * @return New token
573 */
574 virtual Token readToken(const bool consumeBytes = true);
575
576 /**
577 * Reads white space.
578 * @param crossLines Determines whether the white space can be separated over several lines
579 * @return Next not-white-space character
580 */
581 uint8_t readWhiteSpace(bool crossLines = true);
582
583 /**
584 * Discards non white space and jumps to the first white space position.
585 * @return Discarded elements
586 */
587 std::string discardNonWhiteSpace();
588
589 /**
590 * Reads remark comments.
591 * @return True, if a comment was read
592 */
594
595 /**
596 * Reads a line remark comment.
597 * @return True, if a comment was read
598 */
600
601 /**
602 * Reads a scope remark comment.
603 * @return True, if a comment was read
604 */
606
607 /**
608 * Tries to read a character as next token.
609 * @param token Returning token
610 * @param consumeBytes Determines whether the scanner consumes the read characters
611 * @return True, if succeeded
612 */
613 bool readCharacter(Token& token, const bool consumeBytes);
614
615 /**
616 * Tries to read a identifier as next token.
617 * @param token Returning token
618 * @param consumeBytes Determines whether the scanner consumes the read characters
619 * @return True, if succeeded
620 */
621 bool readIdentifier(Token& token, const bool consumeBytes);
622
623 /**
624 * Tries to read an integer as next token.
625 * @param token Returning token
626 * @param consumeBytes Determines whether the scanner consumes the read characters
627 * @return True, if succeeded
628 */
629 bool readInteger(Token& token, const bool consumeBytes);
630
631 /**
632 * Tries to read a keyword as next token.
633 * @param token Returning token
634 * @param consumeBytes Determines whether the scanner consumes the read characters
635 * @return True, if succeeded
636 */
637 bool readKeyword(Token& token, const bool consumeBytes);
638
639 /**
640 * Tries to read a remaining line as next token.
641 * @param token Returning token
642 * @param consumeBytes Determines whether the scanner consumes the read characters
643 * @return True, if succeeded
644 */
645 bool readLine(Token& token, const bool consumeBytes);
646
647 /**
648 * Tries to read a number as next token.
649 * @param token Returning token
650 * @param consumeBytes Determines whether the scanner consumes the read characters
651 * @return True, if succeeded
652 */
653 bool readNumber(Token& token, const bool consumeBytes);
654
655 /**
656 * Tries to read a string as next token.
657 * @param token Returning token
658 * @param consumeBytes Determines whether the scanner consumes the read characters
659 * @return True, if succeeded
660 */
661 bool readString(Token& token, const bool consumeBytes);
662
663 /**
664 * Tries to read a symbol as next token.
665 * @param token Returning token
666 * @param consumeBytes Determines whether the scanner consumes the read characters
667 * @return True, if succeeded
668 */
669 bool readSymbol(Token& token, const bool consumeBytes);
670
671 private:
672
673 /**
674 * Returns one character from the extra buffer.
675 * @param offset Offset inside the recent extra buffer
676 * @return Character
677 */
678 uint8_t getExtra(const size_t offset = 0);
679
680 /**
681 * Refills the extra buffer.
682 * @param minIndex Minimal index of the character needed inside the extra buffer
683 * @return True, if enough characters could be read
684 */
685 bool refillExtraBuffer(const size_t minIndex);
686
687 /**
688 * Creates a file input stream or a string input stream depending on the given input.
689 * @param filename The name of the file to be used as input, `buffer` must be empty
690 * @param buffer The buffer to be used as input, `filename` must be empty
691 */
692 static inline std::shared_ptr<std::istream> createInputStream(const std::string& filename, std::string&& buffer);
693
694 /**
695 * Creates a file input stream or a string input stream depending on the given input.
696 * @param filename The name of the file to be used as input, `buffer` must be empty
697 * @param buffer The buffer to be used as input, `filename` must be empty
698 */
699 static inline std::shared_ptr<std::istream> createInputStream(const std::string& filename, const std::string& buffer);
700
701 protected:
702
703 /// Recent token.
705
706 /// Next token.
708
709 /// The input stream from which the scanner receives the data.
710 std::shared_ptr<std::istream> stream_;
711
712 /// The name of the input file, if the input is a file.
713 std::string filename_;
714
715 /// The scanner's progress in percent, with range [0, 1].
716 float* progress_ = nullptr;
717
718 /// Cancel flag.
719 bool* cancel_ = nullptr;
720
721 /// Local intermediate buffer.
723
724 /// The current pointer inside the intermediate buffer.
725 uint8_t* intermediateBufferPointer_ = nullptr;
726
727 /// Number of remaining characters in the intermediate buffer.
728 size_t intermediateBufferSize_ = 0;
729
730 /// Local extra buffer, used if the intermediate buffer is too small.
732
733 /// Pointer inside the extra buffer.
734 uint8_t* extraBufferPointer_ = nullptr;
735
736 /// Number of remaining characters inside the extra buffer.
737 size_t extraBufferSize_ = 0;
738
739 /// Holds the current line.
740 size_t line_ = 1;
741
742 /// Holds the current column.
743 size_t column_ = 1;
744
745 /// Holds the current position of the scanner.
746 size_t position_ = 0;
747
748 /// Map mapping keyword strings to identifier ids.
750
751 /// Determines whether all keywords are case sensitive.
752 bool keywordsAreCaseSensitive_ = true;
753
754 /// Map mapping symbol strings to symbol ids.
756
757 /// Registered line remarks.
759
760 /// Length of the maximal line remark.
761 size_t maximalLengthLineRemarks_ = 0;
762
763 /// Scope remarks.
765
766 /// Length of the maximal scope remarks.
767 size_t maximalLengthScopeRemarks_ = 0;
768
769 /// Table holding the definition of allowed first characters.
771
772 /// Table holding the definition of allowed following characters.
774
775 /// Table holding the definition of not allowed following characters.
777
778 /// Definition of the minimum intermediate buffer size.
779 static constexpr size_t minBufferSize_ = 2048;
780
781 /// Definition of the maximum intermediate buffer size.
782 static constexpr size_t maxBufferSize_ = 8192;
783};
784
786{
787 return type_;
788}
789
790inline bool Scanner::Token::isType(const Type type) const
791{
792 return type_ == type;
793}
794
796{
797 return type_ == TOKEN_CHARACTER;
798}
799
801{
802 return type_ == TOKEN_IDENTIFIER;
803}
804
805inline bool Scanner::Token::isInteger() const
806{
807 return type_ == TOKEN_INTEGER;
808}
809
810inline bool Scanner::Token::isNumber() const
811{
812 return type_ == TOKEN_NUMBER;
813}
814
816{
817 return type_ == TOKEN_INTEGER || type_ == TOKEN_NUMBER;
818}
819
820inline bool Scanner::Token::isLine() const
821{
822 return type_ == TOKEN_LINE;
823}
824
825inline bool Scanner::Token::isKeyword() const
826{
827 return type_ == TOKEN_KEYWORD;
828}
829
830inline bool Scanner::Token::isString() const
831{
832 return type_ == TOKEN_STRING;
833}
834
835inline bool Scanner::Token::isSymbol() const
836{
837 return type_ == TOKEN_SYMBOL;
838}
839
841{
842 return type_ == TOKEN_END_OF_FILE;
843}
844
845inline const std::string& Scanner::Token::raw() const
846{
847 return data_;
848}
849
850inline Scanner::Token::operator bool() const
851{
852 return type_ != TOKEN_INVALID;
853}
854
855inline Scanner::Scanner(const std::string& filename, const std::string& buffer, float* progress, bool* cancel) :
856 Scanner(createInputStream(filename, buffer), progress, cancel)
857{
858 ocean_assert(!filename.empty() || !buffer.empty());
859
860 if (!filename.empty() && stream_)
861 {
863 }
864}
865
866inline Scanner::Scanner(const std::string& filename, std::string&& buffer, float* progress, bool* cancel) :
867 Scanner(createInputStream(filename, std::move(buffer)), progress, cancel)
868{
869 ocean_assert(!filename.empty() || stream_);
870
871 if (!filename.empty() && stream_)
872 {
874 }
875}
876
877size_t Scanner::line() const
878{
879 return line_;
880}
881
882size_t Scanner::column() const
883{
884 return column_;
885}
886
887inline const std::string& Scanner::filename() const
888{
889 return filename_;
890}
891
892inline bool Scanner::isValid() const
893{
894 return bool(stream_);
895}
896
897inline bool Scanner::isWhitespace(const char& character)
898{
899 return character == ' ' || character== '\t' || character == '\n' || character == '\r';
900}
901
902inline std::shared_ptr<std::istream> Scanner::createInputStream(const std::string& filename, std::string&& buffer)
903{
904 ocean_assert(!filename.empty() || !buffer.empty());
905
906 if (!filename.empty())
907 {
908 return std::shared_ptr<std::istream>(new std::ifstream(filename.c_str(), std::ios_base::binary));
909 }
910
911 return std::shared_ptr<std::istream>(new std::istringstream(std::move(buffer)));
912}
913
914inline std::shared_ptr<std::istream> Scanner::createInputStream(const std::string& filename, const std::string& buffer)
915{
916 ocean_assert(!filename.empty() || !buffer.empty());
917
918 if (!filename.empty())
919 {
920 return std::shared_ptr<std::istream>(new std::ifstream(filename.c_str(), std::ios_base::binary));
921 }
922
923 return std::shared_ptr<std::istream>(new std::istringstream(buffer));
924}
925
926}
927
928}
929
930#endif // META_OCEAN_IO_SCANNER_H
This class implements a token for the scanner.
Definition Scanner.h:43
Token(std::string &&data, const Type type)
Creates a new token with given data and type.
bool isKeyword() const
Returns whether this token holds a keyword.
Definition Scanner.h:825
uint32_t keyword() const
Returns the id of the keyword of this token.
std::string data_
Holds the data of the token.
Definition Scanner.h:278
bool isSymbol(const uint32_t symbol) const
Returns whether this token holds a special symbol.
Type
Definition of different token types.
Definition Scanner.h:50
@ TOKEN_END_OF_FILE
End of file token.
Definition Scanner.h:56
@ TOKEN_INTEGER
Integer token.
Definition Scanner.h:64
@ TOKEN_KEYWORD
Keyword token.
Definition Scanner.h:66
@ TOKEN_LINE
Line token.
Definition Scanner.h:60
@ TOKEN_IDENTIFIER
Identifier token.
Definition Scanner.h:58
@ TOKEN_NUMBER
Number token.
Definition Scanner.h:62
@ TOKEN_CHARACTER
Character token.
Definition Scanner.h:54
@ TOKEN_STRING
String token.
Definition Scanner.h:68
bool isKeyword(const uint32_t keyword) const
Returns whether this token holds a special keyword.
bool isType(const Type type) const
Returns whether this token is of a specific type.
Definition Scanner.h:790
bool isLine() const
Returns whether this token holds a remaining line.
Definition Scanner.h:820
bool isSymbol() const
Returns whether this token holds a symbol.
Definition Scanner.h:835
const std::string & raw() const
Returns the raw data of the token.
Definition Scanner.h:845
bool isCharacter() const
Returns whether this token holds a character.
Definition Scanner.h:795
bool isString() const
Returns whether this token holds a string.
Definition Scanner.h:830
uint32_t symbol() const
Returns the id of the symbol of this token.
std::string moveString()
Returns the string value of this token and invalidates the token.
bool isNumber() const
Returns whether this token holds a number.
Definition Scanner.h:810
Type type() const
Returns the type of the token.
Definition Scanner.h:785
Scalar number() const
Returns the float value of this token.
bool isEndOfFile() const
Returns whether this token holds a end of file.
Definition Scanner.h:840
const std::string & string() const
Returns the string value of this token.
const std::string & line() const
Returns the remaining line of this token.
bool isIdentifier() const
Returns whether this token holds an identifier.
Definition Scanner.h:800
bool isIntegerOrNumber() const
Returns whether this token holds an integer or a number.
Definition Scanner.h:815
Type type_
Holds the type of the token.
Definition Scanner.h:272
Token(const std::string &data, const uint32_t id, const Type type)
Creates a new token with given keyword or symbol id.
Token()=default
Creates an invalid token.
uint8_t character() const
Returns the character value of this token.
Token(std::string &&data, const uint32_t id, const Type type)
Creates a new token with given keyword or symbol id.
Token(const std::string &data, const Type type)
Creates a new token with given data and type.
const std::string & identifier() const
Returns the identifier value of this token.
Scalar integerOrNumber() const
Returns the integer or float value of this token.
int integer() const
Returns the integer value of this token.
bool isInteger() const
Returns whether this token holds an integer.
Definition Scanner.h:805
This class implements a simple scanner.
Definition Scanner.h:31
const Token & lineToken()
Returns a line token starting at the current position.
std::unordered_set< std::string > LineRemarks
Definition of an unordered set holding line remark symbols.
Definition Scanner.h:291
std::string filename_
The name of the input file, if the input is a file.
Definition Scanner.h:713
std::string discardNonWhiteSpace()
Discards non white space and jumps to the first white space position.
CharTable followingCharTable_
Table holding the definition of allowed following characters.
Definition Scanner.h:773
bool readKeyword(Token &token, const bool consumeBytes)
Tries to read a keyword as next token.
size_t column_
Holds the current column.
Definition Scanner.h:743
size_t column() const
Returns the recent column.
Definition Scanner.h:882
Token recentToken_
Recent token.
Definition Scanner.h:704
uint32_t symbolId(const std::string &data) const
Returns the symbol id of a given string.
uint8_t get(const size_t offset=0)
Returns one character.
CharTable invalidCharTable_
Table holding the definition of not allowed following characters.
Definition Scanner.h:776
bool readInteger(Token &token, const bool consumeBytes)
Tries to read an integer as next token.
IdMap symbolMap_
Map mapping symbol strings to symbol ids.
Definition Scanner.h:755
static std::shared_ptr< std::istream > createInputStream(const std::string &filename, std::string &&buffer)
Creates a file input stream or a string input stream depending on the given input.
Definition Scanner.h:902
static bool findNextToken(const char *pointer, const size_t start, size_t &tokenStart, size_t &tokenLength)
Finds the next token in a given string starting from a specified position.
bool readIdentifier(Token &token, const bool consumeBytes)
Tries to read a identifier as next token.
bool isValid() const
Returns whether the scanner is valid and ready to use.
Definition Scanner.h:892
size_t position() const
Returns the position of the scanner.
uint32_t keywordId(const std::string &data) const
Returns the keyword id of a given string.
CharTable firstCharTable_
Table holding the definition of allowed first characters.
Definition Scanner.h:770
uint8_t getExtra(const size_t offset=0)
Returns one character from the extra buffer.
FirstChar
Definition of first character types.
Definition Scanner.h:307
void consume(const size_t chars=1)
Consumes one or more character.
bool readCharacter(Token &token, const bool consumeBytes)
Tries to read a character as next token.
std::shared_ptr< std::istream > stream_
The input stream from which the scanner receives the data.
Definition Scanner.h:710
std::unordered_map< std::string, uint32_t > IdMap
Definition of an unordered map mapping strings to ids.
Definition Scanner.h:286
void registerKeyword(const std::string &keyword, const uint32_t id)
Registers a new keyword.
bool readScopeRemark()
Reads a scope remark comment.
std::string data(const size_t offset, const size_t size) const
Returns data of a specified size starting at the offset position.
bool readRemark()
Reads remark comments.
void registerLineRemark(const std::string &lineRemark)
Registers a line remark symbol.
Scanner(float *progress, bool *cancel)
Creates a new scanner.
Token nextToken_
Next token.
Definition Scanner.h:707
const Token & token()
Returns the recent token.
Token tokenPop()
Return the recent token and pops it afterwards.
void setKeywordProperty(const bool caseSensitive)
Sets whether the keywords are case sensitive or not.
bool refillExtraBuffer(const size_t minIndex)
Refills the extra buffer.
std::string data(const size_t size) const
Returns data of a specified size starting at the recent position.
size_t line() const
Returns the recent line.
Definition Scanner.h:877
virtual ~Scanner()
Destructs a scanner.
bool readLine(Token &token, const bool consumeBytes)
Tries to read a remaining line as next token.
void registerSymbol(const std::string &symbol, const uint32_t id)
Registers a new symbol.
static bool isWhitespace(const char &character)
Returns whether a given character is a white space character.
Definition Scanner.h:897
uint8_t readWhiteSpace(bool crossLines=true)
Reads white space.
IdMap keywordMap_
Map mapping keyword strings to identifier ids.
Definition Scanner.h:749
size_t line_
Holds the current line.
Definition Scanner.h:740
const std::string & filename() const
Returns the name of the input file, if the input is a file.
Definition Scanner.h:887
bool readNumber(Token &token, const bool consumeBytes)
Tries to read a number as next token.
ScopeRemarks scopeRemarks_
Scope remarks.
Definition Scanner.h:764
void pop()
Pops the recent token.
virtual Token readToken(const bool consumeBytes=true)
Reads and returns the next token.
std::array< uint16_t, 256 > CharTable
Definition of a character table.
Definition Scanner.h:301
std::unordered_map< std::string, std::string > ScopeRemarks
Definition of an unordered map mapping begin remark symbols to end remark symbols.
Definition Scanner.h:296
bool refillIntermediateBuffer()
Refills the intermediate buffer.
void registerScopeRemark(const std::string &begin, const std::string &end)
Registers a scope remark symbol.
size_t size() const
Returns the size of the scanner.
static bool findNextToken(const char *pointer, const size_t size, const size_t start, size_t &tokenStart, size_t &tokenLength)
Finds the next token in a given string starting from a specified position.
bool readLineRemark()
Reads a line remark comment.
Memory intermediateBuffer_
Local intermediate buffer.
Definition Scanner.h:722
Scanner(const std::shared_ptr< std::istream > &stream, float *progress=nullptr, bool *cancel=nullptr)
Creates a new scanner using a stream as input.
bool registerWhiteSpaceCharacter(const uint8_t character)
Registers a white space character.
bool readSymbol(Token &token, const bool consumeBytes)
Tries to read a symbol as next token.
bool readString(Token &token, const bool consumeBytes)
Tries to read a string as next token.
Memory extraBuffer_
Local extra buffer, used if the intermediate buffer is too small.
Definition Scanner.h:731
const Token & look()
Returns a lookout to the next token.
LineRemarks lineRemarks_
Registered line remarks.
Definition Scanner.h:758
This class implements an object able to allocate memory.
Definition base/Memory.h:22
float Scalar
Definition of a scalar type.
Definition Math.h:129
The namespace covering the entire Ocean framework.
Definition Accessor.h:15