Ocean
Scanner.h
Go to the documentation of this file.
1 /*
2  * Copyright (c) Meta Platforms, Inc. and affiliates.
3  *
4  * This source code is licensed under the MIT license found in the
5  * LICENSE file in the root directory of this source tree.
6  */
7 
8 #ifndef META_OCEAN_IO_SCANNER_H
9 #define META_OCEAN_IO_SCANNER_H
10 
11 #include "ocean/io/IO.h"
12 
13 #include "ocean/base/Memory.h"
14 
15 #include "ocean/math/Math.h"
16 
17 #include <array>
18 #include <istream>
19 
20 namespace Ocean
21 {
22 
23 namespace IO
24 {
25 
26 /**
27  * This class implements a simple scanner.
28  * @ingroup io
29  */
30 class OCEAN_IO_EXPORT Scanner
31 {
32  public:
33 
34  /**
35  * Definition of an invalid keyword or symbol id.
36  */
37  static constexpr uint32_t invalidId = uint32_t(-1);
38 
39  /**
40  * This class implements a token for the scanner.
41  */
42  class OCEAN_IO_EXPORT Token final
43  {
44  public:
45 
46  /**
47  * Definition of different token types.
48  */
49  enum Type : uint32_t
50  {
51  /// Invalid token.
52  TOKEN_INVALID = 0u,
53  /// Character token.
55  /// End of file token.
57  /// Identifier token.
59  /// Line token.
61  /// Number token.
63  /// Integer token.
65  /// Keyword token.
67  /// String token.
69  /// Symbol token.
70  TOKEN_SYMBOL
71  };
72 
73  public:
74 
75  /**
76  * Creates an invalid token.
77  */
78  Token() = default;
79 
80  /**
81  * Creates a new token with given data and type.
82  * @param data Token data
83  * @param type Token type
84  */
85  Token(std::string&& data, const Type type);
86 
87  /**
88  * Creates a new token with given data and type.
89  * @param data Token data
90  * @param type Token type
91  */
92  Token(const std::string& data, const Type type);
93 
94  /**
95  * Creates a new token with given keyword or symbol id.
96  * @param data Raw token data
97  * @param id Keyword or symbol id of the token
98  * @param type Token type
99  */
100  Token(std::string&& data, const uint32_t id, const Type type);
101 
102  /**
103  * Creates a new token with given keyword or symbol id.
104  * @param data Raw token data
105  * @param id Keyword or symbol id of the token
106  * @param type Token type
107  */
108  Token(const std::string& data, const uint32_t id, const Type type);
109 
110  /**
111  * Returns the type of the token.
112  * @return Token type
113  */
114  inline Type type() const;
115 
116  /**
117  * Returns whether this token is of a specific type.
118  * @param type The type to check
119  * @return True, if so
120  */
121  inline bool isType(const Type type) const;
122 
123  /**
124  * Returns whether this token holds a character.
125  * @return True, if so
126  */
127  inline bool isCharacter() const;
128 
129  /**
130  * Returns whether this token holds an identifier.
131  * @return True, if so
132  */
133  inline bool isIdentifier() const;
134 
135  /**
136  * Returns whether this token holds an integer.
137  * @return True, if so
138  */
139  inline bool isInteger() const;
140 
141  /**
142  * Returns whether this token holds a remaining line.
143  * @return True, if so
144  */
145  inline bool isLine() const;
146 
147  /**
148  * Returns whether this token holds a number.
149  * @return True, if so
150  */
151  inline bool isNumber() const;
152 
153  /**
154  * Returns whether this token holds an integer or a number.
155  * @return True, if so
156  */
157  inline bool isIntegerOrNumber() const;
158 
159  /**
160  * Returns whether this token holds a keyword.
161  * @return True, if so
162  */
163  inline bool isKeyword() const;
164 
165  /**
166  * Returns whether this token holds a special keyword.
167  * @param keyword The keyword id to check
168  * @return True, if so
169  */
170  bool isKeyword(const uint32_t keyword) const;
171 
172  /**
173  * Returns whether this token holds a string.
174  * @return True, if so
175  */
176  inline bool isString() const;
177 
178  /**
179  * Returns whether this token holds a symbol.
180  * @return True, if so
181  */
182  inline bool isSymbol() const;
183 
184  /**
185  * Returns whether this token holds a special symbol.
186  * @param symbol The symbol id to check
187  * @return True, if so
188  */
189  bool isSymbol(const uint32_t symbol) const;
190 
191  /**
192  * Returns whether this token holds a end of file.
193  * @return True, if so
194  */
195  inline bool isEndOfFile() const;
196 
197  /**
198  * Returns the character value of this token.
199  * @return Character value
200  */
201  uint8_t character() const;
202 
203  /**
204  * Returns the identifier value of this token.
205  * @return Identifier value
206  */
207  const std::string& identifier() const;
208 
209  /**
210  * Returns the integer value of this token.
211  * @return Integer value
212  */
213  int integer() const;
214 
215  /**
216  * Returns the float value of this token.
217  * @return Float value
218  */
219  Scalar number() const;
220 
221  /**
222  * Returns the integer or float value of this token.
223  * @return Integer or float value as float value
224  */
226 
227  /**
228  * Returns the remaining line of this token.
229  * @return Line value
230  */
231  const std::string& line() const;
232 
233  /**
234  * Returns the id of the keyword of this token.
235  * @return Keyword id
236  */
237  uint32_t keyword() const;
238 
239  /**
240  * Returns the id of the symbol of this token.
241  * @return Symbol id
242  */
243  uint32_t symbol() const;
244 
245  /**
246  * Returns the string value of this token.
247  * @return The token's string value
248  */
249  const std::string& string() const;
250 
251  /**
252  * Returns the string value of this token and invalidates the token.
253  * @return The token's string value
254  */
255  std::string moveString();
256 
257  /**
258  * Returns the raw data of the token.
259  * @return Raw data
260  */
261  inline const std::string& raw() const;
262 
263  /**
264  * Returns whether the token is valid.
265  * @return True, if so
266  */
267  explicit inline operator bool() const;
268 
269  protected:
270 
271  /// Holds the type of the token.
272  Type type_ = TOKEN_INVALID;
273 
274  /// Holds the id of the keyword or symbol, if any.
275  uint32_t id_ = invalidId;
276 
277  /// Holds the data of the token.
278  std::string data_;
279  };
280 
281  protected:
282 
283  /**
284  * Definition of an unordered map mapping strings to ids.
285  */
286  typedef std::unordered_map<std::string, uint32_t> IdMap;
287 
288  /**
289  * Definition of an unordered set holding line remark symbols.
290  */
291  typedef std::unordered_set<std::string> LineRemarks;
292 
293  /**
294  * Definition of an unordered map mapping begin remark symbols to end remark symbols.
295  */
296  typedef std::unordered_map<std::string, std::string> ScopeRemarks;
297 
298  /**
299  * Definition of a character table.
300  */
301  typedef std::array<uint16_t, 256> CharTable;
302 
303  /**
304  * Definition of first character types.
305  */
306  enum FirstChar : uint16_t
307  {
308  /// Invalid.
309  CHAR_INVALID = 0,
310  /// Character.
311  CHAR_CHARACTER = 1,
312  /// Identifier.
313  CHAR_IDENTIFIER = 2,
314  /// Number.
315  CHAR_NUMBER = 4,
316  /// Integer.
317  CHAR_INTEGER = 8,
318  /// Keyword.
319  CHAR_KEYWORD = 16,
320  /// String.
321  CHAR_STRING = 32,
322  /// Symbol.
323  CHAR_SYMBOL = 64,
324  /// Remark.
325  CHAR_REMARK = 128,
326  /// White space.
327  CHAR_SPACE = 256
328  };
329 
330  public:
331 
332  /**
333  * Creates a new scanner using a stream as input.
334  * @param stream The stream to be use as input
335  * @param progress Optional resulting scanner progress in percent, with range [0, 1]
336  * @param cancel Optional scanner cancel flag
337  */
338  explicit Scanner(const std::shared_ptr<std::istream>& stream, float* progress = nullptr, bool* cancel = nullptr);
339 
340  /**
341  * Creates a new scanner using a file or a memory buffer as input.
342  * @param filename The name of the file to be used as input, `buffer` must be empty
343  * @param buffer The buffer to be used as input, `filename` must be empty
344  * @param progress Optional resulting scanner progress in percent, with range [0, 1]
345  * @param cancel Optional scanner cancel flag
346  */
347  inline Scanner(const std::string& filename, const std::string& buffer, float* progress = nullptr, bool* cancel = nullptr);
348 
349  /**
350  * Creates a new scanner using a file or a memory buffer as input.
351  * @param filename The name of the file to be used as input, `buffer` must be empty
352  * @param buffer The buffer to be used as input, `filename` must be empty
353  * @param progress Optional resulting scanner progress in percent, with range [0, 1]
354  * @param cancel Optional scanner cancel flag
355  */
356  inline Scanner(const std::string& filename, std::string&& buffer, float* progress = nullptr, bool* cancel = nullptr);
357 
358  /**
359  * Destructs a scanner.
360  */
361  virtual ~Scanner();
362 
363  /**
364  * Returns the recent token.
365  * @return Recent token
366  */
367  const Token& token();
368 
369  /**
370  * Returns a line token starting at the current position.
371  * A line token does not handle remarks.
372  * @return Line token
373  */
374  const Token& lineToken();
375 
376  /**
377  * Return the recent token and pops it afterwards.
378  * @return Recent token.
379  */
381 
382  /**
383  * Returns a lookout to the next token.
384  * @return Next token
385  */
386  const Token& look();
387 
388  /**
389  * Pops the recent token.
390  */
391  void pop();
392 
393  /**
394  * Returns the recent line.
395  * @return Recent line
396  */
397  inline size_t line() const;
398 
399  /**
400  * Returns the recent column.
401  * @return Recent column
402  */
403  inline size_t column() const;
404 
405  /**
406  * Returns the position of the scanner.
407  * @return Position of the scanner in characters
408  */
409  size_t position() const;
410 
411  /**
412  * Returns the size of the scanner.
413  * @return Size of the scanner in characters
414  */
415  size_t size() const;
416 
417  /**
418  * Returns the name of the input file, if the input is a file.
419  * @return The scanner's input filename, empty if the input was a buffer
420  */
421  inline const std::string& filename() const;
422 
423  /**
424  * Returns whether the scanner is valid and ready to use.
425  * @return True, if so
426  */
427  inline bool isValid() const;
428 
429  /**
430  * Finds the next token in a given string starting from a specified position.
431  * A token is enclosed by white characters or by the borders of the given string, the length of the given string is explicitly defined by the parameter 'size'.
432  * @param pointer The pointer to the string in which the next token is to be found, must be valid
433  * @param size The length of the given string in characters, with range [1, infinity)
434  * @param start The first character within the given string that defines the first possible character of the token, with range [0, size - 1]
435  * @param tokenStart The resulting start location within the given string of the found token, with range [start, strlen(pointer) - 1]
436  * @param tokenLength The resulting length of the found token, with range [1, strlen(pointer) - start]
437  * @return True, if a second token may follow; False, if the token is the last token
438  */
439  static bool findNextToken(const char* pointer, const size_t size, const size_t start, size_t& tokenStart, size_t& tokenLength);
440 
441  /**
442  * Finds the next token in a given string starting from a specified position.
443  * A token is enclosed by white characters or by the borders of the given string, the end is identified by a null character.
444  * @param pointer The pointer to the string in which the next token is to be found, can be nullptr
445  * @param start The first character within the given string that defines the first possible character of the token, with range [0, strlen(pointer)]
446  * @param tokenStart The resulting start location within the given string of the found token, with range [start, strlen(pointer) - 1]
447  * @param tokenLength The resulting length of the found token, with range [1, strlen(pointer) - start]
448  * @return True, if a second token may follow; False, if the token is the last token
449  */
450  static bool findNextToken(const char* pointer, const size_t start, size_t& tokenStart, size_t& tokenLength);
451 
452  /**
453  * Returns whether a given character is a white space character.
454  * A white space character can be one of the following:
455  * <pre>
456  * ' ', '\\t', '\\n', or '\\r'
457  * </pre>
458  * @param character The character to be checked
459  * @return True, if so
460  */
461  static inline bool isWhitespace(const char& character);
462 
463  protected:
464 
465  /**
466  * Creates a new scanner.
467  * The scanner may forward an entire progress state, if the pointer value is defined.<br>
468  * Beware: Make sure that the value exists during the whole scanning timer!<br>
469  * Further, the scanner may be canceled by an explicit flag.<br>
470  * In the case the scanner is canceled an end of file token is returned.<br>
471  * Beware: As for the progress value, also the cancel object must exist during the whole scanning progress, if provided
472  * @param progress Optional progress parameter to forward the scanning progress with range [0, 1], use nullptr if the progress state is not necessary
473  * @param cancel Optional cancel state to cancel the scanner progress by setting the flag to 'true', use nullptr if the cancel state is not necessary
474  */
475  Scanner(float* progress, bool* cancel);
476 
477  /**
478  * Returns one character.
479  * @param offset Offset to the recent position
480  * @return Character
481  */
482  uint8_t get(const size_t offset = 0);
483 
484  /**
485  * Returns data of a specified size starting at the recent position.
486  * Beware: Make sure that enough pending buffer is available
487  * @param size Size of the data to receive
488  * @return Requested data
489  */
490  std::string data(const size_t size) const;
491 
492  /**
493  * Returns data of a specified size starting at the offset position.
494  * Beware: Make sure that enough pending buffer is available
495  * @param offset Start position relative to the current position
496  * @param size Size of the data to receive
497  * @return Requested data
498  */
499  std::string data(const size_t offset, const size_t size) const;
500 
501  /**
502  * Consumes one or more character.
503  * @param chars Number of characters to consume
504  */
505  void consume(const size_t chars = 1);
506 
507  /**
508  * Refills the intermediate buffer.
509  * @return True, if the buffer could be refilled with new characters
510  */
512 
513  /**
514  * Returns the keyword id of a given string.
515  * @param data Data to convert to a keyword
516  * @return Id of the identifier, otherwise invalidId
517  */
518  uint32_t keywordId(const std::string& data) const;
519 
520  /**
521  * Returns the symbol id of a given string.
522  * @param data Data to convert to a symbol
523  * @return Id of the symbol, otherwise invalidId
524  */
525  uint32_t symbolId(const std::string& data) const;
526 
527  /**
528  * Sets whether the keywords are case sensitive or not.
529  * As default all keywords are case sensitive.<br>
530  * Beware: This property has to be set before the first keyword is registered!
531  * @param caseSensitive True, if all keywords will be case sensitive
532  */
533  void setKeywordProperty(const bool caseSensitive);
534 
535  /**
536  * Registers a new keyword.
537  * @param keyword New keyword
538  * @param id Id of the keyword
539  */
540  void registerKeyword(const std::string& keyword, const uint32_t id);
541 
542  /**
543  * Registers a new symbol.
544  * @param symbol New symbol
545  * @param id Id of the symbol
546  */
547  void registerSymbol(const std::string& symbol, const uint32_t id);
548 
549  /**
550  * Registers a line remark symbol.
551  * @param lineRemark Line remark symbol
552  */
553  void registerLineRemark(const std::string& lineRemark);
554 
555  /**
556  * Registers a scope remark symbol.
557  * @param begin Begin remark symbol
558  * @param end End remark symbol
559  */
560  void registerScopeRemark(const std::string& begin, const std::string& end);
561 
562  /**
563  * Registers a white space character.
564  * @param character White space character to register
565  * @return True, if succeeded
566  */
567  bool registerWhiteSpaceCharacter(const uint8_t character);
568 
569  /**
570  * Reads and returns the next token.
571  * @param consumeBytes Determines whether the scanner consume the read characters.
572  * @return New token
573  */
574  virtual Token readToken(const bool consumeBytes = true);
575 
576  /**
577  * Reads white space.
578  * @param crossLines Determines whether the white space can be separated over several lines
579  * @return Next not-white-space character
580  */
581  uint8_t readWhiteSpace(bool crossLines = true);
582 
583  /**
584  * Discards non white space and jumps to the first white space position.
585  * @return Discarded elements
586  */
587  std::string discardNonWhiteSpace();
588 
589  /**
590  * Reads remark comments.
591  * @return True, if a comment was read
592  */
593  bool readRemark();
594 
595  /**
596  * Reads a line remark comment.
597  * @return True, if a comment was read
598  */
600 
601  /**
602  * Reads a scope remark comment.
603  * @return True, if a comment was read
604  */
606 
607  /**
608  * Tries to read a character as next token.
609  * @param token Returning token
610  * @param consumeBytes Determines whether the scanner consumes the read characters
611  * @return True, if succeeded
612  */
613  bool readCharacter(Token& token, const bool consumeBytes);
614 
615  /**
616  * Tries to read a identifier as next token.
617  * @param token Returning token
618  * @param consumeBytes Determines whether the scanner consumes the read characters
619  * @return True, if succeeded
620  */
621  bool readIdentifier(Token& token, const bool consumeBytes);
622 
623  /**
624  * Tries to read an integer as next token.
625  * @param token Returning token
626  * @param consumeBytes Determines whether the scanner consumes the read characters
627  * @return True, if succeeded
628  */
629  bool readInteger(Token& token, const bool consumeBytes);
630 
631  /**
632  * Tries to read a keyword as next token.
633  * @param token Returning token
634  * @param consumeBytes Determines whether the scanner consumes the read characters
635  * @return True, if succeeded
636  */
637  bool readKeyword(Token& token, const bool consumeBytes);
638 
639  /**
640  * Tries to read a remaining line as next token.
641  * @param token Returning token
642  * @param consumeBytes Determines whether the scanner consumes the read characters
643  * @return True, if succeeded
644  */
645  bool readLine(Token& token, const bool consumeBytes);
646 
647  /**
648  * Tries to read a number as next token.
649  * @param token Returning token
650  * @param consumeBytes Determines whether the scanner consumes the read characters
651  * @return True, if succeeded
652  */
653  bool readNumber(Token& token, const bool consumeBytes);
654 
655  /**
656  * Tries to read a string as next token.
657  * @param token Returning token
658  * @param consumeBytes Determines whether the scanner consumes the read characters
659  * @return True, if succeeded
660  */
661  bool readString(Token& token, const bool consumeBytes);
662 
663  /**
664  * Tries to read a symbol as next token.
665  * @param token Returning token
666  * @param consumeBytes Determines whether the scanner consumes the read characters
667  * @return True, if succeeded
668  */
669  bool readSymbol(Token& token, const bool consumeBytes);
670 
671  private:
672 
673  /**
674  * Returns one character from the extra buffer.
675  * @param offset Offset inside the recent extra buffer
676  * @return Character
677  */
678  uint8_t getExtra(const size_t offset = 0);
679 
680  /**
681  * Refills the extra buffer.
682  * @param minIndex Minimal index of the character needed inside the extra buffer
683  * @return True, if enough characters could be read
684  */
685  bool refillExtraBuffer(const size_t minIndex);
686 
687  /**
688  * Creates a file input stream or a string input stream depending on the given input.
689  * @param filename The name of the file to be used as input, `buffer` must be empty
690  * @param buffer The buffer to be used as input, `filename` must be empty
691  */
692  static inline std::shared_ptr<std::istream> createInputStream(const std::string& filename, std::string&& buffer);
693 
694  /**
695  * Creates a file input stream or a string input stream depending on the given input.
696  * @param filename The name of the file to be used as input, `buffer` must be empty
697  * @param buffer The buffer to be used as input, `filename` must be empty
698  */
699  static inline std::shared_ptr<std::istream> createInputStream(const std::string& filename, const std::string& buffer);
700 
701  protected:
702 
703  /// Recent token.
705 
706  /// Next token.
708 
709  /// The input stream from which the scanner receives the data.
710  std::shared_ptr<std::istream> stream_;
711 
712  /// The name of the input file, if the input is a file.
713  std::string filename_;
714 
715  /// The scanner's progress in percent, with range [0, 1].
716  float* progress_ = nullptr;
717 
718  /// Cancel flag.
719  bool* cancel_ = nullptr;
720 
721  /// Local intermediate buffer.
723 
724  /// The current pointer inside the intermediate buffer.
725  uint8_t* intermediateBufferPointer_ = nullptr;
726 
727  /// Number of remaining characters in the intermediate buffer.
728  size_t intermediateBufferSize_ = 0;
729 
730  /// Local extra buffer, used if the intermediate buffer is too small.
732 
733  /// Pointer inside the extra buffer.
734  uint8_t* extraBufferPointer_ = nullptr;
735 
736  /// Number of remaining characters inside the extra buffer.
737  size_t extraBufferSize_ = 0;
738 
739  /// Holds the current line.
740  size_t line_ = 1;
741 
742  /// Holds the current column.
743  size_t column_ = 1;
744 
745  /// Holds the current position of the scanner.
746  size_t position_ = 0;
747 
748  /// Map mapping keyword strings to identifier ids.
750 
751  /// Determines whether all keywords are case sensitive.
752  bool keywordsAreCaseSensitive_ = true;
753 
754  /// Map mapping symbol strings to symbol ids.
756 
757  /// Registered line remarks.
759 
760  /// Length of the maximal line remark.
761  size_t maximalLengthLineRemarks_ = 0;
762 
763  /// Scope remarks.
765 
766  /// Length of the maximal scope remarks.
767  size_t maximalLengthScopeRemarks_ = 0;
768 
769  /// Table holding the definition of allowed first characters.
771 
772  /// Table holding the definition of allowed following characters.
774 
775  /// Table holding the definition of not allowed following characters.
777 
778  /// Definition of the minimum intermediate buffer size.
779  static constexpr size_t minBufferSize_ = 2048;
780 
781  /// Definition of the maximum intermediate buffer size.
782  static constexpr size_t maxBufferSize_ = 8192;
783 };
784 
786 {
787  return type_;
788 }
789 
790 inline bool Scanner::Token::isType(const Type type) const
791 {
792  return type_ == type;
793 }
794 
795 inline bool Scanner::Token::isCharacter() const
796 {
797  return type_ == TOKEN_CHARACTER;
798 }
799 
800 inline bool Scanner::Token::isIdentifier() const
801 {
802  return type_ == TOKEN_IDENTIFIER;
803 }
804 
805 inline bool Scanner::Token::isInteger() const
806 {
807  return type_ == TOKEN_INTEGER;
808 }
809 
810 inline bool Scanner::Token::isNumber() const
811 {
812  return type_ == TOKEN_NUMBER;
813 }
814 
816 {
817  return type_ == TOKEN_INTEGER || type_ == TOKEN_NUMBER;
818 }
819 
820 inline bool Scanner::Token::isLine() const
821 {
822  return type_ == TOKEN_LINE;
823 }
824 
825 inline bool Scanner::Token::isKeyword() const
826 {
827  return type_ == TOKEN_KEYWORD;
828 }
829 
830 inline bool Scanner::Token::isString() const
831 {
832  return type_ == TOKEN_STRING;
833 }
834 
835 inline bool Scanner::Token::isSymbol() const
836 {
837  return type_ == TOKEN_SYMBOL;
838 }
839 
840 inline bool Scanner::Token::isEndOfFile() const
841 {
842  return type_ == TOKEN_END_OF_FILE;
843 }
844 
845 inline const std::string& Scanner::Token::raw() const
846 {
847  return data_;
848 }
849 
850 inline Scanner::Token::operator bool() const
851 {
852  return type_ != TOKEN_INVALID;
853 }
854 
855 inline Scanner::Scanner(const std::string& filename, const std::string& buffer, float* progress, bool* cancel) :
856  Scanner(createInputStream(filename, buffer), progress, cancel)
857 {
858  ocean_assert(!filename.empty() || !buffer.empty());
859 
860  if (!filename.empty() && stream_)
861  {
863  }
864 }
865 
866 inline Scanner::Scanner(const std::string& filename, std::string&& buffer, float* progress, bool* cancel) :
867  Scanner(createInputStream(filename, std::move(buffer)), progress, cancel)
868 {
869  ocean_assert(!filename.empty() || stream_);
870 
871  if (!filename.empty() && stream_)
872  {
874  }
875 }
876 
877 size_t Scanner::line() const
878 {
879  return line_;
880 }
881 
882 size_t Scanner::column() const
883 {
884  return column_;
885 }
886 
887 inline const std::string& Scanner::filename() const
888 {
889  return filename_;
890 }
891 
892 inline bool Scanner::isValid() const
893 {
894  return bool(stream_);
895 }
896 
897 inline bool Scanner::isWhitespace(const char& character)
898 {
899  return character == ' ' || character== '\t' || character == '\n' || character == '\r';
900 }
901 
902 inline std::shared_ptr<std::istream> Scanner::createInputStream(const std::string& filename, std::string&& buffer)
903 {
904  ocean_assert(!filename.empty() || !buffer.empty());
905 
906  if (!filename.empty())
907  {
908  return std::shared_ptr<std::istream>(new std::ifstream(filename.c_str(), std::ios_base::binary));
909  }
910 
911  return std::shared_ptr<std::istream>(new std::istringstream(std::move(buffer)));
912 }
913 
914 inline std::shared_ptr<std::istream> Scanner::createInputStream(const std::string& filename, const std::string& buffer)
915 {
916  ocean_assert(!filename.empty() || !buffer.empty());
917 
918  if (!filename.empty())
919  {
920  return std::shared_ptr<std::istream>(new std::ifstream(filename.c_str(), std::ios_base::binary));
921  }
922 
923  return std::shared_ptr<std::istream>(new std::istringstream(buffer));
924 }
925 
926 }
927 
928 }
929 
930 #endif // META_OCEAN_IO_SCANNER_H
This class implements a token for the scanner.
Definition: Scanner.h:43
const std::string & string() const
Returns the string value of this token.
Token(std::string &&data, const Type type)
Creates a new token with given data and type.
bool isKeyword() const
Returns whether this token holds a keyword.
Definition: Scanner.h:825
uint32_t keyword() const
Returns the id of the keyword of this token.
std::string data_
Holds the data of the token.
Definition: Scanner.h:278
bool isSymbol(const uint32_t symbol) const
Returns whether this token holds a special symbol.
Type
Definition of different token types.
Definition: Scanner.h:50
@ TOKEN_END_OF_FILE
End of file token.
Definition: Scanner.h:56
@ TOKEN_INTEGER
Integer token.
Definition: Scanner.h:64
@ TOKEN_KEYWORD
Keyword token.
Definition: Scanner.h:66
@ TOKEN_LINE
Line token.
Definition: Scanner.h:60
@ TOKEN_IDENTIFIER
Identifier token.
Definition: Scanner.h:58
@ TOKEN_NUMBER
Number token.
Definition: Scanner.h:62
@ TOKEN_CHARACTER
Character token.
Definition: Scanner.h:54
@ TOKEN_STRING
String token.
Definition: Scanner.h:68
bool isKeyword(const uint32_t keyword) const
Returns whether this token holds a special keyword.
bool isType(const Type type) const
Returns whether this token is of a specific type.
Definition: Scanner.h:790
bool isLine() const
Returns whether this token holds a remaining line.
Definition: Scanner.h:820
bool isSymbol() const
Returns whether this token holds a symbol.
Definition: Scanner.h:835
const std::string & raw() const
Returns the raw data of the token.
Definition: Scanner.h:845
bool isCharacter() const
Returns whether this token holds a character.
Definition: Scanner.h:795
bool isString() const
Returns whether this token holds a string.
Definition: Scanner.h:830
uint32_t symbol() const
Returns the id of the symbol of this token.
std::string moveString()
Returns the string value of this token and invalidates the token.
bool isNumber() const
Returns whether this token holds a number.
Definition: Scanner.h:810
Type type() const
Returns the type of the token.
Definition: Scanner.h:785
Scalar number() const
Returns the float value of this token.
bool isEndOfFile() const
Returns whether this token holds a end of file.
Definition: Scanner.h:840
bool isIdentifier() const
Returns whether this token holds an identifier.
Definition: Scanner.h:800
bool isIntegerOrNumber() const
Returns whether this token holds an integer or a number.
Definition: Scanner.h:815
Type type_
Holds the type of the token.
Definition: Scanner.h:272
const std::string & line() const
Returns the remaining line of this token.
Token(const std::string &data, const uint32_t id, const Type type)
Creates a new token with given keyword or symbol id.
Token()=default
Creates an invalid token.
uint8_t character() const
Returns the character value of this token.
Token(std::string &&data, const uint32_t id, const Type type)
Creates a new token with given keyword or symbol id.
const std::string & identifier() const
Returns the identifier value of this token.
Token(const std::string &data, const Type type)
Creates a new token with given data and type.
Scalar integerOrNumber() const
Returns the integer or float value of this token.
int integer() const
Returns the integer value of this token.
bool isInteger() const
Returns whether this token holds an integer.
Definition: Scanner.h:805
This class implements a simple scanner.
Definition: Scanner.h:31
std::unordered_set< std::string > LineRemarks
Definition of an unordered set holding line remark symbols.
Definition: Scanner.h:291
std::string filename_
The name of the input file, if the input is a file.
Definition: Scanner.h:713
std::string discardNonWhiteSpace()
Discards non white space and jumps to the first white space position.
CharTable followingCharTable_
Table holding the definition of allowed following characters.
Definition: Scanner.h:773
bool readKeyword(Token &token, const bool consumeBytes)
Tries to read a keyword as next token.
size_t column_
Holds the current column.
Definition: Scanner.h:743
size_t column() const
Returns the recent column.
Definition: Scanner.h:882
Token recentToken_
Recent token.
Definition: Scanner.h:704
uint32_t symbolId(const std::string &data) const
Returns the symbol id of a given string.
uint8_t get(const size_t offset=0)
Returns one character.
CharTable invalidCharTable_
Table holding the definition of not allowed following characters.
Definition: Scanner.h:776
bool readInteger(Token &token, const bool consumeBytes)
Tries to read an integer as next token.
IdMap symbolMap_
Map mapping symbol strings to symbol ids.
Definition: Scanner.h:755
static std::shared_ptr< std::istream > createInputStream(const std::string &filename, std::string &&buffer)
Creates a file input stream or a string input stream depending on the given input.
Definition: Scanner.h:902
static bool findNextToken(const char *pointer, const size_t start, size_t &tokenStart, size_t &tokenLength)
Finds the next token in a given string starting from a specified position.
bool readIdentifier(Token &token, const bool consumeBytes)
Tries to read a identifier as next token.
bool isValid() const
Returns whether the scanner is valid and ready to use.
Definition: Scanner.h:892
size_t position() const
Returns the position of the scanner.
uint32_t keywordId(const std::string &data) const
Returns the keyword id of a given string.
CharTable firstCharTable_
Table holding the definition of allowed first characters.
Definition: Scanner.h:770
uint8_t getExtra(const size_t offset=0)
Returns one character from the extra buffer.
FirstChar
Definition of first character types.
Definition: Scanner.h:307
void consume(const size_t chars=1)
Consumes one or more character.
bool readCharacter(Token &token, const bool consumeBytes)
Tries to read a character as next token.
std::shared_ptr< std::istream > stream_
The input stream from which the scanner receives the data.
Definition: Scanner.h:710
std::unordered_map< std::string, uint32_t > IdMap
Definition of an unordered map mapping strings to ids.
Definition: Scanner.h:286
void registerKeyword(const std::string &keyword, const uint32_t id)
Registers a new keyword.
bool readScopeRemark()
Reads a scope remark comment.
std::string data(const size_t offset, const size_t size) const
Returns data of a specified size starting at the offset position.
bool readRemark()
Reads remark comments.
void registerLineRemark(const std::string &lineRemark)
Registers a line remark symbol.
Scanner(float *progress, bool *cancel)
Creates a new scanner.
Token nextToken_
Next token.
Definition: Scanner.h:707
Token tokenPop()
Return the recent token and pops it afterwards.
void setKeywordProperty(const bool caseSensitive)
Sets whether the keywords are case sensitive or not.
const Token & lineToken()
Returns a line token starting at the current position.
bool refillExtraBuffer(const size_t minIndex)
Refills the extra buffer.
std::string data(const size_t size) const
Returns data of a specified size starting at the recent position.
size_t line() const
Returns the recent line.
Definition: Scanner.h:877
virtual ~Scanner()
Destructs a scanner.
const Token & token()
Returns the recent token.
bool readLine(Token &token, const bool consumeBytes)
Tries to read a remaining line as next token.
void registerSymbol(const std::string &symbol, const uint32_t id)
Registers a new symbol.
static bool isWhitespace(const char &character)
Returns whether a given character is a white space character.
Definition: Scanner.h:897
uint8_t readWhiteSpace(bool crossLines=true)
Reads white space.
IdMap keywordMap_
Map mapping keyword strings to identifier ids.
Definition: Scanner.h:749
size_t line_
Holds the current line.
Definition: Scanner.h:740
const std::string & filename() const
Returns the name of the input file, if the input is a file.
Definition: Scanner.h:887
bool readNumber(Token &token, const bool consumeBytes)
Tries to read a number as next token.
ScopeRemarks scopeRemarks_
Scope remarks.
Definition: Scanner.h:764
void pop()
Pops the recent token.
virtual Token readToken(const bool consumeBytes=true)
Reads and returns the next token.
std::array< uint16_t, 256 > CharTable
Definition of a character table.
Definition: Scanner.h:301
std::unordered_map< std::string, std::string > ScopeRemarks
Definition of an unordered map mapping begin remark symbols to end remark symbols.
Definition: Scanner.h:296
bool refillIntermediateBuffer()
Refills the intermediate buffer.
void registerScopeRemark(const std::string &begin, const std::string &end)
Registers a scope remark symbol.
size_t size() const
Returns the size of the scanner.
static bool findNextToken(const char *pointer, const size_t size, const size_t start, size_t &tokenStart, size_t &tokenLength)
Finds the next token in a given string starting from a specified position.
const Token & look()
Returns a lookout to the next token.
bool readLineRemark()
Reads a line remark comment.
Memory intermediateBuffer_
Local intermediate buffer.
Definition: Scanner.h:722
Scanner(const std::shared_ptr< std::istream > &stream, float *progress=nullptr, bool *cancel=nullptr)
Creates a new scanner using a stream as input.
bool registerWhiteSpaceCharacter(const uint8_t character)
Registers a white space character.
bool readSymbol(Token &token, const bool consumeBytes)
Tries to read a symbol as next token.
bool readString(Token &token, const bool consumeBytes)
Tries to read a string as next token.
Memory extraBuffer_
Local extra buffer, used if the intermediate buffer is too small.
Definition: Scanner.h:731
LineRemarks lineRemarks_
Registered line remarks.
Definition: Scanner.h:758
This class implements an object able to allocate memory.
Definition: base/Memory.h:22
float Scalar
Definition of a scalar type.
Definition: Math.h:128
The namespace covering the entire Ocean framework.
Definition: Accessor.h:15