8#ifndef META_OCEAN_IO_SCANNER_H
9#define META_OCEAN_IO_SCANNER_H
37 static constexpr uint32_t invalidId = uint32_t(-1);
42 class OCEAN_IO_EXPORT
Token final
100 Token(std::string&& data,
const uint32_t
id,
const Type type);
108 Token(
const std::string& data,
const uint32_t
id,
const Type type);
114 inline Type type()
const;
121 inline bool isType(
const Type type)
const;
127 inline bool isCharacter()
const;
133 inline bool isIdentifier()
const;
139 inline bool isInteger()
const;
145 inline bool isLine()
const;
151 inline bool isNumber()
const;
157 inline bool isIntegerOrNumber()
const;
163 inline bool isKeyword()
const;
176 inline bool isString()
const;
182 inline bool isSymbol()
const;
195 inline bool isEndOfFile()
const;
231 const std::string&
line()
const;
261 inline const std::string& raw()
const;
267 explicit inline operator bool()
const;
275 uint32_t id_ = invalidId;
286 typedef std::unordered_map<std::string, uint32_t>
IdMap;
338 explicit Scanner(
const std::shared_ptr<std::istream>& stream,
float* progress =
nullptr,
bool* cancel =
nullptr);
347 inline Scanner(
const std::string& filename,
const std::string& buffer,
float* progress =
nullptr,
bool* cancel =
nullptr);
356 inline Scanner(
const std::string& filename, std::string&& buffer,
float* progress =
nullptr,
bool* cancel =
nullptr);
397 inline size_t line()
const;
403 inline size_t column()
const;
421 inline const std::string& filename()
const;
427 inline bool isValid()
const;
439 static bool findNextToken(
const char* pointer,
const size_t size,
const size_t start,
size_t& tokenStart,
size_t& tokenLength);
450 static bool findNextToken(
const char* pointer,
const size_t start,
size_t& tokenStart,
size_t& tokenLength);
461 static inline bool isWhitespace(
const char& character);
482 uint8_t
get(
const size_t offset = 0);
490 std::string
data(
const size_t size)
const;
499 std::string
data(
const size_t offset,
const size_t size)
const;
692 static inline std::shared_ptr<std::istream> createInputStream(
const std::string& filename, std::string&& buffer);
699 static inline std::shared_ptr<std::istream> createInputStream(
const std::string& filename,
const std::string& buffer);
716 float* progress_ =
nullptr;
719 bool* cancel_ =
nullptr;
725 uint8_t* intermediateBufferPointer_ =
nullptr;
728 size_t intermediateBufferSize_ = 0;
734 uint8_t* extraBufferPointer_ =
nullptr;
737 size_t extraBufferSize_ = 0;
746 size_t position_ = 0;
752 bool keywordsAreCaseSensitive_ =
true;
761 size_t maximalLengthLineRemarks_ = 0;
767 size_t maximalLengthScopeRemarks_ = 0;
779 static constexpr size_t minBufferSize_ = 2048;
782 static constexpr size_t maxBufferSize_ = 8192;
792 return type_ == type;
797 return type_ == TOKEN_CHARACTER;
802 return type_ == TOKEN_IDENTIFIER;
807 return type_ == TOKEN_INTEGER;
812 return type_ == TOKEN_NUMBER;
817 return type_ == TOKEN_INTEGER || type_ == TOKEN_NUMBER;
822 return type_ == TOKEN_LINE;
827 return type_ == TOKEN_KEYWORD;
832 return type_ == TOKEN_STRING;
837 return type_ == TOKEN_SYMBOL;
842 return type_ == TOKEN_END_OF_FILE;
850inline Scanner::Token::operator bool()
const
852 return type_ != TOKEN_INVALID;
858 ocean_assert(!
filename.empty() || !buffer.empty());
866inline Scanner::Scanner(
const std::string& filename, std::string&& buffer,
float* progress,
bool* cancel) :
867 Scanner(createInputStream(filename, std::move(buffer)), progress, cancel)
899 return character ==
' ' || character==
'\t' || character ==
'\n' || character ==
'\r';
904 ocean_assert(!
filename.empty() || !buffer.empty());
908 return std::shared_ptr<std::istream>(
new std::ifstream(
filename.c_str(), std::ios_base::binary));
911 return std::shared_ptr<std::istream>(
new std::istringstream(std::move(buffer)));
916 ocean_assert(!
filename.empty() || !buffer.empty());
920 return std::shared_ptr<std::istream>(
new std::ifstream(
filename.c_str(), std::ios_base::binary));
923 return std::shared_ptr<std::istream>(
new std::istringstream(buffer));
This class implements a token for the scanner.
Definition Scanner.h:43
Token(std::string &&data, const Type type)
Creates a new token with given data and type.
bool isKeyword() const
Returns whether this token holds a keyword.
Definition Scanner.h:825
uint32_t keyword() const
Returns the id of the keyword of this token.
std::string data_
Holds the data of the token.
Definition Scanner.h:278
bool isSymbol(const uint32_t symbol) const
Returns whether this token holds a special symbol.
Type
Definition of different token types.
Definition Scanner.h:50
@ TOKEN_END_OF_FILE
End of file token.
Definition Scanner.h:56
@ TOKEN_INTEGER
Integer token.
Definition Scanner.h:64
@ TOKEN_KEYWORD
Keyword token.
Definition Scanner.h:66
@ TOKEN_LINE
Line token.
Definition Scanner.h:60
@ TOKEN_IDENTIFIER
Identifier token.
Definition Scanner.h:58
@ TOKEN_NUMBER
Number token.
Definition Scanner.h:62
@ TOKEN_CHARACTER
Character token.
Definition Scanner.h:54
@ TOKEN_STRING
String token.
Definition Scanner.h:68
bool isKeyword(const uint32_t keyword) const
Returns whether this token holds a special keyword.
bool isType(const Type type) const
Returns whether this token is of a specific type.
Definition Scanner.h:790
bool isLine() const
Returns whether this token holds a remaining line.
Definition Scanner.h:820
bool isSymbol() const
Returns whether this token holds a symbol.
Definition Scanner.h:835
const std::string & raw() const
Returns the raw data of the token.
Definition Scanner.h:845
bool isCharacter() const
Returns whether this token holds a character.
Definition Scanner.h:795
bool isString() const
Returns whether this token holds a string.
Definition Scanner.h:830
uint32_t symbol() const
Returns the id of the symbol of this token.
std::string moveString()
Returns the string value of this token and invalidates the token.
bool isNumber() const
Returns whether this token holds a number.
Definition Scanner.h:810
Type type() const
Returns the type of the token.
Definition Scanner.h:785
Scalar number() const
Returns the float value of this token.
bool isEndOfFile() const
Returns whether this token holds a end of file.
Definition Scanner.h:840
const std::string & string() const
Returns the string value of this token.
const std::string & line() const
Returns the remaining line of this token.
bool isIdentifier() const
Returns whether this token holds an identifier.
Definition Scanner.h:800
bool isIntegerOrNumber() const
Returns whether this token holds an integer or a number.
Definition Scanner.h:815
Type type_
Holds the type of the token.
Definition Scanner.h:272
Token(const std::string &data, const uint32_t id, const Type type)
Creates a new token with given keyword or symbol id.
Token()=default
Creates an invalid token.
uint8_t character() const
Returns the character value of this token.
Token(std::string &&data, const uint32_t id, const Type type)
Creates a new token with given keyword or symbol id.
Token(const std::string &data, const Type type)
Creates a new token with given data and type.
const std::string & identifier() const
Returns the identifier value of this token.
Scalar integerOrNumber() const
Returns the integer or float value of this token.
int integer() const
Returns the integer value of this token.
bool isInteger() const
Returns whether this token holds an integer.
Definition Scanner.h:805
This class implements a simple scanner.
Definition Scanner.h:31
const Token & lineToken()
Returns a line token starting at the current position.
std::unordered_set< std::string > LineRemarks
Definition of an unordered set holding line remark symbols.
Definition Scanner.h:291
std::string filename_
The name of the input file, if the input is a file.
Definition Scanner.h:713
std::string discardNonWhiteSpace()
Discards non white space and jumps to the first white space position.
CharTable followingCharTable_
Table holding the definition of allowed following characters.
Definition Scanner.h:773
bool readKeyword(Token &token, const bool consumeBytes)
Tries to read a keyword as next token.
size_t column_
Holds the current column.
Definition Scanner.h:743
size_t column() const
Returns the recent column.
Definition Scanner.h:882
Token recentToken_
Recent token.
Definition Scanner.h:704
uint32_t symbolId(const std::string &data) const
Returns the symbol id of a given string.
uint8_t get(const size_t offset=0)
Returns one character.
CharTable invalidCharTable_
Table holding the definition of not allowed following characters.
Definition Scanner.h:776
bool readInteger(Token &token, const bool consumeBytes)
Tries to read an integer as next token.
IdMap symbolMap_
Map mapping symbol strings to symbol ids.
Definition Scanner.h:755
static std::shared_ptr< std::istream > createInputStream(const std::string &filename, std::string &&buffer)
Creates a file input stream or a string input stream depending on the given input.
Definition Scanner.h:902
static bool findNextToken(const char *pointer, const size_t start, size_t &tokenStart, size_t &tokenLength)
Finds the next token in a given string starting from a specified position.
bool readIdentifier(Token &token, const bool consumeBytes)
Tries to read a identifier as next token.
bool isValid() const
Returns whether the scanner is valid and ready to use.
Definition Scanner.h:892
size_t position() const
Returns the position of the scanner.
uint32_t keywordId(const std::string &data) const
Returns the keyword id of a given string.
CharTable firstCharTable_
Table holding the definition of allowed first characters.
Definition Scanner.h:770
uint8_t getExtra(const size_t offset=0)
Returns one character from the extra buffer.
FirstChar
Definition of first character types.
Definition Scanner.h:307
void consume(const size_t chars=1)
Consumes one or more character.
bool readCharacter(Token &token, const bool consumeBytes)
Tries to read a character as next token.
std::shared_ptr< std::istream > stream_
The input stream from which the scanner receives the data.
Definition Scanner.h:710
std::unordered_map< std::string, uint32_t > IdMap
Definition of an unordered map mapping strings to ids.
Definition Scanner.h:286
void registerKeyword(const std::string &keyword, const uint32_t id)
Registers a new keyword.
bool readScopeRemark()
Reads a scope remark comment.
std::string data(const size_t offset, const size_t size) const
Returns data of a specified size starting at the offset position.
bool readRemark()
Reads remark comments.
void registerLineRemark(const std::string &lineRemark)
Registers a line remark symbol.
Scanner(float *progress, bool *cancel)
Creates a new scanner.
Token nextToken_
Next token.
Definition Scanner.h:707
const Token & token()
Returns the recent token.
Token tokenPop()
Return the recent token and pops it afterwards.
void setKeywordProperty(const bool caseSensitive)
Sets whether the keywords are case sensitive or not.
bool refillExtraBuffer(const size_t minIndex)
Refills the extra buffer.
std::string data(const size_t size) const
Returns data of a specified size starting at the recent position.
size_t line() const
Returns the recent line.
Definition Scanner.h:877
virtual ~Scanner()
Destructs a scanner.
bool readLine(Token &token, const bool consumeBytes)
Tries to read a remaining line as next token.
void registerSymbol(const std::string &symbol, const uint32_t id)
Registers a new symbol.
static bool isWhitespace(const char &character)
Returns whether a given character is a white space character.
Definition Scanner.h:897
uint8_t readWhiteSpace(bool crossLines=true)
Reads white space.
IdMap keywordMap_
Map mapping keyword strings to identifier ids.
Definition Scanner.h:749
size_t line_
Holds the current line.
Definition Scanner.h:740
const std::string & filename() const
Returns the name of the input file, if the input is a file.
Definition Scanner.h:887
bool readNumber(Token &token, const bool consumeBytes)
Tries to read a number as next token.
ScopeRemarks scopeRemarks_
Scope remarks.
Definition Scanner.h:764
void pop()
Pops the recent token.
virtual Token readToken(const bool consumeBytes=true)
Reads and returns the next token.
std::array< uint16_t, 256 > CharTable
Definition of a character table.
Definition Scanner.h:301
std::unordered_map< std::string, std::string > ScopeRemarks
Definition of an unordered map mapping begin remark symbols to end remark symbols.
Definition Scanner.h:296
bool refillIntermediateBuffer()
Refills the intermediate buffer.
void registerScopeRemark(const std::string &begin, const std::string &end)
Registers a scope remark symbol.
size_t size() const
Returns the size of the scanner.
static bool findNextToken(const char *pointer, const size_t size, const size_t start, size_t &tokenStart, size_t &tokenLength)
Finds the next token in a given string starting from a specified position.
bool readLineRemark()
Reads a line remark comment.
Memory intermediateBuffer_
Local intermediate buffer.
Definition Scanner.h:722
Scanner(const std::shared_ptr< std::istream > &stream, float *progress=nullptr, bool *cancel=nullptr)
Creates a new scanner using a stream as input.
bool registerWhiteSpaceCharacter(const uint8_t character)
Registers a white space character.
bool readSymbol(Token &token, const bool consumeBytes)
Tries to read a symbol as next token.
bool readString(Token &token, const bool consumeBytes)
Tries to read a string as next token.
Memory extraBuffer_
Local extra buffer, used if the intermediate buffer is too small.
Definition Scanner.h:731
const Token & look()
Returns a lookout to the next token.
LineRemarks lineRemarks_
Registered line remarks.
Definition Scanner.h:758
This class implements an object able to allocate memory.
Definition base/Memory.h:22
float Scalar
Definition of a scalar type.
Definition Math.h:129
The namespace covering the entire Ocean framework.
Definition Accessor.h:15