8 #ifndef META_OCEAN_IO_SCANNER_H
9 #define META_OCEAN_IO_SCANNER_H
37 static constexpr uint32_t invalidId = uint32_t(-1);
42 class OCEAN_IO_EXPORT
Token final
100 Token(std::string&& data,
const uint32_t
id,
const Type type);
108 Token(
const std::string& data,
const uint32_t
id,
const Type type);
114 inline Type type()
const;
121 inline bool isType(
const Type type)
const;
127 inline bool isCharacter()
const;
133 inline bool isIdentifier()
const;
139 inline bool isInteger()
const;
145 inline bool isLine()
const;
151 inline bool isNumber()
const;
157 inline bool isIntegerOrNumber()
const;
163 inline bool isKeyword()
const;
176 inline bool isString()
const;
182 inline bool isSymbol()
const;
195 inline bool isEndOfFile()
const;
231 const std::string&
line()
const;
261 inline const std::string& raw()
const;
267 explicit inline operator bool()
const;
275 uint32_t id_ = invalidId;
286 typedef std::unordered_map<std::string, uint32_t>
IdMap;
338 explicit Scanner(
const std::shared_ptr<std::istream>& stream,
float* progress =
nullptr,
bool* cancel =
nullptr);
347 inline Scanner(
const std::string& filename,
const std::string& buffer,
float* progress =
nullptr,
bool* cancel =
nullptr);
356 inline Scanner(
const std::string& filename, std::string&& buffer,
float* progress =
nullptr,
bool* cancel =
nullptr);
397 inline size_t line()
const;
403 inline size_t column()
const;
421 inline const std::string& filename()
const;
427 inline bool isValid()
const;
439 static bool findNextToken(
const char* pointer,
const size_t size,
const size_t start,
size_t& tokenStart,
size_t& tokenLength);
450 static bool findNextToken(
const char* pointer,
const size_t start,
size_t& tokenStart,
size_t& tokenLength);
461 static inline bool isWhitespace(
const char& character);
482 uint8_t
get(
const size_t offset = 0);
490 std::string
data(
const size_t size)
const;
499 std::string
data(
const size_t offset,
const size_t size)
const;
692 static inline std::shared_ptr<std::istream> createInputStream(
const std::string& filename, std::string&& buffer);
699 static inline std::shared_ptr<std::istream> createInputStream(
const std::string& filename,
const std::string& buffer);
716 float* progress_ =
nullptr;
719 bool* cancel_ =
nullptr;
725 uint8_t* intermediateBufferPointer_ =
nullptr;
728 size_t intermediateBufferSize_ = 0;
734 uint8_t* extraBufferPointer_ =
nullptr;
737 size_t extraBufferSize_ = 0;
746 size_t position_ = 0;
752 bool keywordsAreCaseSensitive_ =
true;
761 size_t maximalLengthLineRemarks_ = 0;
767 size_t maximalLengthScopeRemarks_ = 0;
779 static constexpr
size_t minBufferSize_ = 2048;
782 static constexpr
size_t maxBufferSize_ = 8192;
792 return type_ == type;
797 return type_ == TOKEN_CHARACTER;
802 return type_ == TOKEN_IDENTIFIER;
807 return type_ == TOKEN_INTEGER;
812 return type_ == TOKEN_NUMBER;
817 return type_ == TOKEN_INTEGER || type_ == TOKEN_NUMBER;
822 return type_ == TOKEN_LINE;
827 return type_ == TOKEN_KEYWORD;
832 return type_ == TOKEN_STRING;
837 return type_ == TOKEN_SYMBOL;
842 return type_ == TOKEN_END_OF_FILE;
850 inline Scanner::Token::operator bool()
const
852 return type_ != TOKEN_INVALID;
858 ocean_assert(!
filename.empty() || !buffer.empty());
866 inline Scanner::Scanner(
const std::string& filename, std::string&& buffer,
float* progress,
bool* cancel) :
867 Scanner(createInputStream(filename, std::move(buffer)), progress, cancel)
899 return character ==
' ' || character==
'\t' || character ==
'\n' || character ==
'\r';
904 ocean_assert(!
filename.empty() || !buffer.empty());
908 return std::shared_ptr<std::istream>(
new std::ifstream(
filename.c_str(), std::ios_base::binary));
911 return std::shared_ptr<std::istream>(
new std::istringstream(std::move(buffer)));
916 ocean_assert(!
filename.empty() || !buffer.empty());
920 return std::shared_ptr<std::istream>(
new std::ifstream(
filename.c_str(), std::ios_base::binary));
923 return std::shared_ptr<std::istream>(
new std::istringstream(buffer));
This class implements a token for the scanner.
Definition: Scanner.h:43
const std::string & string() const
Returns the string value of this token.
Token(std::string &&data, const Type type)
Creates a new token with given data and type.
bool isKeyword() const
Returns whether this token holds a keyword.
Definition: Scanner.h:825
uint32_t keyword() const
Returns the id of the keyword of this token.
std::string data_
Holds the data of the token.
Definition: Scanner.h:278
bool isSymbol(const uint32_t symbol) const
Returns whether this token holds a special symbol.
Type
Definition of different token types.
Definition: Scanner.h:50
@ TOKEN_END_OF_FILE
End of file token.
Definition: Scanner.h:56
@ TOKEN_INTEGER
Integer token.
Definition: Scanner.h:64
@ TOKEN_KEYWORD
Keyword token.
Definition: Scanner.h:66
@ TOKEN_LINE
Line token.
Definition: Scanner.h:60
@ TOKEN_IDENTIFIER
Identifier token.
Definition: Scanner.h:58
@ TOKEN_NUMBER
Number token.
Definition: Scanner.h:62
@ TOKEN_CHARACTER
Character token.
Definition: Scanner.h:54
@ TOKEN_STRING
String token.
Definition: Scanner.h:68
bool isKeyword(const uint32_t keyword) const
Returns whether this token holds a special keyword.
bool isType(const Type type) const
Returns whether this token is of a specific type.
Definition: Scanner.h:790
bool isLine() const
Returns whether this token holds a remaining line.
Definition: Scanner.h:820
bool isSymbol() const
Returns whether this token holds a symbol.
Definition: Scanner.h:835
const std::string & raw() const
Returns the raw data of the token.
Definition: Scanner.h:845
bool isCharacter() const
Returns whether this token holds a character.
Definition: Scanner.h:795
bool isString() const
Returns whether this token holds a string.
Definition: Scanner.h:830
uint32_t symbol() const
Returns the id of the symbol of this token.
std::string moveString()
Returns the string value of this token and invalidates the token.
bool isNumber() const
Returns whether this token holds a number.
Definition: Scanner.h:810
Type type() const
Returns the type of the token.
Definition: Scanner.h:785
Scalar number() const
Returns the float value of this token.
bool isEndOfFile() const
Returns whether this token holds a end of file.
Definition: Scanner.h:840
bool isIdentifier() const
Returns whether this token holds an identifier.
Definition: Scanner.h:800
bool isIntegerOrNumber() const
Returns whether this token holds an integer or a number.
Definition: Scanner.h:815
Type type_
Holds the type of the token.
Definition: Scanner.h:272
const std::string & line() const
Returns the remaining line of this token.
Token(const std::string &data, const uint32_t id, const Type type)
Creates a new token with given keyword or symbol id.
Token()=default
Creates an invalid token.
uint8_t character() const
Returns the character value of this token.
Token(std::string &&data, const uint32_t id, const Type type)
Creates a new token with given keyword or symbol id.
const std::string & identifier() const
Returns the identifier value of this token.
Token(const std::string &data, const Type type)
Creates a new token with given data and type.
Scalar integerOrNumber() const
Returns the integer or float value of this token.
int integer() const
Returns the integer value of this token.
bool isInteger() const
Returns whether this token holds an integer.
Definition: Scanner.h:805
This class implements a simple scanner.
Definition: Scanner.h:31
std::unordered_set< std::string > LineRemarks
Definition of an unordered set holding line remark symbols.
Definition: Scanner.h:291
std::string filename_
The name of the input file, if the input is a file.
Definition: Scanner.h:713
std::string discardNonWhiteSpace()
Discards non white space and jumps to the first white space position.
CharTable followingCharTable_
Table holding the definition of allowed following characters.
Definition: Scanner.h:773
bool readKeyword(Token &token, const bool consumeBytes)
Tries to read a keyword as next token.
size_t column_
Holds the current column.
Definition: Scanner.h:743
size_t column() const
Returns the recent column.
Definition: Scanner.h:882
Token recentToken_
Recent token.
Definition: Scanner.h:704
uint32_t symbolId(const std::string &data) const
Returns the symbol id of a given string.
uint8_t get(const size_t offset=0)
Returns one character.
CharTable invalidCharTable_
Table holding the definition of not allowed following characters.
Definition: Scanner.h:776
bool readInteger(Token &token, const bool consumeBytes)
Tries to read an integer as next token.
IdMap symbolMap_
Map mapping symbol strings to symbol ids.
Definition: Scanner.h:755
static std::shared_ptr< std::istream > createInputStream(const std::string &filename, std::string &&buffer)
Creates a file input stream or a string input stream depending on the given input.
Definition: Scanner.h:902
static bool findNextToken(const char *pointer, const size_t start, size_t &tokenStart, size_t &tokenLength)
Finds the next token in a given string starting from a specified position.
bool readIdentifier(Token &token, const bool consumeBytes)
Tries to read a identifier as next token.
bool isValid() const
Returns whether the scanner is valid and ready to use.
Definition: Scanner.h:892
size_t position() const
Returns the position of the scanner.
uint32_t keywordId(const std::string &data) const
Returns the keyword id of a given string.
CharTable firstCharTable_
Table holding the definition of allowed first characters.
Definition: Scanner.h:770
uint8_t getExtra(const size_t offset=0)
Returns one character from the extra buffer.
FirstChar
Definition of first character types.
Definition: Scanner.h:307
void consume(const size_t chars=1)
Consumes one or more character.
bool readCharacter(Token &token, const bool consumeBytes)
Tries to read a character as next token.
std::shared_ptr< std::istream > stream_
The input stream from which the scanner receives the data.
Definition: Scanner.h:710
std::unordered_map< std::string, uint32_t > IdMap
Definition of an unordered map mapping strings to ids.
Definition: Scanner.h:286
void registerKeyword(const std::string &keyword, const uint32_t id)
Registers a new keyword.
bool readScopeRemark()
Reads a scope remark comment.
std::string data(const size_t offset, const size_t size) const
Returns data of a specified size starting at the offset position.
bool readRemark()
Reads remark comments.
void registerLineRemark(const std::string &lineRemark)
Registers a line remark symbol.
Scanner(float *progress, bool *cancel)
Creates a new scanner.
Token nextToken_
Next token.
Definition: Scanner.h:707
Token tokenPop()
Return the recent token and pops it afterwards.
void setKeywordProperty(const bool caseSensitive)
Sets whether the keywords are case sensitive or not.
const Token & lineToken()
Returns a line token starting at the current position.
bool refillExtraBuffer(const size_t minIndex)
Refills the extra buffer.
std::string data(const size_t size) const
Returns data of a specified size starting at the recent position.
size_t line() const
Returns the recent line.
Definition: Scanner.h:877
virtual ~Scanner()
Destructs a scanner.
const Token & token()
Returns the recent token.
bool readLine(Token &token, const bool consumeBytes)
Tries to read a remaining line as next token.
void registerSymbol(const std::string &symbol, const uint32_t id)
Registers a new symbol.
static bool isWhitespace(const char &character)
Returns whether a given character is a white space character.
Definition: Scanner.h:897
uint8_t readWhiteSpace(bool crossLines=true)
Reads white space.
IdMap keywordMap_
Map mapping keyword strings to identifier ids.
Definition: Scanner.h:749
size_t line_
Holds the current line.
Definition: Scanner.h:740
const std::string & filename() const
Returns the name of the input file, if the input is a file.
Definition: Scanner.h:887
bool readNumber(Token &token, const bool consumeBytes)
Tries to read a number as next token.
ScopeRemarks scopeRemarks_
Scope remarks.
Definition: Scanner.h:764
void pop()
Pops the recent token.
virtual Token readToken(const bool consumeBytes=true)
Reads and returns the next token.
std::array< uint16_t, 256 > CharTable
Definition of a character table.
Definition: Scanner.h:301
std::unordered_map< std::string, std::string > ScopeRemarks
Definition of an unordered map mapping begin remark symbols to end remark symbols.
Definition: Scanner.h:296
bool refillIntermediateBuffer()
Refills the intermediate buffer.
void registerScopeRemark(const std::string &begin, const std::string &end)
Registers a scope remark symbol.
size_t size() const
Returns the size of the scanner.
static bool findNextToken(const char *pointer, const size_t size, const size_t start, size_t &tokenStart, size_t &tokenLength)
Finds the next token in a given string starting from a specified position.
const Token & look()
Returns a lookout to the next token.
bool readLineRemark()
Reads a line remark comment.
Memory intermediateBuffer_
Local intermediate buffer.
Definition: Scanner.h:722
Scanner(const std::shared_ptr< std::istream > &stream, float *progress=nullptr, bool *cancel=nullptr)
Creates a new scanner using a stream as input.
bool registerWhiteSpaceCharacter(const uint8_t character)
Registers a white space character.
bool readSymbol(Token &token, const bool consumeBytes)
Tries to read a symbol as next token.
bool readString(Token &token, const bool consumeBytes)
Tries to read a string as next token.
Memory extraBuffer_
Local extra buffer, used if the intermediate buffer is too small.
Definition: Scanner.h:731
LineRemarks lineRemarks_
Registered line remarks.
Definition: Scanner.h:758
This class implements an object able to allocate memory.
Definition: base/Memory.h:22
float Scalar
Definition of a scalar type.
Definition: Math.h:128
The namespace covering the entire Ocean framework.
Definition: Accessor.h:15