xo-parser: + reader class (tokenizer -> parser pipeline)
This commit is contained in:
parent
07ba966e08
commit
7e311ab0cb
3 changed files with 113 additions and 6 deletions
|
|
@ -6,10 +6,27 @@
|
|||
#pragma once
|
||||
|
||||
#include "parser.hpp"
|
||||
#include "xo/expression/Expression.hpp"
|
||||
#include "xo/tokenizer/tokenizer.hpp"
|
||||
|
||||
namespace xo {
|
||||
namespace scm {
|
||||
/** @class parse_result
|
||||
* @brief Result object returned from reader::read_expr
|
||||
**/
|
||||
struct reader_result {
|
||||
using Expression = xo::ast::Expression;
|
||||
using span_type = span<const char>;
|
||||
|
||||
/** parsed schematica expression **/
|
||||
rp<Expression> expr_;
|
||||
/** span giving text input consumed to construct expr,
|
||||
* including any leading whitespace.
|
||||
* This is the span returned in result of tokenizer<char>::scan()
|
||||
**/
|
||||
span_type rem_;
|
||||
};
|
||||
|
||||
/**
|
||||
* Use:
|
||||
* @code
|
||||
|
|
@ -21,7 +38,7 @@ namespace xo {
|
|||
* // eof: true if no more input will be forthcoming from this stream
|
||||
* eof = ins.eof();
|
||||
*
|
||||
* for (auto rem = input; ; !rem.empty()) {
|
||||
* for (auto rem = input; !rem.empty();) {
|
||||
* // res: (parsed-expr, used)
|
||||
* auto res = rdr.read_expr(rem, eof);
|
||||
*
|
||||
|
|
@ -39,18 +56,32 @@ namespace xo {
|
|||
* @endcode
|
||||
**/
|
||||
class reader {
|
||||
public:
|
||||
using tokenizer_type = tokenizer<char>;
|
||||
using span_type = tokenizer_type::span_type;
|
||||
|
||||
public:
|
||||
reader() = default;
|
||||
|
||||
/** Try to read one expression from @p input.
|
||||
* Return struct containing parsed expression
|
||||
* and span of characters comprising that expression
|
||||
*
|
||||
* @param input Supply this input span of chars
|
||||
* @param eof. True if input stream supplying @p input
|
||||
* reports end-of-file immediately after the last char
|
||||
* in @p input.
|
||||
**/
|
||||
reader_result read_expr(const span_type & input, bool eof);
|
||||
|
||||
private:
|
||||
/** tokenizer: text -> tokens **/
|
||||
tokenizer tokenizer_;
|
||||
tokenizer_type tokenizer_;
|
||||
|
||||
/** parser: tokens -> expressions (TODO: reanme ->reader) **/
|
||||
/** parser: tokens -> expressions **/
|
||||
parser parser_;
|
||||
};
|
||||
} /*namespace scm*/
|
||||
} /*namespace xo*/
|
||||
|
||||
|
||||
/* end Repl.hpp */
|
||||
/* end reader.hpp */
|
||||
|
|
|
|||
|
|
@ -2,7 +2,8 @@
|
|||
|
||||
set(SELF_LIB xo_parser)
|
||||
set(SELF_SRCS
|
||||
parser.cpp)
|
||||
parser.cpp
|
||||
reader.cpp)
|
||||
|
||||
xo_add_shared_library4(${SELF_LIB} ${PROJECT_NAME}Targets ${PROJECT_VERSION} 1 ${SELF_SRCS})
|
||||
xo_dependency(${SELF_LIB} xo_expression)
|
||||
|
|
|
|||
75
src/parser/reader.cpp
Normal file
75
src/parser/reader.cpp
Normal file
|
|
@ -0,0 +1,75 @@
|
|||
/* @file reader.cpp */
|
||||
|
||||
#include "reader.hpp"
|
||||
|
||||
namespace xo {
|
||||
namespace scm {
|
||||
reader_result
|
||||
reader::read_expr(const span_type & input_arg, bool eof)
|
||||
{
|
||||
span_type input = input_arg;
|
||||
|
||||
/* input text-span consumed by this call.
|
||||
* Always comprises some number (possibly 0)
|
||||
* of complete tokens, along with any leading
|
||||
* whitespace
|
||||
*/
|
||||
span_type expr_span = input.prefix(0ul);
|
||||
|
||||
while (!input.empty()) {
|
||||
/* read one token from input */
|
||||
auto sr = this->tokenizer_.scan2(input, eof);
|
||||
const auto & tk = sr.first;
|
||||
const span_type & used_span = sr.second;
|
||||
|
||||
input = input.after_prefix(used_span);
|
||||
expr_span += used_span;
|
||||
|
||||
if (tk.is_valid()) {
|
||||
/* forward just-read token to parser */
|
||||
auto expr = this->parser_.include_token(tk);
|
||||
|
||||
if (expr) {
|
||||
/* token completes an expression -> victory */
|
||||
return reader_result(expr, expr_span);
|
||||
} else {
|
||||
/* token did not complete an expression
|
||||
* (e.g. token for '[')
|
||||
*
|
||||
* input span may contain more tokens -> iterate
|
||||
*/
|
||||
input = input.after_prefix(used_span);
|
||||
}
|
||||
} else {
|
||||
assert(input.empty());
|
||||
|
||||
/* no more tokens in input */
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* control here: either
|
||||
* 1. input.empty (perhaps ate some whitespace, ok)
|
||||
* 2. missing or incomplete token (ok unless eof)
|
||||
*/
|
||||
if (eof) {
|
||||
if (parser_.has_incomplete_expr()) {
|
||||
throw std::runtime_error
|
||||
("reader::read_expr"
|
||||
": eof reached with incomplete expression");
|
||||
}
|
||||
|
||||
if (tokenizer_.has_prefix()) {
|
||||
throw std::runtime_error
|
||||
("reader::read_expr"
|
||||
": unintelligible input recognized at eof");
|
||||
}
|
||||
}
|
||||
|
||||
return reader_result(nullptr, expr_span);
|
||||
}
|
||||
|
||||
} /*namespace scm*/
|
||||
} /*namespace xo*/
|
||||
|
||||
/* end reader.cpp */
|
||||
Loading…
Add table
Add a link
Reference in a new issue