xo-parser: + reader class (tokenizer -> parser pipeline)

This commit is contained in:
Roland Conybeare 2024-08-06 09:43:32 -04:00
commit 7e311ab0cb
3 changed files with 113 additions and 6 deletions

View file

@ -6,10 +6,27 @@
#pragma once #pragma once
#include "parser.hpp" #include "parser.hpp"
#include "xo/expression/Expression.hpp"
#include "xo/tokenizer/tokenizer.hpp" #include "xo/tokenizer/tokenizer.hpp"
namespace xo { namespace xo {
namespace scm { namespace scm {
/** @class parse_result
* @brief Result object returned from reader::read_expr
**/
struct reader_result {
using Expression = xo::ast::Expression;
using span_type = span<const char>;
/** parsed schematica expression **/
rp<Expression> expr_;
/** span giving text input consumed to construct expr,
* including any leading whitespace.
* This is the span returned in result of tokenizer<char>::scan()
**/
span_type rem_;
};
/** /**
* Use: * Use:
* @code * @code
@ -21,7 +38,7 @@ namespace xo {
* // eof: true if no more input will be forthcoming from this stream * // eof: true if no more input will be forthcoming from this stream
* eof = ins.eof(); * eof = ins.eof();
* *
* for (auto rem = input; ; !rem.empty()) { * for (auto rem = input; !rem.empty();) {
* // res: (parsed-expr, used) * // res: (parsed-expr, used)
* auto res = rdr.read_expr(rem, eof); * auto res = rdr.read_expr(rem, eof);
* *
@ -39,18 +56,32 @@ namespace xo {
* @endcode * @endcode
**/ **/
class reader { class reader {
public:
using tokenizer_type = tokenizer<char>;
using span_type = tokenizer_type::span_type;
public: public:
reader() = default; reader() = default;
/** Try to read one expression from @p input.
* Return struct containing parsed expression
* and span of characters comprising that expression
*
* @param input Supply this input span of chars
* @param eof. True if input stream supplying @p input
* reports end-of-file immediately after the last char
* in @p input.
**/
reader_result read_expr(const span_type & input, bool eof);
private: private:
/** tokenizer: text -> tokens **/ /** tokenizer: text -> tokens **/
tokenizer tokenizer_; tokenizer_type tokenizer_;
/** parser: tokens -> expressions (TODO: reanme ->reader) **/ /** parser: tokens -> expressions **/
parser parser_; parser parser_;
}; };
} /*namespace scm*/ } /*namespace scm*/
} /*namespace xo*/ } /*namespace xo*/
/* end reader.hpp */
/* end Repl.hpp */

View file

@ -2,7 +2,8 @@
set(SELF_LIB xo_parser) set(SELF_LIB xo_parser)
set(SELF_SRCS set(SELF_SRCS
parser.cpp) parser.cpp
reader.cpp)
xo_add_shared_library4(${SELF_LIB} ${PROJECT_NAME}Targets ${PROJECT_VERSION} 1 ${SELF_SRCS}) xo_add_shared_library4(${SELF_LIB} ${PROJECT_NAME}Targets ${PROJECT_VERSION} 1 ${SELF_SRCS})
xo_dependency(${SELF_LIB} xo_expression) xo_dependency(${SELF_LIB} xo_expression)

75
src/parser/reader.cpp Normal file
View file

@ -0,0 +1,75 @@
/* @file reader.cpp */
#include "reader.hpp"
namespace xo {
namespace scm {
reader_result
reader::read_expr(const span_type & input_arg, bool eof)
{
span_type input = input_arg;
/* input text-span consumed by this call.
* Always comprises some number (possibly 0)
* of complete tokens, along with any leading
* whitespace
*/
span_type expr_span = input.prefix(0ul);
while (!input.empty()) {
/* read one token from input */
auto sr = this->tokenizer_.scan2(input, eof);
const auto & tk = sr.first;
const span_type & used_span = sr.second;
input = input.after_prefix(used_span);
expr_span += used_span;
if (tk.is_valid()) {
/* forward just-read token to parser */
auto expr = this->parser_.include_token(tk);
if (expr) {
/* token completes an expression -> victory */
return reader_result(expr, expr_span);
} else {
/* token did not complete an expression
* (e.g. token for '[')
*
* input span may contain more tokens -> iterate
*/
input = input.after_prefix(used_span);
}
} else {
assert(input.empty());
/* no more tokens in input */
break;
}
}
/* control here: either
* 1. input.empty (perhaps ate some whitespace, ok)
* 2. missing or incomplete token (ok unless eof)
*/
if (eof) {
if (parser_.has_incomplete_expr()) {
throw std::runtime_error
("reader::read_expr"
": eof reached with incomplete expression");
}
if (tokenizer_.has_prefix()) {
throw std::runtime_error
("reader::read_expr"
": unintelligible input recognized at eof");
}
}
return reader_result(nullptr, expr_span);
}
} /*namespace scm*/
} /*namespace xo*/
/* end reader.cpp */