diff --git a/CMakeLists.txt b/CMakeLists.txt index 0e7d37ef..114a512a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -57,6 +57,7 @@ endif() message("-- CMAKE_MODULE_PATH=${CMAKE_MODULE_PATH}") message("-- CMAKE_INSTALL_PREFIX=${CMAKE_INSTALL_PREFIX}") message("-- CMAKE_INSTALL_RPATH=${CMAKE_INSTALL_RPATH}") +message("-- XO_ENABLE_EXAMPLES=${XO_ENABLE_EXAMPLES}") # ---------------------------------------------------------------- # diff --git a/pkgs/xo-tokenizer.nix b/pkgs/xo-tokenizer.nix index ff02f2d8..2a4e2c19 100644 --- a/pkgs/xo-tokenizer.nix +++ b/pkgs/xo-tokenizer.nix @@ -12,7 +12,7 @@ stdenv.mkDerivation (finalattrs: src = ../xo-tokenizer; - cmakeFlags = ["-DCMAKE_MODULE_PATH=${xo-cmake}/share/cmake"]; + cmakeFlags = ["-DCMAKE_MODULE_PATH=${xo-cmake}/share/cmake" "-DXO_ENABLE_EXAMPLES"]; doCheck = true; propagatedBuildInputs = [ ]; nativeBuildInputs = [ cmake catch2 diff --git a/xo-tokenizer/docs/examples.rst b/xo-tokenizer/docs/examples.rst index 72e890e6..16f963a8 100644 --- a/xo-tokenizer/docs/examples.rst +++ b/xo-tokenizer/docs/examples.rst @@ -45,14 +45,55 @@ See ``xo-tokenizer/examples/tokenrepl`` for (slighly elaborated) version of code if (tk.is_valid()) { cout << tk; + } else if (error.is_error()) { + cout << "parsing error: " << endl; + error.report(cout); } } +Reminder: enable building examples with ``cmake -DXO_ENABLE_EXAMPLES=1 ..`` + .. code-block:: :linenos: - $ .build/xo-tokenizer/utest/utest.tokenizer + $ .build/xo-tokenizer/example/tokenrepl/xo_tokenizer_repl > 123 > 123e5 + > def sq(x: i64) -> i64 { x * x } + + + + + + + + + + + + + + + +Example of error reporting (via ``error.report(cout)`` above) + +.. code-block:: + :linenos: + + $ .build/xo-tokenizer/example/tokenrepl/xo_tokenizer_repl + + > 123q + parsing error: + char: 4 + input: 123q + ---^ + unexpected character in numeric constant + + > (8 * 8 * 123fd) + parsing error: + char: 13 + input: (8 * 8 * 123fd) + ---^ + unexpected character in numeric constant diff --git a/xo-tokenizer/docs/input-state-class.rst b/xo-tokenizer/docs/input-state-class.rst index 01d14267..d995868e 100644 --- a/xo-tokenizer/docs/input-state-class.rst +++ b/xo-tokenizer/docs/input-state-class.rst @@ -27,3 +27,51 @@ Context .. code-block:: cpp #include + +.. uml:: + :scale: 99% + :align: center + + allowmixing + + object in1<> + in1 : current_line = input + in1 : current_pos + in1 : whitespace + in1 : debug_flag + + object input + input : (x * y * 123d) + + input o-- sp1 + + +Class +----- + +.. doxygenclass:: xo::scm::input_state + +Instance Variables +------------------ + +.. doxygengroup:: input-state-instance-vars + +Constructors +------------ + +.. doxygengroup:: input-state-ctors + +Static Methods +-------------- + +.. doxygengroup:: input-state-static-methods + +Access Methods +-------------- + +.. doxygengroup:: input-state-access-methods + +General Methods +--------------- + +.. doxygengroup:: input-state-general-methods diff --git a/xo-tokenizer/docs/tokenizer-class.rst b/xo-tokenizer/docs/tokenizer-class.rst index 5b29f4b3..4903dae9 100644 --- a/xo-tokenizer/docs/tokenizer-class.rst +++ b/xo-tokenizer/docs/tokenizer-class.rst @@ -35,9 +35,10 @@ Context allowmixing object tkz1<> - tkz : input_state + tkz1 : input_state = ins1 object ins1<> + ins1 : current_line = (9 * 8) tkz1 o-- ins1 @@ -54,9 +55,14 @@ Class Instance Variables ------------------ -.. doxygenclass:: tokenizer-instance-vars +.. doxygengroup:: tokenizer-instance-vars Constructors ------------ .. doxygengroup:: tokenizer-ctors + +Methods +------- + +.. doxygengroup:: tokenizer-general-methods diff --git a/xo-tokenizer/example/tokenrepl/tokenrepl.cpp b/xo-tokenizer/example/tokenrepl/tokenrepl.cpp index b1a2fc55..3b70d435 100644 --- a/xo-tokenizer/example/tokenrepl/tokenrepl.cpp +++ b/xo-tokenizer/example/tokenrepl/tokenrepl.cpp @@ -48,7 +48,6 @@ main() { } input = tkz.consume(consumed, input); - //input = input.after_prefix(consumed.size()); } /* here: input.empty() or error encountered */ diff --git a/xo-tokenizer/include/xo/tokenizer/input_state.hpp b/xo-tokenizer/include/xo/tokenizer/input_state.hpp index c17f085b..c5d57a39 100644 --- a/xo-tokenizer/include/xo/tokenizer/input_state.hpp +++ b/xo-tokenizer/include/xo/tokenizer/input_state.hpp @@ -38,7 +38,7 @@ namespace xo { ///@} - /** @defgroup input-state static methods **/ + /** @defgroup input-state-static-methods input_state static methods **/ ///@{ /** recognize the newline character '\n' **/ @@ -80,7 +80,7 @@ namespace xo { void discard_current_line(); /** Add @p z to current position **/ - void consume(size_t z) { current_pos_ += z; } + void consume(size_t z); /** Skip prefix of input comprising whitespace. * Return pointer to first non-whitespace character in @p input, @@ -91,7 +91,7 @@ namespace xo { ///@} private: - /** @defgroup input-state-instance-vars **/ + /** @defgroup input-state-instance-vars input_state instance variables **/ ///@{ /** remember current input line. Used only to report errors **/ @@ -128,6 +128,16 @@ namespace xo { return false; } + template + void + input_state::consume(size_t z) { + scope log(XO_DEBUG(debug_flag_)); + + this->current_pos_ += z; + + log && log(xtag("z", z), xtag("current_pos", current_pos_)); + } + template void input_state::discard_current_line() { diff --git a/xo-tokenizer/include/xo/tokenizer/tokenizer.hpp b/xo-tokenizer/include/xo/tokenizer/tokenizer.hpp index 1eda4f10..35fac5a5 100644 --- a/xo-tokenizer/include/xo/tokenizer/tokenizer.hpp +++ b/xo-tokenizer/include/xo/tokenizer/tokenizer.hpp @@ -58,8 +58,16 @@ namespace xo { using result_type = scan_result; public: + /** @defgroup tokenizer-ctors tokenizer constructors **/ + ///@{ + tokenizer(bool debug_flag = false); + ///@} + + /** @defgroup tokenizer-general-methods tokenizer methods **/ + ///@{ + /** identifies punctuation chars. * These are chars that are not permitted to appear within * a symbol token. Instead they force completion of @@ -130,19 +138,26 @@ namespace xo { **/ result_type notify_eof(const span_type & input); + ///@} + private: result_type scan_completion(const span_type & whitespace, const CharT* token_end, const span_type & input); private: + /** @defgroup tokenizer-instance-vars tokenizer instance variables **/ + ///@{ + /** track input state (line#,pos,..) for error messages **/ input_state_type input_state_; /** Accumulate partial token here. * This will happen if input sent to @ref tokenizer::scan - * ends without a determinate token boundary. + * ends without whitespace such that last available token's extent is not determined **/ std::string prefix_; + + ///@} }; /*tokenizer*/ template @@ -338,7 +353,8 @@ namespace xo { //current_line_, //current_pos_, //initial_whitespace, - (ix - tk_start))); + (ix - tk_start) + )); } } else if (*ix == '.') { if (period_flag) { @@ -378,9 +394,6 @@ namespace xo { (error_type(__FUNCTION__ /*src_function*/, "unexpected character in numeric constant" /*error_description*/, input_state_, - //current_line_, - //current_pos_, - //initial_whitespace, (ix - tk_start))); } } diff --git a/xo-tokenizer/include/xo/tokenizer/tokenizer_error.hpp b/xo-tokenizer/include/xo/tokenizer/tokenizer_error.hpp index 1f807741..9488b62b 100644 --- a/xo-tokenizer/include/xo/tokenizer/tokenizer_error.hpp +++ b/xo-tokenizer/include/xo/tokenizer/tokenizer_error.hpp @@ -36,16 +36,17 @@ namespace xo { tokenizer_error(const char * src_function, const char * error_description, const input_state_type & input_state, - //span_type input_line, - //size_t tk_start, - //size_t whitespace, size_t error_pos) : src_function_{src_function}, error_description_{error_description}, input_state_{input_state}, - //tk_entry_{tk_start}, - //whitespace_{whitespace}, - error_pos_{error_pos} {} + error_pos_{error_pos} + { + scope log(XO_DEBUG(input_state.debug_flag())); + + log && log(xtag("input_state.current_pos", input_state.current_pos()), + xtag("error_pos", error_pos)); + } ///@} /** @defgroup tokenizer-error-access-methods **/ @@ -57,7 +58,6 @@ namespace xo { #pragma GCC diagnostic ignored "-Wchanges-meaning" const input_state_type & input_state() const { return input_state_; } #pragma GCC diagnostic pop - //const span_type& input_line() const { return input_line_; } size_t tk_start() const { return input_state_.current_pos(); } size_t whitespace() const { return input_state_.whitespace(); } size_t error_pos() const { return error_pos_; } @@ -94,8 +94,6 @@ namespace xo { * Sufficient to precisely locate it with context. **/ input_state_type input_state_; - /** position (relative to line_.lo) of token start where error encountered **/ - size_t tk_entry_ = 0; /** position (relative to @ref tk_entry_) of error **/ size_t error_pos_ = 0; @@ -110,7 +108,6 @@ namespace xo { << xtag("message", error_description_) << xtag("input", input_state_.current_line()) << xtag("whitespace", input_state_.whitespace()) - << xtag("tk-start", tk_entry_) << xtag("error-pos", error_pos_) << ">"; } @@ -122,10 +119,13 @@ namespace xo { if (error_description_) { const char * prefix = "input: "; - const size_t tk_indent = strlen(prefix) + tk_entry_ + input_state_.whitespace(); - //const size_t msg_length = strlen(error_description_); - - const size_t error_pos = 1 + tk_entry_ + input_state_.whitespace() + error_pos_; + /* input_state.current_pos: position of first character following preceding token. + * input_state.whitespace: whitespace between current_pos and start of failing token + * error_pos: position (relative to start) at which failure detected + */ + const size_t tk_start = input_state_.current_pos() + input_state_.whitespace(); + const size_t tk_indent = (strlen(prefix) + tk_start); + const size_t error_pos = 1 + tk_start + error_pos_; os << "char: " << error_pos << endl; os << prefix; diff --git a/xo-tokenizer/src/tokenizer/CMakeLists.txt b/xo-tokenizer/src/tokenizer/CMakeLists.txt index cad846f4..505b2040 100644 --- a/xo-tokenizer/src/tokenizer/CMakeLists.txt +++ b/xo-tokenizer/src/tokenizer/CMakeLists.txt @@ -6,9 +6,6 @@ set(SELF_SRCS token.cpp) xo_add_shared_library4(${SELF_LIB} ${PROJECT_NAME}Targets ${PROJECT_VERSION} 1 ${SELF_SRCS}) -#xo_dependency(${SELF_LIB} refcnt) xo_dependency(${SELF_LIB} indentlog) -#xo_dependency(${SELF_LIB} subsys) -#xo_boost_dependency(${SELF_LIB}) # end CMakeLists.txt diff --git a/xo-tokenizer/src/tokenizer/tokentype.cpp b/xo-tokenizer/src/tokenizer/tokentype.cpp index b7172118..54d86540 100644 --- a/xo-tokenizer/src/tokenizer/tokentype.cpp +++ b/xo-tokenizer/src/tokenizer/tokentype.cpp @@ -63,5 +63,4 @@ namespace xo { } /*namespace scm*/ } /*namespace xo*/ - /* end tokentype.cpp */