xo-tokenizer: + doc for tokenizer + other doc-related improvements
This commit is contained in:
parent
0bd77b3c52
commit
3b073d013f
11 changed files with 145 additions and 31 deletions
|
|
@ -57,6 +57,7 @@ endif()
|
|||
message("-- CMAKE_MODULE_PATH=${CMAKE_MODULE_PATH}")
|
||||
message("-- CMAKE_INSTALL_PREFIX=${CMAKE_INSTALL_PREFIX}")
|
||||
message("-- CMAKE_INSTALL_RPATH=${CMAKE_INSTALL_RPATH}")
|
||||
message("-- XO_ENABLE_EXAMPLES=${XO_ENABLE_EXAMPLES}")
|
||||
|
||||
# ----------------------------------------------------------------
|
||||
#
|
||||
|
|
|
|||
|
|
@ -12,7 +12,7 @@ stdenv.mkDerivation (finalattrs:
|
|||
|
||||
src = ../xo-tokenizer;
|
||||
|
||||
cmakeFlags = ["-DCMAKE_MODULE_PATH=${xo-cmake}/share/cmake"];
|
||||
cmakeFlags = ["-DCMAKE_MODULE_PATH=${xo-cmake}/share/cmake" "-DXO_ENABLE_EXAMPLES"];
|
||||
doCheck = true;
|
||||
propagatedBuildInputs = [ ];
|
||||
nativeBuildInputs = [ cmake catch2
|
||||
|
|
|
|||
|
|
@ -45,14 +45,55 @@ See ``xo-tokenizer/examples/tokenrepl`` for (slighly elaborated) version of code
|
|||
|
||||
if (tk.is_valid()) {
|
||||
cout << tk;
|
||||
} else if (error.is_error()) {
|
||||
cout << "parsing error: " << endl;
|
||||
error.report(cout);
|
||||
}
|
||||
}
|
||||
|
||||
Reminder: enable building examples with ``cmake -DXO_ENABLE_EXAMPLES=1 ..``
|
||||
|
||||
.. code-block::
|
||||
:linenos:
|
||||
|
||||
$ .build/xo-tokenizer/utest/utest.tokenizer
|
||||
$ .build/xo-tokenizer/example/tokenrepl/xo_tokenizer_repl
|
||||
> 123
|
||||
<token :type tk_i64 :text 123>
|
||||
> 123e5
|
||||
<token :type tk_f64 :text 123e5>
|
||||
> def sq(x: i64) -> i64 { x * x }
|
||||
<token :type tk_def :text "">
|
||||
<token :type tk_symbol :text sq>
|
||||
<token :type tk_leftparen :text "">
|
||||
<token :type tk_symbol :text x>
|
||||
<token :type tk_colon :text "">
|
||||
<token :type tk_symbol :text i64>
|
||||
<token :type tk_rightparen :text "">
|
||||
<token :type tk_yields :text "">
|
||||
<token :type tk_symbol :text i64>
|
||||
<token :type tk_leftbrace :text "">
|
||||
<token :type tk_symbol :text x>
|
||||
<token :type tk_star :text "">
|
||||
<token :type tk_symbol :text x>
|
||||
<token :type tk_rightbrace :text "">
|
||||
|
||||
Example of error reporting (via ``error.report(cout)`` above)
|
||||
|
||||
.. code-block::
|
||||
:linenos:
|
||||
|
||||
$ .build/xo-tokenizer/example/tokenrepl/xo_tokenizer_repl
|
||||
|
||||
> 123q
|
||||
parsing error:
|
||||
char: 4
|
||||
input: 123q
|
||||
---^
|
||||
unexpected character in numeric constant
|
||||
|
||||
> (8 * 8 * 123fd)
|
||||
parsing error:
|
||||
char: 13
|
||||
input: (8 * 8 * 123fd)
|
||||
---^
|
||||
unexpected character in numeric constant
|
||||
|
|
|
|||
|
|
@ -27,3 +27,51 @@ Context
|
|||
.. code-block:: cpp
|
||||
|
||||
#include <xo/tokenizer/input_state.hpp>
|
||||
|
||||
.. uml::
|
||||
:scale: 99%
|
||||
:align: center
|
||||
|
||||
allowmixing
|
||||
|
||||
object in1<<input_state>>
|
||||
in1 : current_line = input
|
||||
in1 : current_pos
|
||||
in1 : whitespace
|
||||
in1 : debug_flag
|
||||
|
||||
object input
|
||||
input : (x * y * 123d)
|
||||
|
||||
input o-- sp1
|
||||
|
||||
|
||||
Class
|
||||
-----
|
||||
|
||||
.. doxygenclass:: xo::scm::input_state
|
||||
|
||||
Instance Variables
|
||||
------------------
|
||||
|
||||
.. doxygengroup:: input-state-instance-vars
|
||||
|
||||
Constructors
|
||||
------------
|
||||
|
||||
.. doxygengroup:: input-state-ctors
|
||||
|
||||
Static Methods
|
||||
--------------
|
||||
|
||||
.. doxygengroup:: input-state-static-methods
|
||||
|
||||
Access Methods
|
||||
--------------
|
||||
|
||||
.. doxygengroup:: input-state-access-methods
|
||||
|
||||
General Methods
|
||||
---------------
|
||||
|
||||
.. doxygengroup:: input-state-general-methods
|
||||
|
|
|
|||
|
|
@ -35,9 +35,10 @@ Context
|
|||
allowmixing
|
||||
|
||||
object tkz1<<tokenizer>>
|
||||
tkz : input_state
|
||||
tkz1 : input_state = ins1
|
||||
|
||||
object ins1<<input_state>>
|
||||
ins1 : current_line = (9 * 8)
|
||||
|
||||
tkz1 o-- ins1
|
||||
|
||||
|
|
@ -54,9 +55,14 @@ Class
|
|||
Instance Variables
|
||||
------------------
|
||||
|
||||
.. doxygenclass:: tokenizer-instance-vars
|
||||
.. doxygengroup:: tokenizer-instance-vars
|
||||
|
||||
Constructors
|
||||
------------
|
||||
|
||||
.. doxygengroup:: tokenizer-ctors
|
||||
|
||||
Methods
|
||||
-------
|
||||
|
||||
.. doxygengroup:: tokenizer-general-methods
|
||||
|
|
|
|||
|
|
@ -48,7 +48,6 @@ main() {
|
|||
}
|
||||
|
||||
input = tkz.consume(consumed, input);
|
||||
//input = input.after_prefix(consumed.size());
|
||||
}
|
||||
|
||||
/* here: input.empty() or error encountered */
|
||||
|
|
|
|||
|
|
@ -38,7 +38,7 @@ namespace xo {
|
|||
|
||||
///@}
|
||||
|
||||
/** @defgroup input-state static methods **/
|
||||
/** @defgroup input-state-static-methods input_state static methods **/
|
||||
///@{
|
||||
|
||||
/** recognize the newline character '\n' **/
|
||||
|
|
@ -80,7 +80,7 @@ namespace xo {
|
|||
void discard_current_line();
|
||||
|
||||
/** Add @p z to current position **/
|
||||
void consume(size_t z) { current_pos_ += z; }
|
||||
void consume(size_t z);
|
||||
|
||||
/** Skip prefix of input comprising whitespace.
|
||||
* Return pointer to first non-whitespace character in @p input,
|
||||
|
|
@ -91,7 +91,7 @@ namespace xo {
|
|||
///@}
|
||||
|
||||
private:
|
||||
/** @defgroup input-state-instance-vars **/
|
||||
/** @defgroup input-state-instance-vars input_state instance variables **/
|
||||
///@{
|
||||
|
||||
/** remember current input line. Used only to report errors **/
|
||||
|
|
@ -128,6 +128,16 @@ namespace xo {
|
|||
return false;
|
||||
}
|
||||
|
||||
template <typename CharT>
|
||||
void
|
||||
input_state<CharT>::consume(size_t z) {
|
||||
scope log(XO_DEBUG(debug_flag_));
|
||||
|
||||
this->current_pos_ += z;
|
||||
|
||||
log && log(xtag("z", z), xtag("current_pos", current_pos_));
|
||||
}
|
||||
|
||||
template <typename CharT>
|
||||
void
|
||||
input_state<CharT>::discard_current_line() {
|
||||
|
|
|
|||
|
|
@ -58,8 +58,16 @@ namespace xo {
|
|||
using result_type = scan_result<CharT>;
|
||||
|
||||
public:
|
||||
/** @defgroup tokenizer-ctors tokenizer constructors **/
|
||||
///@{
|
||||
|
||||
tokenizer(bool debug_flag = false);
|
||||
|
||||
///@}
|
||||
|
||||
/** @defgroup tokenizer-general-methods tokenizer methods **/
|
||||
///@{
|
||||
|
||||
/** identifies punctuation chars.
|
||||
* These are chars that are not permitted to appear within
|
||||
* a symbol token. Instead they force completion of
|
||||
|
|
@ -130,19 +138,26 @@ namespace xo {
|
|||
**/
|
||||
result_type notify_eof(const span_type & input);
|
||||
|
||||
///@}
|
||||
|
||||
private:
|
||||
result_type scan_completion(const span_type & whitespace,
|
||||
const CharT* token_end,
|
||||
const span_type & input);
|
||||
|
||||
private:
|
||||
/** @defgroup tokenizer-instance-vars tokenizer instance variables **/
|
||||
///@{
|
||||
|
||||
/** track input state (line#,pos,..) for error messages **/
|
||||
input_state_type input_state_;
|
||||
/** Accumulate partial token here.
|
||||
* This will happen if input sent to @ref tokenizer::scan
|
||||
* ends without a determinate token boundary.
|
||||
* ends without whitespace such that last available token's extent is not determined
|
||||
**/
|
||||
std::string prefix_;
|
||||
|
||||
///@}
|
||||
}; /*tokenizer*/
|
||||
|
||||
template <typename CharT>
|
||||
|
|
@ -338,7 +353,8 @@ namespace xo {
|
|||
//current_line_,
|
||||
//current_pos_,
|
||||
//initial_whitespace,
|
||||
(ix - tk_start)));
|
||||
(ix - tk_start)
|
||||
));
|
||||
}
|
||||
} else if (*ix == '.') {
|
||||
if (period_flag) {
|
||||
|
|
@ -378,9 +394,6 @@ namespace xo {
|
|||
(error_type(__FUNCTION__ /*src_function*/,
|
||||
"unexpected character in numeric constant" /*error_description*/,
|
||||
input_state_,
|
||||
//current_line_,
|
||||
//current_pos_,
|
||||
//initial_whitespace,
|
||||
(ix - tk_start)));
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -36,16 +36,17 @@ namespace xo {
|
|||
tokenizer_error(const char * src_function,
|
||||
const char * error_description,
|
||||
const input_state_type & input_state,
|
||||
//span_type input_line,
|
||||
//size_t tk_start,
|
||||
//size_t whitespace,
|
||||
size_t error_pos)
|
||||
: src_function_{src_function},
|
||||
error_description_{error_description},
|
||||
input_state_{input_state},
|
||||
//tk_entry_{tk_start},
|
||||
//whitespace_{whitespace},
|
||||
error_pos_{error_pos} {}
|
||||
error_pos_{error_pos}
|
||||
{
|
||||
scope log(XO_DEBUG(input_state.debug_flag()));
|
||||
|
||||
log && log(xtag("input_state.current_pos", input_state.current_pos()),
|
||||
xtag("error_pos", error_pos));
|
||||
}
|
||||
///@}
|
||||
|
||||
/** @defgroup tokenizer-error-access-methods **/
|
||||
|
|
@ -57,7 +58,6 @@ namespace xo {
|
|||
#pragma GCC diagnostic ignored "-Wchanges-meaning"
|
||||
const input_state_type & input_state() const { return input_state_; }
|
||||
#pragma GCC diagnostic pop
|
||||
//const span_type& input_line() const { return input_line_; }
|
||||
size_t tk_start() const { return input_state_.current_pos(); }
|
||||
size_t whitespace() const { return input_state_.whitespace(); }
|
||||
size_t error_pos() const { return error_pos_; }
|
||||
|
|
@ -94,8 +94,6 @@ namespace xo {
|
|||
* Sufficient to precisely locate it with context.
|
||||
**/
|
||||
input_state_type input_state_;
|
||||
/** position (relative to line_.lo) of token start where error encountered **/
|
||||
size_t tk_entry_ = 0;
|
||||
/** position (relative to @ref tk_entry_) of error **/
|
||||
size_t error_pos_ = 0;
|
||||
|
||||
|
|
@ -110,7 +108,6 @@ namespace xo {
|
|||
<< xtag("message", error_description_)
|
||||
<< xtag("input", input_state_.current_line())
|
||||
<< xtag("whitespace", input_state_.whitespace())
|
||||
<< xtag("tk-start", tk_entry_)
|
||||
<< xtag("error-pos", error_pos_)
|
||||
<< ">";
|
||||
}
|
||||
|
|
@ -122,10 +119,13 @@ namespace xo {
|
|||
|
||||
if (error_description_) {
|
||||
const char * prefix = "input: ";
|
||||
const size_t tk_indent = strlen(prefix) + tk_entry_ + input_state_.whitespace();
|
||||
//const size_t msg_length = strlen(error_description_);
|
||||
|
||||
const size_t error_pos = 1 + tk_entry_ + input_state_.whitespace() + error_pos_;
|
||||
/* input_state.current_pos: position of first character following preceding token.
|
||||
* input_state.whitespace: whitespace between current_pos and start of failing token
|
||||
* error_pos: position (relative to start) at which failure detected
|
||||
*/
|
||||
const size_t tk_start = input_state_.current_pos() + input_state_.whitespace();
|
||||
const size_t tk_indent = (strlen(prefix) + tk_start);
|
||||
const size_t error_pos = 1 + tk_start + error_pos_;
|
||||
|
||||
os << "char: " << error_pos << endl;
|
||||
os << prefix;
|
||||
|
|
|
|||
|
|
@ -6,9 +6,6 @@ set(SELF_SRCS
|
|||
token.cpp)
|
||||
|
||||
xo_add_shared_library4(${SELF_LIB} ${PROJECT_NAME}Targets ${PROJECT_VERSION} 1 ${SELF_SRCS})
|
||||
#xo_dependency(${SELF_LIB} refcnt)
|
||||
xo_dependency(${SELF_LIB} indentlog)
|
||||
#xo_dependency(${SELF_LIB} subsys)
|
||||
#xo_boost_dependency(${SELF_LIB})
|
||||
|
||||
# end CMakeLists.txt
|
||||
|
|
|
|||
|
|
@ -63,5 +63,4 @@ namespace xo {
|
|||
} /*namespace scm*/
|
||||
} /*namespace xo*/
|
||||
|
||||
|
||||
/* end tokentype.cpp */
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue