xo-tokenizer: tokenrepl example + docs

This commit is contained in:
Roland Conybeare 2025-06-22 16:18:46 -05:00
commit b24d6d7e8d
8 changed files with 282 additions and 0 deletions

View file

@ -0,0 +1,5 @@
# xo-tokenizer/CMakeLists.txt
xo_doxygen_collect_deps()
xo_docdir_doxygen_config()
xo_docdir_sphinx_config(index.rst install.rst)

39
xo-tokenizer/docs/README Normal file
View file

@ -0,0 +1,39 @@
standalone build
+-----------------------------------------------+
| cmake |
| CMakeLists.txt |
| $PREFIX/share/cmake/xo_macros/xo_cxx.cmake |
+-----------------------------------------------+
|
| +----------------------+
+------------------------------------------------->| .build/docs/Doxyfile |
| +----------------------+
| |
| /------------/
| |
| v
| +---------------------------------------+ +-----------------+
+---->| doxygen |--->| .build/docs/dox |
| | $PREFIX/share/xo-macros/Doxyfile.in | | +- html/ |
| +---------------------------------------+ | +- xml/ |
| +-----------------+
| |
| /------------/
| |
| v
| +---------------------------------------+ +--------------------+
\---->| sphinx |--->| .build/docs/sphinx |
| +- conf.py | | +- html/ |
| +- _static/ | +--------------------+
| +- *.rst |
+---------------------------------------+
umbrella build relies on top-level cmake macros
files
README this file
CMakeLists.txt build entry point
conf.py sphinx config
_static static files for sphinx

View file

@ -0,0 +1,49 @@
.. _examples:
.. toctree
:maxdepth: 2
Examples
========
See ``xo-tokenizer/examples/tokenrepl`` for (slighly elaborated) version of code below
.. code-block:: cpp
:linenos:
#include "xo/tokenizer/tokenizer.hpp"
int
main() {
using namespace xo::scm;
using namespace std;
using tokenizer_type = tokenizer<char>;
using span_type = tokenizer_type::span_type;
tokenizer_type tkz;
string input_str;
while (getline(cin, input_str)) {
// we want tokenizer to see newline, it's syntax
input_str.push_back('\n');
span_type input(input_str.begin(), input_str.end());
// input may contain multiple tokens
while (!input.empty()) {
auto [tk, nread] = tkz.scan(input);
if (tk.is_valid()) {
cout << tk;
}
input = input.after_prefix(nread);
}
}
auto tk = tkz.notify_eof();
if (tk.is_valid()) {
cout << tk;
}
}

View file

@ -0,0 +1,19 @@
.. xo-tokenizer documentation master file.
xo-tokenizer documentation
==========================
xo-tokenizer provides a tokenizer for the Schematika language.
Syntax is generally C-like, but with some important differences.
Notably, characters used for arithmetic operators (``+``, ``-``, ``*``, ``/``)
may appear in variable names: ``one-of-those-days`` is an ordinary symbol.
.. toctree::
:maxdepth: 2
:caption: xo-tokenizer contents
install
examples
genindex
search

View file

@ -0,0 +1,96 @@
.. _install:
.. toctree
:maxdepth: 2
Install
=======
``xo-tokenizer`` uses supporting library ``xo-tokenizer`` and cmake macros ``xo-cmake``.
These are on github:
- `xo-tokenizer source`_ (Schematika tokenizer)
- `xo-indentlog source`_ (structured logging)
- `xo-cmake source`_ (shared cmake macros)
.. _xo-tokenizer source: https://github.com/rconybea/xo-tokenizer
.. _xo-indentlog source: https://github.com/rconybea/xo-indentlog
.. _xo-cmake source: https://github.com/rconybea/xo-cmake
Installing from source
----------------------
Install scripts for `xo-tokenizer` and `xo-indentlog` depend on helper scripts installed from `xo-cmake`.
Preamble:
.. code-block:: bash
mkdir -p ~/proj/xo
cd ~/proj/xo
git clone https://github.com/rconybea/xo-cmake
PREFIX=/usr/local # ..or desired installation prefix
# want PREFIX/bin in PATH to use xo-cmake helpers
PATH=$PREFIX/bin:$PATH
Install `xo-cmake`:
.. code-block:: bash
cmake -B xo-cmake/.build -S xo-cmake
cmake --build xo-cmake/.build -j # placeholder, can omit for now
cmake --install xo-cmake/.build
Install `xo-indentlog`:
.. code-block:: bash
xo-build --clone --configure --build --install indentlog
Install `xo-tokenizer`:
.. code-block:: bash
xo-build --clone --configure --build --install xo-tokenizer
Directories under ``PREFIX`` will then contain:
.. code-block::
PREFIX
+- bin
| +- xo-build
| +- xo-cmake-config
| \- xo-cmake-lcov-harness
+- include
| \- xo
| +- indentlog/
| \- tokenizer/
+- lib
| \- cmake
| +- indentlog/
| \- xo_tokenizer/
+- share
\- cmake
\- xo_macros
+- Doxyfile.in
+- gen-ccov.in
\- xo-bootstrap-macros.cmake
Use CMake Support
-----------------
To use built-in cmake suport, when using ``xo-tokenizer`` from another project:
Make sure ``PREFIX/lib/cmake`` is searched by cmake (if necessary, include it in ``CMAKE_PREFIX_PATH``)
Add to ``CMakeLists.txt``:
.. code-block:: cmake
FindPackage(xo_tokenizer CONFIG REQUIRED)
target_link_libraries(mytarget INTERFACE xo_tokenizer)

View file

@ -0,0 +1 @@
add_subdirectory(tokenrepl)

View file

@ -0,0 +1,11 @@
# xo-tokenizer/example/tokenrepl/CMakeLists.txt
set(SELF_EXE xo_tokenizer_repl)
set(SELF_SRCS tokenrepl.cpp)
if (XO_ENABLE_EXAMPLES)
xo_add_executable(${SELF_EXE} ${SELF_SRCS})
xo_dependency(${SELF_EXE} xo_tokenizer)
endif()
# end CMakeLists.txt

View file

@ -0,0 +1,62 @@
/** @file tokenrepl.cp **/
#include "xo/tokenizer/tokenizer.hpp"
#include <iostream>
#include <unistd.h> // for isatty
bool repl_getline(bool interactive, std::istream& in, std::ostream& out, std::string& input)
{
if (interactive) {
out << "> ";
std::flush(out);
}
return static_cast<bool>(std::getline(in, input));
}
int
main() {
using namespace xo::scm;
using namespace std;
using tokenizer_type = tokenizer<char>;
using span_type = tokenizer_type::span_type;
xo::log_config::min_log_level = xo::log_level::info;
bool interactive = isatty(STDIN_FILENO);
tokenizer_type tkz(xo::log_config::min_log_level <= xo::log_level::info);
string input_str;
while (repl_getline(interactive, cin, cout, input_str)) {
// we want tokenizer to see newline, it's syntax
input_str.push_back('\n');
span_type input = span_type::from_string(input_str);
// reminder: input may contain multiple tokens
while (!input.empty()) {
auto [tk, consumed, error] = tkz.scan(input);
if (tk.is_valid()) {
cout << tk << endl;
} else if (error.is_error()) {
cout << "parsing error: " << error << endl;
/* discard remainder of input line */
break;
}
input = input.after_prefix(consumed.size());
}
}
auto [tk, consumed, error] = tkz.notify_eof(span_type::from_string(input_str));
if (tk.is_valid()) {
cout << tk << endl;
} else if (error.is_error()) {
cout << "parsing error: " << error << endl;
}
}
/** end tokenrepl.cpp */