.xo-tokenizer subrepo tidy
This commit is contained in:
parent
1f981a0636
commit
c8653b0b38
40 changed files with 0 additions and 5082 deletions
8
.xo-tokenizer/.gitignore
vendored
8
.xo-tokenizer/.gitignore
vendored
|
|
@ -1,8 +0,0 @@
|
|||
# emacs workspace config
|
||||
.projectile
|
||||
# clangd working space (see emacs+lsp)
|
||||
.cache
|
||||
# typical cmake build directory (source-tree-nephew)
|
||||
.build*
|
||||
# symlink to builddir/compile_commands.json; should be set manually in dev sandbox
|
||||
compile_commands.json
|
||||
|
|
@ -1,33 +0,0 @@
|
|||
# xo-tokenizer/CMakeLists.txt
|
||||
|
||||
cmake_minimum_required(VERSION 3.10)
|
||||
|
||||
project(xo_tokenizer VERSION 0.1)
|
||||
|
||||
include(GNUInstallDirs)
|
||||
include(cmake/xo-bootstrap-macros.cmake)
|
||||
|
||||
xo_cxx_toplevel_options3()
|
||||
|
||||
# ----------------------------------------------------------------
|
||||
# c++ settings
|
||||
|
||||
set(PROJECT_CXX_FLAGS "")
|
||||
#set(PROJECT_CXX_FLAGS "-fconcepts-diagnostics-depth=2")
|
||||
add_definitions(${PROJECT_CXX_FLAGS})
|
||||
|
||||
# ----------------------------------------------------------------
|
||||
|
||||
add_subdirectory(src/tokenizer)
|
||||
add_subdirectory(example)
|
||||
#add_subdirectory(utest) # tests failing, temporarily remove
|
||||
xo_export_cmake_config(${PROJECT_NAME} ${PROJECT_VERSION} ${PROJECT_NAME}Targets)
|
||||
|
||||
if (XO_ENABLE_EXAMPLES)
|
||||
install(TARGETS xo_tokenizer_repl DESTINATION bin/xo/example/tokenizer)
|
||||
endif()
|
||||
|
||||
# ----------------------------------------------------------------
|
||||
# docs targets depend on all the other library/utest targets
|
||||
#
|
||||
add_subdirectory(docs)
|
||||
|
|
@ -1,56 +0,0 @@
|
|||
# schematica tokenizer library
|
||||
|
||||
## Getting Started
|
||||
|
||||
### build + install 'xo-cmake` dependency
|
||||
|
||||
- [github/Rconybea/xo-cmake](https://github.com/Rconybea/xo-cmake)
|
||||
|
||||
Installs a few cmake ingredients, along with a build assistant `xo-build` for XO projects such as this one.
|
||||
|
||||
### build + install other required XO dependencies
|
||||
```
|
||||
$ xo-build --clone --configure --build --install xo-indentlog
|
||||
$ xo-build --clone --configure --build --install xo-refnct
|
||||
$ xo-build --clone --configure --build --install xo-subsys
|
||||
$ xo-build --clone --configure --build --install xo-reflectutil
|
||||
```
|
||||
|
||||
Note: can use `-n` to dry-run here
|
||||
|
||||
### copy `xo-tokenizer` repository locally
|
||||
```
|
||||
$ xo-build --clone xo-tokenizer
|
||||
```
|
||||
|
||||
or equivalently
|
||||
```
|
||||
$ git clone git@github.com:Rconybea/xo-tokenizer.git
|
||||
```
|
||||
|
||||
### build + install `xo-tokenizer`
|
||||
|
||||
```
|
||||
$ xo-build --configure --build --install xo-tokenizer
|
||||
```
|
||||
|
||||
or equivalently:
|
||||
|
||||
```
|
||||
$ PREFIX=/usr/local # or wherever you prefer
|
||||
$ cmake -DCMAKE_INSTALL_PREFIX=${PREFIX} -S xo-tokenizer -B xo-tokenizer/.build
|
||||
$ cmake --build xo-tokenizer/.build
|
||||
$ cmake --install xo-tokenizer/.build
|
||||
```
|
||||
|
||||
### build for unit test coverage
|
||||
```
|
||||
$ cmake -DCMAKE_BUILD_TYPE=coverage -DCMAKE_INSTALL_PREFIX=$PREFIX xo-tokenizer/.build-ccov
|
||||
$ cmake --build xo-tokenizer/.build-ccov
|
||||
```
|
||||
|
||||
### LSP support
|
||||
```
|
||||
$ cd xo-tokenizer
|
||||
$ ln -s .build/compile_commands.json # lsp will look for compile_commands.json in the root of the source tree
|
||||
```
|
||||
|
|
@ -1,41 +0,0 @@
|
|||
# ----------------------------------------------------------------
|
||||
# for example:
|
||||
# $ PREFIX=/usr/local # for example
|
||||
# $ cmake -DCMAKE_MODULE_PATH=prefix -DCMAKE_INSTALL_PREFIX=$PREFIX -B .build
|
||||
#
|
||||
# will get
|
||||
# CMAKE_MODULE_PATH
|
||||
# from xo-cmake-config --cmake-module-path
|
||||
#
|
||||
# and expect .cmake macros in
|
||||
# CMAKE_MODULE_PATH/xo_macros/xo_cxx.cmake
|
||||
# ----------------------------------------------------------------
|
||||
|
||||
find_program(XO_CMAKE_CONFIG_EXECUTABLE NAMES xo-cmake-config REQUIRED)
|
||||
|
||||
if ("${XO_CMAKE_CONFIG_EXECUTABLE}" STREQUAL "XO_CMAKE_CONFIG_EXECUTABLE-NOT_FOUND")
|
||||
message(FATAL "could not find xo-cmake-config executable")
|
||||
endif()
|
||||
|
||||
message(STATUS "XO_CMAKE_CONFIG_EXECUTABLE=${XO_CMAKE_CONFIG_EXECUTABLE}")
|
||||
|
||||
if (XO_SUBMODULE_BUILD)
|
||||
if (("${CMAKE_MODULE_PATH}" STREQUAL "") OR ("${CMAKE_MODULE_PATH}" STREQUAL prefix))
|
||||
# local version of xo-cmake macros
|
||||
set(CMAKE_MODULE_PATH "${PROJECT_SOURCE_DIR}/xo-cmake/cmake")
|
||||
message(STATUS "CMAKE_MODULE_PATH=${CMAKE_MODULE_PATH}")
|
||||
endif()
|
||||
else()
|
||||
if (("${CMAKE_MODULE_PATH}" STREQUAL "") OR ("${CMAKE_MODULE_PATH}" STREQUAL prefix))
|
||||
# default to typical install location for xo-project-macros
|
||||
execute_process(COMMAND ${XO_CMAKE_CONFIG_EXECUTABLE} --cmake-module-path OUTPUT_VARIABLE CMAKE_MODULE_PATH)
|
||||
message(STATUS "CMAKE_MODULE_PATH=${CMAKE_MODULE_PATH}")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
# needs to have been installed somewhere on CMAKE_MODULE_PATH,
|
||||
# (e.g. from xo-cmake with the same value for CMAKE_INSTALL_PREFIX)
|
||||
#
|
||||
include(xo_macros/xo_cxx)
|
||||
|
||||
xo_cxx_bootstrap_message()
|
||||
|
|
@ -1,9 +0,0 @@
|
|||
@PACKAGE_INIT@
|
||||
|
||||
include(CMakeFindDependencyMacro)
|
||||
#find_dependency(refcnt)
|
||||
find_dependency(indentlog)
|
||||
#find_dependency(subsys)
|
||||
include("${CMAKE_CURRENT_LIST_DIR}/@PROJECT_NAME@Targets.cmake")
|
||||
include("${CMAKE_CURRENT_LIST_DIR}/@PROJECT_NAME@Share.cmake")
|
||||
check_required_components("@PROJECT_NAME@")
|
||||
|
|
@ -1,9 +0,0 @@
|
|||
# xo-tokenizer/docs/CMakeLists.txt
|
||||
|
||||
xo_doxygen_collect_deps()
|
||||
xo_docdir_doxygen_config()
|
||||
xo_docdir_sphinx_config(
|
||||
index.rst install.rst examples.rst implementation.rst
|
||||
input-state-class.rst scan-result-class.rst schematika-tokens.rst span-class.rst
|
||||
token-class.rst tokenizer-error-class.rst tokentype-enum.rst
|
||||
)
|
||||
|
|
@ -1,41 +0,0 @@
|
|||
standalone build
|
||||
|
||||
+-----------------------------------------------+
|
||||
| cmake |
|
||||
| CMakeLists.txt |
|
||||
| $PREFIX/share/cmake/xo_macros/xo_cxx.cmake |
|
||||
+-----------------------------------------------+
|
||||
|
|
||||
| +----------------------+
|
||||
+------------------------------------------------->| .build/docs/Doxyfile |
|
||||
| +----------------------+
|
||||
| ^
|
||||
| (cmake |
|
||||
| /------------/
|
||||
| |
|
||||
| +---------------------------------------+ +-----------------+
|
||||
+---->| doxygen |--->| .build/docs/dox |
|
||||
| | $PREFIX/share/xo-macros/Doxyfile.in | | +- html/ |
|
||||
| +---------------------------------------+ | +- xml/ |
|
||||
| +-----------------+
|
||||
| |
|
||||
| /------------/
|
||||
| |
|
||||
| v
|
||||
| +---------------------------------------+ +--------------------+
|
||||
\---->| sphinx |--->| .build/docs/sphinx |
|
||||
| +- conf.py | | +- html/ |
|
||||
| +- _static/ | +--------------------+
|
||||
| +- *.rst |
|
||||
+---------------------------------------+
|
||||
|
||||
umbrella build relies on top-level cmake macros
|
||||
|
||||
files
|
||||
|
||||
README this file
|
||||
CMakeLists.txt build entry point
|
||||
conf.py sphinx config
|
||||
_static static files for sphinx
|
||||
|
||||
index.rst toplevel sphinx document; entry point
|
||||
1
.xo-tokenizer/docs/_static/README
vendored
1
.xo-tokenizer/docs/_static/README
vendored
|
|
@ -1 +0,0 @@
|
|||
add any static {.html, .js, ..} files for sphinx to pickup here
|
||||
BIN
.xo-tokenizer/docs/_static/img/favicon.ico
vendored
BIN
.xo-tokenizer/docs/_static/img/favicon.ico
vendored
Binary file not shown.
|
Before Width: | Height: | Size: 302 KiB |
|
|
@ -1,39 +0,0 @@
|
|||
# Configuration file for the Sphinx documentation builder.
|
||||
#
|
||||
# For the full list of built-in configuration values, see the documentation:
|
||||
# https://www.sphinx-doc.org/en/master/usage/configuration.html
|
||||
|
||||
# -- Project information -----------------------------------------------------
|
||||
# https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information
|
||||
|
||||
project = 'xo tokenizer documentation'
|
||||
copyright = '2024-2025, Roland Conybeare'
|
||||
author = 'Roland Conybeare'
|
||||
|
||||
# -- General configuration ---------------------------------------------------
|
||||
# https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration
|
||||
|
||||
#extensions = []
|
||||
extensions = [ "breathe",
|
||||
"sphinx.ext.mathjax", # inline math
|
||||
"sphinx.ext.autodoc", # generate info from docstrings
|
||||
"sphinxcontrib.ditaa", # diagrams-through-ascii-art
|
||||
"sphinxcontrib.plantuml" # text -> uml diagrams
|
||||
]
|
||||
|
||||
# note: breathe requires doxygen xml output -> must have GENERATE_XML = YES in Doxyfile.in
|
||||
# match project name in Doxyfile.in
|
||||
breathe_default_project = "xodoxxml"
|
||||
|
||||
templates_path = ['_templates']
|
||||
exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
|
||||
|
||||
pygments_style = 'sphinx'
|
||||
|
||||
# -- Options for HTML output -------------------------------------------------
|
||||
# https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output
|
||||
|
||||
#html_theme = 'alabaster'
|
||||
html_theme = 'sphinx_rtd_theme'
|
||||
html_static_path = ['_static']
|
||||
html_favicon = '_static/img/favicon.ico'
|
||||
|
|
@ -1,99 +0,0 @@
|
|||
.. _examples:
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 2
|
||||
|
||||
Examples
|
||||
========
|
||||
|
||||
See ``xo-tokenizer/examples/tokenrepl`` for (slighly elaborated) version of code below
|
||||
|
||||
.. code-block:: cpp
|
||||
:linenos:
|
||||
|
||||
#include "xo/tokenizer/tokenizer.hpp"
|
||||
|
||||
int
|
||||
main() {
|
||||
using namespace xo::scm;
|
||||
using namespace std;
|
||||
|
||||
using tokenizer_type = tokenizer<char>;
|
||||
using span_type = tokenizer_type::span_type;
|
||||
|
||||
tokenizer_type tkz;
|
||||
string input_str;
|
||||
|
||||
while (getline(cin, input_str)) {
|
||||
// we want tokenizer to see newline, it's syntax
|
||||
input_str.push_back('\n');
|
||||
span_type input(input_str.begin(), input_str.end());
|
||||
|
||||
// input may contain multiple tokens
|
||||
while (!input.empty()) {
|
||||
auto [tk, consumed, error] = tkz.scan(input);
|
||||
|
||||
if (tk.is_valid()) {
|
||||
cout << tk;
|
||||
}
|
||||
|
||||
input = input.after_prefix(consumed.size());
|
||||
}
|
||||
}
|
||||
|
||||
auto [tk, consumed, error] = tkz.notify_eof(spxn_type::from_string(input_str));
|
||||
|
||||
if (tk.is_valid()) {
|
||||
cout << tk;
|
||||
} else if (error.is_error()) {
|
||||
cout << "parsing error: " << endl;
|
||||
error.report(cout);
|
||||
}
|
||||
}
|
||||
|
||||
Reminder: enable building examples with ``cmake -DXO_ENABLE_EXAMPLES=1 ..``
|
||||
|
||||
.. code-block::
|
||||
:linenos:
|
||||
|
||||
$ .build/xo-tokenizer/example/tokenrepl/xo_tokenizer_repl
|
||||
> 123
|
||||
<token :type tk_i64 :text 123>
|
||||
> 123e5
|
||||
<token :type tk_f64 :text 123e5>
|
||||
> def sq(x: i64) -> i64 { x * x }
|
||||
<token :type tk_def :text "">
|
||||
<token :type tk_symbol :text sq>
|
||||
<token :type tk_leftparen :text "">
|
||||
<token :type tk_symbol :text x>
|
||||
<token :type tk_colon :text "">
|
||||
<token :type tk_symbol :text i64>
|
||||
<token :type tk_rightparen :text "">
|
||||
<token :type tk_yields :text "">
|
||||
<token :type tk_symbol :text i64>
|
||||
<token :type tk_leftbrace :text "">
|
||||
<token :type tk_symbol :text x>
|
||||
<token :type tk_star :text "">
|
||||
<token :type tk_symbol :text x>
|
||||
<token :type tk_rightbrace :text "">
|
||||
|
||||
Example of error reporting (via ``error.report(cout)`` above)
|
||||
|
||||
.. code-block::
|
||||
:linenos:
|
||||
|
||||
$ .build/xo-tokenizer/example/tokenrepl/xo_tokenizer_repl
|
||||
|
||||
> 123q
|
||||
parsing error:
|
||||
char: 4
|
||||
input: 123q
|
||||
---^
|
||||
unexpected character in numeric constant
|
||||
|
||||
> (8 * 8 * 123fd)
|
||||
parsing error:
|
||||
char: 13
|
||||
input: (8 * 8 * 123fd)
|
||||
---^
|
||||
unexpected character in numeric constant
|
||||
|
|
@ -1,38 +0,0 @@
|
|||
.. _implementation:
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 2
|
||||
|
||||
Components
|
||||
==========
|
||||
|
||||
Library dependency tower for *xo-tokenizer*:
|
||||
|
||||
.. ditaa::
|
||||
|
||||
+-----------------+
|
||||
| xo_tokenizer |
|
||||
+-----------------+
|
||||
| xo_indentlog |
|
||||
+-----------------+
|
||||
| xo_cmake |
|
||||
+-----------------+
|
||||
|
||||
Install instructions :doc:`here<install>`
|
||||
|
||||
Abstraction tower for *xo-tokenizer* components:
|
||||
|
||||
.. ditaa::
|
||||
:--scale: 0.85
|
||||
|
||||
+-----------------------------------------+----------+
|
||||
| tokenizer | |
|
||||
+-----------------------------------------+ |
|
||||
| scan_result | |
|
||||
+-----------------+-----------------------+ |
|
||||
| | tokenizer_error | buffer |
|
||||
| token +-----------------------+ |
|
||||
| | input_state | |
|
||||
+-----------------+-----------------------+ |
|
||||
| tokentype | span | |
|
||||
+-----------------+-----------------------+----------+
|
||||
|
|
@ -1,29 +0,0 @@
|
|||
.. xo-tokenizer documentation master file.
|
||||
|
||||
xo-tokenizer documentation
|
||||
==========================
|
||||
|
||||
xo-tokenizer provides a tokenizer for the Schematika language.
|
||||
|
||||
Syntax is generally C-like, but with some important differences.
|
||||
Notably, characters used for arithmetic operators (``+``, ``-``, ``*``, ``/``)
|
||||
may appear in variable names: ``one-of-those-days`` is an ordinary symbol.
|
||||
|
||||
Typically applications would use xo-reader to construct Schematika expressions
|
||||
instead of interacting directly with ``xo::scm::tokenizer``.
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 2
|
||||
:caption: xo-tokenizer contents
|
||||
|
||||
install
|
||||
examples
|
||||
schematika-tokens
|
||||
implementation
|
||||
tokenizer-class
|
||||
scan-result-class
|
||||
token-class
|
||||
tokenizer-error-class
|
||||
input-state-class
|
||||
span-class
|
||||
tokentype-enum
|
||||
|
|
@ -1,77 +0,0 @@
|
|||
|
||||
.. _input-state-class:
|
||||
|
||||
Input State
|
||||
===========
|
||||
|
||||
Track detailed state of input stream to collect information useful for detailed error reporting
|
||||
|
||||
Context
|
||||
-------
|
||||
|
||||
.. ditaa::
|
||||
:--scale: 0.85
|
||||
|
||||
+-----------------------------------------+----------+
|
||||
| tokenizer | |
|
||||
+-----------------------------------------+ |
|
||||
| scan_result | |
|
||||
+-----------------+-----------------------+ |
|
||||
| | tokenizer_error | buffer |
|
||||
| token +-----------------------+ |
|
||||
| |cBLU input_state | |
|
||||
+-----------------+-----------------------+ |
|
||||
| tokentype | span | |
|
||||
+-----------------+-----------------------+----------+
|
||||
|
||||
.. code-block:: cpp
|
||||
|
||||
#include <xo/tokenizer/input_state.hpp>
|
||||
|
||||
.. uml::
|
||||
:scale: 99%
|
||||
:align: center
|
||||
|
||||
allowmixing
|
||||
|
||||
object in1<<input_state>>
|
||||
in1 : current_line = input
|
||||
in1 : current_pos
|
||||
in1 : whitespace
|
||||
in1 : debug_flag
|
||||
|
||||
object input
|
||||
input : (x * y * 123d)
|
||||
|
||||
input o-- sp1
|
||||
|
||||
|
||||
Class
|
||||
-----
|
||||
|
||||
.. doxygenclass:: xo::scm::input_state
|
||||
|
||||
Instance Variables
|
||||
------------------
|
||||
|
||||
.. doxygengroup:: input-state-instance-vars
|
||||
|
||||
Constructors
|
||||
------------
|
||||
|
||||
.. doxygengroup:: input-state-ctors
|
||||
|
||||
Static Methods
|
||||
--------------
|
||||
|
||||
.. doxygengroup:: input-state-static-methods
|
||||
|
||||
Access Methods
|
||||
--------------
|
||||
|
||||
.. doxygengroup:: input-state-access-methods
|
||||
|
||||
General Methods
|
||||
---------------
|
||||
|
||||
.. doxygengroup:: input-state-general-methods
|
||||
|
|
@ -1,111 +0,0 @@
|
|||
.. _install:
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 2
|
||||
|
||||
Source
|
||||
======
|
||||
|
||||
Souce code lives on github `here`_
|
||||
|
||||
.. _here: https://github.com/rconybea/xo-tokenizer
|
||||
|
||||
To clone from git:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
git clone https://github.com/rconybea/xo-tokenizer
|
||||
|
||||
Tested with gcc 13.3
|
||||
|
||||
Install
|
||||
=======
|
||||
|
||||
``xo-tokenizer`` uses supporting library ``xo-indentlog`` and cmake macros ``xo-cmake``.
|
||||
These are on github:
|
||||
|
||||
- `xo-tokenizer source`_ (Schematika tokenizer)
|
||||
- `xo-indentlog source`_ (structured logging)
|
||||
- `xo-cmake source`_ (shared cmake macros)
|
||||
|
||||
.. _xo-tokenizer source: https://github.com/rconybea/xo-tokenizer
|
||||
.. _xo-indentlog source: https://github.com/rconybea/indentlog
|
||||
.. _xo-cmake source: https://github.com/rconybea/xo-cmake
|
||||
|
||||
Installing from source
|
||||
----------------------
|
||||
|
||||
Install scripts for `xo-tokenizer` and `xo-indentlog` depend on helper scripts installed from `xo-cmake`.
|
||||
|
||||
Preamble:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
mkdir -p ~/proj/xo
|
||||
cd ~/proj/xo
|
||||
|
||||
git clone https://github.com/rconybea/xo-cmake
|
||||
|
||||
PREFIX=/usr/local # ..or desired installation prefix
|
||||
|
||||
# want PREFIX/bin in PATH to use xo-cmake helpers
|
||||
PATH=$PREFIX/bin:$PATH
|
||||
|
||||
Install `xo-cmake`:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
cmake -B xo-cmake/.build -S xo-cmake
|
||||
cmake --build xo-cmake/.build -j # placeholder, can omit for now
|
||||
cmake --install xo-cmake/.build
|
||||
|
||||
Install `xo-indentlog`:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
xo-build --clone --configure --build --install indentlog
|
||||
|
||||
Install `xo-tokenizer`:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
xo-build --clone --configure --build --install xo-tokenizer
|
||||
|
||||
Directories under ``PREFIX`` will then contain:
|
||||
|
||||
.. code-block::
|
||||
|
||||
PREFIX
|
||||
+- bin
|
||||
| +- xo-build
|
||||
| +- xo-cmake-config
|
||||
| \- xo-cmake-lcov-harness
|
||||
+- include
|
||||
| \- xo
|
||||
| +- indentlog/
|
||||
| \- tokenizer/
|
||||
+- lib
|
||||
| \- cmake
|
||||
| +- indentlog/
|
||||
| \- xo_tokenizer/
|
||||
+- share
|
||||
\- cmake
|
||||
\- xo_macros
|
||||
+- Doxyfile.in
|
||||
+- gen-ccov.in
|
||||
\- xo-bootstrap-macros.cmake
|
||||
|
||||
Use CMake Support
|
||||
-----------------
|
||||
|
||||
To use built-in cmake suport, when using ``xo-tokenizer`` from another project:
|
||||
|
||||
Make sure ``PREFIX/lib/cmake`` is searched by cmake (if necessary, include it in ``CMAKE_PREFIX_PATH``)
|
||||
|
||||
Add to ``CMakeLists.txt``:
|
||||
|
||||
.. code-block:: cmake
|
||||
|
||||
FindPackage(xo_tokenizer CONFIG REQUIRED)
|
||||
|
||||
target_link_libraries(mytarget INTERFACE xo_tokenizer)
|
||||
|
|
@ -1,29 +0,0 @@
|
|||
|
||||
.. _scan-result-class:
|
||||
|
||||
Scan Result
|
||||
===========
|
||||
|
||||
Represent the result of a tokenizer scan call
|
||||
|
||||
Context
|
||||
-------
|
||||
|
||||
.. ditaa::
|
||||
:--scale: 0.85
|
||||
|
||||
+-----------------------------------------+----------+
|
||||
| tokenizer | |
|
||||
+-----------------------------------------+ |
|
||||
|cBLU scan_result | |
|
||||
+-----------------+-----------------------+ |
|
||||
| | tokenizer_error | buffer |
|
||||
| token +-----------------------+ |
|
||||
| | input_state | |
|
||||
+-----------------+-----------------------+ |
|
||||
| tokentype | span | |
|
||||
+-----------------+-----------------------+----------+
|
||||
|
||||
.. code-block:: cpp
|
||||
|
||||
#include <xo/tokenizer/input_state.hpp>
|
||||
|
|
@ -1,105 +0,0 @@
|
|||
.. _schematika-tokens:
|
||||
|
||||
Schematika Tokens
|
||||
=================
|
||||
|
||||
.. list-table:: Schematika Tokens
|
||||
:widths: 15 30 30
|
||||
:header-rows: 1
|
||||
|
||||
* - tokentype
|
||||
- examples
|
||||
- description
|
||||
* - tk_i64
|
||||
- ``123``, ``-8``
|
||||
- 64-bit integer literal
|
||||
* - tk_f64
|
||||
- ``1.234``, ``-10``., ``-1.981e-10``, ``3e6``
|
||||
- 64-bit floating-point literal
|
||||
* - tk_string
|
||||
- ``"hello"``, ``"Q: \"what's up?\"\nA: \"parsing!\""``
|
||||
- string literal. Usual escapes ``\n``, ``\r``, ``\t``, ``\"``, ``\\``
|
||||
* - tk_symbol
|
||||
- ``apple``, ``funKy``, ``x123``, ``_mumble``, ``hyphenated-var``
|
||||
- symbol name
|
||||
* - tk_type
|
||||
- ``type``
|
||||
- keyword
|
||||
* - tk_def
|
||||
- ``def``
|
||||
- keyword
|
||||
* - tk_lambda
|
||||
- ``lambda``
|
||||
- keyword
|
||||
* - tk_if
|
||||
- ``if``
|
||||
- keyword
|
||||
* - tk_let
|
||||
- ``let``
|
||||
- keyword
|
||||
* - tk_in
|
||||
- ``in``
|
||||
- keyword
|
||||
* - tk_end
|
||||
- ``end``
|
||||
- keyword
|
||||
* - tk_leftparen
|
||||
- ``(``
|
||||
-
|
||||
* - tk_rightparen
|
||||
- ``)``
|
||||
-
|
||||
* - tl_leftbracket
|
||||
- ``[``
|
||||
-
|
||||
* - tk_rightbracket
|
||||
- ``]``
|
||||
-
|
||||
* - tk_leftbrace
|
||||
- ``{``
|
||||
-
|
||||
* - tk_rightbrace
|
||||
- ``}``
|
||||
-
|
||||
* - tk_leftangle
|
||||
- ``<``
|
||||
-
|
||||
* - tk_rightangle
|
||||
- ``>``
|
||||
-
|
||||
* - tk_dot
|
||||
- ``.``
|
||||
-
|
||||
* - tk_comma
|
||||
- ``,``
|
||||
-
|
||||
* - tk_colon
|
||||
- ``:``
|
||||
-
|
||||
* - tk_doublecolon
|
||||
- ``::``
|
||||
-
|
||||
* - tk_semicolon
|
||||
- ``;``
|
||||
-
|
||||
* - tk_singleassign
|
||||
- ``=``
|
||||
-
|
||||
* - tk_assign
|
||||
- ``:=``
|
||||
-
|
||||
* - tk_yields
|
||||
- ``->``
|
||||
-
|
||||
* - tk_plus
|
||||
- ``+``
|
||||
- allowed in symbol
|
||||
* - tk_minus
|
||||
- ``-``
|
||||
- allowed in symbol
|
||||
* - tk_star
|
||||
- ``*``
|
||||
- allowed in symbol
|
||||
* - tk_slash
|
||||
- ``/``
|
||||
- allowed in symbol
|
||||
|
|
@ -1,87 +0,0 @@
|
|||
|
||||
.. _span-class:
|
||||
|
||||
Span
|
||||
====
|
||||
|
||||
Identify an unowned contiguous memory range
|
||||
|
||||
Context
|
||||
-------
|
||||
|
||||
.. ditaa::
|
||||
:--scale: 0.85
|
||||
|
||||
+-----------------------------------------+----------+
|
||||
| tokenizer | |
|
||||
+-----------------------------------------+ |
|
||||
| scan_result | |
|
||||
+-----------------+-----------------------+ |
|
||||
| | tokenizer_error | buffer |
|
||||
| token +-----------------------+ |
|
||||
| | input_state | |
|
||||
+-----------------+-----------------------+ |
|
||||
| tokentype |cBLU span | |
|
||||
+-----------------+-----------------------+----------+
|
||||
|
||||
.. code-block:: cpp
|
||||
|
||||
#include <xo/tokenizer/span.hpp>
|
||||
|
||||
.. uml::
|
||||
:scale: 99%
|
||||
:align: center
|
||||
|
||||
allowmixing
|
||||
|
||||
object span1<<span>>
|
||||
span1 : lo = p
|
||||
span1 : hi = p+25
|
||||
|
||||
object dest<<memory>>
|
||||
dest : def fact(n : i64) { ... }
|
||||
|
||||
span1 o-- dest
|
||||
|
||||
- Identify a sequence of characters stored in contiguous memory.
|
||||
|
||||
- Lightweight, consists of a pair of pointers.
|
||||
|
||||
- Does not own storage. Lifetime management for target memory is
|
||||
up to the caller.
|
||||
|
||||
|
||||
Class
|
||||
-----
|
||||
|
||||
.. doxygenclass:: xo::scm::span
|
||||
|
||||
Member Variables
|
||||
----------------
|
||||
|
||||
.. doxygengroup:: span-instance-vars
|
||||
|
||||
Type Traits
|
||||
-----------
|
||||
|
||||
.. doxygengroup:: span-type-traits
|
||||
|
||||
Constructors
|
||||
------------
|
||||
|
||||
.. doxygengroup:: span-ctors
|
||||
|
||||
Access Methods
|
||||
--------------
|
||||
|
||||
.. doxygengroup:: span-access-methods
|
||||
|
||||
General Methods
|
||||
---------------
|
||||
|
||||
.. doxygengroup:: span-general-methods
|
||||
|
||||
Operators
|
||||
---------
|
||||
|
||||
.. doxygengroup:: span-operators
|
||||
|
|
@ -1,96 +0,0 @@
|
|||
|
||||
.. _token-class:
|
||||
|
||||
Token
|
||||
=====
|
||||
|
||||
Represent a single lexical token in the Schematika language
|
||||
|
||||
Context
|
||||
-------
|
||||
|
||||
.. ditaa::
|
||||
:--scale: 0.85
|
||||
|
||||
+-----------------------------------------+----------+
|
||||
| tokenizer | |
|
||||
+-----------------------------------------+ |
|
||||
| scan_result | |
|
||||
+-----------------+-----------------------+ |
|
||||
|cBLU | tokenizer_error | buffer |
|
||||
| token +-----------------------+ |
|
||||
| | input_state | |
|
||||
+-----------------+-----------------------+ |
|
||||
| tokentype | span | |
|
||||
+-----------------+-----------------------+----------+
|
||||
|
||||
.. code-block:: cpp
|
||||
|
||||
#include <xo/tokenizer/token.hpp>
|
||||
|
||||
.. uml::
|
||||
:scale: 99%
|
||||
:align: center
|
||||
|
||||
allowmixing
|
||||
|
||||
object tk1<<token>>
|
||||
tk1 : tk_type = tk_i64
|
||||
tk1 : text = "123"
|
||||
|
||||
object tk2<<token>>
|
||||
tk2 : tk_type = tk_string
|
||||
tk2 : text = "the quick brown fox"
|
||||
|
||||
- Represent a single lexical token
|
||||
|
||||
- Does not share any storage with original input stream
|
||||
(maintains a local copy).
|
||||
|
||||
- Remembers copied input extent.
|
||||
Convert on demand to native untagged representation
|
||||
|
||||
Example
|
||||
-------
|
||||
|
||||
.. code-block:: cpp
|
||||
|
||||
void foo() {
|
||||
using namespace xo::scm;
|
||||
|
||||
token<char> tk = token<char>::i64_token("123");
|
||||
|
||||
tk.is_valid(); // -> true
|
||||
tk.text(); // -> "123"s;
|
||||
|
||||
tk.tk_type(); // -> tokentype::tk_i64
|
||||
tk.i64_value(); // -> 123
|
||||
|
||||
cout << tk << endl; // -> <token :type i64 :text 123>
|
||||
}
|
||||
|
||||
Class
|
||||
-----
|
||||
|
||||
.. doxygenclass:: xo::scm::token
|
||||
|
||||
|
||||
Instance Variables
|
||||
------------------
|
||||
|
||||
.. doxygengroup:: token-instance-vars
|
||||
|
||||
Constructors
|
||||
------------
|
||||
|
||||
.. doxygengroup:: token-ctors
|
||||
|
||||
Access Methods
|
||||
--------------
|
||||
|
||||
.. doxygengroup:: token-access-methods
|
||||
|
||||
General Methods
|
||||
---------------
|
||||
|
||||
.. doxygengroup:: token-general-methods
|
||||
|
|
@ -1,68 +0,0 @@
|
|||
|
||||
.. _tokenizer-class:
|
||||
|
||||
Tokenizer
|
||||
=========
|
||||
|
||||
Parse a Schematika character stream into lexical tokens
|
||||
|
||||
Context
|
||||
-------
|
||||
|
||||
.. ditaa::
|
||||
:--scale: 0.85
|
||||
|
||||
+-----------------------------------------+----------+
|
||||
|cBLU tokenizer | |
|
||||
+-----------------------------------------+ |
|
||||
| scan_result | |
|
||||
+-----------------+-----------------------+ |
|
||||
| | tokenizer_error | buffer |
|
||||
| token +-----------------------+ |
|
||||
| | input_state | |
|
||||
+-----------------+-----------------------+ |
|
||||
| tokentype | span | |
|
||||
+-----------------+-----------------------+----------+
|
||||
|
||||
.. code-block:: cpp
|
||||
|
||||
#include <xo/tokenizer/tokenizer.hpp>
|
||||
|
||||
.. uml::
|
||||
:scale: 99%
|
||||
:align: center
|
||||
|
||||
allowmixing
|
||||
|
||||
object tkz1<<tokenizer>>
|
||||
tkz1 : input_state = ins1
|
||||
|
||||
object ins1<<input_state>>
|
||||
ins1 : current_line = (9 * 8)
|
||||
|
||||
tkz1 o-- ins1
|
||||
|
||||
- Assemble a stream of lexical tokens from a text stream.
|
||||
|
||||
- Lexical errors reported via scan_result instance;
|
||||
errors reported with detailed context
|
||||
|
||||
Class
|
||||
-----
|
||||
|
||||
.. doxygenclass:: xo::scm::tokenizer
|
||||
|
||||
Instance Variables
|
||||
------------------
|
||||
|
||||
.. doxygengroup:: tokenizer-instance-vars
|
||||
|
||||
Constructors
|
||||
------------
|
||||
|
||||
.. doxygengroup:: tokenizer-ctors
|
||||
|
||||
Methods
|
||||
-------
|
||||
|
||||
.. doxygengroup:: tokenizer-general-methods
|
||||
|
|
@ -1,54 +0,0 @@
|
|||
|
||||
.. _tokenizer-error-class
|
||||
|
||||
Tokenizer Error
|
||||
===============
|
||||
|
||||
Represent a possible tokenizer error result, including parsing context
|
||||
|
||||
Context
|
||||
-------
|
||||
|
||||
.. ditaa::
|
||||
:--scale: 0.85
|
||||
|
||||
+-----------------------------------------+----------+
|
||||
| tokenizer | |
|
||||
+-----------------------------------------+ |
|
||||
| scan_result | |
|
||||
+-----------------+-----------------------+ |
|
||||
| |cBLU tokenizer_error | buffer |
|
||||
| token +-----------------------+ |
|
||||
| | input_state | |
|
||||
+-----------------+-----------------------+ |
|
||||
| tokentype | span | |
|
||||
+-----------------+-----------------------+----------+
|
||||
|
||||
.. code-block:: cpp
|
||||
|
||||
#include <xo/tokenizer/tokenizer_error.hpp>
|
||||
|
||||
Class
|
||||
------
|
||||
|
||||
.. doxygenclass:: xo::scm::tokenizer_error
|
||||
|
||||
Instance Variables
|
||||
------------------
|
||||
|
||||
.. doxygengroup:: tokenizer-error-vars
|
||||
|
||||
Constructors
|
||||
------------
|
||||
|
||||
.. doxygengroup:: tokenizer-error-ctors
|
||||
|
||||
Access Methods
|
||||
--------------
|
||||
|
||||
.. doxygengroup:: tokenizer-error-access-methods
|
||||
|
||||
General Methods
|
||||
---------------
|
||||
|
||||
.. doxygengroup:: tokenizer-error-general-methods
|
||||
|
|
@ -1,36 +0,0 @@
|
|||
|
||||
.. _tokentype-enum:
|
||||
|
||||
Tokentype
|
||||
=========
|
||||
|
||||
Distinguish different lexical tokens for the Schematika language.
|
||||
|
||||
Context
|
||||
-------
|
||||
|
||||
.. ditaa::
|
||||
:--scale: 0.85
|
||||
|
||||
+-----------------------------------------+----------+
|
||||
| tokenizer | |
|
||||
+-----------------------------------------+ |
|
||||
| scan_result | |
|
||||
+-----------------+-----------------------+ |
|
||||
| | tokenizer_error | buffer |
|
||||
| token +-----------------------+ |
|
||||
| | input_state | |
|
||||
+-----------------+-----------------------+ |
|
||||
|cBLU tokentype | span | |
|
||||
+-----------------+-----------------------+----------+
|
||||
|
||||
.. code-block:: cpp
|
||||
|
||||
#include <xo/tokenizer/tokentype.hpp>
|
||||
|
||||
Enum
|
||||
----
|
||||
|
||||
.. doxygenfunction:: xo::scm::tokentype_descr
|
||||
|
||||
.. doxygenfunction:: xo::scm::operator<<(std::ostream&,tokentype)
|
||||
|
|
@ -1 +0,0 @@
|
|||
add_subdirectory(tokenrepl)
|
||||
|
|
@ -1,11 +0,0 @@
|
|||
# xo-tokenizer/example/tokenrepl/CMakeLists.txt
|
||||
|
||||
set(SELF_EXE xo_tokenizer_repl)
|
||||
set(SELF_SRCS tokenrepl.cpp)
|
||||
|
||||
if (XO_ENABLE_EXAMPLES)
|
||||
xo_add_executable(${SELF_EXE} ${SELF_SRCS})
|
||||
xo_self_dependency(${SELF_EXE} xo_tokenizer)
|
||||
endif()
|
||||
|
||||
# end CMakeLists.txt
|
||||
|
|
@ -1,71 +0,0 @@
|
|||
/** @file tokenrepl.cpp **/
|
||||
|
||||
#include "xo/tokenizer/tokenizer.hpp"
|
||||
#include <iostream>
|
||||
#include <unistd.h> // for isatty
|
||||
|
||||
bool repl_getline(bool interactive,
|
||||
std::istream & in,
|
||||
std::ostream & out,
|
||||
std::string & input)
|
||||
{
|
||||
if (interactive) {
|
||||
out << "> ";
|
||||
std::flush(out);
|
||||
}
|
||||
|
||||
return static_cast<bool>(std::getline(in, input));
|
||||
}
|
||||
|
||||
int
|
||||
main() {
|
||||
using namespace xo::scm;
|
||||
using namespace std;
|
||||
|
||||
using tokenizer_type = tokenizer<char>;
|
||||
using span_type = tokenizer_type::span_type;
|
||||
|
||||
xo::log_config::min_log_level = xo::log_level::severe;
|
||||
|
||||
bool interactive = isatty(STDIN_FILENO);
|
||||
|
||||
tokenizer_type tkz(xo::log_config::min_log_level <= xo::log_level::info);
|
||||
string input_str;
|
||||
|
||||
size_t line_no = 1;
|
||||
|
||||
constexpr std::size_t c_maxlines = 25;
|
||||
|
||||
while (repl_getline(interactive, cin, cout, input_str)) {
|
||||
// we want tokenizer to see newline, it's syntax
|
||||
input_str.push_back('\n');
|
||||
span_type input = span_type::from_string(input_str);
|
||||
|
||||
// reminder: input may contain multiple tokens
|
||||
while (!input.empty()) {
|
||||
auto [tk, consumed, error] = tkz.scan(input, false /*!eof*/);
|
||||
|
||||
if (tk.is_valid()) {
|
||||
cout << tk << endl;
|
||||
} else if (error.is_error()) {
|
||||
cout << "tokenizer error: " << endl;
|
||||
error.report(cout);
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
input = input.after_prefix(consumed);
|
||||
}
|
||||
|
||||
/* here: input.empty() or error encountered */
|
||||
|
||||
++line_no;
|
||||
|
||||
if (line_no > c_maxlines) {
|
||||
cout << "always exit after " << c_maxlines << " lines of input" << endl;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/** end tokenrepl.cpp */
|
||||
|
|
@ -1,328 +0,0 @@
|
|||
/** @file buffer.hpp **/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "span.hpp"
|
||||
#include <utility>
|
||||
#include <cstdint>
|
||||
#include <cassert>
|
||||
#include <new>
|
||||
|
||||
namespace xo {
|
||||
namespace scm {
|
||||
/**
|
||||
* @class buffer buffer.hpp
|
||||
*
|
||||
* @brief Container for a (possibly owned) FIFO queue of chars
|
||||
*
|
||||
* @tparam CharT. buffer element type.
|
||||
*
|
||||
* @code
|
||||
* .buf
|
||||
*
|
||||
* +------------------------------------------+
|
||||
* | | ... | | X| ... | X| | ... | |
|
||||
* +------------------------------------------+
|
||||
* ^ ^ ^ ^
|
||||
* 0 .lo .hi .buf_z
|
||||
*
|
||||
* <-contents-><----avail----->
|
||||
* @endcode
|
||||
*
|
||||
* Buffer does not support wrapped content:
|
||||
* content that has not been consumed always occupies contiguous memory.
|
||||
*
|
||||
* Example:
|
||||
* @code
|
||||
* // 1.
|
||||
* buffer<char> buf(64*1024);
|
||||
* buf.empty() -> true
|
||||
* buf.buf_z() -> 65536
|
||||
* buf.lo_pos() -> 0
|
||||
* buf.hi_pos() -> 65536
|
||||
* buf.contents() -> empty span
|
||||
* buf.avail() -> span entire buffer memory
|
||||
*
|
||||
* // write to (a prefix of) buf.avail()
|
||||
* ::strncpy(buf.buf(), "hello, world\n", 13);
|
||||
* buf.produce(span_type(buf.buf(), buf.buf() + 13));
|
||||
*
|
||||
* buf.lo_pos() -> 0
|
||||
* buf.hi_pos() -> 13
|
||||
* buf.contents() -> "hello, world\n";
|
||||
*
|
||||
*
|
||||
* // examine stored content (does not change buffer state)
|
||||
* auto span = buf.contents();
|
||||
* cerr << string_view(span.lo(), span.hi()); // "hello, world\n"
|
||||
*
|
||||
* // consume (a prefix of) stored content
|
||||
* buf.consume(span.prefix(7);
|
||||
*
|
||||
* buf.lo_pos() -> 7
|
||||
* buf.hi_pos() -> 13
|
||||
* buf.contents() -> "world\n"
|
||||
*
|
||||
* // consuming all remain content resets to original state
|
||||
* buf.consume(buf.contents());
|
||||
*
|
||||
* buf.empty() -> true
|
||||
* buf.hi_pos() -> 0 // not 13!
|
||||
*
|
||||
* // 2.
|
||||
* buffer<char> buf;
|
||||
* buf.empty() -> true
|
||||
* buf.buf_z() -> 0
|
||||
* buf.lo_pos() -> 0
|
||||
* buf.hi_pos() -> 0
|
||||
* buf.contents() -> empty span
|
||||
* buf.avail() -> empty span
|
||||
*
|
||||
* // allocate memory separately from ctor
|
||||
* buf.alloc(64*1024);
|
||||
* @endcode
|
||||
**/
|
||||
template <typename CharT>
|
||||
class buffer {
|
||||
public:
|
||||
/** @brief typealias for span of CharT **/
|
||||
using span_type = span<CharT>;
|
||||
/** @brief typealias for buffer size (counts CharT's, not bytes) **/
|
||||
using size_type = std::uint64_t;
|
||||
|
||||
public:
|
||||
/** @brief create empty buffer.
|
||||
|
||||
Does not allocate any storage; @see alloc
|
||||
**/
|
||||
buffer() = default;
|
||||
/** @brief create empty buffer, and possibly allocate storage.
|
||||
|
||||
@param buf_z Buffer size. allocate storage (owned by this buffer) if >0.
|
||||
@param align_z Align to this value, e.g. 8 to align storage on an 8-byte boundary
|
||||
**/
|
||||
buffer(size_type buf_z,
|
||||
size_type align_z = sizeof(char))
|
||||
: is_owner_{true},
|
||||
buf_{buf_z ? (new (std::align_val_t(align_z)) CharT [buf_z]) : nullptr},
|
||||
buf_z_{buf_z},
|
||||
lo_pos_{0},
|
||||
hi_pos_{0}
|
||||
{}
|
||||
/** @brief buffer is not copyable **/
|
||||
buffer(buffer const & x) = delete;
|
||||
/** @brief destructor. Release storage if owned **/
|
||||
~buffer() { this->reset(); }
|
||||
|
||||
/** @name Access methods **/
|
||||
///@{
|
||||
|
||||
/** @brief start of buffer memory **/
|
||||
CharT * buf() const { return buf_; }
|
||||
/** @brief buffer size (number of characters) **/
|
||||
size_type buf_z() const { return buf_z_; }
|
||||
/** @brief current start position within buffer **/
|
||||
size_type lo_pos() const { return lo_pos_; }
|
||||
/** @brief current end position within buffer **/
|
||||
size_type hi_pos() const { return hi_pos_; }
|
||||
|
||||
///@}
|
||||
|
||||
/** @brief readonly access to a single buffer element.
|
||||
|
||||
Relative to start of buffer (ignores current consume position)
|
||||
**/
|
||||
CharT const & operator[](size_type i) const { return buf_[i]; }
|
||||
|
||||
/** @brief return span for current buffer contents **/
|
||||
span_type contents() const { return span_type(buf_ + lo_pos_,
|
||||
buf_ + hi_pos_); }
|
||||
/** @brief returns span for writable buffer contents (unused prefix following produce position **/
|
||||
span_type avail() const { return span_type(buf_ + hi_pos_,
|
||||
buf_ + buf_z_); }
|
||||
|
||||
/** @brief @c true iff buffer is empty **/
|
||||
bool empty() const { return lo_pos_ == hi_pos_; }
|
||||
|
||||
|
||||
/**
|
||||
@brief update buffer produce position, after (independently) writing contents of span to it
|
||||
|
||||
@pre left endpoint of @p span equals buffer produce position (@c .hi_pos)
|
||||
@pre right endpoint of @p span within bounds of buffer memory range
|
||||
@post right endpoint of @p span equals buffer produce position.
|
||||
**/
|
||||
void produce(span_type const & span) {
|
||||
assert(span.lo() == buf_ + hi_pos_);
|
||||
|
||||
hi_pos_ += span.size();
|
||||
}
|
||||
|
||||
/**
|
||||
@brief update buffer consume position, when done with contents of span
|
||||
|
||||
@pre left endpoint of @p span equals buffer consume position (@c .lo_pos)
|
||||
@pre right endpoint of @p span within bounds of buffer memory range
|
||||
@post Either
|
||||
buffer is empty, with @c .lo_pos = @c .hi_pos = @c 0.
|
||||
buffer is non-empty, right endpoint of @p span equals new buffer consume position.
|
||||
**/
|
||||
void consume(span_type const & span) {
|
||||
if (span.size()) {
|
||||
assert(span.lo() == buf_ + lo_pos_);
|
||||
|
||||
lo_pos_ += span.size();
|
||||
} else {
|
||||
/* since .consume() that arrives at empty contents also resets .lo_pos .hi_pos,
|
||||
* we don't want to blow up when called with an empty span -- argument
|
||||
* may represent some pre-reset location in buffer
|
||||
*/
|
||||
}
|
||||
|
||||
if (lo_pos_ == hi_pos_) {
|
||||
lo_pos_ = 0;
|
||||
hi_pos_ = 0;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
@brief allocate buffer with desired amount of memory
|
||||
|
||||
@param buf_z desired buffer size
|
||||
@param align_z alignment; buffer memory will be aligned on this byte-boundary.
|
||||
**/
|
||||
void alloc(size_type buf_z, size_type align_z = sizeof(char)) {
|
||||
/* properly reset (+ discard) any existing state */
|
||||
this->reset();
|
||||
|
||||
is_owner_ = true;
|
||||
if (buf_z)
|
||||
buf_ = new (std::align_val_t(align_z)) CharT [buf_z];
|
||||
buf_z_ = buf_z;
|
||||
lo_pos_ = 0;
|
||||
hi_pos_ = 0;
|
||||
}
|
||||
|
||||
/**
|
||||
@brief attach buffer to (unowned) range of @p buf_z bytes starting at @p buf[0]
|
||||
|
||||
Buffer is not responsible for managing storage.
|
||||
|
||||
@post
|
||||
1. buffer is empty
|
||||
@post
|
||||
2. buffer read position = buffer write position = 0
|
||||
**/
|
||||
void setbuf(CharT * buf, size_type buf_z) {
|
||||
/* properly reset (+ discard) any existing state */
|
||||
this->reset();
|
||||
|
||||
is_owner_ = false;
|
||||
lo_pos_ = 0;
|
||||
hi_pos_ = 0;
|
||||
buf_ = buf;
|
||||
buf_z_ = buf_z;
|
||||
}
|
||||
|
||||
/**
|
||||
@brief revert buffer to empty state and possibly zero it
|
||||
|
||||
@param zero_buffer_flag Zero buffer contents iff this is true
|
||||
|
||||
@post
|
||||
1. buffer is empty
|
||||
@post
|
||||
2. buffer read position = buffer write position = 0
|
||||
**/
|
||||
void clear2empty(bool zero_buffer_flag) {
|
||||
if (buf_ && zero_buffer_flag)
|
||||
explicit_bzero(buf_, buf_z_ * sizeof(CharT));
|
||||
|
||||
lo_pos_ = 0;
|
||||
hi_pos_ = 0;
|
||||
}
|
||||
|
||||
/**
|
||||
@brief swap representation with another buffer instance.
|
||||
**/
|
||||
void swap (buffer & x) {
|
||||
std::swap(is_owner_, x.is_owner_);
|
||||
std::swap(buf_, x.buf_);
|
||||
std::swap(buf_z_, x.buf_z_);
|
||||
std::swap(lo_pos_, x.lo_pos_);
|
||||
std::swap(hi_pos_, x.hi_pos_);
|
||||
}
|
||||
|
||||
/**
|
||||
@brief reset buffer to an empty state and recover owned storage
|
||||
**/
|
||||
void reset() {
|
||||
if (is_owner_ && buf_)
|
||||
delete [] buf_;
|
||||
|
||||
is_owner_ = false;
|
||||
buf_ = nullptr;
|
||||
buf_z_ = 0;
|
||||
lo_pos_ = 0;
|
||||
hi_pos_ = 0;
|
||||
}
|
||||
|
||||
/**
|
||||
@brief move-assignment operator.
|
||||
@param x right-hand-side to move from.
|
||||
|
||||
@post
|
||||
@p x is in a valid, empty,
|
||||
**/
|
||||
buffer & operator= (buffer && x) {
|
||||
is_owner_ = x.is_owner_;
|
||||
buf_ = x.buf_;
|
||||
buf_z_ = x.buf_z_;
|
||||
lo_pos_ = x.lo_pos_;
|
||||
hi_pos_ = x.hi_pos_;
|
||||
|
||||
x.is_owner_ = false;
|
||||
x.lo_pos_ = 0;
|
||||
x.hi_pos_ = 0;
|
||||
x.buf_ = nullptr;
|
||||
x.buf_z_ = 0;
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
||||
/** @brief buffer is not assignable */
|
||||
buffer & operator= (buffer & x) = delete;
|
||||
|
||||
private:
|
||||
/** @brief true iff buffer is responsible for freeing storage at @c buf_ **/
|
||||
bool is_owner_ = false;
|
||||
/** @brief buffer contents. buffer memory comprises @c buf_[0] to @c buf_[buf_z_] **/
|
||||
CharT * buf_ = nullptr;
|
||||
/** @brief buffer size (in units of CharT) **/
|
||||
size_type buf_z_ = 0;
|
||||
|
||||
/** @brief buffer read (consume) position
|
||||
|
||||
@invariant
|
||||
0 <= lo_pos_ <= hi_pos_ < buf_z_
|
||||
**/
|
||||
size_type lo_pos_ = 0;
|
||||
/** @brief buffer write (produce) position
|
||||
|
||||
@invariant
|
||||
0 <= hi_pos_ < hi_pos_ < buf_z_
|
||||
**/
|
||||
size_type hi_pos_ = 0;
|
||||
};
|
||||
|
||||
/** @brief Overload for @c swap, so that @c buffer<CharT> swappable **/
|
||||
template <typename CharT>
|
||||
inline void
|
||||
swap(buffer<CharT> & lhs,
|
||||
buffer<CharT> & rhs) {
|
||||
lhs.swap(rhs);
|
||||
}
|
||||
} /*namespace scm*/
|
||||
} /*namespace xo*/
|
||||
|
||||
/* end buffer.hpp */
|
||||
|
|
@ -1,363 +0,0 @@
|
|||
/* @file input_state.hpp
|
||||
*
|
||||
* author: Roland Conybeare, Jun 2025
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "span.hpp"
|
||||
|
||||
namespace xo {
|
||||
namespace scm {
|
||||
/** enum to report outcome of @ref capture_current_line **/
|
||||
enum class input_error {
|
||||
/** normal return, input line successfully identified and captured **/
|
||||
ok = 0,
|
||||
/** incomplete input; should not have been submitted to @ref capture_current_line.
|
||||
* note: submit last line of input with eof_flag=true
|
||||
**/
|
||||
incomplete,
|
||||
N
|
||||
};
|
||||
|
||||
/** @class input_state
|
||||
* @brief Track detailed input position for use in error messages
|
||||
*
|
||||
* input characters fall into two categories:
|
||||
* - consumed: memory can be reclaimed/recycled
|
||||
* - buffered: memory will be retained unaltered until consumed
|
||||
*
|
||||
* remarks:
|
||||
* - always in one of two states:
|
||||
* - empty
|
||||
* - contains exactly one line of input
|
||||
* - also record current input position.
|
||||
* Use this for example to identify where tokenizer rejected input.
|
||||
* - .current_pos advances by one token
|
||||
*
|
||||
* - buffered characters always form a single contiguous range.
|
||||
* - input_state does not own any storage; storage is owned elsewhere
|
||||
*
|
||||
* @text
|
||||
*
|
||||
* <------------------.current_line------------------>
|
||||
* > <-- .whitespace
|
||||
* cccccccccccccccccccccccccccccccc__TTTTTTTTxxxxxxxxx
|
||||
* ^ ^ ^
|
||||
* .current_line.lo | .current_line.hi
|
||||
* .current_pos
|
||||
*
|
||||
* <----prev_line----> <----current_line---->
|
||||
* > <--whitespace
|
||||
* ppppppppppppppppppp cccccccccccc__TTTTTTTT
|
||||
* ^
|
||||
*
|
||||
* @endtext
|
||||
**/
|
||||
template <typename CharT>
|
||||
class input_state {
|
||||
public:
|
||||
/** @defgroup input-state-type-traits input-state type straits **/
|
||||
///@{
|
||||
|
||||
/** type representing a contiguous span of tokenizer input characters **/
|
||||
using span_type = span<const CharT>;
|
||||
|
||||
///@}
|
||||
|
||||
public:
|
||||
/** @defgroup input-state-ctors input_state constructors **/
|
||||
///@{
|
||||
|
||||
input_state() = default;
|
||||
explicit input_state(bool debug_flag) : debug_flag_{debug_flag} {}
|
||||
/** Create instance with supplied @p current_line, @p current_pos, @p whitespace.
|
||||
* Introduced for unit tests, not used in tokenizer.
|
||||
**/
|
||||
explicit input_state(const span<const CharT>& current_line,
|
||||
size_t current_pos,
|
||||
size_t whitespace) : current_line_{current_line},
|
||||
current_pos_{current_pos},
|
||||
whitespace_{whitespace} {}
|
||||
|
||||
///@}
|
||||
|
||||
/** @defgroup input-state-static-methods input_state static methods **/
|
||||
///@{
|
||||
|
||||
/** recognize the newline character '\n' **/
|
||||
static bool is_newline(CharT ch);
|
||||
/** identifies whitespace chars.
|
||||
* These are chars that do not belong to any token.
|
||||
* They are not permitted to appear within
|
||||
* a symbol or string token.
|
||||
* Appearance of a whitespace char forces completioon of
|
||||
* preceding token.
|
||||
**/
|
||||
static bool is_whitespace(CharT ch);
|
||||
|
||||
///@}
|
||||
|
||||
/** @defgroup input-state-access-methods **/
|
||||
///@{
|
||||
|
||||
#pragma GCC diagnostic push
|
||||
#ifndef __APPLE__
|
||||
#pragma GCC diagnostic ignored "-Wchanges-meaning"
|
||||
#endif
|
||||
const span_type & current_line() const { return current_line_; }
|
||||
#pragma GCC diagnostic pop
|
||||
size_t tk_start() const { return tk_start_; }
|
||||
size_t current_pos() const { return current_pos_; }
|
||||
size_t whitespace() const { return whitespace_; }
|
||||
bool debug_flag() const { return debug_flag_; }
|
||||
|
||||
///@}
|
||||
|
||||
/** @defgroup input-state-general-methods **/
|
||||
///@{
|
||||
|
||||
/** Input state less @p n chars.
|
||||
* Use to recover input state before a complete but error-triggering token
|
||||
**/
|
||||
input_state rewind(std::size_t n) const;
|
||||
|
||||
/** Capture prefix of @p input up to first newline.
|
||||
* Set read position to start of line.
|
||||
*
|
||||
* Alters:
|
||||
* .current_line
|
||||
* .current_pos
|
||||
*
|
||||
* Return pair comprising error code and input span representing first line
|
||||
* (including trailing newline) from @p input.
|
||||
**/
|
||||
std::pair<input_error, span_type> capture_current_line(const span_type & input,
|
||||
bool eof_flag);
|
||||
|
||||
/** atomically return current line while discarding it from input state
|
||||
*
|
||||
* Alters
|
||||
* .current_line
|
||||
* .current_pos
|
||||
* .whitespace
|
||||
**/
|
||||
span_type consume_current_line();
|
||||
|
||||
/** Reset input state for start of next line.
|
||||
* Expression parser may use this to discard remainder of input line
|
||||
* after a parsing error.
|
||||
*
|
||||
* Alters:
|
||||
* .current_line
|
||||
* .current_pos
|
||||
* .whitespace
|
||||
**/
|
||||
void discard_current_line();
|
||||
|
||||
/** Advance input position by @p z
|
||||
*
|
||||
* Alters:
|
||||
* .current_pos
|
||||
**/
|
||||
void advance(size_t z);
|
||||
|
||||
/** Advance .current_pos to pos.
|
||||
* Require: pos in @ref current_line_
|
||||
**/
|
||||
void advance_until(const CharT * pos);
|
||||
|
||||
/** Skip prefix of input, starting at current read position,
|
||||
* comprising only whitespace.
|
||||
*
|
||||
* Presume input position is at end of token;
|
||||
* on return @ref whitespace_ counts number of whitespace characters
|
||||
* skipped.
|
||||
*
|
||||
* Return pointer to first non-whitespace character after @ref current_pos_
|
||||
* or @ref current_line_.hi if reached end of buffered line.
|
||||
*
|
||||
* Alters:
|
||||
* .whitespace
|
||||
**/
|
||||
const CharT * skip_leading_whitespace();
|
||||
|
||||
///@}
|
||||
|
||||
private:
|
||||
/** @defgroup input-state-instance-vars input_state instance variables **/
|
||||
///@{
|
||||
|
||||
/** remember current input line. Used only to report errors **/
|
||||
span<const CharT> current_line_ = span<const CharT>();
|
||||
/** start of last token within @ref current_line_ **/
|
||||
size_t tk_start_ = 0;
|
||||
/** input position within @ref current_line_ **/
|
||||
size_t current_pos_ = 0;
|
||||
/** number of whitespace chars since end of preceding token,
|
||||
* or last newline, whichever is less
|
||||
**/
|
||||
size_t whitespace_ = 0;
|
||||
|
||||
/** true to log input activity */
|
||||
bool debug_flag_ = false;
|
||||
|
||||
///@}
|
||||
}; /*input_state*/
|
||||
|
||||
template <typename CharT>
|
||||
bool
|
||||
input_state<CharT>::is_newline(CharT ch) {
|
||||
return (ch == '\n');
|
||||
}
|
||||
|
||||
template <typename CharT>
|
||||
bool
|
||||
input_state<CharT>::is_whitespace(CharT ch) {
|
||||
switch(ch) {
|
||||
case ' ': return true;
|
||||
case '\t': return true;
|
||||
case '\n': return true;
|
||||
case '\r': return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
template <typename CharT>
|
||||
input_state<CharT>
|
||||
input_state<CharT>::rewind(std::size_t n) const {
|
||||
return input_state<CharT>(this->current_line_,
|
||||
(n <= current_pos_) ? current_pos_ - n : 0,
|
||||
0 /*whitespace*/);
|
||||
}
|
||||
|
||||
template <typename CharT>
|
||||
void
|
||||
input_state<CharT>::advance(size_t z) {
|
||||
scope log(XO_DEBUG(debug_flag_));
|
||||
|
||||
this->current_pos_ += z;
|
||||
|
||||
log && log(xtag("z", z), xtag("current_pos", current_pos_));
|
||||
}
|
||||
|
||||
template <typename CharT>
|
||||
void
|
||||
input_state<CharT>::advance_until(const CharT * pos) {
|
||||
scope log(XO_DEBUG(debug_flag_));
|
||||
|
||||
assert(current_line_.lo() <= pos && pos <= current_line_.hi());
|
||||
|
||||
this->current_pos_ = pos - current_line_.lo();
|
||||
|
||||
log && log(xtag("current_pos", current_pos_));
|
||||
}
|
||||
|
||||
template <typename CharT>
|
||||
auto
|
||||
input_state<CharT>::consume_current_line() -> span_type {
|
||||
span_type retval = current_line_;
|
||||
|
||||
this->discard_current_line();
|
||||
|
||||
return retval;
|
||||
}
|
||||
|
||||
template <typename CharT>
|
||||
void
|
||||
input_state<CharT>::discard_current_line() {
|
||||
this->current_line_ = span_type::make_null();
|
||||
this->current_pos_ = 0;
|
||||
this->whitespace_ = 0;
|
||||
}
|
||||
|
||||
template <typename CharT>
|
||||
auto
|
||||
input_state<CharT>::capture_current_line(const span_type & input,
|
||||
bool eof_flag) -> std::pair<input_error, span_type>
|
||||
{
|
||||
// see also discard_current_line()
|
||||
// note: must capture entirety of first line,
|
||||
// for example including leading whitespace.
|
||||
// See discussion in tokenizer scan() method
|
||||
|
||||
scope log(XO_DEBUG(debug_flag_));
|
||||
|
||||
/* look ahead to {end of line, end of input}, whichever comes first */
|
||||
const CharT * sol = input.lo();
|
||||
const CharT * eol = sol;
|
||||
|
||||
if (sol == current_line_.lo()) {
|
||||
log && log("short-circuit - current line already stashed");
|
||||
|
||||
/* nothing to do here */
|
||||
return std::make_pair(input_error::ok, current_line_);
|
||||
}
|
||||
|
||||
while ((eol < input.hi()) && (*eol != '\n'))
|
||||
++eol;
|
||||
|
||||
if (*eol == '\n') {
|
||||
/* include \n at end-of-line */
|
||||
++eol;
|
||||
} else {
|
||||
if (!eof_flag) {
|
||||
/* caller expected to provide complete line of input. complain and ignore */
|
||||
return std::make_pair(input_error::incomplete,
|
||||
input.prefix(0ul));
|
||||
}
|
||||
}
|
||||
|
||||
this->current_line_ = span_type(sol, eol);
|
||||
this->current_pos_ = 0;
|
||||
this->whitespace_ = 0;
|
||||
|
||||
log && log(xtag("current_line", print::printspan(current_line_)),
|
||||
xtag("current_pos", current_pos_));
|
||||
|
||||
return std::make_pair(input_error::ok,
|
||||
span_type(sol, eol));
|
||||
}
|
||||
|
||||
template <typename CharT>
|
||||
const CharT *
|
||||
input_state<CharT>::skip_leading_whitespace()
|
||||
{
|
||||
scope log(XO_DEBUG(debug_flag_));
|
||||
|
||||
const CharT * ix = current_line_.lo() + current_pos_;
|
||||
|
||||
this->whitespace_ = 0;
|
||||
|
||||
/* skip whitespace + remember beginning of most recent line */
|
||||
while (is_whitespace(*ix) && (ix != current_line_.hi())) {
|
||||
++ix;
|
||||
|
||||
++(this->whitespace_);
|
||||
}
|
||||
|
||||
this->tk_start_ = ix - current_line_.lo();
|
||||
this->current_pos_ = ix - current_line_.lo();
|
||||
|
||||
return ix;
|
||||
}
|
||||
|
||||
template <typename CharT>
|
||||
inline std::ostream &
|
||||
operator<<(std::ostream & os,
|
||||
const input_state<CharT>& x)
|
||||
{
|
||||
using xo::print::unq;
|
||||
|
||||
os << "<input_state"
|
||||
<< xtag("tk", x.tk_start())
|
||||
<< xtag("pos", x.current_pos())
|
||||
<< xtag("line", unq(std::string_view(x.current_line().lo(), x.current_line().hi())))
|
||||
<< xtag("whitespace", x.whitespace())
|
||||
<< ">";
|
||||
|
||||
return os;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -1,112 +0,0 @@
|
|||
/* file scan_result.hpp
|
||||
*
|
||||
* author: Roland Conybeare, Jun 2025
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "token.hpp"
|
||||
#include "tokenizer_error.hpp"
|
||||
#include "input_state.hpp"
|
||||
|
||||
namespace xo {
|
||||
namespace scm {
|
||||
/** @class scan_result
|
||||
* @brief Represent result of parsing one input token.
|
||||
*
|
||||
* @code
|
||||
* Possible outcomes fall into several categories
|
||||
* (with T: @c token_.is_valid(), E: @cerror_.is_error())
|
||||
*
|
||||
* | T | E | description |
|
||||
* |-------+-------+-------------------------------------|
|
||||
* | false | false | end of input, including end of line |
|
||||
* | true | false | parsed token in T |
|
||||
* | false | true | parse error in E |
|
||||
*
|
||||
* @endcode
|
||||
**/
|
||||
template <typename CharT>
|
||||
class scan_result {
|
||||
public:
|
||||
using token_type = token<CharT>;
|
||||
using span_type = span<const CharT>;
|
||||
using error_type = tokenizer_error<CharT>;
|
||||
using input_state_type = input_state<CharT>;
|
||||
|
||||
public:
|
||||
scan_result(const token_type & token,
|
||||
const span_type & consumed,
|
||||
const error_type & error = error_type())
|
||||
: token_{token}, consumed_{consumed}, error_{error} {}
|
||||
|
||||
static scan_result make_whitespace(const span_type & prefix_input);
|
||||
static scan_result make_partial(const span_type & prefix_input);
|
||||
/**
|
||||
* @p error_src can be __FUNCTION__ from site where error generated.
|
||||
* @p error_msg error message
|
||||
* @p error_pos error position, relative to start of token
|
||||
* @p input_state_ref input state object;
|
||||
* copied into scan_result, and leaving input_state_ref.current_line cleared
|
||||
**/
|
||||
static scan_result make_error_consume_current_line(const char * error_src,
|
||||
std::string error_msg,
|
||||
size_t error_pos,
|
||||
input_state_type & input_state_ref);
|
||||
|
||||
bool is_eof_or_ambiguous() const { return token_.is_invalid() && error_.is_not_an_error(); }
|
||||
bool is_token() const { return token_.is_valid(); }
|
||||
bool is_error() const { return error_.is_error(); }
|
||||
|
||||
const token_type & get_token() const { return token_; }
|
||||
const span_type & consumed() const { return consumed_; }
|
||||
const error_type & error() const { return error_; }
|
||||
|
||||
public:
|
||||
/** Successfully parsed token, whenever tk_type != tokentype::tk_invalid.
|
||||
* Will be tokentype::tk_invalid in normal cause of events for valid input,
|
||||
* when consuming whitespace
|
||||
**/
|
||||
token_type token_;
|
||||
/** input span represented by .token, on success. Otherwise not defined **/
|
||||
span_type consumed_;
|
||||
/** error description, whenever .error_.is_error() is true **/
|
||||
error_type error_;
|
||||
};
|
||||
|
||||
template <typename CharT>
|
||||
auto scan_result<CharT>::make_whitespace(const span_type& whitespace_input) -> scan_result
|
||||
{
|
||||
return scan_result(token_type::invalid(), whitespace_input /*consumed*/);
|
||||
}
|
||||
|
||||
template <typename CharT>
|
||||
auto scan_result<CharT>::make_partial(const span_type& prefix_input) -> scan_result
|
||||
{
|
||||
return scan_result(token_type::invalid(), prefix_input /*consumed*/);
|
||||
}
|
||||
|
||||
template <typename CharT>
|
||||
auto
|
||||
scan_result<CharT>::make_error_consume_current_line(const char * error_src,
|
||||
std::string error_msg,
|
||||
size_t error_pos,
|
||||
input_state_type & input_state_ref) -> scan_result
|
||||
{
|
||||
/* report+consume entire input line */
|
||||
|
||||
/* copy before altered by .consume_current_line() */
|
||||
input_state_type input_state_copy = input_state_ref;
|
||||
|
||||
return scan_result(token_type::invalid(),
|
||||
input_state_ref.consume_current_line(),
|
||||
error_type(error_src,
|
||||
error_msg,
|
||||
input_state_copy,
|
||||
error_pos));
|
||||
}
|
||||
|
||||
} /*namespace scm*/
|
||||
} /*namespace xo*/
|
||||
|
||||
/* end scan_result.hpp */
|
||||
|
|
@ -1,291 +0,0 @@
|
|||
/** @file span.hpp **/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "xo/indentlog/scope.hpp"
|
||||
#include "xo/indentlog/print/ppdetail_atomic.hpp"
|
||||
#include <ostream>
|
||||
#include <cstdint>
|
||||
#include <cassert>
|
||||
|
||||
namespace xo {
|
||||
namespace scm {
|
||||
/** @class span compression/span.hpp
|
||||
*
|
||||
* @brief A contiguous range of characters, without ownership.
|
||||
*
|
||||
* @tparam CharT type for elements referred to by this span.
|
||||
**/
|
||||
template <typename CharT>
|
||||
class span {
|
||||
public:
|
||||
/** @defgroup span-type-traits span type traits **/
|
||||
///@{
|
||||
|
||||
/** typealias for span size (in units of CharT) **/
|
||||
using size_type = std::uint64_t;
|
||||
|
||||
///@}
|
||||
|
||||
public:
|
||||
/** @defgroup span-ctors span constructors **/
|
||||
///@{
|
||||
|
||||
/** null span **/
|
||||
span() : lo_{nullptr}, hi_{nullptr} {}
|
||||
|
||||
/** Create span for the contiguous memory range [@p lo, @p hi) **/
|
||||
span(CharT * lo, CharT * hi) : lo_{lo}, hi_{hi} {}
|
||||
|
||||
/** explicit conversion from span<U> **/
|
||||
template<typename CharU>
|
||||
span(const span<CharU> & other,
|
||||
std::enable_if_t<std::is_convertible_v<CharU*, CharT*>
|
||||
&& !std::is_same_v<CharU, CharT>> * = nullptr)
|
||||
: lo_{other.lo()}, hi_{other.hi()} {}
|
||||
|
||||
/** copy ctor (explicit to avoid ambiguity with template ctor) **/
|
||||
span(const span & other) = default;
|
||||
span & operator=(const span & other) = default;
|
||||
|
||||
/** Create a null span (i.e. with null @p lo, @p hi pointers)
|
||||
* A null span can be concatenated with any other span
|
||||
* without triggering matching-endpoint asserts.
|
||||
**/
|
||||
static span make_null() { return span(static_cast<CharT*>(nullptr), static_cast<CharT*>(nullptr)); }
|
||||
|
||||
/** @brief create span for C-style string @p cstr **/
|
||||
static span from_cstr(const CharT * cstr) {
|
||||
CharT * lo = cstr;
|
||||
CharT * hi = cstr ? cstr + strlen(cstr) : nullptr;
|
||||
|
||||
return span(lo, hi);
|
||||
}
|
||||
|
||||
/** @brief create span from std::string @p str **/
|
||||
static span from_string(const std::string& str) {
|
||||
CharT * lo = &(*str.begin());
|
||||
CharT * hi = &(*str.end());
|
||||
|
||||
return span(lo, hi);
|
||||
}
|
||||
|
||||
/** @brief concatenate two contiguous spans */
|
||||
static span concat(const span & span1, const span & span2) {
|
||||
if (span1.is_null())
|
||||
return span2;
|
||||
if (span2.is_null())
|
||||
return span1;
|
||||
|
||||
if (span1.hi() != span2.lo()) {
|
||||
scope log(XO_DEBUG(true));
|
||||
|
||||
log && log(xtag("span1.hi", (void*)span1.hi()), xtag("span2.lo", (void*)span2.lo()));
|
||||
}
|
||||
|
||||
assert(span1.hi() == span2.lo());
|
||||
|
||||
CharT * lo = span1.lo();
|
||||
CharT * hi = span2.hi();
|
||||
|
||||
return span(lo, hi);
|
||||
}
|
||||
|
||||
///@}
|
||||
|
||||
/** @defgroup span-access-methods **/
|
||||
///@{
|
||||
|
||||
CharT * lo() const { return lo_; } /* get member span::lo_ */
|
||||
CharT * hi() const { return hi_; } /* get member span::hi_ */
|
||||
|
||||
///@}
|
||||
|
||||
/** @defgroup span-general-methods **/
|
||||
///@{
|
||||
|
||||
/** @brief strip prefix until first occurence of '\n', including the newline **/
|
||||
void discard_until_newline() {
|
||||
for (const CharT * p = lo_; p < hi_; ++p) {
|
||||
if (*p == '\n') {
|
||||
lo_ = p + 1;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
lo_ = hi_;
|
||||
}
|
||||
|
||||
/** Create new span over supplied type,
|
||||
* with identical (possibly misaligned) endpoints.
|
||||
*
|
||||
* @warning
|
||||
* 1. New span uses exactly the same memory addresses.
|
||||
* Endpoint pointers may not be aligned.
|
||||
* 2. Implementation assumes code compiled with
|
||||
* @code -fno-strict-aliasing @endcode enabled.
|
||||
*
|
||||
* @tparam OtherT element type for new span
|
||||
**/
|
||||
template <typename OtherT>
|
||||
span<OtherT>
|
||||
cast() const { return span<OtherT>(reinterpret_cast<OtherT *>(lo_),
|
||||
reinterpret_cast<OtherT *>(hi_)); }
|
||||
|
||||
/** @brief create span including the first @p z members of this span. **/
|
||||
span prefix(size_type z) const { return span(lo_, lo_ + z); }
|
||||
|
||||
/** @brief create span representing prefix up to (but not including) @p *p
|
||||
**/
|
||||
span prefix_upto(CharT * p) const {
|
||||
if (p <= hi_)
|
||||
return span(lo_, p);
|
||||
else
|
||||
return span(lo_, hi_);
|
||||
}
|
||||
|
||||
/** @brief create span with first @p z members of this span removed **/
|
||||
span after_prefix(size_type z) const {
|
||||
if (lo_ + z > hi_)
|
||||
z = hi_ - lo_;
|
||||
|
||||
return span(lo_ + z, hi_);
|
||||
}
|
||||
|
||||
/** @brief create span with @p prefix of this span removed **/
|
||||
span after_prefix(const span & prefix) const {
|
||||
if (!prefix.is_null() && (prefix.lo() != lo_)) {
|
||||
throw std::runtime_error
|
||||
("after_prefix: expected prefix of this span");
|
||||
}
|
||||
|
||||
return after_prefix(prefix.size());
|
||||
}
|
||||
|
||||
/** Create span starting with position @p p.
|
||||
* Does boundary checking; will return empty span if @p p is outside @c [lo_,hi)
|
||||
**/
|
||||
span suffix_from(CharT * p) const {
|
||||
if ((lo_ <= p) && (p <= hi_))
|
||||
return span(p, hi_);
|
||||
else
|
||||
return span(hi_, hi_);
|
||||
}
|
||||
|
||||
/** true iff this span is null. distinct from empty. **/
|
||||
bool is_null() const { return lo_ == nullptr && hi_ == nullptr; }
|
||||
/** true iff this span is empty (comprises 0 elements). **/
|
||||
bool empty() const { return lo_ == hi_; }
|
||||
/** report the number of elements (of type CharT) in this span. **/
|
||||
size_type size() const { return hi_ - lo_; }
|
||||
|
||||
/** increase extent of this spans to include @p x.
|
||||
* Requires @c hi() == @c x.lo()
|
||||
**/
|
||||
span & operator+=(const span & x) {
|
||||
if (hi_ == x.lo_) {
|
||||
hi_ = x.hi_;
|
||||
} else if (!x.is_null()) {
|
||||
assert(false);
|
||||
}
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
||||
/** print representation for this span on stream @p os **/
|
||||
void print(std::ostream & os) const {
|
||||
os << "<span"
|
||||
<< xtag("addr", (void*)lo_)
|
||||
<< xtag("size", size())
|
||||
<< " :text " << xo::print::quot(std::string_view(lo_, hi_))
|
||||
<< ">";
|
||||
}
|
||||
///@}
|
||||
|
||||
private:
|
||||
/** @defgroup span-instance-vars **/
|
||||
///@{
|
||||
|
||||
/** start of span.
|
||||
Span comprises memory address between @p lo (inclusive) and @p hi (exclusive)
|
||||
**/
|
||||
CharT * lo_ = nullptr;
|
||||
|
||||
/** @brief end of span.
|
||||
Span comprises memory address between @p lo (inclusive) and @p hi (exclusive)
|
||||
**/
|
||||
CharT * hi_ = nullptr;
|
||||
|
||||
///@}
|
||||
}; /*span*/
|
||||
|
||||
/** @defgroup span-operators **/
|
||||
///@{
|
||||
|
||||
/** compare spans for equality.
|
||||
* Two spans are equal iff both endpoints match exactly.
|
||||
**/
|
||||
template <typename CharT>
|
||||
inline bool
|
||||
operator==(const span<CharT> & lhs, const span<CharT> & rhs) {
|
||||
return ((lhs.lo() == rhs.lo())
|
||||
&& (lhs.hi() == rhs.hi()));
|
||||
}
|
||||
|
||||
/** compare spans for inequality.
|
||||
* Two spans are unequal if either paired endpoint differs.
|
||||
**/
|
||||
template <typename CharT>
|
||||
inline bool
|
||||
operator!=(const span<CharT> & lhs, const span<CharT> & rhs) {
|
||||
return ((lhs.lo() != rhs.lo())
|
||||
|| (lhs.hi() != rhs.hi()));
|
||||
}
|
||||
|
||||
/** print a summary of @p x on stream @p os. Intended for diagnostics **/
|
||||
template <typename CharT>
|
||||
inline std::ostream &
|
||||
operator<<(std::ostream & os,
|
||||
const span<CharT> & x) {
|
||||
x.print(os);
|
||||
return os;
|
||||
}
|
||||
|
||||
///@}
|
||||
} /*namespace scm*/
|
||||
|
||||
namespace print {
|
||||
template <typename CharT>
|
||||
class printspan_impl {
|
||||
public:
|
||||
printspan_impl(xo::scm::span<CharT> x) : span_{x} {}
|
||||
|
||||
xo::scm::span<CharT> span_;
|
||||
};
|
||||
|
||||
template <typename CharT>
|
||||
printspan_impl<CharT> printspan(const xo::scm::span<CharT>& span) {
|
||||
return printspan_impl<CharT>(span);
|
||||
}
|
||||
|
||||
template <typename CharT>
|
||||
inline std::ostream &
|
||||
operator<< (std::ostream & os,
|
||||
const printspan_impl<CharT> & x)
|
||||
{
|
||||
for (const CharT * p = x.span_.lo(); p < x.span_.hi(); ++p)
|
||||
os << *p;
|
||||
|
||||
return os;
|
||||
}
|
||||
|
||||
#ifndef ppdetail_atomic
|
||||
template <typename CharT> \
|
||||
PPDETAIL_ATOMIC_BODY(printspan_impl<CharT>);
|
||||
|
||||
template <typename CharT> \
|
||||
PPDETAIL_ATOMIC_BODY(xo::scm::span<CharT>);
|
||||
#endif
|
||||
|
||||
}
|
||||
} /*namespace xo*/
|
||||
|
|
@ -1,473 +0,0 @@
|
|||
/* file token.hpp
|
||||
*
|
||||
* author: Roland Conybeare, Jul 2024
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "tokentype.hpp"
|
||||
#include "xo/indentlog/print/tag.hpp"
|
||||
#include <stdexcept>
|
||||
#include <ostream>
|
||||
#include <string>
|
||||
#include <cstdint>
|
||||
|
||||
namespace xo {
|
||||
namespace scm {
|
||||
namespace detail {
|
||||
/* compute a * b^p, p >= 0 */
|
||||
constexpr double
|
||||
pow_aux(double a, double b, int p) {
|
||||
while (p > 0) {
|
||||
if (p % 2 == 1) {
|
||||
/* a * b^p = a * b^(2q + 1) = a.b * 10^(2q) */
|
||||
a *= b;
|
||||
p -= 1;
|
||||
} else {
|
||||
/* a * b^p = a * b^(2q) = a * (b^2)^q */
|
||||
b = b * b;
|
||||
p /= 2;
|
||||
}
|
||||
}
|
||||
|
||||
/* a * b^0 = a */
|
||||
return a;
|
||||
}
|
||||
|
||||
constexpr double
|
||||
pow10(int p) {
|
||||
if (p >= 0)
|
||||
return pow_aux(1.0, 10.0, p);
|
||||
else
|
||||
return 1.0 / pow_aux(1.0, 10.0, -p);
|
||||
}
|
||||
}
|
||||
|
||||
/** @class token
|
||||
* @brief Represent a Schematika lexical token
|
||||
**/
|
||||
template <typename CharT>
|
||||
class token {
|
||||
public:
|
||||
/** @defgroup token-ctors token constructors **/
|
||||
///@{
|
||||
|
||||
/** default ctor creates token with type @c tk_invalid **/
|
||||
token() = default;
|
||||
/** create token with type @c tk_type and input text @c text **/
|
||||
token(tokentype tk_type, const std::string & text = "")
|
||||
: tk_type_{tk_type}, text_{text} {}
|
||||
|
||||
/** create invalid token (same as null ctor, but explicit) **/
|
||||
static token invalid() { return token(); }
|
||||
/** Create token representing a boolean literal from text @p txt
|
||||
* @p txt must be @c true or @c false
|
||||
**/
|
||||
static token bool_token(const std::string & txt) {
|
||||
return token(tokentype::tk_bool, txt);
|
||||
}
|
||||
/** Create token representing 64-bit signed integer literal parsed from decimal @p txt.
|
||||
* The string @p txt must be a decimal integer literal, since @ref i64_value re-parses @p txt.
|
||||
**/
|
||||
static token i64_token(const std::string & txt) {
|
||||
return token(tokentype::tk_i64, txt);
|
||||
}
|
||||
/** create token representing 64-bit floating-point literal parsed from decimal @p txt
|
||||
* The string @p txt must be a decimal floating-point literal, since @ref f64_value re-parses @p txt.
|
||||
**/
|
||||
static token f64_token(const std::string & txt) {
|
||||
return token(tokentype::tk_f64, txt);
|
||||
}
|
||||
/** create token representing literal string parsed from @p txt **/
|
||||
static token string_token(const std::string & txt) {
|
||||
return token(tokentype::tk_string, txt);
|
||||
}
|
||||
/** create token representing a symbol parsed from @p txt.
|
||||
* Note that not all strings are valid symbol names.
|
||||
**/
|
||||
static token symbol_token(const std::string & txt) {
|
||||
return token(tokentype::tk_symbol, txt);
|
||||
}
|
||||
/** token representing left angle bracket @c "<" **/
|
||||
static token leftangle() { return token(tokentype::tk_leftangle); }
|
||||
/** token representing right angle bracket @c ">" **/
|
||||
static token rightangle() { return token(tokentype::tk_rightangle); }
|
||||
/** token representing left parenthesis @c "(" **/
|
||||
static token leftparen() { return token(tokentype::tk_leftparen); }
|
||||
/** token representing right parenthesis @c ")" **/
|
||||
static token rightparen() { return token(tokentype::tk_rightparen); }
|
||||
/** token representing left bracket @c "[" **/
|
||||
static token leftbracket() { return token(tokentype::tk_leftbracket); }
|
||||
/** token representing right bracket @c "]" **/
|
||||
static token rightbracket() { return token(tokentype::tk_rightbracket); }
|
||||
/** token representing left brace @c "{" **/
|
||||
static token leftbrace() { return token(tokentype::tk_leftbrace); }
|
||||
/** token representing right brace @c "}' **/
|
||||
static token rightbrace() { return token(tokentype::tk_rightbrace); }
|
||||
/** token representing period @c "." **/
|
||||
static token dot() { return token(tokentype::tk_dot); }
|
||||
/** token representing comma @c "," **/
|
||||
static token comma() { return token(tokentype::tk_comma); }
|
||||
/** token representing colon @c ":" **/
|
||||
static token colon() { return token(tokentype::tk_colon); }
|
||||
/** token representing double-colo @c "::" **/
|
||||
static token doublecolon() { return token(tokentype::tk_doublecolon); }
|
||||
/** token representing semicolon @c ";" **/
|
||||
static token semicolon() { return token(tokentype::tk_semicolon); }
|
||||
/** token representing single-assignment @c "=" **/
|
||||
static token singleassign() { return token(tokentype::tk_singleassign); }
|
||||
/** token representing unrestricted assignment @c ":=" **/
|
||||
static token assign_token() { return token(tokentype::tk_assign); }
|
||||
/** token representing indirection @c "->" **/
|
||||
static token yields() { return token(tokentype::tk_yields); }
|
||||
|
||||
/** token for @c "+" **/
|
||||
static token plus_token() { return token(tokentype::tk_plus); }
|
||||
/** token for @c "-" **/
|
||||
static token minus_token() { return token(tokentype::tk_minus); }
|
||||
/** token for @c "*" **/
|
||||
static token star_token() { return token(tokentype::tk_star); }
|
||||
/** token for @c "/" **/
|
||||
static token slash_token() { return token(tokentype::tk_slash); }
|
||||
|
||||
/** token representing keyword @c type **/
|
||||
static token type() { return token(tokentype::tk_type); }
|
||||
/** token representing keyword @c def **/
|
||||
static token def() { return token(tokentype::tk_def); }
|
||||
/** token representing keyword @c lambda **/
|
||||
static token lambda() { return token(tokentype::tk_lambda); }
|
||||
/** token representing keyword @c if **/
|
||||
static token if_token() { return token(tokentype::tk_if); }
|
||||
/** token representing keyword @c else **/
|
||||
static token else_token() { return token(tokentype::tk_else); }
|
||||
/** token representing keyword @c let **/
|
||||
static token let() { return token(tokentype::tk_let); }
|
||||
/** token representing keyword @c in **/
|
||||
static token in() { return token(tokentype::tk_in); }
|
||||
/** token representing keyword @c end **/
|
||||
static token end() { return token(tokentype::tk_end); }
|
||||
|
||||
///@}
|
||||
|
||||
/** @defgroup token-access-methods **/
|
||||
///@{
|
||||
|
||||
tokentype tk_type() const { return tk_type_; }
|
||||
const std::string & text() const { return text_; }
|
||||
|
||||
///@}
|
||||
|
||||
/** @defgroup token-general-methods **/
|
||||
///@{
|
||||
|
||||
/** true if token understood to represent valid input
|
||||
* i.e. any token type except @c tk_invalid
|
||||
**/
|
||||
bool is_valid() const { return tk_type_ != tokentype::tk_invalid; }
|
||||
/** true for sentinel token with type tk_invalid **/
|
||||
bool is_invalid() const { return tk_type_ == tokentype::tk_invalid; }
|
||||
|
||||
/** true for tokens with variable text. false for those with fixed textual representation **/
|
||||
bool has_variable_text() const { return (tk_type_ == tokentype::tk_i64
|
||||
|| tk_type_ == tokentype::tk_f64
|
||||
|| tk_type_ == tokentype::tk_string
|
||||
|| tk_type_ == tokentype::tk_symbol); }
|
||||
|
||||
/** expect input matching @c true or @c false **/
|
||||
bool bool_value() const;
|
||||
|
||||
/** expect input matching @c [+|-][0-9][0-9]* **/
|
||||
std::int64_t i64_value() const;
|
||||
|
||||
/** expect input matching @c [+|-][0-9]*[.][0-9]*[e|E][+|-][0-9]* **/
|
||||
double f64_value() const;
|
||||
|
||||
/** print human-readable token representation on stream @p os **/
|
||||
void print(std::ostream & os) const;
|
||||
|
||||
///@}
|
||||
|
||||
private:
|
||||
/** @defgroup token-instance-vars **/
|
||||
///@{
|
||||
|
||||
/** category for this token **/
|
||||
tokentype tk_type_ = tokentype::tk_invalid;
|
||||
|
||||
/** characters comprising this token.
|
||||
* only provided for certain token types:
|
||||
*
|
||||
* tk_i64
|
||||
* tk_f64
|
||||
* tk_string
|
||||
* tk_symbol
|
||||
**/
|
||||
std::string text_;
|
||||
|
||||
///@}
|
||||
};
|
||||
|
||||
template <typename CharT>
|
||||
bool
|
||||
token<CharT>::bool_value() const {
|
||||
if (tk_type_ != tokentype::tk_bool) {
|
||||
throw (std::runtime_error
|
||||
(tostr("token::bool_value",
|
||||
": token with type tk found where tk_bool expected",
|
||||
xtag("tk", tk_type_))));
|
||||
}
|
||||
|
||||
if (text_ == "true")
|
||||
return true;
|
||||
if (text_ == "false")
|
||||
return false;
|
||||
|
||||
throw (std::runtime_error
|
||||
(tostr("token::bool_value",
|
||||
": unexpected input string tk_bool token",
|
||||
xtag("text", text_))));
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
template <typename CharT>
|
||||
std::int64_t
|
||||
token<CharT>::i64_value() const {
|
||||
if (tk_type_ != tokentype::tk_i64) {
|
||||
throw (std::runtime_error
|
||||
(tostr("token::i64_value",
|
||||
": token with type tk found where tk_i64 expected",
|
||||
xtag("tk", tk_type_))));
|
||||
}
|
||||
|
||||
if (text_.empty()) {
|
||||
throw (std::runtime_error
|
||||
(tostr("token::i64_value",
|
||||
": unexpected empty input string for tk_i64 token")));
|
||||
}
|
||||
|
||||
int sign = 1;
|
||||
int value = 0;
|
||||
{
|
||||
auto ix = text_.begin();
|
||||
auto end_ix = text_.end();
|
||||
|
||||
CharT ch = *ix;
|
||||
|
||||
if (ch == '+') {
|
||||
++ix;
|
||||
} else if (ch == '-') {
|
||||
sign = -1;
|
||||
++ix;
|
||||
}
|
||||
|
||||
if (ix == end_ix) {
|
||||
throw (std::runtime_error
|
||||
(tostr("token::i64_value",
|
||||
": input text found where at least one digit expected",
|
||||
xtag("text", text_))));
|
||||
}
|
||||
|
||||
for (; ix != end_ix; ++ix) {
|
||||
CharT ch = *ix;
|
||||
|
||||
if ((ch >= '0') && (ch <= '9')) {
|
||||
value *= 10;
|
||||
value += (ch - '0');
|
||||
} else {
|
||||
throw (std::runtime_error
|
||||
(tostr("token::i64_value",
|
||||
": unexpected char ch in integer token",
|
||||
xtag("ch", ch))));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return sign * value;
|
||||
} /*i64_value*/
|
||||
|
||||
template <typename CharT>
|
||||
double
|
||||
token<CharT>::f64_value() const {
|
||||
if (tk_type_ != tokentype::tk_f64) {
|
||||
throw (std::runtime_error
|
||||
(tostr("token::f64_value",
|
||||
": token with type tk found where tk_f64 expected",
|
||||
xtag("tk", tk_type_))));
|
||||
}
|
||||
|
||||
if (text_.empty()) {
|
||||
throw (std::runtime_error
|
||||
(tostr("token::f64_value",
|
||||
": unexpected empty input string for tk_f64 token")));
|
||||
}
|
||||
|
||||
int sign = 1;
|
||||
/* integer representing denormalized unsigned mantissa
|
||||
* (mantissa scaled by smallest power of 10 sufficient to make
|
||||
* it an integer)
|
||||
*/
|
||||
std::int64_t mantissa = 0;
|
||||
/* counts #of digits to the right of decimal point '.' */
|
||||
int rh_digits = 0;
|
||||
/* sign of exponent */
|
||||
int exp_sign = 1;
|
||||
/* value of exponenct = integer to the right of 'e' or 'E' */
|
||||
int exponent = 0;
|
||||
|
||||
/* floating-point value will represent
|
||||
* sign * mantissa * 10^(sign*exponent - rh_digits)
|
||||
*/
|
||||
{
|
||||
auto ix = text_.begin();
|
||||
auto end_ix = text_.end();
|
||||
|
||||
CharT ch = *ix;
|
||||
|
||||
if (ch == '+') {
|
||||
++ix;
|
||||
} else if (ch == '-') {
|
||||
sign = -1;
|
||||
++ix;
|
||||
}
|
||||
|
||||
if (ix == end_ix) {
|
||||
throw (std::runtime_error
|
||||
(tostr("token::f64_value",
|
||||
": input text found where at least one digit expected",
|
||||
xtag("text", text_))));
|
||||
}
|
||||
|
||||
/* true iff decimal point '.' present in mantissa */
|
||||
bool have_decimal_point = false;
|
||||
/* true iff exponent prefix 'e' or 'E' present */
|
||||
//bool have_exponent = false;
|
||||
/* counts number of digits in mantissa
|
||||
* (both before and after, but not including, any decimal point
|
||||
*/
|
||||
int m_digits = 0;
|
||||
/* digits to the left of decimal point */
|
||||
int lh_digits = 0;
|
||||
|
||||
/* loop over mantissa digits */
|
||||
for (; ix != end_ix; ++ix) {
|
||||
CharT ch = *ix;
|
||||
|
||||
if (ch == '.') {
|
||||
if (have_decimal_point) {
|
||||
throw (std::runtime_error
|
||||
(tostr("token::f64_value",
|
||||
": input text found where at most one decimal point expected",
|
||||
xtag("text", text_))));
|
||||
}
|
||||
|
||||
have_decimal_point = true;
|
||||
lh_digits = m_digits;
|
||||
} else if ((ch >= '0') && (ch <= '9')) {
|
||||
mantissa *= 10;
|
||||
mantissa += (ch - '0');
|
||||
++m_digits;
|
||||
} else if (ch == 'e' || ch == 'E') {
|
||||
//have_exponent = true;
|
||||
break; // done with mantissa
|
||||
} else {
|
||||
throw (std::runtime_error
|
||||
(tostr("token::i64_value",
|
||||
": unexpected char ch in integer token",
|
||||
xtag("ch", ch))));
|
||||
}
|
||||
}
|
||||
|
||||
if (have_decimal_point)
|
||||
rh_digits = m_digits - lh_digits;
|
||||
|
||||
if (ix != end_ix) {
|
||||
/* continue to read exponent */
|
||||
|
||||
/* skip e|E */
|
||||
++ix;
|
||||
|
||||
if (ix == end_ix) {
|
||||
throw (std::runtime_error
|
||||
(tostr("token::f64_value",
|
||||
": on input text, expect at least one digit following exponent marker e|E",
|
||||
xtag("text", text_))));
|
||||
}
|
||||
|
||||
CharT ch = *ix;
|
||||
|
||||
if (ch == '+') {
|
||||
++ix; /*skip*/
|
||||
} else if (ch == '-') {
|
||||
exp_sign = -1;
|
||||
++ix;
|
||||
}
|
||||
|
||||
for (; ix != end_ix; ++ix) {
|
||||
CharT ch = *ix;
|
||||
|
||||
if ((ch >= '0') && (ch <= '9')) {
|
||||
exponent *= 10;
|
||||
exponent += (ch - '0');
|
||||
} else {
|
||||
throw (std::runtime_error
|
||||
(tostr("token::f64_value",
|
||||
"; on input text, expect only digits following"
|
||||
" (possibly signed) exponenct marker",
|
||||
xtag("text", text_))));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* floating-point value will represent
|
||||
* sign * mantissa * 10^(sign*exponent - rh_digits)
|
||||
*/
|
||||
|
||||
double mantissa_f64 = sign * mantissa;
|
||||
|
||||
#ifdef OBSOLETE_DEBUG
|
||||
std::cerr << xtag("text", text_)
|
||||
<< xtag("rh_digits", rh_digits)
|
||||
<< xtag("mantissa_f64", mantissa_f64)
|
||||
<< xtag("exp_sign", exp_sign)
|
||||
<< xtag("exponent", exponent)
|
||||
<< std::endl;
|
||||
#endif
|
||||
|
||||
double retval = (mantissa_f64
|
||||
* detail::pow10((exp_sign * exponent)
|
||||
- rh_digits));
|
||||
|
||||
return retval;
|
||||
} /*f64_value*/
|
||||
|
||||
template <typename CharT>
|
||||
void
|
||||
token<CharT>::print(std::ostream & os) const {
|
||||
os << "<token"
|
||||
<< xtag("type", tk_type_);
|
||||
if (has_variable_text())
|
||||
os << xtag("text", text_);
|
||||
os << ">";
|
||||
} /*print*/
|
||||
|
||||
template <typename CharT>
|
||||
inline std::ostream &
|
||||
operator<< (std::ostream & os,
|
||||
const token<CharT> & tk)
|
||||
{
|
||||
tk.print(os);
|
||||
return os;
|
||||
}
|
||||
} /*Namespace scm*/
|
||||
|
||||
#ifndef ppdetail_atomic
|
||||
namespace print {
|
||||
PPDETAIL_ATOMIC(xo::scm::token<char>);
|
||||
}
|
||||
#endif
|
||||
|
||||
} /*namespace xo*/
|
||||
|
||||
/* end token.hpp */
|
||||
File diff suppressed because it is too large
Load diff
|
|
@ -1,162 +0,0 @@
|
|||
/* file tokenizer_error.hpp
|
||||
*
|
||||
* author: Roland Conybeare, Jun 2025
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "input_state.hpp"
|
||||
#include "tokentype.hpp"
|
||||
#include "span.hpp"
|
||||
#include <iomanip>
|
||||
|
||||
namespace xo {
|
||||
namespace scm {
|
||||
/** @class tokenizer_error
|
||||
* @brief represent a lexing error, with context
|
||||
*
|
||||
* @tparam CharT representation for single characters
|
||||
**/
|
||||
template <typename CharT>
|
||||
class tokenizer_error {
|
||||
public:
|
||||
using input_state_type = input_state<CharT>;
|
||||
using span_type = span<const CharT>;
|
||||
|
||||
public:
|
||||
/** @defgroup tokenizer-error-ctors **/
|
||||
///@{
|
||||
|
||||
/** Default ctor represents a not-an-error sentinel object **/
|
||||
tokenizer_error() = default;
|
||||
/** Constructor to capture parsing error context
|
||||
* @p tk_start current position on entry to scanner
|
||||
* @p error_pos error location relative to token start
|
||||
**/
|
||||
tokenizer_error(const char * src_function,
|
||||
std::string error_description,
|
||||
const input_state_type & input_state,
|
||||
size_t error_pos)
|
||||
: src_function_{src_function},
|
||||
error_description_{std::move(error_description)},
|
||||
input_state_{input_state},
|
||||
error_pos_{error_pos}
|
||||
{
|
||||
scope log(XO_DEBUG(input_state.debug_flag()));
|
||||
|
||||
log && log(xtag("input_state.current_pos", input_state.current_pos()),
|
||||
xtag("error_pos", error_pos));
|
||||
}
|
||||
///@}
|
||||
|
||||
/** @defgroup tokenizer-error-access-methods **/
|
||||
///@{
|
||||
|
||||
const char * src_function() const { return src_function_; }
|
||||
const std::string & error_description() const { return error_description_; }
|
||||
#pragma GCC diagnostic push
|
||||
#ifndef __APPLE__
|
||||
#pragma GCC diagnostic ignored "-Wchanges-meaning"
|
||||
#endif
|
||||
const input_state_type & input_state() const { return input_state_; }
|
||||
#pragma GCC diagnostic pop
|
||||
size_t tk_start() const { return input_state_.current_pos(); }
|
||||
size_t whitespace() const { return input_state_.whitespace(); }
|
||||
size_t error_pos() const { return error_pos_; }
|
||||
|
||||
///@}
|
||||
|
||||
/** @defgroup tokenizer-error-general-methods **/
|
||||
///@{
|
||||
|
||||
/** true, except for a sentinel error object **/
|
||||
bool is_error() const { return !error_description_.empty(); }
|
||||
/** false except for object in sentinel state **/
|
||||
bool is_not_an_error() const { return error_description_.empty(); }
|
||||
|
||||
/** Print representation to stream @p os. Intended for tokenizer diagnostics.
|
||||
* For Schematika errors prefer @ref report
|
||||
**/
|
||||
void print(std::ostream & os) const;
|
||||
|
||||
/** Print human-oriented error report on @p os. **/
|
||||
void report(std::ostream & os) const;
|
||||
|
||||
///@}
|
||||
|
||||
private:
|
||||
/** @defgroup tokenizer-error-vars **/
|
||||
///@{
|
||||
|
||||
/** source location (in tokenizer) at which error identified **/
|
||||
char const * src_function_ = nullptr;
|
||||
/** static error description **/
|
||||
std::string error_description_;
|
||||
/** input state associated with this error.
|
||||
* Sufficient to precisely locate it with context.
|
||||
**/
|
||||
input_state_type input_state_;
|
||||
/** position (relative to @ref tk_entry_) of error **/
|
||||
size_t error_pos_ = 0;
|
||||
|
||||
///@}
|
||||
}; /*error_token*/
|
||||
|
||||
template <typename CharT>
|
||||
void
|
||||
tokenizer_error<CharT>::print(std::ostream & os) const {
|
||||
os << "<tokenizer-error"
|
||||
<< xtag("src-function", src_function_)
|
||||
<< xtag("message", error_description_)
|
||||
<< xtag("input", input_state_.current_line())
|
||||
<< xtag("whitespace", input_state_.whitespace())
|
||||
<< xtag("error-pos", error_pos_)
|
||||
<< ">";
|
||||
}
|
||||
|
||||
template <typename CharT>
|
||||
void
|
||||
tokenizer_error<CharT>::report(std::ostream & os) const {
|
||||
using namespace std;
|
||||
|
||||
if (!error_description_.empty()) {
|
||||
const char * prefix = "input: ";
|
||||
/* input_state.tk_start: position of first character in token
|
||||
* input_state.current_pos: position of first character following preceding token.
|
||||
* error_pos: position (relative to start) at which failure detected
|
||||
*/
|
||||
const size_t tk_start = input_state_.tk_start();
|
||||
const size_t tk_indent = (strlen(prefix) + tk_start);
|
||||
const size_t error_pos = 1 + tk_start + error_pos_;
|
||||
|
||||
os << "token col: " << tk_start << ", error col: " << error_pos << "\n";
|
||||
os << prefix;
|
||||
for (const char *p = input_state_.current_line().lo(),
|
||||
*e = input_state_.current_line().hi(); p < e; ++p)
|
||||
{
|
||||
os << *p;
|
||||
}
|
||||
//os << endl;
|
||||
os << std::setw(tk_indent) << " ";
|
||||
|
||||
for (size_t i = 0; i < error_pos_; ++i) {
|
||||
os << '_';
|
||||
}
|
||||
os << '^' << endl;
|
||||
|
||||
os << error_description_ << endl;
|
||||
}
|
||||
}
|
||||
|
||||
template <typename CharT>
|
||||
inline std::ostream &
|
||||
operator<< (std::ostream & os,
|
||||
const tokenizer_error<CharT> & tkerr)
|
||||
{
|
||||
tkerr.print(os);
|
||||
return os;
|
||||
}
|
||||
} /*namespace scm*/
|
||||
} /*namespace xo*/
|
||||
|
||||
/* end tokenizer_error.hpp */
|
||||
|
|
@ -1,192 +0,0 @@
|
|||
/** @file tokentype.hpp
|
||||
*
|
||||
* author: Roland Conybeare, Jul 2024
|
||||
**/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "xo/indentlog/print/tag.hpp" // for STRINGIFY
|
||||
#include "xo/indentlog/print/ppdetail_atomic.hpp"
|
||||
#include <ostream>
|
||||
|
||||
namespace xo {
|
||||
namespace scm {
|
||||
/** @enum tokentype
|
||||
* Enum to identify different schematika input token types
|
||||
*
|
||||
* Schematica code examples:
|
||||
*
|
||||
* @code
|
||||
* type point :: { xcoord : f64, ycoord : f64 };
|
||||
* type matrix :: array<double, 2>; // 2-d array
|
||||
*
|
||||
* decl hypot(x : f64, y : f64) -> f64;
|
||||
*
|
||||
* def hypot(x : f64, y : f64) {
|
||||
* let
|
||||
* x2 = (x * x);
|
||||
* y2 = (y * y);
|
||||
* hypot2 = (x2 + y2);
|
||||
* in
|
||||
* sqrt(hypot2);
|
||||
* };
|
||||
*
|
||||
* def someconst 4;
|
||||
*
|
||||
* def foo(v : vec<i32>) {
|
||||
* def (pi : f64) = 3.1415926;
|
||||
* def (h : (f64,f64) -> f64) = hypot;
|
||||
*
|
||||
* h = hypot3;
|
||||
* };
|
||||
*
|
||||
* def matrixproduct(x : matrix, y : matrix) {
|
||||
* [i, j : x.row(i) * y.col(j)];
|
||||
* };
|
||||
* @endcode
|
||||
**/
|
||||
enum class tokentype {
|
||||
/** sentinel value **/
|
||||
tk_invalid = -1,
|
||||
|
||||
/** a boolean constant **/
|
||||
tk_bool,
|
||||
|
||||
/** an integer constant (signed 64-bit integer) **/
|
||||
tk_i64,
|
||||
|
||||
/** a 64-bit floating-point constant **/
|
||||
tk_f64,
|
||||
|
||||
/** a string literal **/
|
||||
tk_string,
|
||||
|
||||
/** a symbol **/
|
||||
tk_symbol,
|
||||
|
||||
/** left-hand parenthesis @c '(' **/
|
||||
tk_leftparen,
|
||||
|
||||
/** right-hand parenthesis @c ')' **/
|
||||
tk_rightparen,
|
||||
|
||||
/** left-hand bracket @c '[' **/
|
||||
tk_leftbracket,
|
||||
|
||||
/** right-hand bracket @c ']' **/
|
||||
tk_rightbracket,
|
||||
|
||||
/** left-hand brace @c '{' **/
|
||||
tk_leftbrace,
|
||||
|
||||
/** right-hand brace @c '}' **/
|
||||
tk_rightbrace,
|
||||
|
||||
/** left-hand angle bracket @c '<' **/
|
||||
tk_leftangle,
|
||||
|
||||
/** right-hand angle bracket @c '>' **/
|
||||
tk_rightangle,
|
||||
|
||||
/** less-equal @c '<=' **/
|
||||
tk_lessequal,
|
||||
|
||||
/** great-equal @c '>=' **/
|
||||
tk_greatequal,
|
||||
|
||||
/** dot @c '.' **/
|
||||
tk_dot,
|
||||
|
||||
/** comma @c ',' **/
|
||||
tk_comma,
|
||||
|
||||
/** colon @c ':' **/
|
||||
tk_colon,
|
||||
|
||||
/** double-colon @c '::' **/
|
||||
tk_doublecolon,
|
||||
|
||||
/** semi-colon @c ';' **/
|
||||
tk_semicolon,
|
||||
|
||||
/** single equals sign @c '=' **/
|
||||
tk_singleassign,
|
||||
|
||||
/** assignment @c ':=' **/
|
||||
tk_assign,
|
||||
|
||||
/** indirection @c '->' **/
|
||||
tk_yields,
|
||||
|
||||
/** note: operators not treated as punctuation
|
||||
* 'do-always' is a legal variable name,
|
||||
* as is 'maybe*2', 'maybe+1', 'path/to/foo'
|
||||
**/
|
||||
|
||||
/** operator @c '+' **/
|
||||
tk_plus,
|
||||
/** operator @c '-' **/
|
||||
tk_minus,
|
||||
/** operator @c '*' **/
|
||||
tk_star,
|
||||
/** operator @c '/' **/
|
||||
tk_slash,
|
||||
|
||||
/** operator @c '==' **/
|
||||
tk_cmpeq,
|
||||
/** operator @c '!=' **/
|
||||
tk_cmpne,
|
||||
|
||||
/** keyword @c 'type' **/
|
||||
tk_type,
|
||||
|
||||
/** keyword @c 'def' **/
|
||||
tk_def,
|
||||
|
||||
/** keyword @c 'lambda' **/
|
||||
tk_lambda,
|
||||
|
||||
/** keyword @c 'if' **/
|
||||
tk_if,
|
||||
|
||||
/** keyworkd @c 'then' **/
|
||||
tk_then,
|
||||
|
||||
/** keyword @c 'else' **/
|
||||
tk_else,
|
||||
|
||||
/** keyword @c 'let' **/
|
||||
tk_let,
|
||||
|
||||
/** keyword @c 'in' **/
|
||||
tk_in,
|
||||
|
||||
/** keyword @c 'end' **/
|
||||
tk_end,
|
||||
|
||||
/** counts number of entries **/
|
||||
n_tokentype
|
||||
}; /*tokentype*/
|
||||
|
||||
/** String representation for enum value.
|
||||
* For example @c tokentype_descr(tokentype::tk_if) -> @c "if"
|
||||
**/
|
||||
extern char const *
|
||||
tokentype_descr(tokentype tk_type);
|
||||
|
||||
/** Print enum value for @p tk_type on stream @p os **/
|
||||
inline std::ostream &
|
||||
operator<< (std::ostream & os, tokentype tk_type) {
|
||||
os << tokentype_descr(tk_type);
|
||||
return os;
|
||||
}
|
||||
} /*namespace scm*/
|
||||
|
||||
#ifndef ppdetail_atomic
|
||||
namespace print {
|
||||
PPDETAIL_ATOMIC(xo::scm::tokentype);
|
||||
} /*namespace print*/
|
||||
#endif
|
||||
} /*namespace xo*/
|
||||
|
||||
/* end tokentype.hpp */
|
||||
|
|
@ -1,11 +0,0 @@
|
|||
# tokenizer/CMakeLists.txt
|
||||
|
||||
set(SELF_LIB xo_tokenizer)
|
||||
set(SELF_SRCS
|
||||
tokentype.cpp
|
||||
token.cpp)
|
||||
|
||||
xo_add_shared_library4(${SELF_LIB} ${PROJECT_NAME}Targets ${PROJECT_VERSION} 1 ${SELF_SRCS})
|
||||
xo_dependency(${SELF_LIB} indentlog)
|
||||
|
||||
# end CMakeLists.txt
|
||||
|
|
@ -1,9 +0,0 @@
|
|||
/** @file token.cpp
|
||||
*
|
||||
* author: Roland Conybeare
|
||||
**/
|
||||
|
||||
#include "token.hpp"
|
||||
#include <xo/indentlog/print/tag.hpp>
|
||||
|
||||
/** end token.cpp **/
|
||||
|
|
@ -1,74 +0,0 @@
|
|||
/* file tokentype.cpp
|
||||
*
|
||||
* author: Roland Conybeare
|
||||
*/
|
||||
|
||||
#include "tokentype.hpp"
|
||||
|
||||
namespace xo {
|
||||
namespace scm {
|
||||
char const *
|
||||
tokentype_descr(tokentype tk_type)
|
||||
{
|
||||
#define CASE(x) case tokentype::x: return STRINGIFY(x)
|
||||
|
||||
switch(tk_type) {
|
||||
CASE(tk_bool);
|
||||
CASE(tk_i64);
|
||||
CASE(tk_f64);
|
||||
CASE(tk_string);
|
||||
CASE(tk_symbol);
|
||||
CASE(tk_leftparen);
|
||||
|
||||
CASE(tk_rightparen);
|
||||
CASE(tk_leftbracket);
|
||||
CASE(tk_rightbracket);
|
||||
CASE(tk_leftbrace);
|
||||
CASE(tk_rightbrace);
|
||||
|
||||
CASE(tk_leftangle);
|
||||
CASE(tk_rightangle);
|
||||
CASE(tk_lessequal);
|
||||
CASE(tk_greatequal);
|
||||
CASE(tk_dot);
|
||||
CASE(tk_comma);
|
||||
CASE(tk_colon);
|
||||
|
||||
CASE(tk_doublecolon);
|
||||
CASE(tk_semicolon);
|
||||
CASE(tk_singleassign);
|
||||
CASE(tk_assign);
|
||||
CASE(tk_yields);
|
||||
|
||||
CASE(tk_plus);
|
||||
CASE(tk_minus);
|
||||
CASE(tk_star);
|
||||
CASE(tk_slash);
|
||||
|
||||
CASE(tk_cmpeq);
|
||||
CASE(tk_cmpne);
|
||||
|
||||
CASE(tk_type);
|
||||
CASE(tk_def);
|
||||
CASE(tk_lambda);
|
||||
CASE(tk_if);
|
||||
CASE(tk_then);
|
||||
CASE(tk_else);
|
||||
CASE(tk_let);
|
||||
|
||||
CASE(tk_in);
|
||||
CASE(tk_end);
|
||||
|
||||
case tokentype::tk_invalid:
|
||||
case tokentype::n_tokentype:
|
||||
return "?tokentype";
|
||||
}
|
||||
|
||||
#undef CASE
|
||||
|
||||
return "???";
|
||||
} /*tokentype_descr*/
|
||||
} /*namespace scm*/
|
||||
} /*namespace xo*/
|
||||
|
||||
/* end tokentype.cpp */
|
||||
|
|
@ -1,13 +0,0 @@
|
|||
# build unittest tokenizer/utest
|
||||
|
||||
set(SELF_EXECUTABLE_NAME utest.tokenizer)
|
||||
set(SELF_SOURCE_FILES
|
||||
tokenizer_utest_main.cpp
|
||||
tokenizer.test.cpp
|
||||
token.test.cpp)
|
||||
|
||||
xo_add_utest_executable(${SELF_EXECUTABLE_NAME} ${SELF_SOURCE_FILES})
|
||||
xo_self_dependency(${SELF_EXECUTABLE_NAME} xo_tokenizer)
|
||||
xo_external_target_dependency(${SELF_EXECUTABLE_NAME} Catch2 Catch2::Catch2)
|
||||
|
||||
# end CMakeLists.txt
|
||||
|
|
@ -1,266 +0,0 @@
|
|||
/* file token.test.cpp
|
||||
*
|
||||
* author: Roland Conybeare
|
||||
*/
|
||||
|
||||
#include "xo/tokenizer/token.hpp"
|
||||
#include <catch2/catch.hpp>
|
||||
#include <memory>
|
||||
|
||||
namespace xo {
|
||||
using token = xo::scm::token<char>;
|
||||
using xo::scm::tokentype;
|
||||
|
||||
namespace ut {
|
||||
// also see tokenizer.test.cpp for syntax
|
||||
|
||||
namespace test2 {
|
||||
struct testcase_i64 {
|
||||
std::string text_;
|
||||
bool expect_throw_;
|
||||
std::int64_t expected_;
|
||||
};
|
||||
|
||||
std::vector<testcase_i64> s_testcase_v = {
|
||||
{"", true, 0},
|
||||
{"0", false, 0},
|
||||
{"-", true, 0},
|
||||
{"+", true, 0},
|
||||
{"-0", false, 0},
|
||||
{"+0", false, 0},
|
||||
{"1", false, 1},
|
||||
{"-1", false, -1},
|
||||
{"9", false, 9},
|
||||
{"-9", false, -9},
|
||||
{"12", false, 12},
|
||||
{"+12", false, 12},
|
||||
{"-12", false, -12},
|
||||
{"99", false, 99},
|
||||
{"-99", false, -99},
|
||||
{"123x", true, 0},
|
||||
};
|
||||
|
||||
TEST_CASE("parse-i64", "[token]") {
|
||||
for (std::size_t i_tc = 0, n_tc = s_testcase_v.size(); i_tc < n_tc; ++i_tc) {
|
||||
INFO(xtag("i_tc", i_tc));
|
||||
|
||||
auto const & testcase = s_testcase_v[i_tc];
|
||||
|
||||
token tk(tokentype::tk_i64,
|
||||
testcase.text_);
|
||||
|
||||
REQUIRE(tk.tk_type() == tokentype::tk_i64);
|
||||
|
||||
bool throw_flag = false;
|
||||
try {
|
||||
std::int64_t x = tk.i64_value();
|
||||
|
||||
REQUIRE(x == testcase.expected_);
|
||||
} catch (std::exception & ex) {
|
||||
throw_flag = true;
|
||||
}
|
||||
|
||||
REQUIRE(throw_flag == testcase.expect_throw_);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
namespace test3 {
|
||||
TEST_CASE("error-i64", "[token]") {
|
||||
token tk(tokentype::tk_i64, "+");
|
||||
|
||||
bool throw_flag = false;
|
||||
|
||||
try {
|
||||
tk.i64_value();
|
||||
} catch(std::exception & ex) {
|
||||
throw_flag = true;
|
||||
}
|
||||
|
||||
REQUIRE(throw_flag);
|
||||
}
|
||||
}
|
||||
|
||||
namespace test4 {
|
||||
struct testcase_f64 {
|
||||
std::string text_;
|
||||
bool expect_throw_;
|
||||
double expected_;
|
||||
};
|
||||
|
||||
std::vector<testcase_f64> s_testcase_v = {
|
||||
{"", true, 0},
|
||||
{"0", false, 0},
|
||||
{"-", true, 0},
|
||||
{"+", true, 0},
|
||||
{"-0", false, 0},
|
||||
|
||||
{"+0", false, 0},
|
||||
{"1", false, 1},
|
||||
{"-1", false, -1},
|
||||
{"9", false, 9},
|
||||
{"-9", false, -9},
|
||||
|
||||
{"12", false, 12},
|
||||
{"+12", false, 12},
|
||||
{"-12", false, -12},
|
||||
{"99", false, 99},
|
||||
{"-99", false, -99},
|
||||
|
||||
{"123x", true, 0},
|
||||
{"0.0", false, 0.0},
|
||||
{"0.1", false, 0.1},
|
||||
{"0.12", false, 0.12},
|
||||
{"0.123", false, 0.123},
|
||||
|
||||
{"0.1234", false, 0.1234},
|
||||
{"0.12345", false, 0.12345},
|
||||
{"0.123456", false, 0.123456},
|
||||
{"0.1234567", false, 0.1234567},
|
||||
{"0.12345678", false, 0.12345678},
|
||||
|
||||
{"0.123456789", false, 0.123456789},
|
||||
{"+0.0", false, 0.0},
|
||||
{"+0.1", false, 0.1},
|
||||
{"+0.12", false, 0.12},
|
||||
{"+0.123", false, 0.123},
|
||||
|
||||
{"+0.1234", false, 0.1234},
|
||||
{"+0.12345", false, 0.12345},
|
||||
{"+0.123456", false, 0.123456},
|
||||
{"+0.1234567", false, 0.1234567},
|
||||
{"+0.12345678", false, 0.12345678},
|
||||
|
||||
{"+0.123456789", false, 0.123456789},
|
||||
{"+0.0e0", false, 0.0},
|
||||
{"+0.1e0", false, 0.1},
|
||||
{"+0.12e0", false, 0.12},
|
||||
{"+0.123e0", false, 0.123},
|
||||
|
||||
{"+0.1234e0", false, 0.1234},
|
||||
{"+0.12345e0", false, 0.12345},
|
||||
{"+0.123456e0", false, 0.123456},
|
||||
{"+0.1234567e0", false, 0.1234567},
|
||||
{"+0.12345678e0", false, 0.12345678},
|
||||
|
||||
{"+0.123456789e0", false, 0.123456789},
|
||||
{"+0.0e1", false, 00.},
|
||||
{"+0.1e1", false, 01.},
|
||||
{"+0.12e1", false, 01.2},
|
||||
{"+0.123e1", false, 01.23},
|
||||
|
||||
{"+0.1234e1", false, 01.234},
|
||||
{"+0.12345e1", false, 01.2345},
|
||||
{"+0.123456e1", false, 01.23456},
|
||||
{"+0.1234567e1", false, 01.234567},
|
||||
{"+0.12345678e1", false, 01.2345678},
|
||||
|
||||
{"+0.123456789e1", false, 01.23456789},
|
||||
{"+0.0E1", false, 00.},
|
||||
{"+0.1E1", false, 01.},
|
||||
{"+0.12E1", false, 01.2},
|
||||
{"+0.123E1", false, 01.23},
|
||||
|
||||
{"+0.1234E1", false, 01.234},
|
||||
{"+0.12345E1", false, 01.2345},
|
||||
{"+0.123456E1", false, 01.23456},
|
||||
{"+0.1234567E1", false, 01.234567},
|
||||
{"+0.12345678E1", false, 01.2345678},
|
||||
|
||||
{"+0.123456789E1", false, 01.23456789},
|
||||
{"+0.0e9", false, 0.0},
|
||||
{"+0.1e9", false, 0.1e9},
|
||||
{"+0.12e9", false, 0.12e9},
|
||||
{"+0.123e9", false, 0.123e9},
|
||||
|
||||
{"+0.1234e9", false, 0.1234e9},
|
||||
{"+0.12345e9", false, 0.12345e9},
|
||||
{"+0.123456e9", false, 0.123456e9},
|
||||
{"+0.1234567e9", false, 0.1234567e9},
|
||||
{"+0.12345678e9", false, 0.12345678e9},
|
||||
|
||||
{"+0.123456789e9", false, 0.123456789e9},
|
||||
{"-0.0", false, -0.0},
|
||||
{"-0.1", false, -0.1},
|
||||
{"-0.12", false, -0.12},
|
||||
{"-0.123", false, -0.123},
|
||||
|
||||
{"-0.1234", false, -0.1234},
|
||||
{"-0.12345", false, -0.12345},
|
||||
{"-0.123456", false, -0.123456},
|
||||
{"-0.1234567", false, -0.1234567},
|
||||
{"-0.12345678", false, -0.12345678},
|
||||
|
||||
{"-0.123456789", false, -0.123456789},
|
||||
{"00.", false, 0.0},
|
||||
{"01.", false, 1.0},
|
||||
{"01.2", false, 1.2},
|
||||
{"01.23", false, 1.23},
|
||||
|
||||
{"01.234", false, 1.234},
|
||||
{"01.2345", false, 1.2345},
|
||||
{"01.23456", false, 1.23456},
|
||||
{"01.234567", false, 1.234567},
|
||||
{"01.2345678", false, 1.2345678},
|
||||
|
||||
{"01.23456789", false, 1.23456789},
|
||||
{"0.0", false, 0.0},
|
||||
{"1.2", false, 1.2},
|
||||
{"12.", false, 12.0},
|
||||
{"12.3", false, 12.3},
|
||||
|
||||
{"12.34", false, 12.34},
|
||||
{"12.345", false, 12.345},
|
||||
{"12.3456", false, 12.3456},
|
||||
{"12.34567", false, 12.34567},
|
||||
{"12.345678", false, 12.345678},
|
||||
|
||||
{"12.3456789", false, 12.3456789},
|
||||
{"01.23", false, 1.23},
|
||||
{"12.3", false, 12.3},
|
||||
{"123.", false, 123.0},
|
||||
{"123.4", false, 123.4},
|
||||
|
||||
{"123.45", false, 123.45},
|
||||
{"123.456", false, 123.456},
|
||||
{"123.4567", false, 123.4567},
|
||||
{"123.45678", false, 123.45678},
|
||||
{"123.456789", false, 123.456789},
|
||||
};
|
||||
|
||||
TEST_CASE("parse-f64", "[token]") {
|
||||
for (std::size_t i_tc = 0, n_tc = s_testcase_v.size(); i_tc < n_tc; ++i_tc) {
|
||||
auto const & testcase = s_testcase_v[i_tc];
|
||||
|
||||
INFO(tostr(xtag("i_tc", i_tc),
|
||||
xtag("text", testcase.text_)
|
||||
));
|
||||
|
||||
token tk(tokentype::tk_f64,
|
||||
testcase.text_);
|
||||
|
||||
REQUIRE(tk.tk_type() == tokentype::tk_f64);
|
||||
|
||||
bool throw_flag = false;
|
||||
std::string ex_msg;
|
||||
|
||||
try {
|
||||
double x = tk.f64_value();
|
||||
|
||||
REQUIRE(x == Approx(testcase.expected_).epsilon(1.0e-15));
|
||||
} catch (std::exception & ex) {
|
||||
ex_msg = ex.what();
|
||||
|
||||
throw_flag = true;
|
||||
}
|
||||
|
||||
INFO(xtag("ex_msg", ex_msg));
|
||||
|
||||
REQUIRE(throw_flag == testcase.expect_throw_);
|
||||
}
|
||||
}
|
||||
} /*namespace*/
|
||||
} /*namespace ut*/
|
||||
} /*namespace xo*/
|
||||
|
||||
/* end token.test.cpp */
|
||||
|
|
@ -1,576 +0,0 @@
|
|||
/* file tokenizer.test.cpp
|
||||
*
|
||||
* author: Roland Conybeare
|
||||
*/
|
||||
|
||||
#include "xo/tokenizer/tokenizer.hpp"
|
||||
#include <catch2/catch.hpp>
|
||||
|
||||
namespace xo {
|
||||
using xo::scm::tokentype;
|
||||
using token = xo::scm::token<char>;
|
||||
using xo::scm::span;
|
||||
|
||||
namespace ut {
|
||||
/** Two-pass test harness.
|
||||
*
|
||||
* First pass - verify test assertions.
|
||||
* Second pass only if first pass failed.
|
||||
* On second pass, enable verbose logging
|
||||
**/
|
||||
struct rehearser {
|
||||
rehearser(std::uint32_t att = 0) : attention_{att} {}
|
||||
|
||||
/* expect at most one iterator to exist per TestRehearser instance **/
|
||||
struct iterator {
|
||||
explicit iterator(rehearser* parent) : parent_{parent} {}
|
||||
|
||||
iterator& operator++();
|
||||
std::uint32_t operator*() { return parent_->attention_; }
|
||||
|
||||
bool operator==(const iterator& ix2) const {
|
||||
return (parent_ == ix2.parent_);
|
||||
}
|
||||
|
||||
rehearser* parent_ = nullptr;
|
||||
std::uint32_t attention_ = 0;
|
||||
|
||||
};
|
||||
|
||||
bool is_first_pass() const { return attention_ == 0; }
|
||||
bool is_second_pass() const { return attention_ == 1; }
|
||||
bool enable_debug() const { return is_second_pass(); }
|
||||
|
||||
iterator begin() { return iterator(this); }
|
||||
iterator end() { return iterator(nullptr); }
|
||||
|
||||
public:
|
||||
/** pass number: 0 or 1 **/
|
||||
std::uint32_t attention_ = 0;
|
||||
/** @brief set to true when test starts; false if first pass fails **/
|
||||
bool ok_flag_ = true;
|
||||
};
|
||||
|
||||
auto rehearser::iterator::operator++() -> iterator&
|
||||
{
|
||||
if (parent_)
|
||||
++(parent_->attention_);
|
||||
|
||||
if (parent_->ok_flag_ && (parent_->attention_ == 1)) {
|
||||
/* skip 2nd pass */
|
||||
++(parent_->attention_);
|
||||
}
|
||||
|
||||
if (parent_->attention_ == 2)
|
||||
parent_ = nullptr;
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
||||
/* use this instead of REQUIRE(expr) in context of a test_rehearser */
|
||||
# define REHEARSE(rehearser, expr) \
|
||||
if (rehearser.is_first_pass()) { \
|
||||
bool _f = (expr); \
|
||||
rehearser.ok_flag_ = rehearser.ok_flag_ && _f; \
|
||||
} else { \
|
||||
REQUIRE(expr); \
|
||||
}
|
||||
|
||||
/* note: trivial REQUIRE() call in else branch bc we still want
|
||||
* catch2 to count assertions when verification succeeds
|
||||
*/
|
||||
# define REQUIRE_ORCAPTURE(ok_flag, catch_flag, expr) \
|
||||
if (catch_flag) { \
|
||||
REQUIRE((expr)); \
|
||||
} else { \
|
||||
REQUIRE(true); \
|
||||
ok_flag &= (expr); \
|
||||
}
|
||||
|
||||
# define REQUIRE_ORFAIL(ok_flag, catch_flag, expr) \
|
||||
REQUIRE_ORCAPTURE(ok_flag, catch_flag, expr); \
|
||||
if (!ok_flag) \
|
||||
return ok_flag
|
||||
|
||||
namespace {
|
||||
struct testcase_tkz {
|
||||
std::string input_;
|
||||
bool expect_throw_;
|
||||
token expected_tk_;
|
||||
bool consume_all_;
|
||||
};
|
||||
|
||||
std::vector<testcase_tkz>
|
||||
s_testcase_v = {
|
||||
/*
|
||||
*
|
||||
* expect_throw consume_all
|
||||
* v v
|
||||
*/
|
||||
{"<", false, token::leftangle(), true},
|
||||
/* possible prefix of >= */
|
||||
{">", false, token::rightangle(), true},
|
||||
{"> ", false, token::rightangle(), true},
|
||||
|
||||
{"(", false, token::leftparen(), true},
|
||||
{")", false, token::rightparen(), true},
|
||||
|
||||
{"[", false, token::leftbracket(), true},
|
||||
{"]", false, token::rightbracket(), true},
|
||||
|
||||
{"{", false, token::leftbrace(), true},
|
||||
{" {", false, token::leftbrace(), true},
|
||||
|
||||
{"\t{", false, token::leftbrace(), true},
|
||||
{"\n{", false, token::leftbrace(), true},
|
||||
{"}", false, token::rightbrace(), true},
|
||||
|
||||
{"0", false, token::i64_token("0"), true},
|
||||
{"1", false, token::i64_token("1"), true},
|
||||
{"12", false, token::i64_token("12"), true},
|
||||
{"123", false, token::i64_token("123"), true},
|
||||
{"1234", false, token::i64_token("1234"), true},
|
||||
|
||||
{"0 ", false, token::i64_token("0"), false},
|
||||
{"1 ", false, token::i64_token("1"), false},
|
||||
{"12 ", false, token::i64_token("12"), false},
|
||||
{"123 ", false, token::i64_token("123"), false},
|
||||
{"1234 ", false, token::i64_token("1234"), false},
|
||||
|
||||
{"1<", false, token::i64_token("1"), false},
|
||||
{"1>", false, token::i64_token("1"), false},
|
||||
{"1(", false, token::i64_token("1"), false},
|
||||
{"1)", false, token::i64_token("1"), false},
|
||||
{"1[", false, token::i64_token("1"), false},
|
||||
{"1]", false, token::i64_token("1"), false},
|
||||
{"1{", false, token::i64_token("1"), false},
|
||||
{"1}", false, token::i64_token("1"), false},
|
||||
{"1;", false, token::i64_token("1"), false},
|
||||
{"1:", false, token::i64_token("1"), false},
|
||||
{"1,", false, token::i64_token("1"), false},
|
||||
|
||||
{".1", false, token::f64_token(".1"), true},
|
||||
{".12", false, token::f64_token(".12"), true},
|
||||
{".123", false, token::f64_token(".123"), true},
|
||||
|
||||
{"+.1", false, token::f64_token("+.1"), true},
|
||||
{"+.12", false, token::f64_token("+.12"), true},
|
||||
{"+.123", false, token::f64_token("+.123"), true},
|
||||
|
||||
{"-.1", false, token::f64_token("-.1"), true},
|
||||
{"-.12", false, token::f64_token("-.12"), true},
|
||||
{"-.123", false, token::f64_token("-.123"), true},
|
||||
|
||||
{"1.", false, token::f64_token("1."), true},
|
||||
{"1.2", false, token::f64_token("1.2"), true},
|
||||
{"1.23", false, token::f64_token("1.23"), true},
|
||||
|
||||
{"1e0", false, token::f64_token("1e0"), true},
|
||||
{"1e-1", false, token::f64_token("1e-1"), true},
|
||||
{"1e1", false, token::f64_token("1e1"), true},
|
||||
{"1e+1", false, token::f64_token("1e+1"), true},
|
||||
|
||||
{"\"hello\"", false, token::string_token("hello"), true},
|
||||
/* tokenizer sees this input:
|
||||
* "\"hi\", she said"
|
||||
*/
|
||||
{"\"\\\"hi\\\", she said\"", false, token::string_token("\"hi\", she said"), true},
|
||||
/* tokenizer sees this input:
|
||||
* "look ma, newline ->\n<- "
|
||||
*/
|
||||
{"\"look ma, newline ->\\n<- \"", false,
|
||||
token::string_token("look ma, newline ->\n<- "), true},
|
||||
/* tokenizer sees this input:
|
||||
* "tab to the right [\t], to the right [\t]"
|
||||
*/
|
||||
{"\"tab to the right [\\t], to the right [\\t]\"", false,
|
||||
token::string_token("tab to the right [\t], to the right [\t]"), true},
|
||||
|
||||
{".", false, token::dot(), true},
|
||||
{":", false, token::colon(), true},
|
||||
{",", false, token::comma(), true},
|
||||
{"=", false, token::singleassign(), true},
|
||||
{":=", false, token::assign_token(), true},
|
||||
{"->", false, token::yields(), true},
|
||||
|
||||
{"+", false, token::plus_token(), true},
|
||||
{"-", false, token::minus_token(), true},
|
||||
{"*", false, token::star_token(), true},
|
||||
{"/", false, token::slash_token(), true},
|
||||
|
||||
{"symbol", false, token::symbol_token("symbol"), true},
|
||||
{"another-symbol", false, token::symbol_token("another-symbol"), true},
|
||||
|
||||
{"type", false, token::type(), true},
|
||||
{"def", false, token::def(), true},
|
||||
{"lambda", false, token::lambda(), true},
|
||||
{"if", false, token::if_token(), true},
|
||||
{"let", false, token::let(), true},
|
||||
{"in", false, token::in(), true},
|
||||
{"end", false, token::end(), true},
|
||||
|
||||
};
|
||||
}
|
||||
|
||||
TEST_CASE("tokenizer", "[tokenizer]") {
|
||||
for (std::size_t i_tc = 0, n_tc = s_testcase_v.size(); i_tc < n_tc; ++i_tc) {
|
||||
|
||||
const testcase_tkz & testcase = s_testcase_v[i_tc];
|
||||
|
||||
rehearser rh;
|
||||
|
||||
for (auto _ : rh) {
|
||||
scope log(XO_DEBUG2(rh.enable_debug(), "tokenizer"));
|
||||
|
||||
log && log(xtag("i_tc", i_tc), xtag("input", testcase.input_));
|
||||
|
||||
using tokenizer
|
||||
= xo::scm::tokenizer<char>;
|
||||
|
||||
tokenizer tkz(rh.enable_debug());
|
||||
tokenizer::span_type
|
||||
in_span(testcase.input_.c_str(),
|
||||
testcase.input_.c_str() + testcase.input_.size());
|
||||
|
||||
auto sr = tkz.scan(in_span, true /*eof*/);
|
||||
|
||||
REHEARSE(rh, sr.get_token().tk_type() == testcase.expected_tk_.tk_type());
|
||||
if (sr.get_token().tk_type() == tokentype::tk_i64)
|
||||
{
|
||||
REHEARSE(rh, !sr.get_token().text().empty());
|
||||
REHEARSE(rh, sr.get_token().i64_value() == testcase.expected_tk_.i64_value());
|
||||
} else if (sr.get_token().tk_type() == tokentype::tk_f64)
|
||||
{
|
||||
REHEARSE(rh, !sr.get_token().text().empty());
|
||||
REHEARSE(rh, sr.get_token().f64_value() == testcase.expected_tk_.f64_value());
|
||||
} else if(sr.get_token().tk_type() == tokentype::tk_string)
|
||||
{
|
||||
/* sr.get_token().text() can be empty, consider input "" */
|
||||
REHEARSE(rh, sr.get_token().text() == testcase.expected_tk_.text());
|
||||
} else if(sr.get_token().tk_type() == tokentype::tk_symbol)
|
||||
{
|
||||
REHEARSE(rh, !sr.get_token().text().empty());
|
||||
REHEARSE(rh, sr.get_token().text() == testcase.expected_tk_.text());
|
||||
} else {
|
||||
REHEARSE(rh, sr.get_token().text().empty());
|
||||
}
|
||||
|
||||
/* must consume all input for tests we're doing here */
|
||||
if (testcase.consume_all_) {
|
||||
REHEARSE(rh, sr.consumed() == in_span);
|
||||
} else {
|
||||
REHEARSE(rh, sr.consumed() != in_span);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
namespace {
|
||||
struct testcase2_tkz {
|
||||
std::string input_;
|
||||
bool expect_throw_;
|
||||
std::vector<token> expected_tk_v_;
|
||||
};
|
||||
|
||||
std::vector<testcase2_tkz>
|
||||
s_testcase2_v = {
|
||||
{"def foo : f64 = 3.141;",
|
||||
false,
|
||||
{token::def(),
|
||||
token::symbol_token("foo"),
|
||||
token::colon(),
|
||||
token::symbol_token("f64"),
|
||||
token::singleassign(),
|
||||
token::f64_token("3.141"),
|
||||
token::semicolon()
|
||||
}},
|
||||
{"def foo = lambda (x : f64) { def y = x * x; y; }",
|
||||
false,
|
||||
{token::def(),
|
||||
token::symbol_token("foo"),
|
||||
token::singleassign(),
|
||||
token::lambda(),
|
||||
token::leftparen(),
|
||||
token::symbol_token("x"),
|
||||
token::colon(),
|
||||
token::symbol_token("f64"),
|
||||
token::rightparen(),
|
||||
token::leftbrace(),
|
||||
token::def(),
|
||||
token::symbol_token("y"),
|
||||
token::singleassign(),
|
||||
token::symbol_token("x"),
|
||||
token::star_token(),
|
||||
token::symbol_token("x"),
|
||||
token::semicolon(),
|
||||
token::symbol_token("y"),
|
||||
token::semicolon(),
|
||||
token::rightbrace()
|
||||
}},
|
||||
#ifdef TODO
|
||||
{"a.b",
|
||||
false,
|
||||
{token::symbol_token("a"),
|
||||
token::dot(),
|
||||
token::symbol_token("b")
|
||||
}},
|
||||
#endif
|
||||
{"a,b",
|
||||
false,
|
||||
{token::symbol_token("a"),
|
||||
token::comma(),
|
||||
token::symbol_token("b")
|
||||
}},
|
||||
{"a:b",
|
||||
false,
|
||||
{token::symbol_token("a"),
|
||||
token::colon(),
|
||||
token::symbol_token("b")
|
||||
}},
|
||||
{"a;b",
|
||||
false,
|
||||
{token::symbol_token("a"),
|
||||
token::semicolon(),
|
||||
token::symbol_token("b")
|
||||
}},
|
||||
{"a:=b",
|
||||
false,
|
||||
{token::symbol_token("a"),
|
||||
token::assign_token(),
|
||||
token::symbol_token("b")
|
||||
}},
|
||||
{"a=b",
|
||||
false,
|
||||
{token::symbol_token("a"),
|
||||
token::singleassign(),
|
||||
token::symbol_token("b")
|
||||
}},
|
||||
{"p->q",
|
||||
false,
|
||||
{token::symbol_token("p"),
|
||||
token::yields(),
|
||||
token::symbol_token("q")
|
||||
}},
|
||||
{"a + b",
|
||||
false,
|
||||
{token::symbol_token("a"),
|
||||
token::plus_token(),
|
||||
token::symbol_token("b")
|
||||
}},
|
||||
{"a - b",
|
||||
false,
|
||||
{token::symbol_token("a"),
|
||||
token::minus_token(),
|
||||
token::symbol_token("b")
|
||||
}},
|
||||
{"a-b",
|
||||
false,
|
||||
{token::symbol_token("a-b"),
|
||||
}},
|
||||
{"(apple)",
|
||||
false,
|
||||
{token::leftparen(),
|
||||
token::symbol_token("apple"),
|
||||
token::rightparen()
|
||||
}},
|
||||
{"<apple>",
|
||||
false,
|
||||
{token::leftangle(),
|
||||
token::symbol_token("apple"),
|
||||
token::rightangle()
|
||||
}},
|
||||
};
|
||||
}
|
||||
|
||||
TEST_CASE("tokenizer2", "[tokenizer]") {
|
||||
/* this time testing token sequences */
|
||||
|
||||
using tokenizer = xo::scm::tokenizer<char>;
|
||||
|
||||
for (std::size_t i_tc = 0, n_tc = s_testcase2_v.size(); i_tc < n_tc; ++i_tc) {
|
||||
const testcase2_tkz & testcase = s_testcase2_v[i_tc];
|
||||
|
||||
rehearser rh;
|
||||
|
||||
for (auto _ : rh) {
|
||||
scope log(XO_DEBUG2(rh.enable_debug(), "tokenizer2"));
|
||||
|
||||
log && log(xtag("i_tc", i_tc), xtag("input", testcase.input_));
|
||||
|
||||
tokenizer tkz(rh.enable_debug());
|
||||
|
||||
tokenizer::span_type
|
||||
in_span(testcase.input_.c_str(),
|
||||
testcase.input_.c_str() + testcase.input_.size());
|
||||
|
||||
for (int i_tk = 0, n_tk = testcase.expected_tk_v_.size();
|
||||
i_tk < n_tk; ++i_tk)
|
||||
{
|
||||
log && log(xtag("i_tk", i_tk));
|
||||
|
||||
auto sr = tkz.scan(in_span, in_span.empty());
|
||||
const auto & tk = sr.get_token();
|
||||
|
||||
if (tk.is_valid()) {
|
||||
REHEARSE(rh, tk.tk_type() == testcase.expected_tk_v_[i_tk].tk_type());
|
||||
}
|
||||
if (tk.tk_type() == tokentype::tk_i64)
|
||||
{
|
||||
REHEARSE(rh, !tk.text().empty());
|
||||
REHEARSE(rh, tk.i64_value() == testcase.expected_tk_v_[i_tk].i64_value());
|
||||
} else if (tk.tk_type() == tokentype::tk_f64)
|
||||
{
|
||||
REHEARSE(rh, !tk.text().empty());
|
||||
REHEARSE(rh, tk.f64_value() == testcase.expected_tk_v_[i_tk].f64_value());
|
||||
} else if(tk.tk_type() == tokentype::tk_string)
|
||||
{
|
||||
/* tk.text() can be empty, consider input "" */
|
||||
REHEARSE(rh, tk.text() == testcase.expected_tk_v_[i_tk].text());
|
||||
} else if(tk.tk_type() == tokentype::tk_symbol)
|
||||
{
|
||||
REHEARSE(rh, !tk.text().empty());
|
||||
REHEARSE(rh, tk.text() == testcase.expected_tk_v_[i_tk].text());
|
||||
} else {
|
||||
REHEARSE(rh, tk.text().empty());
|
||||
}
|
||||
|
||||
in_span = in_span.after_prefix(sr.consumed());
|
||||
}
|
||||
}
|
||||
}
|
||||
} /*TEST_CASE(tokenizer2)*/
|
||||
|
||||
namespace {
|
||||
using tkz_error_type = xo::scm::tokenizer_error<char>;
|
||||
using input_state_type = xo::scm::input_state<char>;
|
||||
using span_type = xo::scm::span<const char>;
|
||||
|
||||
struct testcase_error {
|
||||
std::string input_;
|
||||
tkz_error_type expect_error_;
|
||||
};
|
||||
|
||||
testcase_error
|
||||
make_testcase(const char * input, const char * src_function, const char * error_descr,
|
||||
size_t tk_start, size_t whitespace, size_t error_pos)
|
||||
{
|
||||
size_t line_no = 1;
|
||||
|
||||
testcase_error retval;
|
||||
retval.input_ = input;
|
||||
retval.expect_error_ = tkz_error_type(src_function, error_descr,
|
||||
input_state_type(span_type::from_string(retval.input_),
|
||||
tk_start, whitespace),
|
||||
error_pos);
|
||||
return retval;
|
||||
}
|
||||
|
||||
std::vector<testcase_error>
|
||||
s_testcase3_v = {
|
||||
// 012345678
|
||||
// --------v
|
||||
make_testcase("123.456ez",
|
||||
"assemble_token",
|
||||
"unexpected character in numeric constant",
|
||||
0, 0, 8),
|
||||
// 01
|
||||
// -v
|
||||
make_testcase("1-3",
|
||||
"assemble_token",
|
||||
"improperly placed sign indicator",
|
||||
0, 0, 1),
|
||||
// 012
|
||||
// --v
|
||||
make_testcase("1..2",
|
||||
"assemble_token",
|
||||
"duplicate decimal point in numeric literal",
|
||||
0, 0, 2),
|
||||
// o 0123456
|
||||
// ------v
|
||||
make_testcase("1.23e4e",
|
||||
"assemble_token",
|
||||
"duplicate exponent marker in numeric literal",
|
||||
0, 0, 6),
|
||||
// tokenizer sees string ["\"]
|
||||
// 0 1 2 3
|
||||
// - - - v
|
||||
make_testcase("\"\\\"",
|
||||
"assemble_token",
|
||||
"missing terminating '\"' to complete literal string",
|
||||
//"expect \\ to escape one of n|t|r|\"|\\ in string literal",
|
||||
0, 0, 3),
|
||||
// tokenizer sees literal with embedded newline
|
||||
// 1 2 3
|
||||
// 01234567890123456789012345678901 2
|
||||
// -------------------------------- v
|
||||
make_testcase("\"everything was going fine until\n\"",
|
||||
"scan",
|
||||
"must use \\n or \\r to encode newline/cr in string literal",
|
||||
0, 0, 32),
|
||||
// tokenizer sees string ["\]
|
||||
// 0 1 2
|
||||
// - - v
|
||||
make_testcase("\"\\",
|
||||
"assemble_token",
|
||||
"expecting key following escape character \\",
|
||||
0, 0, 2),
|
||||
// tokenizer sees string ["\q"]
|
||||
// 0 12
|
||||
// - -v
|
||||
make_testcase("\"\\q\"",
|
||||
"assemble_token",
|
||||
"expecting one of n|r|\"|\\ following escape \\",
|
||||
0, 0, 2),
|
||||
//
|
||||
make_testcase("#",
|
||||
"assemble_token",
|
||||
"illegal input character",
|
||||
0, 0, 0),
|
||||
};
|
||||
|
||||
TEST_CASE("tokenizer3", "[tokenizer]") {
|
||||
/* testing error handling */
|
||||
|
||||
using tokenizer = xo::scm::tokenizer<char>;
|
||||
|
||||
constexpr bool c_force_debug = false;
|
||||
|
||||
for (std::size_t i_tc = 0, n_tc = s_testcase3_v.size(); i_tc < n_tc; ++i_tc) {
|
||||
const testcase_error & testcase = s_testcase3_v[i_tc];
|
||||
|
||||
rehearser rh(0);
|
||||
|
||||
for (auto _ : rh) {
|
||||
scope log(XO_DEBUG2(c_force_debug || rh.enable_debug(), "tokenizer3"));
|
||||
|
||||
log && log(xtag("pass", _), xtag("ok(-)", rh.ok_flag_));
|
||||
log && log(xtag("i_tc", i_tc), xtag("input", testcase.input_));
|
||||
|
||||
tokenizer tkz(c_force_debug || rh.enable_debug());
|
||||
|
||||
auto in_span = tokenizer::span_type::from_string(testcase.input_);
|
||||
|
||||
auto sr = tkz.scan(in_span, true /*eof*/);
|
||||
|
||||
REHEARSE(rh, sr.is_error());
|
||||
|
||||
if (sr.error().src_function()) {
|
||||
REHEARSE(rh, std::string(sr.error().src_function()) == std::string(testcase.expect_error_.src_function()));
|
||||
}
|
||||
if (!sr.error().error_description().empty()) {
|
||||
REHEARSE(rh, std::string(sr.error().error_description()) == std::string(testcase.expect_error_.error_description()));
|
||||
}
|
||||
REHEARSE(rh, sr.error().whitespace() == testcase.expect_error_.whitespace());
|
||||
REHEARSE(rh, sr.error().tk_start() == testcase.expect_error_.tk_start());
|
||||
REHEARSE(rh, sr.error().error_pos() == testcase.expect_error_.error_pos());
|
||||
|
||||
log && log(xtag("ok(+)", rh.ok_flag_));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} /*namespace ut*/
|
||||
} /*namespace xo*/
|
||||
|
||||
/* end tokenizer.test.cpp */
|
||||
|
|
@ -1,6 +0,0 @@
|
|||
/* file tokenizer_utest_main.cpp */
|
||||
|
||||
#define CATCH_CONFIG_MAIN
|
||||
#include "catch2/catch.hpp"
|
||||
|
||||
/* end tokenizer_utest_main.cpp */
|
||||
Loading…
Add table
Add a link
Reference in a new issue