From 1f981a06362b6ed34c76bb30892a3966cbfb49bf Mon Sep 17 00:00:00 2001 From: Roland Conybeare Date: Sat, 6 Jun 2026 22:25:50 -0400 Subject: [PATCH] git subrepo clone (merge) git@github.com:Rconybea/xo-tokenizer.git xo-tokenizer subrepo: subdir: "xo-tokenizer" merged: "6e665ed7" upstream: origin: "git@github.com:Rconybea/xo-tokenizer.git" branch: "main" commit: "6e665ed7" git-subrepo: version: "0.4.9" origin: "???" commit: "???" --- xo-tokenizer/.gitignore | 8 + xo-tokenizer/.gitrepo | 12 + xo-tokenizer/CMakeLists.txt | 33 + xo-tokenizer/README.md | 56 + xo-tokenizer/cmake/xo-bootstrap-macros.cmake | 35 + .../cmake/xo_tokenizerConfig.cmake.in | 9 + xo-tokenizer/docs/CMakeLists.txt | 9 + xo-tokenizer/docs/README | 41 + xo-tokenizer/docs/_static/README | 1 + xo-tokenizer/docs/_static/img/favicon.ico | Bin 0 -> 309803 bytes xo-tokenizer/docs/conf.py | 39 + xo-tokenizer/docs/examples.rst | 99 ++ xo-tokenizer/docs/implementation.rst | 38 + xo-tokenizer/docs/index.rst | 29 + xo-tokenizer/docs/input-state-class.rst | 77 ++ xo-tokenizer/docs/install.rst | 111 ++ xo-tokenizer/docs/scan-result-class.rst | 29 + xo-tokenizer/docs/schematika-tokens.rst | 105 ++ xo-tokenizer/docs/span-class.rst | 87 ++ xo-tokenizer/docs/token-class.rst | 96 ++ xo-tokenizer/docs/tokenizer-class.rst | 68 ++ xo-tokenizer/docs/tokenizer-error-class.rst | 54 + xo-tokenizer/docs/tokentype-enum.rst | 36 + xo-tokenizer/example/CMakeLists.txt | 1 + xo-tokenizer/example/tokenrepl/CMakeLists.txt | 11 + xo-tokenizer/example/tokenrepl/tokenrepl.cpp | 71 ++ xo-tokenizer/include/xo/tokenizer/buffer.hpp | 328 +++++ .../include/xo/tokenizer/input_state.hpp | 363 ++++++ .../include/xo/tokenizer/scan_result.hpp | 112 ++ xo-tokenizer/include/xo/tokenizer/span.hpp | 291 +++++ xo-tokenizer/include/xo/tokenizer/token.hpp | 473 ++++++++ .../include/xo/tokenizer/tokenizer.hpp | 1057 +++++++++++++++++ .../include/xo/tokenizer/tokenizer_error.hpp | 162 +++ .../include/xo/tokenizer/tokentype.hpp | 192 +++ xo-tokenizer/src/tokenizer/CMakeLists.txt | 11 + xo-tokenizer/src/tokenizer/token.cpp | 9 + xo-tokenizer/src/tokenizer/tokentype.cpp | 74 ++ xo-tokenizer/utest/CMakeLists.txt | 13 + xo-tokenizer/utest/token.test.cpp | 266 +++++ xo-tokenizer/utest/tokenizer.test.cpp | 576 +++++++++ xo-tokenizer/utest/tokenizer_utest_main.cpp | 6 + 41 files changed, 5088 insertions(+) create mode 100644 xo-tokenizer/.gitignore create mode 100644 xo-tokenizer/.gitrepo create mode 100644 xo-tokenizer/CMakeLists.txt create mode 100644 xo-tokenizer/README.md create mode 100644 xo-tokenizer/cmake/xo-bootstrap-macros.cmake create mode 100644 xo-tokenizer/cmake/xo_tokenizerConfig.cmake.in create mode 100644 xo-tokenizer/docs/CMakeLists.txt create mode 100644 xo-tokenizer/docs/README create mode 100644 xo-tokenizer/docs/_static/README create mode 100644 xo-tokenizer/docs/_static/img/favicon.ico create mode 100644 xo-tokenizer/docs/conf.py create mode 100644 xo-tokenizer/docs/examples.rst create mode 100644 xo-tokenizer/docs/implementation.rst create mode 100644 xo-tokenizer/docs/index.rst create mode 100644 xo-tokenizer/docs/input-state-class.rst create mode 100644 xo-tokenizer/docs/install.rst create mode 100644 xo-tokenizer/docs/scan-result-class.rst create mode 100644 xo-tokenizer/docs/schematika-tokens.rst create mode 100644 xo-tokenizer/docs/span-class.rst create mode 100644 xo-tokenizer/docs/token-class.rst create mode 100644 xo-tokenizer/docs/tokenizer-class.rst create mode 100644 xo-tokenizer/docs/tokenizer-error-class.rst create mode 100644 xo-tokenizer/docs/tokentype-enum.rst create mode 100644 xo-tokenizer/example/CMakeLists.txt create mode 100644 xo-tokenizer/example/tokenrepl/CMakeLists.txt create mode 100644 xo-tokenizer/example/tokenrepl/tokenrepl.cpp create mode 100644 xo-tokenizer/include/xo/tokenizer/buffer.hpp create mode 100644 xo-tokenizer/include/xo/tokenizer/input_state.hpp create mode 100644 xo-tokenizer/include/xo/tokenizer/scan_result.hpp create mode 100644 xo-tokenizer/include/xo/tokenizer/span.hpp create mode 100644 xo-tokenizer/include/xo/tokenizer/token.hpp create mode 100644 xo-tokenizer/include/xo/tokenizer/tokenizer.hpp create mode 100644 xo-tokenizer/include/xo/tokenizer/tokenizer_error.hpp create mode 100644 xo-tokenizer/include/xo/tokenizer/tokentype.hpp create mode 100644 xo-tokenizer/src/tokenizer/CMakeLists.txt create mode 100644 xo-tokenizer/src/tokenizer/token.cpp create mode 100644 xo-tokenizer/src/tokenizer/tokentype.cpp create mode 100644 xo-tokenizer/utest/CMakeLists.txt create mode 100644 xo-tokenizer/utest/token.test.cpp create mode 100644 xo-tokenizer/utest/tokenizer.test.cpp create mode 100644 xo-tokenizer/utest/tokenizer_utest_main.cpp diff --git a/xo-tokenizer/.gitignore b/xo-tokenizer/.gitignore new file mode 100644 index 00000000..3d3a7826 --- /dev/null +++ b/xo-tokenizer/.gitignore @@ -0,0 +1,8 @@ +# emacs workspace config +.projectile +# clangd working space (see emacs+lsp) +.cache +# typical cmake build directory (source-tree-nephew) +.build* +# symlink to builddir/compile_commands.json; should be set manually in dev sandbox +compile_commands.json diff --git a/xo-tokenizer/.gitrepo b/xo-tokenizer/.gitrepo new file mode 100644 index 00000000..d927af06 --- /dev/null +++ b/xo-tokenizer/.gitrepo @@ -0,0 +1,12 @@ +; DO NOT EDIT (unless you know what you are doing) +; +; This subdirectory is a git "subrepo", and this file is maintained by the +; git-subrepo command. See https://github.com/ingydotnet/git-subrepo#readme +; +[subrepo] + remote = git@github.com:Rconybea/xo-tokenizer.git + branch = main + commit = 6e665ed77673806f34c9923bbbc43eb629296fe6 + parent = dcdbd3b4f53f1ccbd13be201cdc8108f8cce644d + method = merge + cmdver = 0.4.9 diff --git a/xo-tokenizer/CMakeLists.txt b/xo-tokenizer/CMakeLists.txt new file mode 100644 index 00000000..896c1b97 --- /dev/null +++ b/xo-tokenizer/CMakeLists.txt @@ -0,0 +1,33 @@ +# xo-tokenizer/CMakeLists.txt + +cmake_minimum_required(VERSION 3.10) + +project(xo_tokenizer VERSION 0.1) + +include(GNUInstallDirs) +include(cmake/xo-bootstrap-macros.cmake) + +xo_cxx_toplevel_options3() + +# ---------------------------------------------------------------- +# c++ settings + +set(PROJECT_CXX_FLAGS "") +#set(PROJECT_CXX_FLAGS "-fconcepts-diagnostics-depth=2") +add_definitions(${PROJECT_CXX_FLAGS}) + +# ---------------------------------------------------------------- + +add_subdirectory(src/tokenizer) +add_subdirectory(example) +#add_subdirectory(utest) # tests failing, temporarily remove +xo_export_cmake_config(${PROJECT_NAME} ${PROJECT_VERSION} ${PROJECT_NAME}Targets) + +if (XO_ENABLE_EXAMPLES) + install(TARGETS xo_tokenizer_repl DESTINATION bin/xo/example/tokenizer) +endif() + +# ---------------------------------------------------------------- +# docs targets depend on all the other library/utest targets +# +add_subdirectory(docs) diff --git a/xo-tokenizer/README.md b/xo-tokenizer/README.md new file mode 100644 index 00000000..3f0befba --- /dev/null +++ b/xo-tokenizer/README.md @@ -0,0 +1,56 @@ +# schematica tokenizer library + +## Getting Started + +### build + install 'xo-cmake` dependency + +- [github/Rconybea/xo-cmake](https://github.com/Rconybea/xo-cmake) + +Installs a few cmake ingredients, along with a build assistant `xo-build` for XO projects such as this one. + +### build + install other required XO dependencies +``` +$ xo-build --clone --configure --build --install xo-indentlog +$ xo-build --clone --configure --build --install xo-refnct +$ xo-build --clone --configure --build --install xo-subsys +$ xo-build --clone --configure --build --install xo-reflectutil +``` + +Note: can use `-n` to dry-run here + +### copy `xo-tokenizer` repository locally +``` +$ xo-build --clone xo-tokenizer +``` + +or equivalently +``` +$ git clone git@github.com:Rconybea/xo-tokenizer.git +``` + +### build + install `xo-tokenizer` + +``` +$ xo-build --configure --build --install xo-tokenizer +``` + +or equivalently: + +``` +$ PREFIX=/usr/local # or wherever you prefer +$ cmake -DCMAKE_INSTALL_PREFIX=${PREFIX} -S xo-tokenizer -B xo-tokenizer/.build +$ cmake --build xo-tokenizer/.build +$ cmake --install xo-tokenizer/.build +``` + +### build for unit test coverage +``` +$ cmake -DCMAKE_BUILD_TYPE=coverage -DCMAKE_INSTALL_PREFIX=$PREFIX xo-tokenizer/.build-ccov +$ cmake --build xo-tokenizer/.build-ccov +``` + +### LSP support +``` +$ cd xo-tokenizer +$ ln -s .build/compile_commands.json # lsp will look for compile_commands.json in the root of the source tree +``` diff --git a/xo-tokenizer/cmake/xo-bootstrap-macros.cmake b/xo-tokenizer/cmake/xo-bootstrap-macros.cmake new file mode 100644 index 00000000..aba31169 --- /dev/null +++ b/xo-tokenizer/cmake/xo-bootstrap-macros.cmake @@ -0,0 +1,35 @@ +# ---------------------------------------------------------------- +# for example: +# $ PREFIX=/usr/local # for example +# $ cmake -DCMAKE_MODULE_PATH=prefix -DCMAKE_INSTALL_PREFIX=$PREFIX -B .build +# +# will get +# CMAKE_MODULE_PATH +# from xo-cmake-config --cmake-module-path +# +# and expect .cmake macros in +# CMAKE_MODULE_PATH/xo_macros/xo_cxx.cmake +# ---------------------------------------------------------------- + +find_program(XO_CMAKE_CONFIG_EXECUTABLE NAMES xo-cmake-config REQUIRED) + +if ("${XO_CMAKE_CONFIG_EXECUTABLE}" STREQUAL "XO_CMAKE_CONFIG_EXECUTABLE-NOT_FOUND") + message(FATAL "could not find xo-cmake-config executable") +endif() + +message(STATUS "XO_CMAKE_CONFIG_EXECUTABLE=${XO_CMAKE_CONFIG_EXECUTABLE}") + +if (NOT XO_SUBMODULE_BUILD) + if (("${CMAKE_MODULE_PATH}" STREQUAL "") OR ("${CMAKE_MODULE_PATH}" STREQUAL prefix)) + # default to typical install location for xo-project-macros + execute_process(COMMAND ${XO_CMAKE_CONFIG_EXECUTABLE} --cmake-module-path OUTPUT_VARIABLE CMAKE_MODULE_PATH) + message(STATUS "CMAKE_MODULE_PATH=${CMAKE_MODULE_PATH}") + endif() +endif() + +# needs to have been installed somewhere on CMAKE_MODULE_PATH, +# (e.g. from xo-cmake with the same value for CMAKE_INSTALL_PREFIX) +# +include(xo_macros/xo_cxx) + +xo_cxx_bootstrap_message() diff --git a/xo-tokenizer/cmake/xo_tokenizerConfig.cmake.in b/xo-tokenizer/cmake/xo_tokenizerConfig.cmake.in new file mode 100644 index 00000000..e1b8fe7a --- /dev/null +++ b/xo-tokenizer/cmake/xo_tokenizerConfig.cmake.in @@ -0,0 +1,9 @@ +@PACKAGE_INIT@ + +include(CMakeFindDependencyMacro) +#find_dependency(refcnt) +find_dependency(indentlog) +#find_dependency(subsys) +include("${CMAKE_CURRENT_LIST_DIR}/@PROJECT_NAME@Targets.cmake") +include("${CMAKE_CURRENT_LIST_DIR}/@PROJECT_NAME@Share.cmake") +check_required_components("@PROJECT_NAME@") diff --git a/xo-tokenizer/docs/CMakeLists.txt b/xo-tokenizer/docs/CMakeLists.txt new file mode 100644 index 00000000..30289162 --- /dev/null +++ b/xo-tokenizer/docs/CMakeLists.txt @@ -0,0 +1,9 @@ +# xo-tokenizer/docs/CMakeLists.txt + +xo_doxygen_collect_deps() +xo_docdir_doxygen_config() +xo_docdir_sphinx_config( + index.rst install.rst examples.rst implementation.rst + input-state-class.rst scan-result-class.rst schematika-tokens.rst span-class.rst + token-class.rst tokenizer-error-class.rst tokentype-enum.rst +) diff --git a/xo-tokenizer/docs/README b/xo-tokenizer/docs/README new file mode 100644 index 00000000..ea8a9a25 --- /dev/null +++ b/xo-tokenizer/docs/README @@ -0,0 +1,41 @@ +standalone build + + +-----------------------------------------------+ + | cmake | + | CMakeLists.txt | + | $PREFIX/share/cmake/xo_macros/xo_cxx.cmake | + +-----------------------------------------------+ + | + | +----------------------+ + +------------------------------------------------->| .build/docs/Doxyfile | + | +----------------------+ + | ^ + | (cmake | + | /------------/ + | | + | +---------------------------------------+ +-----------------+ + +---->| doxygen |--->| .build/docs/dox | + | | $PREFIX/share/xo-macros/Doxyfile.in | | +- html/ | + | +---------------------------------------+ | +- xml/ | + | +-----------------+ + | | + | /------------/ + | | + | v + | +---------------------------------------+ +--------------------+ + \---->| sphinx |--->| .build/docs/sphinx | + | +- conf.py | | +- html/ | + | +- _static/ | +--------------------+ + | +- *.rst | + +---------------------------------------+ + +umbrella build relies on top-level cmake macros + +files + + README this file + CMakeLists.txt build entry point + conf.py sphinx config + _static static files for sphinx + + index.rst toplevel sphinx document; entry point diff --git a/xo-tokenizer/docs/_static/README b/xo-tokenizer/docs/_static/README new file mode 100644 index 00000000..8230095c --- /dev/null +++ b/xo-tokenizer/docs/_static/README @@ -0,0 +1 @@ +add any static {.html, .js, ..} files for sphinx to pickup here \ No newline at end of file diff --git a/xo-tokenizer/docs/_static/img/favicon.ico b/xo-tokenizer/docs/_static/img/favicon.ico new file mode 100644 index 0000000000000000000000000000000000000000..15da2145f93eb26e6995c7868ab7883fb2361288 GIT binary patch literal 309803 zcmZQzU}WH800Bk@1%|zv3=GQ{7#I#50EsIwXaq4aBx^A+G&Df@9E=RzHB1Z%2@w8@ zDGUsoTbLOf93XrRCkBRSNfrhJ0|#lWE5$ikqY0O79?U|^U$ zn}tC_0>ba$WMHsf!@?jS0O4n_Ffbh6%EHhY;OEZEB?WRQucwDg5J(n;IoKE&7-EW6 zq%kmPws^WYhE&{od$+tM=IYE3AMfuhPcxaMb8?f;X*TKP$89fXBpnJ8vRbI#B6o_- ziF3Bot52JDKjHOOI6Y~T;X;+!0$vIem=+3GZDn3IE3F`ZQfjI1?(g&dKJGbi!+6`N zGjlZ0fB!sFK*jL$otf41EdMqLIB_VpkW6qn-{f=XSa@OegN62s`>Yx+?3Nd*tCv)? zcj(PObi3H>UYp*YTt4L)H=5#ho-5mO)L*DUBWnL%p?k4SR?jP5ZO=@zUOMBU)tLer zHTOeXSQvXX`#Y}fSSWCEg7^OC8xLMz!^I?4E%!eEMcT(trfaM}%=gSOmkaX<`XS=U zuxQ1f@2MZwz7EiqUs(7i;~7KyF23K^e3z#t@dyM(@60a!RljAKJx9cA_B{(9b}Bqr zDf+si{+XQ0xjh_t^%uTfy`|3SAksRuzu}sNm*LiZ(rVxN`0MS2Ch9vir>{Bw79 zl(7G@ik0@~dUIuM7MR)^Z(o1LMS;^HqVUwig5Os^+zWcOUHZJ_MXi5FOJ`2~E*rfa%hDf78m zVJE+(wBTfmb7juM*bUEDZc{!b@5!L_{)-dCs^*5%Qy*D-32v0VEcTeyroZ2;q_FNZ zpUuWMvN!%^+zx!&Zo=pix3`Gh<`2_CS+$S7t2m}>yH0&OW1AaepL25Ql)crrD|e=f zW#5b7c(1orHnMJAXvdz4KW-LJm|j#*3|_tZ$JECc60Dak>{73&%C%0vdVSNPmkGBo zKAES&Fi~9pzudQ*l8-pc_}hOjer=HZ*ziZtLenKSc`;W{nZI@yl&;?Jwxh`V8Dqzu z88(d5)+{<$_@n%KqEtXptGt6IN4rJ!9SOhOS&Xu|BRONJda%Zo7<22_5J0}_+ zQnbsro;NY5E5qc__S<3xx!YfETYOU2lR@b@-(2pw=Z$oujxO~Q+$QU8 z@6(0h_G#9$Wa6g3WpJsC{K;V=(;yXk)^goxo!cLubOi|%S?1nS`6dy|UbXLSN0GBo z!-gqmDTQ zGf$RnH=3=r*>dTD(wpCZ&3|lHx%q8|+p;Dzd!e5p@%**5KIJ04+mba|o^E)&k=-yV z`&RC8BcVwQE_p5rlTy?~i~L(-kNMAYC_bV4mPNsr!7zqv^L!T$j+L_6cPg4rKE3zv zzDM(W<9lTS%AtZcqj+X=Ut##_AZ(!bH-qgM!{Ww32a#fa%de&@Q#Jc#bJ*iFqJpp9 zJ-*}FqId^GhOPSqgKsG3EIcMrwC8%mfp14ex`K8rek~FE z%2dviVXAB5zOCv^){MKEs}s2XRh%*Zw_#SR*F>G^Q|p^|Cvdc`(ERHARfTbiy?Aix zebyi93GzM<<@tANZ+N&tn!A0EkUn4Y^nZ`aKjgX_8BJn1HM#rewEO%PEHkijYP$4qT04inDsUkol23k-6<)id?6@UbkLF#mi+@~z)>zvkx|9o_XR}7mLF;Mt%Oc>u1sRaM{#ctyJrti4$1@qGWuc|8>u3+HdUqpQm<# zZ19Qa%3eIe4fm`HBI*Lk2d+913rwu1R|K9+A0TRb1=ot zF4OsY{!WthERb{Oh%DPMpLb64&tvjE3=xbrjDJ{XynnesRDb4o$*OLB(?(xuAvQ52z!~v;4BtHY@q;RX+c%hnt^}!$%HgcE~)=@&%8!CtO|p;J1zGy3XgT56NU%FPjr6kn!iA#Qyv{ z6M2`<$+TW@N8@8AkE&+R%x%^D1$NPTe>Q%(Ker)MX^-1Pv44!`_i;^LcHyf0<7Em8 zY1Z>%KG{6mUiRU6D))nT?m6z0r6)0PW%CrUF5}w&TkN0SfiJJ-cT2L>@EMD4e=+x0 zZNyHITe~Yx@K*#(|CRF7WNqhCn>xeV@~HdqS=O`eO!PZ&=qBe6{|BEp9L-gUYLsq_ zPrUU)B87rh(vlWo7Z#cY$!aGtKTR`F%w^GQ?Z#EUM>e`UB$`IPKR#wYWcWti`@ zuKbm*{l_lN<9+vmsZ$CbvRC{`mH2M??6^?Odeb;hv416RRU67DX|9{K$G1|nZUW1M z{J;l756ovsIhd_Ius_*~W&Y9=w|_Pl-Z4*jdicdx**lNe?*te5HkS0NKE3=~-uy$_ z-+x&pml*}Prk>n*Aam)A>E5o&1(pSdDYAT*XK>ntGx!IyvHlMh-!8DL%29LKwXovZ zuQ$wpJA+N!BJG6!f&W+U>ioS{+Vj;my?3>R{;E6T)lHLL^ht_%GW;|Y+`+cFNI!b2 zecMZ(JzNo))(>hT{~4I{G)*~v@9Wk-dNN7ww+(YIIy}7oV)nlkzxGw{<@Gnw%$L*4r$(;;uI#bN$^kh1TysuKYZ9ptphPVc7?Lg>dP@DIMX^Fo|=Pf$0TCdpGjj?re?u z`+@t*mm@!#+v<|IbC%$us|xzI^Y! z76=Hn?-Eu@+$Udjb8W)b#$Gj5)>#n(1^MCn;pVbO7S-S3tFAXXZ_g9M_J@7ud7F&G zXRg#-coc1^G>O42&Hq9D$$gX7OaD;(VDE6KY{SvCpUwuE@e!YxCLiy8*z`&xUGwM? zr#n*`kA1d@^IP(x@;2K)v5a5KeVMi%Ppgk&j^T~*diLoPTiTZ8D%oX|7~1mH&#=tT z$^FOYBl=0qYtP&n`K#qx`||Fr6ux<})Z%!d#l_Maa;=k3ONY$<-}#e~yXyDOt!tZq zFOzq?oxi@d_c2F1=LHH&P4_w;_~o!! z<-(qMKA)1}=Ul%RcV-py^9P}Cjo)nGJFN2cY@5O1Q;|=F=PY_qxxeJpew7AADb|?w zmyG9@2!B5NUwX-h=RUuKKU|t^VZ43$I;}G|p1DN{dCpp;WYQM1sIPgk>B@C+RfiR= z{xn?c%Zr-6Hu13If1jq-jIGZ*4l@MD?c9qg0b01!8 zeyK4jcG{`i8fW5qX1-(O5B@Ej`r`DAxW7K~HB3M1d;SFY1xlqQeoHx1qOYU;yG9^z z5#N!T4KwWu2JwgEa$we`jMH&V%|XQJ0cHMjp{E*?Pxxj`&9Rox5(w| zlV8qz!MZQO+QG1>bjH0uIj#Ra+sfkRYv@}uzMuT_UWHi3HrWLdcC9nnX05&aJ~pB6 ztwL+*^rUN5DX#?n%zFFa$cI|K&3X305A)T3uQ3RWoYk;@@=v=;*_DdZmgE{n7pDIT z2tL3+Vd<4e!p9Ta8qN2HGd#?q2llLLxP7vQ z-?r_GtmxXbGkWW1+U+oVAaI+pYf>T0Om~*5#>2Vn>PElK(){X|u@s0JJyB;{&$n5} z;_}`S~x6l?mJ}#i=JGaSex?F==QsP*A-FfHa zH6D(J&$;aEs&$)Q8pv|Lnf>s&VS-$m)dqo`7n*0f?bB?HG*_71#I=TVb9nT!{QvtC zjkXEw{9<{=Ba-4+8e8wlo(AIdi!3kUC+9TDS&Gd*RL%zYqv6fK7WMe!s|2J|Gi9+tFGm{{9E5; zpU#mSSJOf+&FIkBVtaB=@`u$k`VO30VA#4}RPE=7 z)d#-Z5#RD4-Gil54ML`*P9u#C-mFdI_cV8#)sf zvbWptWi`Jw_ntC4>rBDtWtng{FsC9$2rk>2v9Gc`$od zkXlYhO{e9|?{il?n=HDDb?Rr94+;Ux7b>2dc}i!mZsjSPG)8fS&&)?YEM?j(dSlrt zjp-~K>@U1ERAqUt6S>#Ud_C#1z^Tr#?*Q z@}AxFM2FF((elEYg#SkC{xL5|%$MIb`67cMPu#Tm(UxjD>6`~l`)6`3yRl;e)9eQa z);*o~!X++cm4%kh+=~-0i(lPfEn2W|p3&8M$IrUsQ&~u>av?tRIjZbrPVp>!; zo3^Mjb|{_-yZSwsW%6k$hmeCI(&CBEzZxcOTK~3V&gl}frE0~`6Yf1-!?xgT$gx5* zy+drgXKf8`mB}o6lJe@U{#%v__uj7$3lC_#5a8RgtK#X~7kX~XR%9_)-fUHw>$>RF zr4%-4k&vsZr*nkaC+6$TZz|`=TJ`7u=5iGYS$ibuJ4{_p-U@+T+r3#`^z~ z6MNhg-WOftT%pUC8nLJ}HFmv(<@7h42fTO{)O{NQ)AE9ayqzVdRX7|f6IisQ<>a1s z^$#L`H=TZJkZ|UjYs91rGt0{bm1avB@yi}JP-hX1Ik!#e+J)t3`j$^!q`vM&*=2G4 zBMMh0?c}`ByHx-D994!?qZ?8nUte1Iw%4$PohxIrY_Eo3X^J01Y)YKkoN`M~22TYh zPhNS4pKqC`d8#}(yfXGn*y%|Qh84*t_Jr&c_WtW>`I1>e;!pp<*&%+dKYFK${M+UA z>YIr2Ri|ex8>YQE-SSy0eQtxDfFZlgl)r&hw@Pm9Sv9ft(9sD!>cy;;x=Z&+6+GyS ziQw6j{n&{s$5Te^q{IS=MY)^)+=;p$>Gz#EV43-6&uH;r4X$~s+qOR4^M2-uodO1$ zhtCHe$clD+x`l1Ry2Cb0PFGk2INLO^GjnJy+#LMG$22Yar|;KbK2C-+o$R(BtUffY zTSm?OR?osp4X^f z(pw;9#_)N|Qc=I33l91p&=s}`+Y)xlhkc^A^@PetciSUoF*8(pFg!M#+sGxy@J@gF zrbQ25#_cF(el}Soi9_UB$x9!x+dMk@)f259_Ak=syPg%1_L-^J@o?3H<7Y4IyHsYd zNaQNhW#d14~e_b{FQbzdR|hM^wiCVJf9q0niKcKv_SajzKXEVAGbZc%v^D+?BTsz ziXHC$7vnhTI({J&uQ)u#ER}seGFPdGvK=upkg1i3p9;+Ml z6+9hy&SyMOHPio7J7bw+Du==3mY@yI{PnyF8)6@r>;8E@Bmb|@%qrf3C{v56Ti)+H z(HYxNnp5Ymd(4M#mCX5N>uvZRGyT|UcDZe>cpYz*P=*>IorLb4*`5t|>M*{1K5n$xMaCDw4Yo$O z81PGE|F#uz%{4!hwcq#s{L}*`w-1W;_THPhi=mJA!8wL&Pj)RfXei@<_xZsUKZ(VS z>6_Y3Whmc@HUCdOlOqy5C6&N3By7|v|{d#`iRW2XJ;pKXg<-#%ay zd3$5qu`@~~3wCjB3tbkquiGqidFKI%RIx~xEjuTOEcb6<93GKd@_(Cxhz(sT~rNIixoK>1F>T7<}~ig|Y)l#rJ<6YroPmeTQ@syAN~U zH|FmV3Fb@ZxCPAD;(s~oY1mR>XQ%AFB74-C_kZ`_e&6cvlYbS*f6kVdd3q-z*7nPEl-~rX?EHXtu>NUz1WVZ=d4KH)<1K zxV&cYiNAO?;toqm@Fa$|bjxMJ=}c*>m;2Oqn;vjH;5Xy)q-!_%c3p}o>OA)O*Q&iP ze{`;_hZ%zsnR-H$Klna~H;`?;S<{kfcBk>EY=ZXDT>IVAo{P`zdi`TEv*|;wjyVfO zE;`+N7gc|0M*vriedqhH`@4(wUBC6~7qhA9<3HB#W|(dgKgsCHpe)YjcjV05dHY)T zdndGUu0LkN-L*(~#hsXz?cXRS^39 zb^UC&e_NCld>bZmH*7bY%(eaKncx#wR5%i+waGQU2kA+vTZSGE58&ENTjK8ydk5#=(8p(tJU1IMz%`QkqE z4r;vz7S5=Aq;2^85NmTKt)>k_wZdiUM? zk#0J1_LL$u@ywh5XIwiK==FAGsm3Mw*HwAz_v(F*tNl8w>9``N!grHp4)#qilV|dV zr2f1el~C`~d*FD9*r^E*1!OCKlvb>q7rWq>T0y#4^~3f?KBuP{b1WAIELp9#DdnHt z?>_|#6rT3-5v z;s0hXd4``eZcVm-`Rss9ytz)$!e8IF%i7N^zyCY$ZyC&n_-htCttG@_$|9`}U;%(h%MiH|thC{@8Y`*1Bn( zcI4%_8P}(=1W4RH5PYEK%*W2jC;#1ZH#pn&*iifAEd$w}NgQTHKg*{J`)_@)gSXw` zht%3Rw}N#37;Jdv&>P;hsw3*hi*J|f9_%;y|D3fl*OQ@Dn8CYYf8xHIyo!dUt!T6st4Hb?9}{EMF#WvQNxH_r3hi^RsP? z|IgjCf166fp>F2?406Xro@LHBcOWOL;jSV3ls8Q?J$Hz0{_p7e>)%p)#bXaMWbJ}i zo>=O&a*s?=w1-z?o0j{|WYL57lWH{gB^>1c{BPGJhBr4E{>U0Pt`NI1Pxrl9OmAdE z3DbuBU+J$6elH7|V*72Uc>BJ>Q!D>+THK!cY5V_UQ-T&;aVhyww!kT0*2ewgA^Ul! zUT06Md8Sh+mnQw`bHinWtxaornfsXfnEr`Z2>g)Spe$Mx*BHm=xt`B{g{4<^y5CNb zfBozIS8GlAoEpA#VGEF9TgG<3KT`cdOJvZVrA}{`{eLFlvo_)Rm(N>* zj}>!T?O{nY4_(|D!|CVQ^OL_dXo0Ejfz7jasd9_``M+o1yekY{-sT17X_qH{Ht=m( zE1K4Nv;LjTnLHQ%Bs1lk-EW-xR8RApL@h5nD)T64S;*XL7au=6%e~rLuf}@+`jp>6 zN5xDN(m3aJ-;7SsXrH(xxlLC7z?{MjGlbSl&Z+x#d&`zHYBRsAWBA_wwB~o#+x?yI zEM+3B-?tqTw2wCnV%Eruow1qSk8vNzonsHoC+8gcBKn7^f@d@D)$5J6niqq64?V29 zb$j}CHb(ZTyQ|1xv-%DX43 z$;-CSOx?k_IpSsJgUT~v>1(GP_|ue8qT@K>mY-(&wzm zarcy<#M>XAL<^|=yLJEbo9Qy!?SAgxf4lsN|BS{f3|_$vkp?Bg1=a@a#>pQz&a63j zP^?et%$CPD4@@`o?dxx7ozd%m-|v65^fYt*nUY~ z)_q-Y{rT%s6~-z1?9QYtdfxE5f!W|Uk4(qS{)7$Ad$}ZfUn<`a{*iuW&0*66)iY8) z@RgfXbOK4-XqW zem?o++m~AymziDQUBiA$*>8oeQ16<)qrap7HkkQ5PZqz>?5V_g#_+Y~85TAE%a=tH z-g&d`SU5qtYzm)huw%-Z8FOd#Tff^lJ1blLP1bK;U6!?5+2`=Q(*OMI>GBm`ybJy; zHMoC4K2c4w_xKs9KRjvXi{>_@xtVESIk+Z$#nGG^C#?@2-|Dmch3ETo)d)>yF!N)w zTlRrpg@L`GHom>Kgrg(x#)cJTPfzVPKQpUTZI{uv5I)CMVJlCZn6LSpyMyZy^Xg0e z?uK(4ZYNKZec$`iXHMfxXM;u6%FaJa47c_vUbVTT{;KBBn!S6qb@!IGE;T&dn0UQ7 z+0&Cj>AT4?=8tc`JofXMU2lEn(V|$x&P26m(X}G328B9u`d!YYpYs+RlFw+WDXa8g z5@0r2R%U?=Oo_Eibe~~P~(iz8kbbOAfbew!ypU-xTp>)Qpx)vF=j(hv~p8oc9 zTG+Ltbn=4Tc6WJBt?jB1clA>>wEZP*P|U<;G2`v|Y{Sh$#}l&nng8b;(#gy6_`k2s zU&uwGSU>NM;7UVfW>4{{K?2-UZX4&_nPa=jk@Wk5A6$>UG<1n)c_)zIFRnO|(xGo5bL9GN6i?Z*R9^M4I)qn#;2s zCeFFPP3+jUKbHCHl{p=z^z3?NcFd)2k6gvhIq6KZcpbl3xmT{=;n(vz@5l5#d#9-| z1RB+S+H6_!QkrS)-{TI`f9~Ws_$MRfZ2QCuv-a%kQeg-TlDXQu_|bW7(LM3&wlcKx ziP&7YYHw_)HS2!&6cz<9uLtEqnVAu--`<6LO@F#bq4#^^rQCkEJEk!|A7@t9>UuIP zT5{(1)s#KkL>>lw(!20;t$g%?t!f;9s_TB8<~+U2dj5&uuRWOrRMqeO=bHPrd|#*E z-RQcl4D~DDAAh&w$057Cy^qiA**{B#A+Sj4YVk_h+P?R@{`p3|PJOz`al$nzkH6oZ z%|5nLbJ!w-`-rWdmglOQTxI<59jnIgjQYZ4_i_<&1=uc)vE$@empD{7kTq_DVu`V zwa5MQJEupTzWctf_FCbj@T|4JbB>#Ycx4`_S9ntvFW!01?{2~4Gfn&UsW1dy%G*&W zr0(;2$@=w=!#=F{f4?MJ=*)}O?Fv%w{`Dc>#(kUEwlZp z)>9Y#FfY;C&tchrr@cRZuG=kJY|A#iN9U}|wJLZfF}U2cc)ur%$^KqI?A;&E$Jg%V z==%D{!B>D~efHbfCl(LI8K0fqJbC>RPK8s)^6QNIcKz~jXzxJCWb(25cy;o^&a`%a$>imud#IA z)Bn$|M=f5!|Mc$r-<|VW=Bn=zTeVkV#`o93Ir~lJC$lJ=y0`QBx<$*k`{r8Lo%*sw z{8v}d#Neq#>dzMIEiheRlJV8!=E{XcK@#{SB?Yg{1Zcgd=Tsill<5%bQ36|6}Ks*8daY`y07^6HBRp7tN!bNyMjoY{2U^S$Qh zAKets-(34s(vwL*^!bs+{>gVYSzmp}z9!?+s!Vwf`DYs6Skq^@^)kjx_2Woer6>7s zzvfS`i|hxrTtBd>%y;>29~yl1t7CpmPS!o%-fjA&t3i$_`}Okugilkl@~-fnnrjgG z`T|cuTE?NtQ;XCzmcH8J=eFfq-{T#c97nfYd%g8rxJFX9tooOItF7#LN`CJ;$Gdw; z-rmI$ZqjV)Tbx7}8SMLUR6Jtmr}yX9?T!;$wf5T`m;0v|M%WY_Gj-h?mtVhsadP3S zKI=5rKH-aw1}irEwvfE65u_vV<3E1YZ>IfMvd(mX7Lr+;?+SjaXuU}bNIe)XVI{swmO#haZHTOq$ z&a8jg*?oVqvdX??Wrcpt&M|zElP`F^sGRX?QN{2Mpi$oq%MkdOlEQE|w~*~)W|qC@5XP^F+oipiMc8=$ltSrI*|Nk=_ z2@W2XVKwTh;S&NMvvV0<<`r^$%FOimm7TrfZ+7-;Txp#c@qgLb-~MH1AN`e;mGLPf zL-cEIKEt1^tl<-2qdplHA@I1Qg5h{$H{-YLY@r z!)~XfaeU0m9v0y?>WQHr0^hT<8D8ZTa{tQC$^Dm|{fLA#jk6tu%Zz{7*Fz$q)ebZNFvbFnr0$mH3^VGy6Y*wmq&iO^*1#?Cj&eGBYjz zcXTk^i;W$U;Wz5&p&J6v5)&ETWn?nENlTaiot?eue|Gjia?&v+dj4f)-TsxC>HfdI zp5bF^>d+0$QI`*?5HJ=IVfdPv#qc>ROZscZ=Fr|9`WoO_2o|WbNKQfSETl>@@ z6^^5h9hxEVF(;SdU2Z=A-|XzAl%`*5=>MCYbM0GBuHpZj9EK}3EkiR(T3v;R;d{Zhk>KiSzk-)ClveHtAX8WQPfC_2(7Fq+|ib{50;tSq;G z+1YQXk#4DI#=jrg*)5y03YqR^=MBZ+9d+{{4}njaSq$&9a)f?oW$&Y6x+QrG6qnuq zva?@)%gVL|jZ5_dSOl+|7B-?{F0gJ{68~;f#Jyi!5yfht{V~| z&}L!50BTeG%+3k_m!16+U-}yia=){)XWuI-XZn<1G$g`z)RBWb1YV@3G2BZ^VfmAl zy=*X~Z$fVPmz{m@OK!f>x7^Ww@Ze5aL&J67va%VzW@jsc(iNe!H%R3F{L0RW0?h{u zjSwDnJL4&YJO(JKU)7#JAX85kJ&7#J8t85kJk7#JAT7#J9I z85kH$7#J8V85kIB7#J9Aq1cjvfzg(U|{$Hr9u4nj0{Y#85kI!RvH_BgQcyZK>y3me*Y^w+vIol z=seh9NSHKm0|&GW2bI^L$(~*Y28Im`3=HQO7#N;H)9@b#1_sa|IK{Zq)b!u~oE)mn z)zTpkegBu8?E)INr9s$_8cE*}V1%|?bQl;Iq8S(%x)~T4c0$W=P?`OYTIn2@Ig{<} z|Nqa;9Wv?vUv~DdZ+ZD%|FTEh|MX3&RI?4#R##zQUcB?|-B1AF3rt zIvB*mz`$S(ZL@D-U|@K}z`*beSDB1Q{4cZ}|D1t=;R+)o^FcON_En5bOjAJDoii{n zfF_3G85kIX7#J8l7#J8F7#J9=7#JAL85kH$85kJMKzwFq7Ld4yJTGs;{kXXML#_Su zH!I6|H2u>d)ly~!7c|XPGcYjhXJBA>%fP?@Dw~OIvwvq`V0Z%U*Di*p?-&LK20I1@ z26fO_4-+FZm#UZ)_<*&UB_$-e_HJ@2!~K+0u0L5>gL2I$u71S7?Cf{nGBb^Sj-Hi5 zQhd`%F(@s9#vVa~6QFiHD9sa--a+O1I|c@ZvkVLjptN4hz`)=N9q$2+Yl6l&sp^Hx zk&z5*JUp0wXJ#(JmDUEM_`mGzyWg^N6zU3 z(=)MoYgCMWApjb?k!E0ENQbs>Kz&MlX&%(}dCkDUu$O^>0n{e4XJB9uV_;yQOM1r^ zETA#OpBWiG^gQ1LSGp!v{7-iF#Q#l|4Cmt#v3Y4!jP@Y_O0S?cP8|aS!&L&~Hn23m zje&t7pMinFfPsO5oAyp5&W11P84RD()8+qVXWu3^{SC0XU%zs4LjRAp{fUcU>Z$^z zdo2bAh7JY>hWiBivY@))7y|=C6$1l<5d#AQCv}}d9n)SGl`>o|t6~0=le1ue(=}1^ z4}Y_>Z+_0qlKYl9dQK>HQUFn=fZ92r^1dC~w#U`R0hRTSq3wB4UqFB;(+615pX_Xg z-`P2!HK5-Idmi|mojoNnEP?T2>S)`aPRRz;z6Z6{@8Bu#LF4%685kHU85kHe7#J8p zV?cCr9_7}2%*bH)kdYzyH#_^#U{3#_v(ta%@9<_4glS+@ii;U6x3E* zn^#WAJEQXCg@7yr149=B0|RJ`5?i_lwR1q_eFSvP8hN%1LR~ICv=3_kTwq{e0L@W>=BH@j zgn>8mRaOqejO;STpV>J@gQ*?#H#>X%hm1@?&>39=A4erO1Q`m8n3^_ZYC9vOq$KNlGo7$OJjoE6j$G^36*^)md< z$YA)IlPmEzD+{zXj_zkc|I5z4_#-FR`hQL~!>e4-v^mYZJJJjWjnA)TU|{%-WsN;( zj|k|Z1%;6oDA@e_JtKqRXGVtL@9gZJf7#hzXkPY%_F(4Z{QZ-awd-3(hTi|wWQOMj zq^;@3=8sV^a0r0bE`a7_LF+A%%X=96G6Mrc2m=EHs4ooGFiJ2m+{(^lxR8;<{v#_p z7PR(&>UjW^=5urZf6vJHe=0Qe|2$W>!rO!u1Fr;dMmYR&l0u@34wBHc49v`&F z5SHFy^iSyd37~Tos9@FbGh$hKJ_AF@OolJnIqE;MvZwx~;{5>65)%H;c69vjqoD9# zgoEQhBO@c|{0hn87m5SsBSGk%@^4t$J1-a*7z!8|7(jay2h4#(#p2r;*$k(W(^)@c zWn2Er&YAu%JNv=^?Cif}*9kc}Z~JX+uQD+*g7zxI%A+5Q3=DolB}}N}PO#S*85sUE zGcf#TWnlQv!ocvK330p!*|vbjESKR}pA0%n1hl^j)Gi|1f|024M_xX|kNg7W@40!Z zKeMvZe`RN{`kS3|19~quuKe)tUv~EQe_2^ier0Cv1)n*Wn`>?)A#2LS$O<~s1)K;# zWfUU=L;vs%SI}`x&lnjP{_ry}{5ND^_#ef<@V}mc;s0y~hX1P>82)c&VEDh0f#Lr$ z28MqV7#M#RGBE!1WMKHE#K7=}je!AHj1vqY(Eiw6pzuV)pt5E?^n5gehKwq}9RmML zOBp_-XD~d;D`b6{n=kb(E8FUKR#w#a?407i+1btiv$MPYWoNg2$;_<$k(rhJB|Y8s zb6T3}?YKCu0AouAUUm)!7G@R>1_nmZS`&EsXJBAB$-uw>s<&}lKBQzoUIQ&1_{+t> z@YkAw;cq(w!~f$94FBISF#P|+!0`V+0sMo3;r{~$hW{HF82+a-F#J$rVE7HKAFu`l zXubUjv~&-01ZY2bAM_kGtQtoJ2P6bQ3wl8D1H+&~(qcedLRCv)-uchP!0_LXf#Lrq z28RD12&8pFdEqYu!@s)>41c>B82)H8F#KYK93ujEIOwcP(D*&va%esP?JX~4U|;~9 ztp-;-$`}+O06Lo#H1>=fXSsvIWn?=7>=jU(-VEBV`AT8B$CfW1FfjhEVqp3t#=w*Y zaw+IskZaKN4t4;P{gi=$5j1%Yn)3yz8^wbw1VCr@g4S0d#~EnueTc>v*c;po4FB^P z82&$@LApoI3!wJX9tMWQ302+f>04=M) ziHCuK0W?noIvW72U}z8^r-Pc@@w`2Ul}X&;&oe*R}*h{racdyRpC9kiBz zXawKLb|q*-BPapE@COD4h9T4kfI0iWECa*;^$ZOE|IsJy!}`v*|1&VS!W{uEKR|tf zk)5iBj_<9}*5HEHVWkWm=Yx{oUkT_K4YXW_rA>O#_y036c>QN!6nezKC~V2V2s&bR z=!D=1cPVJj_c~e|2y`~^5C|4fR{g`n!0>M_ean1o`Reh128PRT85sWsfalM|K-+hR zKoE{P5wz}VBU<_gof8H+Lv`?m4)Zex2A;Ky3=IED7#RNl8u;`N%M*XEGcf+pWncvL zM+R^3jk*ja1lrKjKWOX%v?mos(ct0yXJB9i&1?O(VPN?8Z1ALgSRVMlihIyU=%Ffjax z2e*U3TiORUv_@?w7y_X29?%^fptKLdpacfW69hHVUjgWfHINhkgflSw|2_!IerSI9 zdy;|qtvCY{=qe%lhtjBxB!vKIFE*&n14>vh+(42EbW;q_|n!A5CkbVGV z5j3is<{LNfWssdM30gw`Ta~DZb)A117#M$o%F|CnC;k85%E0i49dgAe)dOVIK zlR-`bnU3}ounL^^LHpEK4yp7H zUYqe3v6li?vkG)i<{oId56c4! z85kG_$>vp1clOO$lf#md0Iuo*4=uO2*i3WBpF=v;@z zLoEIOU|{&|F`EAAn3Bn}3Uubf8npBe+7|=L7vz~pRbAlp4{~115KI4{v$pg{(?3-c z11$^#tpQpNN?=GBbUqwte;6(7Bg0H^9sso^{tlr$aEXE8zu0K{CnLJ)s|D1~2d()8 zCo*Ur*g3e)Edx6ZRF=LPLg{}w1H)ft1_sc&J^DuIsBI*Lzz|6P;65-f1H-?ALn!@c zLdt&7F=Hgf$EafZh5)Gj13LQ+T*yMp|LqJ644?zd=<5=kwt0^?z4E(>*AWfiP%1Xb`pkK>-D7NBjYuU;bhUt}rn$yg_mGC}&WI0Jindp!;D4r=wZ!L#wLaKG6G& zVC#(sn%>U9@E^2Cd2oi?sH<>=fHngI!z*Z_0u`_zJc#zcU~@L;e8JD4`?v}i82*17 z1ogm028MsSqwOASaW&}0K=(C#K}!Fi12%mJy{j2O+4MhXUrZwd!++3ybpuuY-(+C; zXA2qc0R`jW54lknqJ%&=0|Nu{8K9u^5Y16k3|bB-5B%X|VE7L@YvjuS)B~V3KDMLf z{-8}D#5k-Jsq6i)q|A# zqy62%krv2u#dNgve|G5I1q>R#nhLt(+M0pk|853`|G#Nh7yNs}!0@}Bf#H`JP$62!Q6kKx;igkqE<}yUGX2+J9042;_XwIdOl5q4z*uXJGhG&Gpis85sU; zU|{;{$iVQ41#&MuDefCpI|xHSih+UQ23q;wJqX=Nf|Ec;8GU4AVEm=P!1x<L>O0lj=^XJiB| z860LoG%~#dy6+L3IG}Byn+yyL(jzk{u=-k*fq~&9G;Cnu1iCLo4Xf%=!J!lapmTrF z@4f)7gC1n-{s%lr^U&r;K>1*h-jOrlVKtN-pv%C(fPU_0?NIUno!kn#5AzaQ9dMh0 zfnl@`pi>0U$eJXyw*NN<1_pN;*)`OS1)YZqT6hX7`(e0csC#MzyC1Z-3v|XCEG|Jb zXdHC3?T1A`;|oG@w=6+!@1 z#?MEK$Hxo|4C+)cd$=2s$beY%11mc=F)%Rj4R?X z)GERu07~Pa`7KcVz%ZyCGz89hAndsTQ~^3W1~hLD3sVpcnpY_veCJ0FU?>f|)vj39 zv4hSy2dx1ec-IYW7l6jjKy80e+J|A#xqnxlcm4TuJn{XxUVhXd;o z(0Xz7eH5TD25rjM99XA~+Rnhh5XZp4@B^)%2Feehd@&k8)CmF5K5F!FWl$d2!oa{F zP8};oO(7!$MZ(Jahy84McR0hRTjG!Mgf7#J9IaA_D7r#J*a zYYVHetdRrtw?Jb;pfZVKOGXVK7y^n63=C(`%6`x~AJAS_f|^GaP!s~(3=9m@(AHRi z${*0Vs#y#S44^zhkp)9e^O`FR3~R11F&z0M#BlVp0mJdHfefdASvLF^1z`0~k*Kn#OSM?-_=3|K2d1 z`}dpS{Qv)0@wtD$7|#8D&2aY5X@*ljr!pM>>d&zMwFtw(Hw+ACKt(v-$N*jFQh+|* z!_3I?fSHNigqaaE4Te|$s3i450JNrX2WmM4KC~T_2S96Zhvy+$^N%nvL=`YHocOND zaO!6j!`Z**8P5ItfiUm!!`>G{ z49C9~Fr52)gW>%De`Ke7B8T1YT(7w8ZSn2`L+Bwi!t3$s{df*KML(_Z)hGSpM8P5FKfwz4FPRlsj zKE%oWVmR|>6T|VZh7AAzGcd4$ixALy+~Zin7*zI*p6NjE6pv{iD1P>0i62mdcJYwj ze{uK&1H-;oEDWc9#xR`w`;dC&J%K!O_U|2rlRttO#bgy37@5$|Fb9?Upz|F-bpmE= zjEW3I2!O`qK=Z+%IWbWDAY)MdU^)^8lz{4BC$e$|p3joGONb#vws%@6*2$7|#9sI3Q^s zl>R~Z;{X2)M?U;w@QHiI$iVQ9iIJs{fq`Lko{=gMO*2D4owi&o_s)Xy1E?GWolER8 zDCY?}S1~YL{m;N~{HqPaxqpubH0^`ZKL~^Jz=7AV8A38j*%=sE*%(3705prcQG=-y z0-*Z^JQ)}m(9hNdwN+zE=zkJD-R$oc^_UP^5iO`Uhc9 zTj>1X4Gf3g3osl7Rio7K?5G)Z2m#ReC}^%2v`!9`?vXL5E;z-&z!1j3zyO-pr-O6w zSg{&(-Rpk_hEqQ)7|#FuJE+n>C=ZuW{}~u|Kf@Q^qjG~Y1cVqE7(jQDgYF?mPyaAJXnY$quWw1WeFewAg8IC24Cnq` zAKdBx|9^&af6p--`6NEt4;Y*YoCsHe1~3d57#KizG=T0Tgr$3Q8dS-H&UEsl>7C^% zb&xTi)4$>v&j0^2_|yNtpA5&o1TvflO*Ik`+M`MaYY2eGNkM1!?M7eIi`Gv7-Q9DH zfq?EJVHmV&my4A$r!br+E#09qp# z!@$4*nil|_H;rBo!1$m757ai=$jro&$;`|QI-iL0wU40rt~0--hD6&3mj2KFJ;!kH zoiM|}cSHv9sInmt0-(MD=)9qQIOdOG`QSez10!hc6f~X$+JgYvlj#hdOCWvU0jTeL z>W2-(`Tw7XO!*JmXLItqA;T$9aWMp9cGQWagaBx~6x8Miwf#Z!Wbky4)-FO82aTbE z&Z+|qjMp(RFa$#HFB5~dmq2|E>~6RK8uR~^K;8BH(D6!Gc};!#H^Zr)!3?0hi``eF zqC-CfKpXmu85kIv7#JANGB7ZJ_7oted3bh5fX?g%)e)dQv!Hv8GZ+{cK=)&T z&d34HH&`-(&P+J=rFqDd{jj`r>PG>?t^W)RexPf6hJIv^`T%DLfXZ&rUG<>-f1thh zptIT0&pgAI7trNE>r+AcLFYPtVP;@B$GG+V9Fo5<9 zg2s$MXHfBIJ|!~OpZ44t51f{_rtqkbea1VCqUfzHAQ zwVOceQ9*k#LHDJA)@S=NGcdR@Zn`~j$hH4Zd@o|S`=5a!6x5|BGh{}!jfTLL{|pQ# zzb6l|@t;2oXa9yWfW~e|V{jBu5(1}xf!4k`Fr54MWys`#bAR769QkOZg9 z`akn~E<@D}CI-;iyA*kTRP%@rf!uZmhMWHx7*2dk9i00f(dOhp=j)#M7CE~9dBmq& z>IDF3Px;}GN(`WL*9LrE3oX5)yXVZG%M5#8NHOdKP5V(V2u4jB!6C5tBm;w_A|u0@ z-@Svr4Rq#rBX|$|GSHH!5gccuUZ-vdfc62L`k_B)&iz3z_d)z~|L!mx|Ek7t0<;v6 zy51i(ZG?xw?&k~);kk?qCw~mqwV;0)PW>zcm;Jj&_r8zt)JhxggU&!X_*RU`HhI9X34VpY~_U~hc<6mvT_hPgz z8sU*N>V2Aq!1hNB3{$p&?l{V2IQQ?{faig8|2{Ds`;rXy^XPa#I2c9=8ixRAUUl1j zc80Tm8yU|3C;xsgP@k0&eD3czhT~r=81_7889ncZ#&JGsJZcCW|H8m<_yZ@ynLmvK zR1bV+IPtB5Vbw);hE2Co-8RY_4FP(D!1jj>44ZDVGo1VZY74!jTU+St-)9Ubf21%R z|H?W#)=7_u9<>8!2!Q$n>#s909RKRdaOO8?T!@xuLxb*tJjihLvmHatEJlXq=Wse} zRAMv)=oSLa3mF*R{%2s=`%;SGtDrhvDp>I~1N7`iJ4%zncuFezr60f33wZeK#Y+D$tc7 zM7d^E(P#(^Ob9GI#=tOfGZVw1_lgXseOe*lSX4Dkj?ssYCrvN;fA{)5Gl;_(jy zBoTnq1Csa$h%x`c;t(a^MDzgaW^kAyi8ny?g2j4E%qfz6Cj|ff34iz`*zesstqd|9>NdYG7a}fI9~y1Q7@ML;)$J{{L?PNkV)M zcP2;(A`T8Il%nVWND|~=2BZ-A|NlQk927%Pmx0y6#6dBQCJr_goMNCVA=CqeIG6wHAr6WORC6FD9jZ8( zhnY^m;R(u^XjLmj9F#s$(=$RGHQhqQLFpVN+d<+BoG(za9aJ0?0Vu%<6$b?(7Qrky0MicfAWRsVuR*0AsyiY1 z{r~^}KT!PzDL3$l|3M8ONVxrlnvq5cmwoNO$50C>0NgSNAL1ut366XVQz#)nMKn@6Su>{ii|NkF^7yti1fN;b=WDg@b z`X91~kv$2kUqIC@iW;by4;UCx&4hZafq|h8*$oe%9$;W#ut6680Pz4QZA(Cy5D!2r zDo|cPDeIsW8+vUGs|Z1P4Yf>$RGeUOsFNWUG=Loh&dadW43>aY%;35PMf?Le5 zG0cZHx~L!yaS=4Bf;|f%@Qb6wFGK^_ow&qN(ho#EX8M7MgVP~|i9+HM2gMmUL87RJ zgeY?Mf=Pqo5;=Rp#6bxHGx+~wsZ*dj8qlglsPO*>=zSNc1gPAAGEu3|sN7NB0EK`C z0|Ns%RWdR#Ffu5B6Euj$kix(K#!!(_YBU5!Ltqq)hQMeDjE2By2#kinXb6mkz-S1J zhQMeDjE2By2#kinXb6mkz-S1JhQMeDjD`U9Lf}?R48!m29EQ&s8LVHkvgLkeW!eAD z%8LG#ot^z7D?8_Jc6P$gtSq;0**U83a`QR=NBJ{c%rBvy14d06kPx_%n9T71%SVO} zS=r)0v$A9VW@oSdmz{n0Uv~D_|Jm99{%2?ZhvI+#va`Sc%g%oCH#>Xh@9gY?pP5<8 z57ILj-(+SDh}%akB|ilIXJ<3~%+6u>lAX)*J3Bk+Z+7;Xf7#i;p>Yk1b9^)?J>311 zon7-KGfU!sejdZ)#6J19JRmM*(?8NXa6K2-qF*<-@n<} zM}KE$+dT*mW%!n!PB(vyT10vXyiG}A_+MDW@I5oj{BL&lSqkDEB^~_B&VKwSD=X|p zQWE3$j11BpHmZZ>A+V#mmEnI@Cd03+Y@@%~*_Wvm_b_w*WoN(qm7Np(KP8sobyhab z{WEF-X(8|}JDcH4R<`ut?CgWEIHNZGFFX6*kL+x-|GBvglcPu51Ej?e?Nq-`&tSNh zn#TMmE4!E4agJ`@pX}^yU$S!qzGdgo&O@UnlN17fvvU}JWoO&|%g%m-9$z%z|NfPg zmHxk=fZ;)097*mRRZRU5c%Pfc@HDTGX_xN;?o{{@?8E6Q8oOL_TGWjtx*h zMoBaAS5`K|&#Wx1f7#hjVevx;`p55_oB+@`!=>6*(i}Xhnrb1i(l3DFe@+g=@2u=J zwDCNw`IegL;!k$=)c@_V4EHle`vO#pP!bJ#m6gSi85GX&Co5|XHRF~Tv;JjgAOD<{ z&G&hjm zU?^r_V9eoX<4C_05pnr{P7clUJaW1KjR}3v%2xg{8vl4B6o(`u0|Ns$0|SFH0|SFI z0|P@Q0|P@J0|Uc)1_p+c3=9l+85kH|GcYiGVPIhR#lXM-+EoVHG>3s185#f2cXs*r zKR1`I@&7+N`~BbSY~z2~qvb!2I7S!dWnf^?V_;wab(*FyFfbfsU|@L6z`*d0fq?-d zUWpZBV`BQh#mDDAz2pC1cJ}9=xp`KfR_K2Qrz7A zw_;*m)4TltpPl{jPj)tFo&n<9Q4$sc!VC-yo(v2Opgew=fq~&WmiPvx0ni5YD+~+_ z8yFZE+8G!aKm)1{3=9k!3=E87+?>4JY}`Dc_5jRW4EkMKI>Y<4bl$&NS;vvVIN5Y~%^BLd*7A1_p-HILh=-3=9nCq4_zLfq}t{fq_Acfq?;}m#h$b zk(kJEH#U~}Z&ua{dc--h9e=a4FMiI-78&8~e^49Fje&t-5d#Cm3$%O=D$n0AFfi<4 zU|^^M9a_P_zyK;6K=lGy@r|x&vYj2n|NMMV|E~-=Zs@`Oo1MM%Mn)F%lMGPP72RdS zo)2ov1u-x%Y-M0z_>30spf>vv1_p*o1_lOW1_lN$T874-9MIT*j^)4X?DzDDcc>kI ze`jZ>{Lji_sG0)`0$TcTP?^mM?T>FqE8{_V9n_Ye&A`9_Di8SS}-b5S=l^)v$NOJI{raxi@s%LDUE<}KXwKNhA?QG9aO%< z@;vC^Bv3nFe!wCe&EhAeRSbV1Yko~YYjkLs20s1H%8K}3kk4>DAsNjT!-@-v+v#X! zJ*dAvlYxOj8*~ioVDs(M%p8XQ83_!(v$MnhXJ@~qK^pk^H!HjDQ+fu=kKwx~0Ms{g zXJBABgI3mq#_qfr7#P?FTf8H?=WA9r!>jCEra#%)N&mC6UsEv+eE*xB)BZIlm+Qyy z9q;9V*5{yk7g$*j>c2Nb$8nK^deE|eW@a+{OiN|@m6h%PH#_?p*=Zmr=l|dA?EjzA z(%${d%q;kpmBsNRbJ+Ivr5P9)<}olZ{6xz4rx+L*KxF~l+w_zst{2(44FB_U8NO#{ zYy8g6oN0Hp;` z8(>g6m{>=>$jD-Nn4Zb@Ej!2VZ+6ZMP`~{jp8Y`ova|pE&(41OGAU`jBA>t=2FSeR zH%0~qw*l?oV0ajG=IA$O28Mr<3=IEm85sV@Gcf!wWnlPU%fRrzjDg{AA_Li5#w37}yY5C-Ml|1}H@|KAW9-?-AoTLy+n%b@FS zz=?{1f$K7^GBP{{XF7 zqp=r2dGQx71H=FAG>&^{o$>bo1JfrV1}4zbTN-<9=ot?h`&8hQa_#Xn{ShX0G` zllS52Y8?Z^Z#D)7P!gx1_lBm?ptZ`NhymegwDkh0um4w$f#LrhI>-NG28MrXkZo49 z_2CdS9kiDLRxod*WW73Jzk}L+e?aZN?{tp;Ukr?Y1HkP-!j2qiDohv{7~VtM0LK^@ z7(i>EX%q&aBIbW4o#P(ThG6^+nm>Xr<)o4KhNiKg@v*1S_y?_d1npU&zE4;{%jUuH z-!LHY&+xwolK&r3-)}?PL`en)22kH06gdwW7#I|3;vFIAG%Vxajsc4QsgU>wZPTKO z4~MFuf(#4{p!qRS{J(^*bEm#vm_dCnkQM)%1|yaSH^+yRMyhJTgd_-FT{o%e>O$)G(?`=RjgZ4UF)6OHH{{Mev28RC+=^Xzr7#RL*kH$aNm_?3%P$S(2 zt17Apg2(>Z7#RMqp>zC$<{CIh;~#4zg3AAW(EJZ7|1GhqqKY6m|AUS@ie_N=|BFs( z;4cHi|1?P4gX$lucw{&k0$K|T+RqLO98miYw5Oeh{sSGC@?DsL;olKD#s3)whX0^- z#5DBNP&FE~9+DB%|2qrK|DZjvv|R_t0vclY&%nU=H-wCJo{+IT2Kc-VA^vv;hJW#p zc^^>n9;)#~D|dtAA9?%-G&n|^@gHO_F#cj-VEoR;!0>M}jmvD5AaqC{94_!G8)24F9)Kc?~Ei?$sb|KF}5i>brgznHazTSs)3DkTvu@;{;?S zXidf+2?mD0(-|24Q@SST2Lr>u`3wwyWg+<hOwqX#IH+I$nt|cnNd|^#TNoI0r!g=v3qp?9M)KV# z6JH2`&aQ)v{e#Xa2Ax%nPdy#vK*l(r6h)x($w7TSP@sd(J)(Wzk0j?2r5JR62gv;( zyajshI#C)&6@l*_fUW%l%>jYNf`?kLXfiM`fYL4~{z2uy=manEG1vy}_kqF=#3o+n z;86)0#Ogxw2k5|38m?g+JRYaeC7>`m1PwP(CmFOCkU}E|Ogrdo&ihcmfF>BC2FxWx z$70Yrk9W{8y8*qEX6U#c)1{42zktfJ6ATOt5|~OxMUXa}`=Q&-1 z`U%v5s7CVZC=)dVKl1B zg%jw`3eY_^L((H?jt1SWwh3AWfcys9&qTMg+R@xHSh=9}UZ8u2K;Z+z4TII8WV#D< zjyvp31JHsq&>7yKJ_4D>4LfZvNb`N5G7@w?;&AdLD6T>Km_ZXup!5wo+iDPuYYvJa z2958mMk*gccUTSeb|1EY1)XaFJKGAB20(2cC2R^um>B3>JkS^!C`>?QB#qbl4QS|s z?qUS>Ye9Ym;gt*w44}1c1L~*&v>ddr6LiiwC@er2G}k*E^FDGA2Qx4*z{WU0o6mb0 z7#Kk9A7nKnfeoq?LH&JD7=Yp)bf*ny-PdsUJ1BjF?ox*J2|;}x(7j!tvk!-k-$Bb= z7aV0|IQp5HVe5S+2336qhQJh3-0#G|zyR9Y35t7ASfrDpd5Eh8&2dj+U|@iyeNg=e zTJthE4+C9zl!4*Te+GsFZ@3vw{xD)V{X2=_%xJR52#V(gP(S}?WMI6-!o;e_0y+nm3p7*$S2q$EpfN7c z`bJP02*RLp0Cdk2sC*b${~!O#z;N&#JHyEz5e#Slo@O}z{}0yq#+430;^+Q8VmR}= zmf^%VQHFZZmD1q5Y?_ePc7oRUMh>h;2i$hhUI$S52bv!Mr2$Y~2?#CK7KGk+#Cod5TgS^mQR{|pD- zK4vhq1?}Yn#r;1921d~Q9;hzB<(iQu4jLZ<^?yNY1wdg0!dDp>7(nwP^ywR%`pLj> z@`n(^**_~O%=4IO0+a^UUHiqLW%7ZMfnhZR0~09CGq5m?CIO^`4jKo@Md}xU(g3KA z1dS2MAgQB1bJsHlhE*4t8P5D}V>ti+FV*88R32RX|DR#=?dyyQWx5P`9n|;3@GucH zJ^;GU8MglwlmP=pRF{Fdr8id^NF%ncCfYuFy4ypr{0iZMh!l3cvg$xV~pmYFIL%xsC|7TzT z;WK|c8P5OzLYMdlrGYblTESc9*Mp`*$anq-(Fs~d2C6VY11X^Rhhb2AdOh^cXwX;! zXiS~7AldeifuVCHBg5IhGwBxhp!h%g_X5MA4`K|5K}YX*~gcukYu#dBx{R4`BbB6Q( zKhQP)L1n<1-$e{p{xdK%FD5S}Mz$_cc>p>Wr;34r0d$cJs4)d93y?8rJPA}*fYJkK zj1g3afcDdZ_Sb{j4#tcO3|frqZ=^6>`2U;k@qhNuRB#($#-5QKla%`&l)pjuM)yO{ z%>|vAh@1}4*`Puf)RzE_#XM&!>fOz7iHz}mP#&fPpZ&X%Var`6hP|L^V9Enw)C2}l zoex?^54vNxgn@x!6$1mqWd;TY&_TzbI7d$hApUR0?3POd694D_uV*;>n}y*l$Z4ao zN3{@O1SJEoX`nhEbjhS4bR4;ufq`Kn0|NtS-PukC1_sdCI!74uyLK^L9Ekk?XDP$f z9ZU?HM$f+n#}@qv&=@1=UPW#u1_n;X-OoZ8&j0^K|N8%LFT<1np#CTQd_HOuY6yVm ze7@^3ocs5h?(u)>XBxxB{|pS*K-CbcvqpJz3IWjg%i$0F3}^oyqig(w<{3_WH)8;e zKhVjmqt;*vfz_887(V}JU^w%;g|6{`_Rj%^18;d54uXcVvAAngh<+ge8iPCe!-V17 zzn64M1AiG#|H@&w`=5bfwC_*9c*bQDXwA=|_bd!&|IlX+;M~7+3@3j`Gn@h~eaGdf zQE|G40H_^!{HqSbxqr85TL%35#&G&qEW^$J40zUu)79&vmN76eYO9N+qPiHvzj)UPSXozw&_+u=5Ek!`VM|4CnvEFoc+6&;qV7phLhhJ7`j1IQAl=fUW)HPT_VACB22G>vqhGSo} z8P5Kl!Eo;1E7H=zzpo5u{_J2l^}~ZEafj7JiH~up)h1dL-`u1~fhc1BwNpZN&0O>e2b2t)%=& z@`U)HwS5Q&gG^&U^1uObOv2?shA|+?gYM2l<})xbBb)b!f%!i&{{w^le~=L{^A0fo z2lHX>WspbV{{aOI$VE{9{$Tw7|34D{0oWWw_#6PMYk-FrNc(>f9}>R*K-&NR2OpLR z3ZOrbc=^Bp5&!=K;*1{<{sZO@VB`M(gYX*|4nX7i;RC2MK!$)Yi2sKHq726W!GMVN z5C8xF|9})~AL{@Ae}F8Hg?|9aJdk>1{*QXFuNXjffiMQ&9!-7_@KM}v4~_+Bu>b%6 z!yYv}uM$~x!$50Q-jd1z@ z4g8?o3QyPn4=|$gKQPFHPND=G1|mR?Lvj!3tZPtt1#>Uhb_V3)6jXR1^Fi4XwKzsG z|37Lz0$B+v5J5ByBd2SS7&;$0pM&H-Amw`~9~=!JHV7l9YY-nfU4!_@=^DiU-+%}% zF#iE47Qw8cL})NDFeETAFo1%WkwF0ziwq1C7#J8pN0oyxL}Zj44S~@R7!85Z5Eu=C z(GVC7fzc2c4S~@R7!83D9|EtlvKcO?R(9y`?ChXFSy^^J zvUB9$<>a#c&o5y3m6bi>!;4x0@Gdip;Yvmh!}qLg#oyW4rT?!6=0@1YV_QGW_ppW%!ewUGP6U`xk+-hG0DW%g%oAGdtVlPj)uL zyCTqa<2bDzm0(~1g*(Hq>}(xSeM)KA!}R~n&R+E?J%jU0M#g9u;|YOJ**OgVvr-uT zW@mT6!j2mBzwGQ!zq7NQ|72z1v3*qPXI2)&kE|^5f7#iWs1feSX8g_0p7=j2p5aAS z&S*g62mw&~{GF9$|1UfH3v$>|gZ(c%`^2}bZ2oT}uWtrACm(bcvN{6;1L!>D5C#SY zP@^1lUV1wNWEK!~&*EeT28OO81A}$HGBb&3=M$`Zk>l-OcJ`Cs+1V<;vxjxx7-=)Rgx1_p*r3=9mQJ1Jf;Fff45rU%WDgAV${ihUFn|NqL&Bq?l>!x<0z z-JhHs!@oJhuKWR={|~xH8FZgK=q~9K&~v_#!x?msBHHxB1H*g< z1_n?*kY!+CASt}jf}i1MRyM=WtSqsA+1VGU9sYl_v-|#Mr82zE$w#+#F!Mp@vxCwy z=&mQw-3_2R6SUyr<;Bm6=4 z5rFPD1>IEyDq}#Gf`FFof!a4b)C_BEW_<$Xf%Fv6I8YXuI}_W$-sU zo8ew8=o&6;whkIG(EWll85kHqcZY$>nwty^44^?1nx%JaKL3`T!|*vfhx2cC_5|qs z7D|}H!j=g7)!*#wkpKDl4BrR$8~~_2>B7Lka2i_HfDXq4)wOEa!iOGWpmyS~>>R$o z+1Wk+v$MYu5!R46%E|fvKPTruXx#Zvc6Ruiyh5h$*@L>z1!{|f90d(XK^B{cp3d_#dySSqC!vWo|Br zq5}p69jMs&$HKtypNE0rKR*M*UoHlgH%tr+9~l^uKwV7?XCU8o59*_CVPIg;#ZWbX z0*^AX82D`a5! zXTZSlla+x16wF{Jf%>nD7$5@Q85kHqcgBJ43k6FKHsVckGQlc2Ew4@&E<3=ID_F);l9 z2-@g@1OH=S`2UiD;om$4#{UKk4D2!t3=Hd_=>&A~E$BWjP&q_(R}O%I$_xw)pbOnW z?KVH!*bmy5^Oui-;eQhY!~gdc3=Ge{GBAFPVPF7lu?0DmYOWj%27&sIAWvOq zU|^7-j)OqMiy$Nad6Hh{fWjOrzRAGwTZ4fCRGv}Cm4n0-BL)Tr&=@!fgT}9@;4Dy; z{s+{q+=jhe#~Qvk>HuW5e|s3Xk1#SYg1TH(aOU7JLXv@j0n~Q~71p4+2P(J5(@sBHMh5AD-}(kzwnzeNlzPZ=2)K;7WM6Mz)D1hl>dl&QB;+y-U(#lXPyOPGP- z-$k0Fe}@077#M#sF)$9Rb`WTx4=DT(F)%QI>H`X*6P*4*eXf(V3IBid85BAh7+7CX z;Ko6x6?EJKDEvWveLl)u2Qmgc)&m+_qjK2;Zi_a6%YTMp)dmHPp@RYrv<`;C`6G}q zU|dCm^#7BA;lCdw{3%N%gG@JQ{T(RKs1W`jpMlFD3kHV&Z>byyml+uTOAW*DkB7E@ zKy!MaHIbk|qyU44Q-88EF#KCVQ2Ia2z`(#wx$|y7!|1OS z1H=Dk)QN+CXBZg%%0i}KD0kr?GXd8A+(vQx6Xr3-?+gqqcNiHM{^c<+{Qpj29K2v) z_zxP(V_;x-1G8Z`(V+P}Q2Jj?-7$8Mk^k8l82;BYF#P{YW*od^VE7-y!0>}{n5KQu zI>=Sf0t2)Lp33P8R2TmR)dgt`44{)9!EH!l+NFOP82+7OU;vF_{~fMj4+?8gd;d5z z{6X~qNQ}xDG$!;5R2JzmF#Ma!!0`Veo;jJH3=E$hGBE62!@zi2g@Iu*0|PT?-i69e z8a&2<=71kFFfjaNU|?{gt+W0!FfjdKW?=ZM%E0h1mVx1aF9XAWP#bAI1H=C%kU8Rg zxeN?SCm9$RL2Cl6XzQLqVY)js?4K|&Fo4zw)5%fbx&f5!m>3v9>wx~jmcW44`5XjE zFfcI8VqjnZt>dSYn+C+1YN%iLQ>X1o6+eK+LqG#;pg|rBs+c%94FRox0JS+miLq-C zIT5r65!4p}xoaWS+Qfs%gHWevL-&Y)%6=cH*kGX|85kHq`zJmzFfa@@w^1vMK>I>K zY3vFE0|RItm|Esi!yM51{SDBv5wxaXlp2N(A~Qhk&V5k-f%;#A$Z2q=fc8Mc);iZx zw{HlyW1tvb3=H5wbkJT`yMb~90eeAn(~ZzH0*VJ`0_ul~G^pNP4fWSX1_lPsLF{lb z1_lPuWIL#w1nrlG9tb{&T};?%p!)1HbdSA1VdVp^0@M!!trG^t0cea0RCW%yqsVjs zsBH&JPwNL!o1c((lAwFNK>Y&HSs0+bcZBo|M)@-6K8Dwz%UTAb197+sv_GT;bnFxZ zWbsKk1I_o0U~$R*Hw+BBo-#6=_$J73>ZcyVncuDq=l=OJoc`s)aPo&P!?7;{4BH+s zGVBIzg~iZs4{h&*@^J?P0|Tgj!%#Gc1$Y@47(ja-L4&HG4NRaz(x|s)8=uFId}Lra z@Rph3^e15bTOR!_mbiK|NjK>xqmOf@o@P+sQmyc^I`2BP(5aY!?vL!4Dt_X zlOCup0G+1+8mFUv|8e6j28L_ku-9QY_irD=`Tze2hC6E9{9`!#cNc?!%}QvQ4{Gbj zF;aH)#9)ad&^m!0Xk7p*PeJ2{ps_j%*H1#d*R+6v;mjWfhSNXQ7|#AZL1MTg^2de$ z{~6{T`pY1v`i7B#p_q}8g=Lt8KPaF;{TR@uSy0~$6bGO&bkG`Z&=@vIo_u`lGXuks zk9-Vg|E?iB>|uEU6bBn`9b_!%5oN3eZ51cqS%Xj~s0{(?7lZm{pgIAx_yu%k6=+_A zyf*5trwk0&|1&V0{+$Y~TM_9Q7N(@q7ykcXIP*J>;l_UkhRt^eVK7qcB+x!pP!|7il4CnrlU+!RwpL2f?GaUaa$Z%pYsM&z* z>|w_Sja`HGmV?S6P#YB#51?}C1|tI_sQ&=ka|k-G19Po6sLcZ^d(Qo9qqyA17EfpY z)Pvhp3y%%EK*!=CP=5_H&I{@}pM@=r;}bjgZx_RfZ|n>wKu2^9i?Ait6QE8%=nPN~1_lPu7!v4Q z57537&>m3Gd3_%kIe8v4th@GvcH#e>eHholq{auR{R$et0PQ^o!Lq{%8I)LizxpV_=70@KMD_p!VnK zUw+WBP0IUT@cQ82H-^){JQ&V`sv4@eba)y9>K~o>CJG*7hKC&$>f&?%jxikjBE)bE zG;}^ZgN`b`I{uY`;S#tE%3?VG|2LJ&p-}S(4o|P9ydWe$le{eqd3z~nFV>t6?GQ;_QZ;6VB zbN}8joc%L_;lwu?h7120M%Z`*>ETRB$I0&u49C8(GMxTp&2aYbWQKG9E-{?{_W?RT z@E1&<`}dyV+`o$q=l)J)IQ7$#VgGAZh9jWm!-NbUl_xp`j)LdKm>5oc7h^d6%aq~F zA76%(Kf=KDsh=hc$G(U%9C*XTa2Rw{CefyjDjW@g(GVC7fzc2c4S~@R7!85Z5Eu=C z(GVC70lXpbpHAQ(1Hz&o4EAu&2L^sP`vC(xoP7X=%?uZBU|__`Wi_@${|DsX9}EXTcKm1L|Ns91%;Eh1|3eM? z|DPXZ3CNiL|Npmx90Cm_FdHuZj{#yiNDVIbAGn&s|Np=P2`?MumH+?$AO4Rp6N3$P z!(ouykl9do9tOD|%5Dew3lsnlzdT?71p|cr1Bv~g1DYt>L7@a?gN*nC)eB<(LuP{n z{{M#vLEM1MhG=UBhw~q>aR&xjNP4fX#SV9EA@JR5=f3mZULHDBm$<8+W zm7OE`DLa?>>)^gCkL+Omo|D6HsjUv~COe11og`}j9IJK=v$2E&Ie(AA7YSv4pXf$q`#ot-WF zFFX4jlK=4D;RsUqFFX70@2qTX(0#yz(qlw8>P==A!~g7T(0$VRAU~0c|7K^m{m)Ee z_>u!U(ufEP2agia`d`rcc$UpRew=@^vbK`ycaZvj+1V$5WoHWv(tDdhds0DXc7VDp zpmpbjc(0o|QENTL>WE&ymf zcN_x)18CFb83qOh(B(X!5tNq<42+<=V?p-~fX)H|-Mg__f{W|Gjp*oaWcvTrpX?m1 zzXRJQ0L3%tZcUJ%L1&wQ7MX(%Ah^iDz_5UUfdRBv)|G*QL6eDzS)7T93A9fNv_}(^ z{+ag&28#X7$|9-kgQbyw+1dAgXJ^ZS%FqFbe~`abp6IgUW8uLhc>V zb9Bh|H!QI3$Hg)H%Lcdql>cRCAHW+1pu59!b3u29pZ}YkZ2*e<7wH)=qi9Kk&cp+i z<)A(PMGOq!F>tEc2fD`_REPb^&Q|=Jojvb=cJ?Q1;qWve;s0_sw->tu0xX-&%oslB z=9B3LP|^hr@Pp0>|F6Qp@ZW)f;lCRL!(USdhQA^V44`wXL0h=NXZ1udFff4jO@jOn zI?D%C?vZIPer?~fvKhW+Wpn(=&UOUdD-TNJ|Fg4?|I5za^FKFtLb{I5WF|&NkmD&h zyAL#q{GSziw*MLihX0Ql82*1_U;vFG|Np?i@c#k>!~aGGh9^o43=FBzd=A=64dUbX z0F`8)=j1Uw%E@Q^o|Vo1Co7xxOGXB$+yR-c32je+?oI&lNyH$(|Knv~_}@zSKI+SN z7#Ji#`}!9$FffV^l52QDd*(oAEg-*x84b+b&;oeqo{996RH}Oga z#$U_~jG*H@hzxStDFgK@uQD(&fX=WW(iU+3mtbJ{e}&BWF9YZQ!EpyO$p4_T%cl|P zdB`zvjG#2|uY(N#KVe|_uM6=%@ix*>B`E!a&TRnQpG$%@pfvCo)YiX5LKuMV1FB}aI_t_%$S?-LOQe;644&u3uxFEEJW9u%;kb`PljO9OF8 z!=O9>I)ddN=#IKw&^`}(d*~kn!|xXi41YlPcL_k+Js^Fg*+fItpmVH2?H|yXG8xwV zWME(fwTJ(L{1(W-@E>$H(@q8k(2An}s~8xzu3})YKEc2ssL8-ULKB+|myoFibbbqH zOc^w;PNoqsZJ;s%bO#UkzBVofhX0_xy*c#4JJ1>QFolDK28}nZV_;wajkAKzd7_28 zK>bS4cqV9UlL}!>RVRWLe1Y6WLfeNd`^^{_7(ioPlLy&dOrSecL3tl^&LdeKBT^G6 zAArs~2hC%65~+vQ%0TDFZD(L$ILW}kK;eDKL^)NOfq?-uR|UeLv!jU8L_rIE=1<}gDz(8Kz3%W-DG{kh~j|IcIe>)k@ z|Nn{oT!Vja87}^zUHt!tVcO2O45Ct?zMjnh#XGuJK=Tlw1y`UsU(gr? zXf6~~FQKc$%zY`3}=2@4#M-xK;tl=x)?Ml4r-Hu>ZfEz zMn=$mqM&s!h^PUb?R@69FvIzOXGr!xC_F%A;?4gI450D@Vd`LEf!0ERCSpN-5>PmR z=1M{BG*JI347wIXlc8gU2gCXQ#GT^;@-s39)eFbJvV+eE8!W!YbO)%70xGXT^#f>k z4(MEbPquBwP_8}D5gHV3nVFreCe?jMp*)g04-(N)h`JU(g?PfUr zOOOH7ei(#aBic!z^EE+d6rKHBM)Vn{=l@eLfJ)V1EE4|Bv}USp5%v2>rqSKiIqjpz|93|8Hmn9q#e}ANT+W4#+tl z|9{l~0BQaIr~U`%+>if%>LKTyAk(0uD?og78g#q`LLW^3AE)va@UdWA5Mn_dh%P-oNbZlwVm{tbekyXygj=jQyFF&G02FoB3aM zcH{r-?BCe-+(5-X|IN-$1npD*nmu^Z6lmYn-|TG5|Jm8Eq3*=FZ|q-o_Vqv6*$RVT zzc^@5BxrsBG)BtyBO|k&klUg1fB%8@{L?r5LGA>tH*{lQU;vHrPh?&JLaV0YHIBD+BX1I|Hy6%<;myo6eG_MZ2(-m}YA85`OwC4)sR?u1` z(B0F_$i{uo%w+hVlLOj2*bGe{IMY5<9+ZCKKzjy}&7lGtl%7Fr=9WX(XLK_#Fqjat zP5_#JE&gX`zk<3EpC7LN&CZqw?GvPen_)(P=I}uK2tezp`xzJ*)Ilp{VUmPsP`&jv zI|o#L*8R`U{)N>Kpgo&8IiT}f;{NC6Fnpz69iR=p^K>8dNHmo6LCEbec~E%%$iv|gMXkr@ST7kHZd^#6Qo1fgWL;B|Df|G zl0Y2%803ENUWN(=hW~G|_yJUhZDwEqpLqbvFZhk4jx4B+1zH^4M_qT@O>>n$JpP;i@7(iv_W(EevX$%aEp#4}F#!x{3w4Y-V z0|Nu7A4c4HGN5%_Yp*ggoc-$sUB~H)1{3xqtr|de%H)P}MbKuqDuB#_24Q zBtYX;Aq)%*r=jC&pn4KBzk$|%p83tsaPHq8oNkA$7rglY8^gu_!QgvuLF?{Gat_&w zL3^Y?YhOWUgMr#Bp!fjg4bZw+CWcc#L2Kab8P5MFZVl+Ue@ht-e`Kcf-Lath1~f|u zYAb^L06N3(2m=GdN(Kgoi3}A}S20}t54!gdkzQb7j!B4Xgo7ykbvn0~;1`8Uw+NA)~t4dB^74h-l2e<8~K=l(5cIP!_!=?B%# z$UIOwIQ>h2;oQG{M7aMu!`Z)K;5$qQ%eu!y?-{`EIs4a*;r##Sc>Qqh-#ms>Ke-t| zYuu2-V4&F`_Z<5Ss=xfecb1;V$jcuY&i$LhaQc@R_}=7g4+q-CnD&F}F;Msa%x_tS zbAMA9&i$JPURQnYUn9fWzjh2Kzq1dzaL4ooM5Ji}14GMV28QNE5I$&)G=w!uj)nkT z0Aw{0Xru(hLZyF#M>_xiKVT0LZ;*%33^1A>%>Mv74HJC74w(M|d^+a;{|DqD;*9^n z^gs6hV10k&|AWl=!%+`57rf+5o`K;5=<*zS28IXdH0aVOkT^OGRR`1e0dxzKJOcyx zGAVfmhJT=?Rv>pi{{R0U$o&tY^hYTDp!|&{D(ZAW*9{;kllm2FB2mH;> zmiwNS#rTuHYb;QL|5sKP!|Swk#=qIwp8vA5PyEl${sEnn1+C8k-2syF8@!f?{_Ai+ z=k$W+CjVw<+yBqbegbtD`kejef8e!3ISk)3=`z0v4?oa3=)bbFIsRp5ufyRsuv`CS zXJ7b}oh?b@wLqYLy(j|%gDwLD185F3l!1XE#$8S>?@M|*p?f~Q{magF2Ca{PdyYB` z&|Cs&9uu^08FZ!x=newVS+a{67#Kk3$@SSvNiBY#n)(f2`2Ek${{1gII}nr>sN*c~ z*fc17mO|Gwfaci!85kHqXCCk|GqJF6@dz;d&dLJal_B*%JNr5T_r3p{ooz;i@B@v# zfyQG%W5%F4upkBo2GCj@tls{eox|`hJBRUKc6JXw_x;Px-UM26L1FrnU|?VXoyP!* z!$1ZG22g(=tGkc|LF+yKLhf7Hh3>|G+1da9W@TOZmz`|_id+2W%7f-j|A6K<1Q{6q zD>5+r1D&tP!oUC;@@4>?Q3^U205nDlawoDA@UlT=Gsum9va_ZCWoNhi%g#RkKRf&J z|D2r5i(OoA_$ew?s&a8LJg){_Gl0z~@EjOuen^ag;eRy)!~fF^4F4Z7F#Ny4!0>-I z14CI10|NtS9tL!_H7I_u8A4DDR7d>C&SCtWoh|e?J6rL8LxV6U3kzt!J81nDp0zk2 z_x%I84RjXoKTr!26`%aYz>ro8Zfr4#5OfEL3P9_dKz_%u<_0wP26Eqjkp19R8>-tN zEjI>+|9co1{(;VqBhh+-%0Y8fpmlGcG75`gko*1#F);i;gTq}gH-BYd_~!%(LoCLS zF9eELP+5s%P66ybB?gB7pffXJZoxtSi>HD6Kt2Plsfqw z*?^cvP%Z+R%LC;-EGB`%4>b4te<1<)-GY=kApc-7hBBcE3=9l7)|!Bh-Ur$F&y<1T z|6Lqz1g#y&gTyV!Ka@EJRX2DM8|V%S(0l=^6r2ZgAE<2p@5sRL{}AHL=YM}07(TpW zVE6`FBf|l4A6yf)7@#@`bUvXCegi?_0&*ufFN4k_O=4hp8_B?6xSoN5&7Oe)G_8Z* z1Ttk685kHq>IcnRg35PLJBmyr@M;5%)q~0xa_c2f+XPg8g6bi>7Lg$dYR`h^ z^>E}3qD=&?B?rw7g6bloH4&i@bmtK03?54796@tHp!pKenjswZBo^0z_qBgwWH|F% zfZ^<4MTRqfBpFWrU}exW1}WxcU|;~XAE|Xw4=A63+J2z+GJgNg2c13f4>aE_!f@_i z1;hD&rx?!vf5dS9|4oMT{}wVFd}qnPz*x(`z_5ssfr*z9eB}wq$rNMI{4!`h5Y#@k z1P||EGZJzpf-u9me=8Zzqs>iS`2U~b@P{`Ho>8FmqR7Ams(!H;5g*8*BE1~CvcXLO;yCr1Sg0bI4%+Plj`UOBr@PW*m@t zsgpk#7_R+iU^x5Nh5<773OZ*5lomm2SMD>M`&-O#>L=%bxD67^3=E(--b>&$L4pkD z{yH<9`OUsr`TWp8-sNr~^$yG5r4_ z2ciEkK*=o~~j5&9~J+7k0;{;=k#qZj6UA_KRf%&zwGRI&_2!|)Y}&b@(QRA z18UoX#?$y7$0tPp&(8jd#eUFy$k9L9+5DjR0coTFgF5Y?HMfZj3=GZCb}DF2Vah~X z+oO2)*gpQ3ovlQ{SUqTb8DuBOF3>sEpxkWBz`&@?z`zJ<4MtmpmrsSF(7~a&CXW-pPhXTBm8r6KzDhzzsbr5 zjU%JzBftTzwFLDCKxdMJ#x!s$2klV>?YaJ!o$d8MJNwH2?CifFcYI1s{d*%idfWfp zJh6W{pe8%I5#Ul3bZ49g1H=Cw28RDr7#RM?GB9{pK<{=2tv@GfoZ@?SHp86^@Exb} z|FW|q|L5kWPqMSi=4E9CRWVpPb)bIKKQ;!2|4j@G|GyxOv;AUV*zk~n!8U|}VH>)G zN#%p?5SYclzyL}=2)%5et#V+$doeKl|BPfe@;GiSWZxJ_9YQOKEKq+5lwMKCx*7j~ zYBdn<#bGzd9fujHvv&>D?+2ank8nD|{%LsZKh41KkDm(rpFrzuL33&d`ysVC$p489 z4F5rQ5P|%Qj%P72{AZ%V7$(Sm(3l&N{U9bd{RuNL{NI2j3_$za3?Oz>6#pQLKzq+Y z<6$5+5(e21G8?pC8k`4?F);i;%fN8(ECa((7Z5;1Q{IA7&<8Z zld2UoPYo)!K=mD|dSU88^)krs;K4zdAdxhv%&lf%U;s^k;@*D_8oN6F4Ybx(jp5wC zIEHioG8oSNvtroy3e@}uEgk`x0V;!tv=&7fXlxI(H>ibyfdN#|>a|`vk*9@c0YNPGtHL z!`Z*WG#f7hO%tE`nFEh^^e{O0Z#~1QpKRcCg60vbaLwnOS+6LzTe;Anme_&w# zf8Yo6|K>mJ5BUGEAF%&_9E|1vKR*D&_Wz$hsQ-WbL;e5b|Lp&t|1bZ)|3Ci&{{QU< z-B^{_&1HwZj zs6l6o{mjZ@_>-N@{yRIH{ZmdJ!`GZ#s<(Tpt*cd zdj;$v&>Bn7c+S7<>@1|Ush~bJXx(QNNbU!G{Vb@B3c6GAzZ?U@e>(<-|7r{j(x72D z(3v)%&6QZ&FVH?S=fCXiwEx-J+drnJ?(Md=p4V+_%e~nTvwQ=ceV3ih@INn)z01l9)RqGsN(Rb$Aj3iZ4-f{epudjV2l)Gr zf#IJu8GRp6yBV|}64`vv%KWdW=7TCv@Hs3{bwpF3{y5S;UU1lJU|EX<3MbHA*hCu) zRS0S?gYGwhazWw$7t{v>-HrSa;)g%K85sV~WMKHu1L+?@l@dpR&SV3fBZj0JWCwU0 z0F-tz7#KSG7#K`#Alt@4(S@WGKNHl>1@-knX%C-DQ1J^&o1hXGpE`6o&>c7+eW12A zx*R$m)J6lH$pFf~=yD9;Hqq%{ObloLa57x{&&;3>stFiCbJWD%_X=`5XnijzzCe8i zh#jCim_Y5~vwuPNkaRPg|G%H%;{SOJ`JJHk9H6iP>4T^xOoH+YD1(98yJ`#!j4TXG zPlN8a;9)rbZwdra4CntJWH|qCF~hmP<_y>WgYF)~a1vPp zpgs#IeV+LZnlohq?~^?Ji!7rk(e(fSe}@16+Zq1m{{emm(5%=0Kk^Kq%~Bw#p02i21Vi~^N0pnNT~ z%EQC#Uv~CQXnO$EmI(sY)j0HndWpzpri>JZ}wnh03{8G5;PK2wuAOwfZQkmsyG?W|4U#v|NjNU z`Ty@3<{nzYASo}zpbMG?K+}ofg65t;b3s820>Z%zdtaP@kA+owfBoXzdmVi!z-5m& uml diagrams + ] + +# note: breathe requires doxygen xml output -> must have GENERATE_XML = YES in Doxyfile.in +# match project name in Doxyfile.in +breathe_default_project = "xodoxxml" + +templates_path = ['_templates'] +exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store'] + +pygments_style = 'sphinx' + +# -- Options for HTML output ------------------------------------------------- +# https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output + +#html_theme = 'alabaster' +html_theme = 'sphinx_rtd_theme' +html_static_path = ['_static'] +html_favicon = '_static/img/favicon.ico' diff --git a/xo-tokenizer/docs/examples.rst b/xo-tokenizer/docs/examples.rst new file mode 100644 index 00000000..16f963a8 --- /dev/null +++ b/xo-tokenizer/docs/examples.rst @@ -0,0 +1,99 @@ +.. _examples: + +.. toctree:: + :maxdepth: 2 + +Examples +======== + +See ``xo-tokenizer/examples/tokenrepl`` for (slighly elaborated) version of code below + +.. code-block:: cpp + :linenos: + + #include "xo/tokenizer/tokenizer.hpp" + + int + main() { + using namespace xo::scm; + using namespace std; + + using tokenizer_type = tokenizer; + using span_type = tokenizer_type::span_type; + + tokenizer_type tkz; + string input_str; + + while (getline(cin, input_str)) { + // we want tokenizer to see newline, it's syntax + input_str.push_back('\n'); + span_type input(input_str.begin(), input_str.end()); + + // input may contain multiple tokens + while (!input.empty()) { + auto [tk, consumed, error] = tkz.scan(input); + + if (tk.is_valid()) { + cout << tk; + } + + input = input.after_prefix(consumed.size()); + } + } + + auto [tk, consumed, error] = tkz.notify_eof(spxn_type::from_string(input_str)); + + if (tk.is_valid()) { + cout << tk; + } else if (error.is_error()) { + cout << "parsing error: " << endl; + error.report(cout); + } + } + +Reminder: enable building examples with ``cmake -DXO_ENABLE_EXAMPLES=1 ..`` + +.. code-block:: + :linenos: + + $ .build/xo-tokenizer/example/tokenrepl/xo_tokenizer_repl + > 123 + + > 123e5 + + > def sq(x: i64) -> i64 { x * x } + + + + + + + + + + + + + + + +Example of error reporting (via ``error.report(cout)`` above) + +.. code-block:: + :linenos: + + $ .build/xo-tokenizer/example/tokenrepl/xo_tokenizer_repl + + > 123q + parsing error: + char: 4 + input: 123q + ---^ + unexpected character in numeric constant + + > (8 * 8 * 123fd) + parsing error: + char: 13 + input: (8 * 8 * 123fd) + ---^ + unexpected character in numeric constant diff --git a/xo-tokenizer/docs/implementation.rst b/xo-tokenizer/docs/implementation.rst new file mode 100644 index 00000000..1063f4fa --- /dev/null +++ b/xo-tokenizer/docs/implementation.rst @@ -0,0 +1,38 @@ +.. _implementation: + +.. toctree:: + :maxdepth: 2 + +Components +========== + +Library dependency tower for *xo-tokenizer*: + +.. ditaa:: + + +-----------------+ + | xo_tokenizer | + +-----------------+ + | xo_indentlog | + +-----------------+ + | xo_cmake | + +-----------------+ + +Install instructions :doc:`here` + +Abstraction tower for *xo-tokenizer* components: + +.. ditaa:: + :--scale: 0.85 + + +-----------------------------------------+----------+ + | tokenizer | | + +-----------------------------------------+ | + | scan_result | | + +-----------------+-----------------------+ | + | | tokenizer_error | buffer | + | token +-----------------------+ | + | | input_state | | + +-----------------+-----------------------+ | + | tokentype | span | | + +-----------------+-----------------------+----------+ diff --git a/xo-tokenizer/docs/index.rst b/xo-tokenizer/docs/index.rst new file mode 100644 index 00000000..6efad465 --- /dev/null +++ b/xo-tokenizer/docs/index.rst @@ -0,0 +1,29 @@ +.. xo-tokenizer documentation master file. + +xo-tokenizer documentation +========================== + +xo-tokenizer provides a tokenizer for the Schematika language. + +Syntax is generally C-like, but with some important differences. +Notably, characters used for arithmetic operators (``+``, ``-``, ``*``, ``/``) +may appear in variable names: ``one-of-those-days`` is an ordinary symbol. + +Typically applications would use xo-reader to construct Schematika expressions +instead of interacting directly with ``xo::scm::tokenizer``. + +.. toctree:: + :maxdepth: 2 + :caption: xo-tokenizer contents + + install + examples + schematika-tokens + implementation + tokenizer-class + scan-result-class + token-class + tokenizer-error-class + input-state-class + span-class + tokentype-enum diff --git a/xo-tokenizer/docs/input-state-class.rst b/xo-tokenizer/docs/input-state-class.rst new file mode 100644 index 00000000..d995868e --- /dev/null +++ b/xo-tokenizer/docs/input-state-class.rst @@ -0,0 +1,77 @@ + +.. _input-state-class: + +Input State +=========== + +Track detailed state of input stream to collect information useful for detailed error reporting + +Context +------- + +.. ditaa:: + :--scale: 0.85 + + +-----------------------------------------+----------+ + | tokenizer | | + +-----------------------------------------+ | + | scan_result | | + +-----------------+-----------------------+ | + | | tokenizer_error | buffer | + | token +-----------------------+ | + | |cBLU input_state | | + +-----------------+-----------------------+ | + | tokentype | span | | + +-----------------+-----------------------+----------+ + +.. code-block:: cpp + + #include + +.. uml:: + :scale: 99% + :align: center + + allowmixing + + object in1<> + in1 : current_line = input + in1 : current_pos + in1 : whitespace + in1 : debug_flag + + object input + input : (x * y * 123d) + + input o-- sp1 + + +Class +----- + +.. doxygenclass:: xo::scm::input_state + +Instance Variables +------------------ + +.. doxygengroup:: input-state-instance-vars + +Constructors +------------ + +.. doxygengroup:: input-state-ctors + +Static Methods +-------------- + +.. doxygengroup:: input-state-static-methods + +Access Methods +-------------- + +.. doxygengroup:: input-state-access-methods + +General Methods +--------------- + +.. doxygengroup:: input-state-general-methods diff --git a/xo-tokenizer/docs/install.rst b/xo-tokenizer/docs/install.rst new file mode 100644 index 00000000..c9ab8598 --- /dev/null +++ b/xo-tokenizer/docs/install.rst @@ -0,0 +1,111 @@ +.. _install: + +.. toctree:: + :maxdepth: 2 + +Source +====== + +Souce code lives on github `here`_ + +.. _here: https://github.com/rconybea/xo-tokenizer + +To clone from git: + +.. code-block:: bash + + git clone https://github.com/rconybea/xo-tokenizer + +Tested with gcc 13.3 + +Install +======= + +``xo-tokenizer`` uses supporting library ``xo-indentlog`` and cmake macros ``xo-cmake``. +These are on github: + +- `xo-tokenizer source`_ (Schematika tokenizer) +- `xo-indentlog source`_ (structured logging) +- `xo-cmake source`_ (shared cmake macros) + +.. _xo-tokenizer source: https://github.com/rconybea/xo-tokenizer +.. _xo-indentlog source: https://github.com/rconybea/indentlog +.. _xo-cmake source: https://github.com/rconybea/xo-cmake + +Installing from source +---------------------- + +Install scripts for `xo-tokenizer` and `xo-indentlog` depend on helper scripts installed from `xo-cmake`. + +Preamble: + +.. code-block:: bash + + mkdir -p ~/proj/xo + cd ~/proj/xo + + git clone https://github.com/rconybea/xo-cmake + + PREFIX=/usr/local # ..or desired installation prefix + + # want PREFIX/bin in PATH to use xo-cmake helpers + PATH=$PREFIX/bin:$PATH + +Install `xo-cmake`: + +.. code-block:: bash + + cmake -B xo-cmake/.build -S xo-cmake + cmake --build xo-cmake/.build -j # placeholder, can omit for now + cmake --install xo-cmake/.build + +Install `xo-indentlog`: + +.. code-block:: bash + + xo-build --clone --configure --build --install indentlog + +Install `xo-tokenizer`: + +.. code-block:: bash + + xo-build --clone --configure --build --install xo-tokenizer + +Directories under ``PREFIX`` will then contain: + +.. code-block:: + + PREFIX + +- bin + | +- xo-build + | +- xo-cmake-config + | \- xo-cmake-lcov-harness + +- include + | \- xo + | +- indentlog/ + | \- tokenizer/ + +- lib + | \- cmake + | +- indentlog/ + | \- xo_tokenizer/ + +- share + \- cmake + \- xo_macros + +- Doxyfile.in + +- gen-ccov.in + \- xo-bootstrap-macros.cmake + +Use CMake Support +----------------- + +To use built-in cmake suport, when using ``xo-tokenizer`` from another project: + +Make sure ``PREFIX/lib/cmake`` is searched by cmake (if necessary, include it in ``CMAKE_PREFIX_PATH``) + +Add to ``CMakeLists.txt``: + +.. code-block:: cmake + + FindPackage(xo_tokenizer CONFIG REQUIRED) + + target_link_libraries(mytarget INTERFACE xo_tokenizer) diff --git a/xo-tokenizer/docs/scan-result-class.rst b/xo-tokenizer/docs/scan-result-class.rst new file mode 100644 index 00000000..6581a839 --- /dev/null +++ b/xo-tokenizer/docs/scan-result-class.rst @@ -0,0 +1,29 @@ + +.. _scan-result-class: + +Scan Result +=========== + +Represent the result of a tokenizer scan call + +Context +------- + +.. ditaa:: + :--scale: 0.85 + + +-----------------------------------------+----------+ + | tokenizer | | + +-----------------------------------------+ | + |cBLU scan_result | | + +-----------------+-----------------------+ | + | | tokenizer_error | buffer | + | token +-----------------------+ | + | | input_state | | + +-----------------+-----------------------+ | + | tokentype | span | | + +-----------------+-----------------------+----------+ + +.. code-block:: cpp + + #include diff --git a/xo-tokenizer/docs/schematika-tokens.rst b/xo-tokenizer/docs/schematika-tokens.rst new file mode 100644 index 00000000..3d99a7ee --- /dev/null +++ b/xo-tokenizer/docs/schematika-tokens.rst @@ -0,0 +1,105 @@ +.. _schematika-tokens: + +Schematika Tokens +================= + +.. list-table:: Schematika Tokens + :widths: 15 30 30 + :header-rows: 1 + + * - tokentype + - examples + - description + * - tk_i64 + - ``123``, ``-8`` + - 64-bit integer literal + * - tk_f64 + - ``1.234``, ``-10``., ``-1.981e-10``, ``3e6`` + - 64-bit floating-point literal + * - tk_string + - ``"hello"``, ``"Q: \"what's up?\"\nA: \"parsing!\""`` + - string literal. Usual escapes ``\n``, ``\r``, ``\t``, ``\"``, ``\\`` + * - tk_symbol + - ``apple``, ``funKy``, ``x123``, ``_mumble``, ``hyphenated-var`` + - symbol name + * - tk_type + - ``type`` + - keyword + * - tk_def + - ``def`` + - keyword + * - tk_lambda + - ``lambda`` + - keyword + * - tk_if + - ``if`` + - keyword + * - tk_let + - ``let`` + - keyword + * - tk_in + - ``in`` + - keyword + * - tk_end + - ``end`` + - keyword + * - tk_leftparen + - ``(`` + - + * - tk_rightparen + - ``)`` + - + * - tl_leftbracket + - ``[`` + - + * - tk_rightbracket + - ``]`` + - + * - tk_leftbrace + - ``{`` + - + * - tk_rightbrace + - ``}`` + - + * - tk_leftangle + - ``<`` + - + * - tk_rightangle + - ``>`` + - + * - tk_dot + - ``.`` + - + * - tk_comma + - ``,`` + - + * - tk_colon + - ``:`` + - + * - tk_doublecolon + - ``::`` + - + * - tk_semicolon + - ``;`` + - + * - tk_singleassign + - ``=`` + - + * - tk_assign + - ``:=`` + - + * - tk_yields + - ``->`` + - + * - tk_plus + - ``+`` + - allowed in symbol + * - tk_minus + - ``-`` + - allowed in symbol + * - tk_star + - ``*`` + - allowed in symbol + * - tk_slash + - ``/`` + - allowed in symbol diff --git a/xo-tokenizer/docs/span-class.rst b/xo-tokenizer/docs/span-class.rst new file mode 100644 index 00000000..b641ca1f --- /dev/null +++ b/xo-tokenizer/docs/span-class.rst @@ -0,0 +1,87 @@ + +.. _span-class: + +Span +==== + +Identify an unowned contiguous memory range + +Context +------- + +.. ditaa:: + :--scale: 0.85 + + +-----------------------------------------+----------+ + | tokenizer | | + +-----------------------------------------+ | + | scan_result | | + +-----------------+-----------------------+ | + | | tokenizer_error | buffer | + | token +-----------------------+ | + | | input_state | | + +-----------------+-----------------------+ | + | tokentype |cBLU span | | + +-----------------+-----------------------+----------+ + +.. code-block:: cpp + + #include + +.. uml:: + :scale: 99% + :align: center + + allowmixing + + object span1<> + span1 : lo = p + span1 : hi = p+25 + + object dest<> + dest : def fact(n : i64) { ... } + + span1 o-- dest + +- Identify a sequence of characters stored in contiguous memory. + +- Lightweight, consists of a pair of pointers. + +- Does not own storage. Lifetime management for target memory is + up to the caller. + + +Class +----- + +.. doxygenclass:: xo::scm::span + +Member Variables +---------------- + +.. doxygengroup:: span-instance-vars + +Type Traits +----------- + +.. doxygengroup:: span-type-traits + +Constructors +------------ + +.. doxygengroup:: span-ctors + +Access Methods +-------------- + +.. doxygengroup:: span-access-methods + +General Methods +--------------- + +.. doxygengroup:: span-general-methods + +Operators +--------- + +.. doxygengroup:: span-operators diff --git a/xo-tokenizer/docs/token-class.rst b/xo-tokenizer/docs/token-class.rst new file mode 100644 index 00000000..8d19a852 --- /dev/null +++ b/xo-tokenizer/docs/token-class.rst @@ -0,0 +1,96 @@ + +.. _token-class: + +Token +===== + +Represent a single lexical token in the Schematika language + +Context +------- + +.. ditaa:: + :--scale: 0.85 + + +-----------------------------------------+----------+ + | tokenizer | | + +-----------------------------------------+ | + | scan_result | | + +-----------------+-----------------------+ | + |cBLU | tokenizer_error | buffer | + | token +-----------------------+ | + | | input_state | | + +-----------------+-----------------------+ | + | tokentype | span | | + +-----------------+-----------------------+----------+ + +.. code-block:: cpp + + #include + +.. uml:: + :scale: 99% + :align: center + + allowmixing + + object tk1<> + tk1 : tk_type = tk_i64 + tk1 : text = "123" + + object tk2<> + tk2 : tk_type = tk_string + tk2 : text = "the quick brown fox" + +- Represent a single lexical token + +- Does not share any storage with original input stream + (maintains a local copy). + +- Remembers copied input extent. + Convert on demand to native untagged representation + +Example +------- + +.. code-block:: cpp + + void foo() { + using namespace xo::scm; + + token tk = token::i64_token("123"); + + tk.is_valid(); // -> true + tk.text(); // -> "123"s; + + tk.tk_type(); // -> tokentype::tk_i64 + tk.i64_value(); // -> 123 + + cout << tk << endl; // -> + } + +Class +----- + +.. doxygenclass:: xo::scm::token + + +Instance Variables +------------------ + +.. doxygengroup:: token-instance-vars + +Constructors +------------ + +.. doxygengroup:: token-ctors + +Access Methods +-------------- + +.. doxygengroup:: token-access-methods + +General Methods +--------------- + +.. doxygengroup:: token-general-methods diff --git a/xo-tokenizer/docs/tokenizer-class.rst b/xo-tokenizer/docs/tokenizer-class.rst new file mode 100644 index 00000000..4903dae9 --- /dev/null +++ b/xo-tokenizer/docs/tokenizer-class.rst @@ -0,0 +1,68 @@ + +.. _tokenizer-class: + +Tokenizer +========= + +Parse a Schematika character stream into lexical tokens + +Context +------- + +.. ditaa:: + :--scale: 0.85 + + +-----------------------------------------+----------+ + |cBLU tokenizer | | + +-----------------------------------------+ | + | scan_result | | + +-----------------+-----------------------+ | + | | tokenizer_error | buffer | + | token +-----------------------+ | + | | input_state | | + +-----------------+-----------------------+ | + | tokentype | span | | + +-----------------+-----------------------+----------+ + +.. code-block:: cpp + + #include + +.. uml:: + :scale: 99% + :align: center + + allowmixing + + object tkz1<> + tkz1 : input_state = ins1 + + object ins1<> + ins1 : current_line = (9 * 8) + + tkz1 o-- ins1 + +- Assemble a stream of lexical tokens from a text stream. + +- Lexical errors reported via scan_result instance; + errors reported with detailed context + +Class +----- + +.. doxygenclass:: xo::scm::tokenizer + +Instance Variables +------------------ + +.. doxygengroup:: tokenizer-instance-vars + +Constructors +------------ + +.. doxygengroup:: tokenizer-ctors + +Methods +------- + +.. doxygengroup:: tokenizer-general-methods diff --git a/xo-tokenizer/docs/tokenizer-error-class.rst b/xo-tokenizer/docs/tokenizer-error-class.rst new file mode 100644 index 00000000..848f2e98 --- /dev/null +++ b/xo-tokenizer/docs/tokenizer-error-class.rst @@ -0,0 +1,54 @@ + +.. _tokenizer-error-class + +Tokenizer Error +=============== + +Represent a possible tokenizer error result, including parsing context + +Context +------- + +.. ditaa:: + :--scale: 0.85 + + +-----------------------------------------+----------+ + | tokenizer | | + +-----------------------------------------+ | + | scan_result | | + +-----------------+-----------------------+ | + | |cBLU tokenizer_error | buffer | + | token +-----------------------+ | + | | input_state | | + +-----------------+-----------------------+ | + | tokentype | span | | + +-----------------+-----------------------+----------+ + +.. code-block:: cpp + + #include + +Class +------ + +.. doxygenclass:: xo::scm::tokenizer_error + +Instance Variables +------------------ + +.. doxygengroup:: tokenizer-error-vars + +Constructors +------------ + +.. doxygengroup:: tokenizer-error-ctors + +Access Methods +-------------- + +.. doxygengroup:: tokenizer-error-access-methods + +General Methods +--------------- + +.. doxygengroup:: tokenizer-error-general-methods diff --git a/xo-tokenizer/docs/tokentype-enum.rst b/xo-tokenizer/docs/tokentype-enum.rst new file mode 100644 index 00000000..0f371dda --- /dev/null +++ b/xo-tokenizer/docs/tokentype-enum.rst @@ -0,0 +1,36 @@ + +.. _tokentype-enum: + +Tokentype +========= + +Distinguish different lexical tokens for the Schematika language. + +Context +------- + +.. ditaa:: + :--scale: 0.85 + + +-----------------------------------------+----------+ + | tokenizer | | + +-----------------------------------------+ | + | scan_result | | + +-----------------+-----------------------+ | + | | tokenizer_error | buffer | + | token +-----------------------+ | + | | input_state | | + +-----------------+-----------------------+ | + |cBLU tokentype | span | | + +-----------------+-----------------------+----------+ + +.. code-block:: cpp + + #include + +Enum +---- + +.. doxygenfunction:: xo::scm::tokentype_descr + +.. doxygenfunction:: xo::scm::operator<<(std::ostream&,tokentype) diff --git a/xo-tokenizer/example/CMakeLists.txt b/xo-tokenizer/example/CMakeLists.txt new file mode 100644 index 00000000..e761ade5 --- /dev/null +++ b/xo-tokenizer/example/CMakeLists.txt @@ -0,0 +1 @@ +add_subdirectory(tokenrepl) diff --git a/xo-tokenizer/example/tokenrepl/CMakeLists.txt b/xo-tokenizer/example/tokenrepl/CMakeLists.txt new file mode 100644 index 00000000..60243b7e --- /dev/null +++ b/xo-tokenizer/example/tokenrepl/CMakeLists.txt @@ -0,0 +1,11 @@ +# xo-tokenizer/example/tokenrepl/CMakeLists.txt + +set(SELF_EXE xo_tokenizer_repl) +set(SELF_SRCS tokenrepl.cpp) + +if (XO_ENABLE_EXAMPLES) + xo_add_executable(${SELF_EXE} ${SELF_SRCS}) + xo_self_dependency(${SELF_EXE} xo_tokenizer) +endif() + +# end CMakeLists.txt diff --git a/xo-tokenizer/example/tokenrepl/tokenrepl.cpp b/xo-tokenizer/example/tokenrepl/tokenrepl.cpp new file mode 100644 index 00000000..61f6ea74 --- /dev/null +++ b/xo-tokenizer/example/tokenrepl/tokenrepl.cpp @@ -0,0 +1,71 @@ +/** @file tokenrepl.cpp **/ + +#include "xo/tokenizer/tokenizer.hpp" +#include +#include // for isatty + +bool repl_getline(bool interactive, + std::istream & in, + std::ostream & out, + std::string & input) +{ + if (interactive) { + out << "> "; + std::flush(out); + } + + return static_cast(std::getline(in, input)); +} + +int +main() { + using namespace xo::scm; + using namespace std; + + using tokenizer_type = tokenizer; + using span_type = tokenizer_type::span_type; + + xo::log_config::min_log_level = xo::log_level::severe; + + bool interactive = isatty(STDIN_FILENO); + + tokenizer_type tkz(xo::log_config::min_log_level <= xo::log_level::info); + string input_str; + + size_t line_no = 1; + + constexpr std::size_t c_maxlines = 25; + + while (repl_getline(interactive, cin, cout, input_str)) { + // we want tokenizer to see newline, it's syntax + input_str.push_back('\n'); + span_type input = span_type::from_string(input_str); + + // reminder: input may contain multiple tokens + while (!input.empty()) { + auto [tk, consumed, error] = tkz.scan(input, false /*!eof*/); + + if (tk.is_valid()) { + cout << tk << endl; + } else if (error.is_error()) { + cout << "tokenizer error: " << endl; + error.report(cout); + + break; + } + + input = input.after_prefix(consumed); + } + + /* here: input.empty() or error encountered */ + + ++line_no; + + if (line_no > c_maxlines) { + cout << "always exit after " << c_maxlines << " lines of input" << endl; + break; + } + } +} + +/** end tokenrepl.cpp */ diff --git a/xo-tokenizer/include/xo/tokenizer/buffer.hpp b/xo-tokenizer/include/xo/tokenizer/buffer.hpp new file mode 100644 index 00000000..7b19316b --- /dev/null +++ b/xo-tokenizer/include/xo/tokenizer/buffer.hpp @@ -0,0 +1,328 @@ +/** @file buffer.hpp **/ + +#pragma once + +#include "span.hpp" +#include +#include +#include +#include + +namespace xo { + namespace scm { + /** + * @class buffer buffer.hpp + * + * @brief Container for a (possibly owned) FIFO queue of chars + * + * @tparam CharT. buffer element type. + * + * @code + * .buf + * + * +------------------------------------------+ + * | | ... | | X| ... | X| | ... | | + * +------------------------------------------+ + * ^ ^ ^ ^ + * 0 .lo .hi .buf_z + * + * <-contents-><----avail-----> + * @endcode + * + * Buffer does not support wrapped content: + * content that has not been consumed always occupies contiguous memory. + * + * Example: + * @code + * // 1. + * buffer buf(64*1024); + * buf.empty() -> true + * buf.buf_z() -> 65536 + * buf.lo_pos() -> 0 + * buf.hi_pos() -> 65536 + * buf.contents() -> empty span + * buf.avail() -> span entire buffer memory + * + * // write to (a prefix of) buf.avail() + * ::strncpy(buf.buf(), "hello, world\n", 13); + * buf.produce(span_type(buf.buf(), buf.buf() + 13)); + * + * buf.lo_pos() -> 0 + * buf.hi_pos() -> 13 + * buf.contents() -> "hello, world\n"; + * + * + * // examine stored content (does not change buffer state) + * auto span = buf.contents(); + * cerr << string_view(span.lo(), span.hi()); // "hello, world\n" + * + * // consume (a prefix of) stored content + * buf.consume(span.prefix(7); + * + * buf.lo_pos() -> 7 + * buf.hi_pos() -> 13 + * buf.contents() -> "world\n" + * + * // consuming all remain content resets to original state + * buf.consume(buf.contents()); + * + * buf.empty() -> true + * buf.hi_pos() -> 0 // not 13! + * + * // 2. + * buffer buf; + * buf.empty() -> true + * buf.buf_z() -> 0 + * buf.lo_pos() -> 0 + * buf.hi_pos() -> 0 + * buf.contents() -> empty span + * buf.avail() -> empty span + * + * // allocate memory separately from ctor + * buf.alloc(64*1024); + * @endcode + **/ + template + class buffer { + public: + /** @brief typealias for span of CharT **/ + using span_type = span; + /** @brief typealias for buffer size (counts CharT's, not bytes) **/ + using size_type = std::uint64_t; + + public: + /** @brief create empty buffer. + + Does not allocate any storage; @see alloc + **/ + buffer() = default; + /** @brief create empty buffer, and possibly allocate storage. + + @param buf_z Buffer size. allocate storage (owned by this buffer) if >0. + @param align_z Align to this value, e.g. 8 to align storage on an 8-byte boundary + **/ + buffer(size_type buf_z, + size_type align_z = sizeof(char)) + : is_owner_{true}, + buf_{buf_z ? (new (std::align_val_t(align_z)) CharT [buf_z]) : nullptr}, + buf_z_{buf_z}, + lo_pos_{0}, + hi_pos_{0} + {} + /** @brief buffer is not copyable **/ + buffer(buffer const & x) = delete; + /** @brief destructor. Release storage if owned **/ + ~buffer() { this->reset(); } + + /** @name Access methods **/ + ///@{ + + /** @brief start of buffer memory **/ + CharT * buf() const { return buf_; } + /** @brief buffer size (number of characters) **/ + size_type buf_z() const { return buf_z_; } + /** @brief current start position within buffer **/ + size_type lo_pos() const { return lo_pos_; } + /** @brief current end position within buffer **/ + size_type hi_pos() const { return hi_pos_; } + + ///@} + + /** @brief readonly access to a single buffer element. + + Relative to start of buffer (ignores current consume position) + **/ + CharT const & operator[](size_type i) const { return buf_[i]; } + + /** @brief return span for current buffer contents **/ + span_type contents() const { return span_type(buf_ + lo_pos_, + buf_ + hi_pos_); } + /** @brief returns span for writable buffer contents (unused prefix following produce position **/ + span_type avail() const { return span_type(buf_ + hi_pos_, + buf_ + buf_z_); } + + /** @brief @c true iff buffer is empty **/ + bool empty() const { return lo_pos_ == hi_pos_; } + + + /** + @brief update buffer produce position, after (independently) writing contents of span to it + + @pre left endpoint of @p span equals buffer produce position (@c .hi_pos) + @pre right endpoint of @p span within bounds of buffer memory range + @post right endpoint of @p span equals buffer produce position. + **/ + void produce(span_type const & span) { + assert(span.lo() == buf_ + hi_pos_); + + hi_pos_ += span.size(); + } + + /** + @brief update buffer consume position, when done with contents of span + + @pre left endpoint of @p span equals buffer consume position (@c .lo_pos) + @pre right endpoint of @p span within bounds of buffer memory range + @post Either + buffer is empty, with @c .lo_pos = @c .hi_pos = @c 0. + buffer is non-empty, right endpoint of @p span equals new buffer consume position. + **/ + void consume(span_type const & span) { + if (span.size()) { + assert(span.lo() == buf_ + lo_pos_); + + lo_pos_ += span.size(); + } else { + /* since .consume() that arrives at empty contents also resets .lo_pos .hi_pos, + * we don't want to blow up when called with an empty span -- argument + * may represent some pre-reset location in buffer + */ + } + + if (lo_pos_ == hi_pos_) { + lo_pos_ = 0; + hi_pos_ = 0; + } + } + + /** + @brief allocate buffer with desired amount of memory + + @param buf_z desired buffer size + @param align_z alignment; buffer memory will be aligned on this byte-boundary. + **/ + void alloc(size_type buf_z, size_type align_z = sizeof(char)) { + /* properly reset (+ discard) any existing state */ + this->reset(); + + is_owner_ = true; + if (buf_z) + buf_ = new (std::align_val_t(align_z)) CharT [buf_z]; + buf_z_ = buf_z; + lo_pos_ = 0; + hi_pos_ = 0; + } + + /** + @brief attach buffer to (unowned) range of @p buf_z bytes starting at @p buf[0] + + Buffer is not responsible for managing storage. + + @post + 1. buffer is empty + @post + 2. buffer read position = buffer write position = 0 + **/ + void setbuf(CharT * buf, size_type buf_z) { + /* properly reset (+ discard) any existing state */ + this->reset(); + + is_owner_ = false; + lo_pos_ = 0; + hi_pos_ = 0; + buf_ = buf; + buf_z_ = buf_z; + } + + /** + @brief revert buffer to empty state and possibly zero it + + @param zero_buffer_flag Zero buffer contents iff this is true + + @post + 1. buffer is empty + @post + 2. buffer read position = buffer write position = 0 + **/ + void clear2empty(bool zero_buffer_flag) { + if (buf_ && zero_buffer_flag) + explicit_bzero(buf_, buf_z_ * sizeof(CharT)); + + lo_pos_ = 0; + hi_pos_ = 0; + } + + /** + @brief swap representation with another buffer instance. + **/ + void swap (buffer & x) { + std::swap(is_owner_, x.is_owner_); + std::swap(buf_, x.buf_); + std::swap(buf_z_, x.buf_z_); + std::swap(lo_pos_, x.lo_pos_); + std::swap(hi_pos_, x.hi_pos_); + } + + /** + @brief reset buffer to an empty state and recover owned storage + **/ + void reset() { + if (is_owner_ && buf_) + delete [] buf_; + + is_owner_ = false; + buf_ = nullptr; + buf_z_ = 0; + lo_pos_ = 0; + hi_pos_ = 0; + } + + /** + @brief move-assignment operator. + @param x right-hand-side to move from. + + @post + @p x is in a valid, empty, + **/ + buffer & operator= (buffer && x) { + is_owner_ = x.is_owner_; + buf_ = x.buf_; + buf_z_ = x.buf_z_; + lo_pos_ = x.lo_pos_; + hi_pos_ = x.hi_pos_; + + x.is_owner_ = false; + x.lo_pos_ = 0; + x.hi_pos_ = 0; + x.buf_ = nullptr; + x.buf_z_ = 0; + + return *this; + } + + /** @brief buffer is not assignable */ + buffer & operator= (buffer & x) = delete; + + private: + /** @brief true iff buffer is responsible for freeing storage at @c buf_ **/ + bool is_owner_ = false; + /** @brief buffer contents. buffer memory comprises @c buf_[0] to @c buf_[buf_z_] **/ + CharT * buf_ = nullptr; + /** @brief buffer size (in units of CharT) **/ + size_type buf_z_ = 0; + + /** @brief buffer read (consume) position + + @invariant + 0 <= lo_pos_ <= hi_pos_ < buf_z_ + **/ + size_type lo_pos_ = 0; + /** @brief buffer write (produce) position + + @invariant + 0 <= hi_pos_ < hi_pos_ < buf_z_ + **/ + size_type hi_pos_ = 0; + }; + + /** @brief Overload for @c swap, so that @c buffer swappable **/ + template + inline void + swap(buffer & lhs, + buffer & rhs) { + lhs.swap(rhs); + } + } /*namespace scm*/ +} /*namespace xo*/ + +/* end buffer.hpp */ diff --git a/xo-tokenizer/include/xo/tokenizer/input_state.hpp b/xo-tokenizer/include/xo/tokenizer/input_state.hpp new file mode 100644 index 00000000..fbff7e57 --- /dev/null +++ b/xo-tokenizer/include/xo/tokenizer/input_state.hpp @@ -0,0 +1,363 @@ +/* @file input_state.hpp + * + * author: Roland Conybeare, Jun 2025 + */ + +#pragma once + +#include "span.hpp" + +namespace xo { + namespace scm { + /** enum to report outcome of @ref capture_current_line **/ + enum class input_error { + /** normal return, input line successfully identified and captured **/ + ok = 0, + /** incomplete input; should not have been submitted to @ref capture_current_line. + * note: submit last line of input with eof_flag=true + **/ + incomplete, + N + }; + + /** @class input_state + * @brief Track detailed input position for use in error messages + * + * input characters fall into two categories: + * - consumed: memory can be reclaimed/recycled + * - buffered: memory will be retained unaltered until consumed + * + * remarks: + * - always in one of two states: + * - empty + * - contains exactly one line of input + * - also record current input position. + * Use this for example to identify where tokenizer rejected input. + * - .current_pos advances by one token + * + * - buffered characters always form a single contiguous range. + * - input_state does not own any storage; storage is owned elsewhere + * + * @text + * + * <------------------.current_line------------------> + * > <-- .whitespace + * cccccccccccccccccccccccccccccccc__TTTTTTTTxxxxxxxxx + * ^ ^ ^ + * .current_line.lo | .current_line.hi + * .current_pos + * + * <----prev_line----> <----current_line----> + * > <--whitespace + * ppppppppppppppppppp cccccccccccc__TTTTTTTT + * ^ + * + * @endtext + **/ + template + class input_state { + public: + /** @defgroup input-state-type-traits input-state type straits **/ + ///@{ + + /** type representing a contiguous span of tokenizer input characters **/ + using span_type = span; + + ///@} + + public: + /** @defgroup input-state-ctors input_state constructors **/ + ///@{ + + input_state() = default; + explicit input_state(bool debug_flag) : debug_flag_{debug_flag} {} + /** Create instance with supplied @p current_line, @p current_pos, @p whitespace. + * Introduced for unit tests, not used in tokenizer. + **/ + explicit input_state(const span& current_line, + size_t current_pos, + size_t whitespace) : current_line_{current_line}, + current_pos_{current_pos}, + whitespace_{whitespace} {} + + ///@} + + /** @defgroup input-state-static-methods input_state static methods **/ + ///@{ + + /** recognize the newline character '\n' **/ + static bool is_newline(CharT ch); + /** identifies whitespace chars. + * These are chars that do not belong to any token. + * They are not permitted to appear within + * a symbol or string token. + * Appearance of a whitespace char forces completioon of + * preceding token. + **/ + static bool is_whitespace(CharT ch); + + ///@} + + /** @defgroup input-state-access-methods **/ + ///@{ + +#pragma GCC diagnostic push +#ifndef __APPLE__ +#pragma GCC diagnostic ignored "-Wchanges-meaning" +#endif + const span_type & current_line() const { return current_line_; } +#pragma GCC diagnostic pop + size_t tk_start() const { return tk_start_; } + size_t current_pos() const { return current_pos_; } + size_t whitespace() const { return whitespace_; } + bool debug_flag() const { return debug_flag_; } + + ///@} + + /** @defgroup input-state-general-methods **/ + ///@{ + + /** Input state less @p n chars. + * Use to recover input state before a complete but error-triggering token + **/ + input_state rewind(std::size_t n) const; + + /** Capture prefix of @p input up to first newline. + * Set read position to start of line. + * + * Alters: + * .current_line + * .current_pos + * + * Return pair comprising error code and input span representing first line + * (including trailing newline) from @p input. + **/ + std::pair capture_current_line(const span_type & input, + bool eof_flag); + + /** atomically return current line while discarding it from input state + * + * Alters + * .current_line + * .current_pos + * .whitespace + **/ + span_type consume_current_line(); + + /** Reset input state for start of next line. + * Expression parser may use this to discard remainder of input line + * after a parsing error. + * + * Alters: + * .current_line + * .current_pos + * .whitespace + **/ + void discard_current_line(); + + /** Advance input position by @p z + * + * Alters: + * .current_pos + **/ + void advance(size_t z); + + /** Advance .current_pos to pos. + * Require: pos in @ref current_line_ + **/ + void advance_until(const CharT * pos); + + /** Skip prefix of input, starting at current read position, + * comprising only whitespace. + * + * Presume input position is at end of token; + * on return @ref whitespace_ counts number of whitespace characters + * skipped. + * + * Return pointer to first non-whitespace character after @ref current_pos_ + * or @ref current_line_.hi if reached end of buffered line. + * + * Alters: + * .whitespace + **/ + const CharT * skip_leading_whitespace(); + + ///@} + + private: + /** @defgroup input-state-instance-vars input_state instance variables **/ + ///@{ + + /** remember current input line. Used only to report errors **/ + span current_line_ = span(); + /** start of last token within @ref current_line_ **/ + size_t tk_start_ = 0; + /** input position within @ref current_line_ **/ + size_t current_pos_ = 0; + /** number of whitespace chars since end of preceding token, + * or last newline, whichever is less + **/ + size_t whitespace_ = 0; + + /** true to log input activity */ + bool debug_flag_ = false; + + ///@} + }; /*input_state*/ + + template + bool + input_state::is_newline(CharT ch) { + return (ch == '\n'); + } + + template + bool + input_state::is_whitespace(CharT ch) { + switch(ch) { + case ' ': return true; + case '\t': return true; + case '\n': return true; + case '\r': return true; + } + + return false; + } + + template + input_state + input_state::rewind(std::size_t n) const { + return input_state(this->current_line_, + (n <= current_pos_) ? current_pos_ - n : 0, + 0 /*whitespace*/); + } + + template + void + input_state::advance(size_t z) { + scope log(XO_DEBUG(debug_flag_)); + + this->current_pos_ += z; + + log && log(xtag("z", z), xtag("current_pos", current_pos_)); + } + + template + void + input_state::advance_until(const CharT * pos) { + scope log(XO_DEBUG(debug_flag_)); + + assert(current_line_.lo() <= pos && pos <= current_line_.hi()); + + this->current_pos_ = pos - current_line_.lo(); + + log && log(xtag("current_pos", current_pos_)); + } + + template + auto + input_state::consume_current_line() -> span_type { + span_type retval = current_line_; + + this->discard_current_line(); + + return retval; + } + + template + void + input_state::discard_current_line() { + this->current_line_ = span_type::make_null(); + this->current_pos_ = 0; + this->whitespace_ = 0; + } + + template + auto + input_state::capture_current_line(const span_type & input, + bool eof_flag) -> std::pair + { + // see also discard_current_line() + // note: must capture entirety of first line, + // for example including leading whitespace. + // See discussion in tokenizer scan() method + + scope log(XO_DEBUG(debug_flag_)); + + /* look ahead to {end of line, end of input}, whichever comes first */ + const CharT * sol = input.lo(); + const CharT * eol = sol; + + if (sol == current_line_.lo()) { + log && log("short-circuit - current line already stashed"); + + /* nothing to do here */ + return std::make_pair(input_error::ok, current_line_); + } + + while ((eol < input.hi()) && (*eol != '\n')) + ++eol; + + if (*eol == '\n') { + /* include \n at end-of-line */ + ++eol; + } else { + if (!eof_flag) { + /* caller expected to provide complete line of input. complain and ignore */ + return std::make_pair(input_error::incomplete, + input.prefix(0ul)); + } + } + + this->current_line_ = span_type(sol, eol); + this->current_pos_ = 0; + this->whitespace_ = 0; + + log && log(xtag("current_line", print::printspan(current_line_)), + xtag("current_pos", current_pos_)); + + return std::make_pair(input_error::ok, + span_type(sol, eol)); + } + + template + const CharT * + input_state::skip_leading_whitespace() + { + scope log(XO_DEBUG(debug_flag_)); + + const CharT * ix = current_line_.lo() + current_pos_; + + this->whitespace_ = 0; + + /* skip whitespace + remember beginning of most recent line */ + while (is_whitespace(*ix) && (ix != current_line_.hi())) { + ++ix; + + ++(this->whitespace_); + } + + this->tk_start_ = ix - current_line_.lo(); + this->current_pos_ = ix - current_line_.lo(); + + return ix; + } + + template + inline std::ostream & + operator<<(std::ostream & os, + const input_state& x) + { + using xo::print::unq; + + os << ""; + + return os; + } + } +} diff --git a/xo-tokenizer/include/xo/tokenizer/scan_result.hpp b/xo-tokenizer/include/xo/tokenizer/scan_result.hpp new file mode 100644 index 00000000..975edf63 --- /dev/null +++ b/xo-tokenizer/include/xo/tokenizer/scan_result.hpp @@ -0,0 +1,112 @@ +/* file scan_result.hpp + * + * author: Roland Conybeare, Jun 2025 + */ + +#pragma once + +#include "token.hpp" +#include "tokenizer_error.hpp" +#include "input_state.hpp" + +namespace xo { + namespace scm { + /** @class scan_result + * @brief Represent result of parsing one input token. + * + * @code + * Possible outcomes fall into several categories + * (with T: @c token_.is_valid(), E: @cerror_.is_error()) + * + * | T | E | description | + * |-------+-------+-------------------------------------| + * | false | false | end of input, including end of line | + * | true | false | parsed token in T | + * | false | true | parse error in E | + * + * @endcode + **/ + template + class scan_result { + public: + using token_type = token; + using span_type = span; + using error_type = tokenizer_error; + using input_state_type = input_state; + + public: + scan_result(const token_type & token, + const span_type & consumed, + const error_type & error = error_type()) + : token_{token}, consumed_{consumed}, error_{error} {} + + static scan_result make_whitespace(const span_type & prefix_input); + static scan_result make_partial(const span_type & prefix_input); + /** + * @p error_src can be __FUNCTION__ from site where error generated. + * @p error_msg error message + * @p error_pos error position, relative to start of token + * @p input_state_ref input state object; + * copied into scan_result, and leaving input_state_ref.current_line cleared + **/ + static scan_result make_error_consume_current_line(const char * error_src, + std::string error_msg, + size_t error_pos, + input_state_type & input_state_ref); + + bool is_eof_or_ambiguous() const { return token_.is_invalid() && error_.is_not_an_error(); } + bool is_token() const { return token_.is_valid(); } + bool is_error() const { return error_.is_error(); } + + const token_type & get_token() const { return token_; } + const span_type & consumed() const { return consumed_; } + const error_type & error() const { return error_; } + + public: + /** Successfully parsed token, whenever tk_type != tokentype::tk_invalid. + * Will be tokentype::tk_invalid in normal cause of events for valid input, + * when consuming whitespace + **/ + token_type token_; + /** input span represented by .token, on success. Otherwise not defined **/ + span_type consumed_; + /** error description, whenever .error_.is_error() is true **/ + error_type error_; + }; + + template + auto scan_result::make_whitespace(const span_type& whitespace_input) -> scan_result + { + return scan_result(token_type::invalid(), whitespace_input /*consumed*/); + } + + template + auto scan_result::make_partial(const span_type& prefix_input) -> scan_result + { + return scan_result(token_type::invalid(), prefix_input /*consumed*/); + } + + template + auto + scan_result::make_error_consume_current_line(const char * error_src, + std::string error_msg, + size_t error_pos, + input_state_type & input_state_ref) -> scan_result + { + /* report+consume entire input line */ + + /* copy before altered by .consume_current_line() */ + input_state_type input_state_copy = input_state_ref; + + return scan_result(token_type::invalid(), + input_state_ref.consume_current_line(), + error_type(error_src, + error_msg, + input_state_copy, + error_pos)); + } + + } /*namespace scm*/ +} /*namespace xo*/ + +/* end scan_result.hpp */ diff --git a/xo-tokenizer/include/xo/tokenizer/span.hpp b/xo-tokenizer/include/xo/tokenizer/span.hpp new file mode 100644 index 00000000..8cf7a4a7 --- /dev/null +++ b/xo-tokenizer/include/xo/tokenizer/span.hpp @@ -0,0 +1,291 @@ +/** @file span.hpp **/ + +#pragma once + +#include "xo/indentlog/scope.hpp" +#include "xo/indentlog/print/ppdetail_atomic.hpp" +#include +#include +#include + +namespace xo { + namespace scm { + /** @class span compression/span.hpp + * + * @brief A contiguous range of characters, without ownership. + * + * @tparam CharT type for elements referred to by this span. + **/ + template + class span { + public: + /** @defgroup span-type-traits span type traits **/ + ///@{ + + /** typealias for span size (in units of CharT) **/ + using size_type = std::uint64_t; + + ///@} + + public: + /** @defgroup span-ctors span constructors **/ + ///@{ + + /** null span **/ + span() : lo_{nullptr}, hi_{nullptr} {} + + /** Create span for the contiguous memory range [@p lo, @p hi) **/ + span(CharT * lo, CharT * hi) : lo_{lo}, hi_{hi} {} + + /** explicit conversion from span **/ + template + span(const span & other, + std::enable_if_t + && !std::is_same_v> * = nullptr) + : lo_{other.lo()}, hi_{other.hi()} {} + + /** copy ctor (explicit to avoid ambiguity with template ctor) **/ + span(const span & other) = default; + span & operator=(const span & other) = default; + + /** Create a null span (i.e. with null @p lo, @p hi pointers) + * A null span can be concatenated with any other span + * without triggering matching-endpoint asserts. + **/ + static span make_null() { return span(static_cast(nullptr), static_cast(nullptr)); } + + /** @brief create span for C-style string @p cstr **/ + static span from_cstr(const CharT * cstr) { + CharT * lo = cstr; + CharT * hi = cstr ? cstr + strlen(cstr) : nullptr; + + return span(lo, hi); + } + + /** @brief create span from std::string @p str **/ + static span from_string(const std::string& str) { + CharT * lo = &(*str.begin()); + CharT * hi = &(*str.end()); + + return span(lo, hi); + } + + /** @brief concatenate two contiguous spans */ + static span concat(const span & span1, const span & span2) { + if (span1.is_null()) + return span2; + if (span2.is_null()) + return span1; + + if (span1.hi() != span2.lo()) { + scope log(XO_DEBUG(true)); + + log && log(xtag("span1.hi", (void*)span1.hi()), xtag("span2.lo", (void*)span2.lo())); + } + + assert(span1.hi() == span2.lo()); + + CharT * lo = span1.lo(); + CharT * hi = span2.hi(); + + return span(lo, hi); + } + + ///@} + + /** @defgroup span-access-methods **/ + ///@{ + + CharT * lo() const { return lo_; } /* get member span::lo_ */ + CharT * hi() const { return hi_; } /* get member span::hi_ */ + + ///@} + + /** @defgroup span-general-methods **/ + ///@{ + + /** @brief strip prefix until first occurence of '\n', including the newline **/ + void discard_until_newline() { + for (const CharT * p = lo_; p < hi_; ++p) { + if (*p == '\n') { + lo_ = p + 1; + return; + } + } + + lo_ = hi_; + } + + /** Create new span over supplied type, + * with identical (possibly misaligned) endpoints. + * + * @warning + * 1. New span uses exactly the same memory addresses. + * Endpoint pointers may not be aligned. + * 2. Implementation assumes code compiled with + * @code -fno-strict-aliasing @endcode enabled. + * + * @tparam OtherT element type for new span + **/ + template + span + cast() const { return span(reinterpret_cast(lo_), + reinterpret_cast(hi_)); } + + /** @brief create span including the first @p z members of this span. **/ + span prefix(size_type z) const { return span(lo_, lo_ + z); } + + /** @brief create span representing prefix up to (but not including) @p *p + **/ + span prefix_upto(CharT * p) const { + if (p <= hi_) + return span(lo_, p); + else + return span(lo_, hi_); + } + + /** @brief create span with first @p z members of this span removed **/ + span after_prefix(size_type z) const { + if (lo_ + z > hi_) + z = hi_ - lo_; + + return span(lo_ + z, hi_); + } + + /** @brief create span with @p prefix of this span removed **/ + span after_prefix(const span & prefix) const { + if (!prefix.is_null() && (prefix.lo() != lo_)) { + throw std::runtime_error + ("after_prefix: expected prefix of this span"); + } + + return after_prefix(prefix.size()); + } + + /** Create span starting with position @p p. + * Does boundary checking; will return empty span if @p p is outside @c [lo_,hi) + **/ + span suffix_from(CharT * p) const { + if ((lo_ <= p) && (p <= hi_)) + return span(p, hi_); + else + return span(hi_, hi_); + } + + /** true iff this span is null. distinct from empty. **/ + bool is_null() const { return lo_ == nullptr && hi_ == nullptr; } + /** true iff this span is empty (comprises 0 elements). **/ + bool empty() const { return lo_ == hi_; } + /** report the number of elements (of type CharT) in this span. **/ + size_type size() const { return hi_ - lo_; } + + /** increase extent of this spans to include @p x. + * Requires @c hi() == @c x.lo() + **/ + span & operator+=(const span & x) { + if (hi_ == x.lo_) { + hi_ = x.hi_; + } else if (!x.is_null()) { + assert(false); + } + + return *this; + } + + /** print representation for this span on stream @p os **/ + void print(std::ostream & os) const { + os << ""; + } + ///@} + + private: + /** @defgroup span-instance-vars **/ + ///@{ + + /** start of span. + Span comprises memory address between @p lo (inclusive) and @p hi (exclusive) + **/ + CharT * lo_ = nullptr; + + /** @brief end of span. + Span comprises memory address between @p lo (inclusive) and @p hi (exclusive) + **/ + CharT * hi_ = nullptr; + + ///@} + }; /*span*/ + + /** @defgroup span-operators **/ + ///@{ + + /** compare spans for equality. + * Two spans are equal iff both endpoints match exactly. + **/ + template + inline bool + operator==(const span & lhs, const span & rhs) { + return ((lhs.lo() == rhs.lo()) + && (lhs.hi() == rhs.hi())); + } + + /** compare spans for inequality. + * Two spans are unequal if either paired endpoint differs. + **/ + template + inline bool + operator!=(const span & lhs, const span & rhs) { + return ((lhs.lo() != rhs.lo()) + || (lhs.hi() != rhs.hi())); + } + + /** print a summary of @p x on stream @p os. Intended for diagnostics **/ + template + inline std::ostream & + operator<<(std::ostream & os, + const span & x) { + x.print(os); + return os; + } + + ///@} + } /*namespace scm*/ + + namespace print { + template + class printspan_impl { + public: + printspan_impl(xo::scm::span x) : span_{x} {} + + xo::scm::span span_; + }; + + template + printspan_impl printspan(const xo::scm::span& span) { + return printspan_impl(span); + } + + template + inline std::ostream & + operator<< (std::ostream & os, + const printspan_impl & x) + { + for (const CharT * p = x.span_.lo(); p < x.span_.hi(); ++p) + os << *p; + + return os; + } + +#ifndef ppdetail_atomic + template \ + PPDETAIL_ATOMIC_BODY(printspan_impl); + + template \ + PPDETAIL_ATOMIC_BODY(xo::scm::span); +#endif + + } +} /*namespace xo*/ diff --git a/xo-tokenizer/include/xo/tokenizer/token.hpp b/xo-tokenizer/include/xo/tokenizer/token.hpp new file mode 100644 index 00000000..689a4512 --- /dev/null +++ b/xo-tokenizer/include/xo/tokenizer/token.hpp @@ -0,0 +1,473 @@ +/* file token.hpp + * + * author: Roland Conybeare, Jul 2024 + */ + +#pragma once + +#include "tokentype.hpp" +#include "xo/indentlog/print/tag.hpp" +#include +#include +#include +#include + +namespace xo { + namespace scm { + namespace detail { + /* compute a * b^p, p >= 0 */ + constexpr double + pow_aux(double a, double b, int p) { + while (p > 0) { + if (p % 2 == 1) { + /* a * b^p = a * b^(2q + 1) = a.b * 10^(2q) */ + a *= b; + p -= 1; + } else { + /* a * b^p = a * b^(2q) = a * (b^2)^q */ + b = b * b; + p /= 2; + } + } + + /* a * b^0 = a */ + return a; + } + + constexpr double + pow10(int p) { + if (p >= 0) + return pow_aux(1.0, 10.0, p); + else + return 1.0 / pow_aux(1.0, 10.0, -p); + } + } + + /** @class token + * @brief Represent a Schematika lexical token + **/ + template + class token { + public: + /** @defgroup token-ctors token constructors **/ + ///@{ + + /** default ctor creates token with type @c tk_invalid **/ + token() = default; + /** create token with type @c tk_type and input text @c text **/ + token(tokentype tk_type, const std::string & text = "") + : tk_type_{tk_type}, text_{text} {} + + /** create invalid token (same as null ctor, but explicit) **/ + static token invalid() { return token(); } + /** Create token representing a boolean literal from text @p txt + * @p txt must be @c true or @c false + **/ + static token bool_token(const std::string & txt) { + return token(tokentype::tk_bool, txt); + } + /** Create token representing 64-bit signed integer literal parsed from decimal @p txt. + * The string @p txt must be a decimal integer literal, since @ref i64_value re-parses @p txt. + **/ + static token i64_token(const std::string & txt) { + return token(tokentype::tk_i64, txt); + } + /** create token representing 64-bit floating-point literal parsed from decimal @p txt + * The string @p txt must be a decimal floating-point literal, since @ref f64_value re-parses @p txt. + **/ + static token f64_token(const std::string & txt) { + return token(tokentype::tk_f64, txt); + } + /** create token representing literal string parsed from @p txt **/ + static token string_token(const std::string & txt) { + return token(tokentype::tk_string, txt); + } + /** create token representing a symbol parsed from @p txt. + * Note that not all strings are valid symbol names. + **/ + static token symbol_token(const std::string & txt) { + return token(tokentype::tk_symbol, txt); + } + /** token representing left angle bracket @c "<" **/ + static token leftangle() { return token(tokentype::tk_leftangle); } + /** token representing right angle bracket @c ">" **/ + static token rightangle() { return token(tokentype::tk_rightangle); } + /** token representing left parenthesis @c "(" **/ + static token leftparen() { return token(tokentype::tk_leftparen); } + /** token representing right parenthesis @c ")" **/ + static token rightparen() { return token(tokentype::tk_rightparen); } + /** token representing left bracket @c "[" **/ + static token leftbracket() { return token(tokentype::tk_leftbracket); } + /** token representing right bracket @c "]" **/ + static token rightbracket() { return token(tokentype::tk_rightbracket); } + /** token representing left brace @c "{" **/ + static token leftbrace() { return token(tokentype::tk_leftbrace); } + /** token representing right brace @c "}' **/ + static token rightbrace() { return token(tokentype::tk_rightbrace); } + /** token representing period @c "." **/ + static token dot() { return token(tokentype::tk_dot); } + /** token representing comma @c "," **/ + static token comma() { return token(tokentype::tk_comma); } + /** token representing colon @c ":" **/ + static token colon() { return token(tokentype::tk_colon); } + /** token representing double-colo @c "::" **/ + static token doublecolon() { return token(tokentype::tk_doublecolon); } + /** token representing semicolon @c ";" **/ + static token semicolon() { return token(tokentype::tk_semicolon); } + /** token representing single-assignment @c "=" **/ + static token singleassign() { return token(tokentype::tk_singleassign); } + /** token representing unrestricted assignment @c ":=" **/ + static token assign_token() { return token(tokentype::tk_assign); } + /** token representing indirection @c "->" **/ + static token yields() { return token(tokentype::tk_yields); } + + /** token for @c "+" **/ + static token plus_token() { return token(tokentype::tk_plus); } + /** token for @c "-" **/ + static token minus_token() { return token(tokentype::tk_minus); } + /** token for @c "*" **/ + static token star_token() { return token(tokentype::tk_star); } + /** token for @c "/" **/ + static token slash_token() { return token(tokentype::tk_slash); } + + /** token representing keyword @c type **/ + static token type() { return token(tokentype::tk_type); } + /** token representing keyword @c def **/ + static token def() { return token(tokentype::tk_def); } + /** token representing keyword @c lambda **/ + static token lambda() { return token(tokentype::tk_lambda); } + /** token representing keyword @c if **/ + static token if_token() { return token(tokentype::tk_if); } + /** token representing keyword @c else **/ + static token else_token() { return token(tokentype::tk_else); } + /** token representing keyword @c let **/ + static token let() { return token(tokentype::tk_let); } + /** token representing keyword @c in **/ + static token in() { return token(tokentype::tk_in); } + /** token representing keyword @c end **/ + static token end() { return token(tokentype::tk_end); } + + ///@} + + /** @defgroup token-access-methods **/ + ///@{ + + tokentype tk_type() const { return tk_type_; } + const std::string & text() const { return text_; } + + ///@} + + /** @defgroup token-general-methods **/ + ///@{ + + /** true if token understood to represent valid input + * i.e. any token type except @c tk_invalid + **/ + bool is_valid() const { return tk_type_ != tokentype::tk_invalid; } + /** true for sentinel token with type tk_invalid **/ + bool is_invalid() const { return tk_type_ == tokentype::tk_invalid; } + + /** true for tokens with variable text. false for those with fixed textual representation **/ + bool has_variable_text() const { return (tk_type_ == tokentype::tk_i64 + || tk_type_ == tokentype::tk_f64 + || tk_type_ == tokentype::tk_string + || tk_type_ == tokentype::tk_symbol); } + + /** expect input matching @c true or @c false **/ + bool bool_value() const; + + /** expect input matching @c [+|-][0-9][0-9]* **/ + std::int64_t i64_value() const; + + /** expect input matching @c [+|-][0-9]*[.][0-9]*[e|E][+|-][0-9]* **/ + double f64_value() const; + + /** print human-readable token representation on stream @p os **/ + void print(std::ostream & os) const; + + ///@} + + private: + /** @defgroup token-instance-vars **/ + ///@{ + + /** category for this token **/ + tokentype tk_type_ = tokentype::tk_invalid; + + /** characters comprising this token. + * only provided for certain token types: + * + * tk_i64 + * tk_f64 + * tk_string + * tk_symbol + **/ + std::string text_; + + ///@} + }; + + template + bool + token::bool_value() const { + if (tk_type_ != tokentype::tk_bool) { + throw (std::runtime_error + (tostr("token::bool_value", + ": token with type tk found where tk_bool expected", + xtag("tk", tk_type_)))); + } + + if (text_ == "true") + return true; + if (text_ == "false") + return false; + + throw (std::runtime_error + (tostr("token::bool_value", + ": unexpected input string tk_bool token", + xtag("text", text_)))); + + return false; + } + + template + std::int64_t + token::i64_value() const { + if (tk_type_ != tokentype::tk_i64) { + throw (std::runtime_error + (tostr("token::i64_value", + ": token with type tk found where tk_i64 expected", + xtag("tk", tk_type_)))); + } + + if (text_.empty()) { + throw (std::runtime_error + (tostr("token::i64_value", + ": unexpected empty input string for tk_i64 token"))); + } + + int sign = 1; + int value = 0; + { + auto ix = text_.begin(); + auto end_ix = text_.end(); + + CharT ch = *ix; + + if (ch == '+') { + ++ix; + } else if (ch == '-') { + sign = -1; + ++ix; + } + + if (ix == end_ix) { + throw (std::runtime_error + (tostr("token::i64_value", + ": input text found where at least one digit expected", + xtag("text", text_)))); + } + + for (; ix != end_ix; ++ix) { + CharT ch = *ix; + + if ((ch >= '0') && (ch <= '9')) { + value *= 10; + value += (ch - '0'); + } else { + throw (std::runtime_error + (tostr("token::i64_value", + ": unexpected char ch in integer token", + xtag("ch", ch)))); + } + } + } + + return sign * value; + } /*i64_value*/ + + template + double + token::f64_value() const { + if (tk_type_ != tokentype::tk_f64) { + throw (std::runtime_error + (tostr("token::f64_value", + ": token with type tk found where tk_f64 expected", + xtag("tk", tk_type_)))); + } + + if (text_.empty()) { + throw (std::runtime_error + (tostr("token::f64_value", + ": unexpected empty input string for tk_f64 token"))); + } + + int sign = 1; + /* integer representing denormalized unsigned mantissa + * (mantissa scaled by smallest power of 10 sufficient to make + * it an integer) + */ + std::int64_t mantissa = 0; + /* counts #of digits to the right of decimal point '.' */ + int rh_digits = 0; + /* sign of exponent */ + int exp_sign = 1; + /* value of exponenct = integer to the right of 'e' or 'E' */ + int exponent = 0; + + /* floating-point value will represent + * sign * mantissa * 10^(sign*exponent - rh_digits) + */ + { + auto ix = text_.begin(); + auto end_ix = text_.end(); + + CharT ch = *ix; + + if (ch == '+') { + ++ix; + } else if (ch == '-') { + sign = -1; + ++ix; + } + + if (ix == end_ix) { + throw (std::runtime_error + (tostr("token::f64_value", + ": input text found where at least one digit expected", + xtag("text", text_)))); + } + + /* true iff decimal point '.' present in mantissa */ + bool have_decimal_point = false; + /* true iff exponent prefix 'e' or 'E' present */ + //bool have_exponent = false; + /* counts number of digits in mantissa + * (both before and after, but not including, any decimal point + */ + int m_digits = 0; + /* digits to the left of decimal point */ + int lh_digits = 0; + + /* loop over mantissa digits */ + for (; ix != end_ix; ++ix) { + CharT ch = *ix; + + if (ch == '.') { + if (have_decimal_point) { + throw (std::runtime_error + (tostr("token::f64_value", + ": input text found where at most one decimal point expected", + xtag("text", text_)))); + } + + have_decimal_point = true; + lh_digits = m_digits; + } else if ((ch >= '0') && (ch <= '9')) { + mantissa *= 10; + mantissa += (ch - '0'); + ++m_digits; + } else if (ch == 'e' || ch == 'E') { + //have_exponent = true; + break; // done with mantissa + } else { + throw (std::runtime_error + (tostr("token::i64_value", + ": unexpected char ch in integer token", + xtag("ch", ch)))); + } + } + + if (have_decimal_point) + rh_digits = m_digits - lh_digits; + + if (ix != end_ix) { + /* continue to read exponent */ + + /* skip e|E */ + ++ix; + + if (ix == end_ix) { + throw (std::runtime_error + (tostr("token::f64_value", + ": on input text, expect at least one digit following exponent marker e|E", + xtag("text", text_)))); + } + + CharT ch = *ix; + + if (ch == '+') { + ++ix; /*skip*/ + } else if (ch == '-') { + exp_sign = -1; + ++ix; + } + + for (; ix != end_ix; ++ix) { + CharT ch = *ix; + + if ((ch >= '0') && (ch <= '9')) { + exponent *= 10; + exponent += (ch - '0'); + } else { + throw (std::runtime_error + (tostr("token::f64_value", + "; on input text, expect only digits following" + " (possibly signed) exponenct marker", + xtag("text", text_)))); + } + } + } + } + + /* floating-point value will represent + * sign * mantissa * 10^(sign*exponent - rh_digits) + */ + + double mantissa_f64 = sign * mantissa; + +#ifdef OBSOLETE_DEBUG + std::cerr << xtag("text", text_) + << xtag("rh_digits", rh_digits) + << xtag("mantissa_f64", mantissa_f64) + << xtag("exp_sign", exp_sign) + << xtag("exponent", exponent) + << std::endl; +#endif + + double retval = (mantissa_f64 + * detail::pow10((exp_sign * exponent) + - rh_digits)); + + return retval; + } /*f64_value*/ + + template + void + token::print(std::ostream & os) const { + os << ""; + } /*print*/ + + template + inline std::ostream & + operator<< (std::ostream & os, + const token & tk) + { + tk.print(os); + return os; + } + } /*Namespace scm*/ + +#ifndef ppdetail_atomic + namespace print { + PPDETAIL_ATOMIC(xo::scm::token); + } +#endif + +} /*namespace xo*/ + +/* end token.hpp */ diff --git a/xo-tokenizer/include/xo/tokenizer/tokenizer.hpp b/xo-tokenizer/include/xo/tokenizer/tokenizer.hpp new file mode 100644 index 00000000..e589b400 --- /dev/null +++ b/xo-tokenizer/include/xo/tokenizer/tokenizer.hpp @@ -0,0 +1,1057 @@ +/* file tokenizer.hpp + * + * author: Roland Conybeare, Jul 2024 + */ + +#pragma once + +#include "token.hpp" +#include "input_state.hpp" +#include "span.hpp" +#include "scan_result.hpp" +#include "xo/indentlog/scope.hpp" +#include "xo/indentlog/print/ppdetail_atomic.hpp" +#include + +namespace xo { + namespace scm { + /** @class tokenizer + * @brief Parse a Schematika character stream into lexical tokens + * + * Use: + * + * @code + * // see xo-tokenizer/example/tokenrepl/tokenrepl.cpp + * // for exact working code + * + * using tokenizer_type = tokenizer; + * using span_type = tokenizer_type::span_type; + * + * tokenizer_type tkz; + * span_type input = ...; + * + * while (!input.empty()) { + * auto [tk, consumed, error] = tkz.scan(input); + * + * if (tk.is_valid()) { + * // do something with tk + * } else if (error.is_error()) { + * error.report(cout); + * break; + * } + * + * input = tkz.consume(consumed, input); + * } + * + * if endofinput { + * auto [tk, consumed, error] = tzk.notify_eof() + * + * // do something with (final) tk if tk.is_valid() + * } + * + * @endcode + * + * See tokentype.hpp for token types + **/ + template + class tokenizer { + public: + using token_type = token; + using error_type = tokenizer_error; + using span_type = span; + using input_state_type = input_state; + using result_type = scan_result; + + public: + /** @defgroup tokenizer-ctors tokenizer constructors **/ + ///@{ + + tokenizer(bool debug_flag = false); + + ///@} + + /** @defgroup tokenizer-access-methods tokenizer access methods **/ + ///@{ + +#pragma GCC diagnostic push +#ifndef __APPLE__ +#pragma GCC diagnostic ignored "-Wchanges-meaning" +#endif + const input_state & input_state() const { return input_state_; } +#pragma GCC diagnostic pop + + ///@} + + /** @defgroup tokenizer-general-methods tokenizer methods **/ + ///@{ + + /** identifies punctuation chars. + * These are chars that are not permitted to appear within + * a symbol token. Instead they force completion of + * a preceding token, and start a new token with themselves + **/ + static bool is_1char_punctuation(CharT ch); + + /** more-relazed version of is_1char_punctuation. + * Chars that are not permitted to appear within a symbol token, + * but may form token combined with next character + **/ + static bool is_2char_punctuation(CharT ch); + + /** assemble token from text @p token_text. + * @p initial_whitespace Amount of whitespace input being consumed from input. + * @p token_text subset of input_line representing a single token. + * @p input_state input state containing input_line + * + * retval.consumed will represent some possibly-empty prefix of @p input + **/ + static result_type assemble_token(std::size_t initial_whitespace, + const span_type & token_text, + input_state_type & input_state); + + /** degenerate version of assemble_token() on reaching end-of-file **/ + static result_type assemble_final_token(const span_type & token_text, + const input_state_type & input_state); + + /** true if tokenizer contains stored prefix of + * possibly-incomplete token + **/ + bool has_prefix() const { return !prefix_.empty(); } + + /** scan for next input token, given @p input. + * Note: + * - tokenizer can consume input (e.g. whitespace) + * without completing a token + * - input will remember the extent of the last line of input + * for which parsing has begun, but not completed. + * It's required that at least that portion of the input span + * remain valid across scan(), scan2() calls + * + * @return {parsed token, consumed span} + **/ + result_type scan(const span_type & input, + bool eof_flag); + + /** discard current line after error. Just cleans up error-reporting state **/ + void discard_current_line(); + + ///@} + + private: + /** @defgroup tokenizer-instance-vars tokenizer instance variables **/ + ///@{ + + /** track input state (line#,pos,..) for error messages. + * There's an ordering problem here: + * 1. input_state_.skip_leading_whitespace() advances current line automagically + * when it sees \n + * 2. need to capture value of @ref input_state_ _before_ newline + * 3. but neeed newline to end token + * Also recall input_state_type needed for reporting errors. + **/ + input_state_type input_state_; + /** Accumulate partial token here. + * This will happen if input sent to @ref tokenizer::scan + * ends without whitespace such that last available token's extent is not determined + **/ + std::string prefix_; + + ///@} + }; /*tokenizer*/ + + template + tokenizer::tokenizer(bool debug_flag) + : input_state_{debug_flag} + {} + + template + bool + tokenizer::is_1char_punctuation(CharT ch) { + switch(ch) { + case '(': + return true; + case ')': + return true; + case '[': + return true; + case ']': + return true; + case '{': + return true; + case '}': + return true; + case '<': + /* can't be 1char punctuation -- can begin lessequal token */ + return false; + case '>': + /* can't be 1char punctuation -- can begin greatequal token, + * and appears in tk_yields token + */ + return false; + case ',': + return true; + case ';': + return true; + case ':': + /* can't be 1char punctuation -- can begin assignment token */ + return false; + case '=': + /* can't be 1char punctuation -- can begin comparison token '==' */ + return false; + case '!': + /* can't be 1char punctuation -- can begin comparison token '!=' */ + return false; + case '-': + /* can't be punctuation + * - can appear inside f64 token: e.g. 1.23e-9. + * - begins tk_yields token: -> + */ + return false; + case '+': + /* can't be punctuation -- can appear inside f64 token: e.g. 1.23e+4 */ + return false; + case '*': + /* not punctuation -- allowed in symbol */ + return false; + case '/': + /* not punctuation -- for symmetry with +,- */ + return false; + case '.': + /* can't be punctuation -- can appear inside f64 token: e.g. 1.23 */ + return false; + } + + return false; + } + + template + bool + tokenizer::is_2char_punctuation(CharT ch) { + /* can't put '-' here, because of the way it appears in numeric literals + * characters here may not appear in symbol names + */ + + switch(ch) { + case '<': + /* can begin <= */ + return true; + case '>': + /* can begin >= */ + return true; + case ':': + /* can begin := */ + return true; + case '=': + /* can begin == */ + return true; + case '!': + /* can begin != */ + return true; + } + + return false; + } + + template + auto + tokenizer::assemble_token(std::size_t initial_whitespace, + const span_type & token_text, + input_state_type & input_state_ref) -> result_type + { + /* literal|pretty|streamlined */ + log_config::style = function_style::streamlined; + + scope log(XO_DEBUG(input_state_ref.debug_flag())); + log && log(xtag("token_text", token_text), + xtag("initial_whitespace", initial_whitespace), + xtag("input_state", input_state_ref)); + + tokentype tk_type = tokentype::tk_invalid; + std::string tk_text; + + const CharT * tk_start = token_text.lo(); + const CharT * tk_end = token_text.hi(); + + const CharT * ix = tk_start; + + /* switch here applies to the first character in a token */ + switch (*ix) { + case '-': + case '+': + if (token_text.size() == 1) { + /* standalone '+' or '-' */ + if (*ix == '+') + tk_type = tokentype::tk_plus; + else if(*ix == '-') + tk_type = tokentype::tk_minus; + } + + /** fall through to numeric literal code below **/ + [[fallthrough]]; + case '.': + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + { + /* examples of valid floating-point numbers: + * .0 + * 1e0 + * 1e + * 0. + * +1e0 + * -1e0 + * +1E+2 + * -1E+2 + * -0.123e-10 + * non-examples: + * . + * - + * + + * e0 + * .e0 + * -.e-0 + * +.e+0 + * + * in particular: to be recognized as a number, + * must contain at least one digit + */ + + log && log("possible number-token"); + + /* true if initial sign -/+ encountered */ + bool sign_flag = false; + /* true if '.' encountered */ + bool period_flag = false; + /* true if 'e' | 'E' encountered. + */ + bool exponent_flag = false; + /* true when sign '-' | '+' precedes exponenct digits */ + bool exponent_sign_flag = false; + /* true when at least one digit follows exponent marker */ + bool exponent_digit_flag = false; + /* true if at least one digit encountered */ + bool number_flag = false; + + log && log(xtag("*ix", *ix), + xtag("tk.length", token_text.size())); + if (log && (ix + 1 < tk_end)) + log(xtag("*(ix+1)", *(ix + 1))); + + if ((*ix == '-') && (ix + 2 == token_text.hi()) && (*(ix + 1) == '>')) { + /* composing exactly '->' */ + tk_type = tokentype::tk_yields; + } else { + /* token (if valid) will be one of: {tk_i64, tk_f64, tk_dot}: */ + for (; ix != token_text.hi(); ++ix) { + if ((*ix == '-') || (*ix == '+')) { + /* sign allowed: + * 1. before period and before first digit + * 2. after exponent + */ + if (!period_flag && !number_flag && !sign_flag) { + sign_flag = true; + } else if (exponent_flag && !exponent_digit_flag) { + exponent_sign_flag = true; + } else { + return result_type::make_error_consume_current_line + (__FUNCTION__ /*src_function*/, + "improperly placed sign indicator", + (ix - tk_start), + input_state_ref); + } + } else if (*ix == '.') { + if (period_flag) { + return result_type::make_error_consume_current_line + (__FUNCTION__ /*src_function*/, + "duplicate decimal point in numeric literal", + (ix - tk_start), + input_state_ref); + } + + period_flag = true; + } else if ((*ix == 'e') || (*ix == 'E')) { + if (exponent_flag) { + return result_type::make_error_consume_current_line + (__FUNCTION__ /*src_function*/, + "duplicate exponent marker in numeric literal", + (ix - tk_start), + input_state_ref); + } + + exponent_flag = true; + } else if (isdigit(*ix)) { + if (exponent_flag) { + /* need digit before exponent to recognize as number */ + exponent_digit_flag = true; + } else { + number_flag = true; + } + } else { + return result_type::make_error_consume_current_line + (__FUNCTION__ /*src_function*/, + "unexpected character in numeric constant" /*error_description*/, + (ix - tk_start), + input_state_ref); + } + } + + if (number_flag) { + if (period_flag || exponent_flag) { + tk_type = tokentype::tk_f64; + } else { + tk_type = tokentype::tk_i64; + } + } else if (period_flag && !exponent_flag) { + tk_type = tokentype::tk_dot; + } else { + /* not a valid token */ + } + + log && log(xtag("sign_flag", sign_flag)); + log && log(xtag("period_flag", period_flag), + xtag("exponent_flag", exponent_flag), + xtag("exponent_sign_flag", exponent_sign_flag), + xtag("number_flag", number_flag)); + log && log(xtag("tk_type", tk_type)); + } + + break; + } + case '*': + if (token_text.size() == 1) { + /* standalone '*' */ + tk_type = tokentype::tk_star; + ++ix; + } else { + /* '*' isn't punctuation -- but may allow appearance in a longer token + * + * thinking that x*y is a symbol with an embedded '*' character; + * in particular want to support kebab-case symbols like 'foo-config' + */ + } + break; + case '/': + if (token_text.size() == 1) { + /* standalone '/' */ + tk_type = tokentype::tk_slash; + ++ix; + } + break; + case '=': + log && log("singleassign or cmpeq token"); + + if (*(ix + 1) == '=') { + tk_type = tokentype::tk_cmpeq; + ++ix; + ++ix; + } else { + /* standalone '=' */ + tk_type = tokentype::tk_singleassign; + ++ix; + } + break; + case '!': + if (*(ix + 1) == '=') { + tk_type = tokentype::tk_cmpne; + ++ix; + ++ix; + } else { + /* standlone '!' */ + + // TODO + } + break; + case '"': + { + log && log("recognize string-token"); + + tk_type = tokentype::tk_string; + + tk_text.reserve(token_text.hi() - token_text.lo()); + + ++ix; /*skip initial " char*/ + + /* true on final " */ + bool endofstring = false; + + for (; ix != token_text.hi(); ++ix) { + log && log(xtag("*ix", *ix)); + + switch(*ix) { + case '"': + endofstring = true; + + /* skip final " char, don't capture */ + ++ix; + + break; + case '\\': + /* skip escape char, don't capture */ + ++ix; + + if (ix == token_text.hi()) { + return result_type::make_error_consume_current_line + (__FUNCTION__ /*src_function*/, + "expecting key following escape character \\", + (ix - tk_start), + input_state_ref); + } + + switch(*ix) { + case '\\': + log && log(xtag("*ix", *ix), xtag("escaped", "t")); + tk_text.push_back(*ix); + break; + case 'n': + log && log(xtag("*ix", *ix), xtag("newline", "t")); + tk_text.push_back('\n'); + break; + case 't': + log && log(xtag("*ix", *ix), xtag("tab", "t")); + tk_text.push_back('\t'); + break; + case 'r': + log && log(xtag("*ix", *ix), xtag("cr", "t")); + tk_text.push_back('\r'); + break; + case '"': + log && log(xtag("*ix", *ix), xtag("quote", "t")); + tk_text.push_back('"'); + break; + default: + return result_type::make_error_consume_current_line + (__FUNCTION__ /*src_function*/, + "expecting one of n|r|\"|\\ following escape \\", + (ix - tk_start), + input_state_ref); + } + break; + default: + tk_text.push_back(*ix); + break; + } + + if (endofstring) + break; + } + + if (!endofstring) { + return result_type::make_error_consume_current_line + (__FUNCTION__ /*src_function*/, + "missing terminating '\"' to complete literal string", + (ix - tk_start), + input_state_ref); + } + + log && log(tostr("tokenizer::assemble_token", + xtag("tk_text", tk_text))); + + break; + } + case 'a': case 'A': + case 'b': case 'B': + case 'c': case 'C': + case 'd': case 'D': + case 'e': case 'E': + case 'f': case 'F': + case 'g': case 'G': + case 'h': case 'H': + case 'i': case 'I': + case 'j': case 'J': + case 'k': case 'K': + case 'l': case 'L': + case 'm': case 'M': + case 'n': case 'N': + case 'o': case 'O': + case 'p': case 'P': + case 'q': case 'Q': + case 'r': case 'R': + case 's': case 'S': + case 't': case 'T': + case 'u': case 'U': + case 'v': case 'V': + case 'w': case 'W': + case 'x': case 'X': + case 'y': case 'Y': + case 'z': case 'Z': + { + /* symbol/identifier must begin with a letter? + * we want to accept some other chars too. + * specifically want to allow identifiers: + * this-is-the-way + * this+is+also+the+way + * how/much/is/that/doggy + * put*an*asterisk*in*that + * something%special% + * + * like pure lisp, we don't allow: + * - identifier beginning with digit + * - period . + * + * unlike pure lisp, we don't allow anywhere in a symbol: + * - colon : + * - semicolon ; + * - comma , + * + * also we don't allow symbols to begin with special chars + */ + + tk_type = tokentype::tk_symbol; + break; + } + case '<': + { + log && log("leftangle or lessequal token"); + + if (*(ix + 1) == '=') { + tk_type = tokentype::tk_lessequal; + ++ix; + ++ix; + } else { + tk_type = tokentype::tk_leftangle; + ++ix; + } + break; + } + case '>': + { + log && log("rightangle or greatequal token"); + + if (*(ix + 1) == '=') { + tk_type = tokentype::tk_greatequal; + ++ix; + ++ix; + } else { + tk_type = tokentype::tk_rightangle; + ++ix; + } + break; + } + case '(': + tk_type = tokentype::tk_leftparen; + ++ix; + break; + case ')': + tk_type = tokentype::tk_rightparen; + ++ix; + break; + case '[': + tk_type = tokentype::tk_leftbracket; + ++ix; + break; + case ']': + tk_type = tokentype::tk_rightbracket; + ++ix; + break; + case '{': + tk_type = tokentype::tk_leftbrace; + ++ix; + break; + case '}': + tk_type = tokentype::tk_rightbrace; + ++ix; + break; + case ',': + tk_type = tokentype::tk_comma; + ++ix; + break; + case ';': + tk_type = tokentype::tk_semicolon; + ++ix; + break; + case ':': + { + log && log("colon or assignment token"); + + if (*(ix + 1) == '=') { + tk_type = tokentype::tk_assign; + ++ix; + ++ix; + } else { + tk_type = tokentype::tk_colon; + ++ix; + } + break; + } + default: + break; + } + + if (tk_type == tokentype::tk_invalid) { + return result_type::make_error_consume_current_line + (__FUNCTION__ /*src_function*/, + "illegal input character", + (ix - tk_start), + input_state_ref); + } + + if ((tk_type == tokentype::tk_i64) + || (tk_type == tokentype::tk_f64) + || (tk_type == tokentype::tk_symbol)) + { + /* note: capturing token text here; + * for numeric literals will re-parse in token::i64_value() / token::f64_value() + */ + tk_text = std::string(tk_start, tk_end); + } else if (tk_type == tokentype::tk_string) { + ; /* nothing to do here -- desired tk_text already constructed */ + } + + if (tk_type == tokentype::tk_symbol) { + /* check for keywords */ + + bool keep_text = false; + + if ((tk_text == "true") || (tk_text == "false")) { + tk_type = tokentype::tk_bool; + keep_text = true; + } else if (tk_text == "type") { + tk_type = tokentype::tk_type; + } else if (tk_text == "def") { + tk_type = tokentype::tk_def; + } else if (tk_text == "lambda") { + tk_type = tokentype::tk_lambda; + } else if (tk_text == "if") { + tk_type = tokentype::tk_if; + } else if (tk_text == "then") { + tk_type = tokentype::tk_then; + } else if (tk_text == "else") { + tk_type = tokentype::tk_else; + } else if (tk_text == "let") { + tk_type = tokentype::tk_let; + } else if (tk_text == "in") { + tk_type = tokentype::tk_in; + } else if (tk_text == "end") { + tk_type = tokentype::tk_end; + } else { + /* keep as symbol */ + keep_text = true; + } + + if (!keep_text) + tk_text.clear(); + } + + /* input.prefix(0): + * require caller preserves current input line until it's entirely exhausted + */ + return result_type(token_type(tk_type, std::move(tk_text)), + input_state_ref.current_line().prefix(0)); + } /*assemble_token*/ + + /* TODO: input_state_ as argument ? */ + template + auto + tokenizer::assemble_final_token(const span_type & token_text, + const input_state_type & input_state) -> result_type + { + return assemble_token(0 /*initial_whitespace*/, + token_text, + input_state); + } + + template + auto + tokenizer::scan(const span_type & input, + bool eof_flag) -> result_type + { + scope log(XO_DEBUG(input_state_.debug_flag())); + + log && log(xtag("input", input)); + + /* - Always at beginning of token when scan() invoked + * - scan will not report any portion of line as consumed until it has + * emitted all tokens in that line. + * rationale: caller is allowed to discard storage that + * scan() reports as consumed. But will be holding that line + * until all tokens have been read. + * - this means caller will typically call scan() + * with the same input span multiple times + */ + + /* automagically no-ops when the same input presented twice */ + this->input_state_.capture_current_line(input, eof_flag); + + const CharT * ix = this->input_state_.skip_leading_whitespace(); + + if(ix == input.hi()) { + log && log("end input -> consume current line"); + + /* entirety of current line has been tokenized + * -> caller may consume it + */ + return result_type::make_whitespace(this->input_state_.consume_current_line()); + } + + /* ix: if ix < input.hi: first non-whitespace character after input_state_.current_pos_ */ + + // TODO: + // 1. hoist complete_flag up here + // 2. use in each branch + // 3. common check for prefix-capturing after if-cascade below done + + /* here: *ix is not whitespace */ + + auto whitespace_z = input_state_.whitespace(); + + log && log(xtag("whitespace_z", whitespace_z)); + + /* tk_start points to known beginning of token + * (after any whitespace) + * + * goal is to leave ix pointing to 1 char past the end of the token + */ + const CharT * tk_start = ix; + + if (is_1char_punctuation(*ix)) { + /* 1-character token */ + ++ix; + } else if (is_2char_punctuation(*ix)) { + CharT ch1 = *ix; + + (void)ch1; + + ++ix; + +#ifdef OBSOLETE // no longer a thing. either input ends in whitespace, or ends translation unit + if (ix == input.hi()) { + /* need more input to know if/when token complete */ + this->prefix_ += std::string(tk_start, input.hi()); + + log && log(xtag("captured-prefix1", this->prefix_)); + } else +#endif + { + CharT ch2 = *ix; + + if (((ch2 >= '0') && (ch2 <= '9')) + || ((ch2 >= 'A') && (ch2 <= 'Z')) + || ((ch2 >= 'a') && (ch2 <= 'z'))) + { + /* treat as 1 char punctuation */ + ; + } else { + /* include next char */ + ++ix; + } + } + } else if (*ix == '"') { + bool complete_flag = false; + + /* 1. embedded space/tab allowed in string literal. + * 2. embedded newline/cr not allowed. + */ + CharT prev_ch = '"'; + + ++ix; + + for (; ix != input.hi(); ++ix) { + /* looking for unescaped " char to end literal */ + if (*ix == '"') { + if (prev_ch != '\\') { + ++ix; /* include terminating " for assemble_token */ + complete_flag = true; + break; + } + } else if ((*ix == '\n') || (*ix == '\r')) { + log && log ("string literal with naked newline or CR"); + + return result_type::make_error_consume_current_line + (__FUNCTION__ /*src_function*/, + "must use \\n or \\r to encode newline/cr in string literal", + (ix - tk_start), + this->input_state_); + } + + prev_ch = *ix; + } + + if (!complete_flag) { + log && log("unterminated string literal"); + + return result_type::make_error_consume_current_line + (__FUNCTION__ /*src_function*/, + "unterminated string literal", + (ix - tk_start), + this->input_state_); + } + } else { + /* ix is start of some token */ + + if (*ix == '-') { + /* this section load-bearing for input '->' scanning from beginning of token */ + ++ix; + + if (ix == input.hi()) { + /* need more input to know if/when token complete -- see captured-prefix5 below */ + } else { + CharT ch2 = *ix; + + if (ch2 == '>') { + /* include next char and complete token */ + ++ix; + + log && log("complete '->' token"); + + this->input_state_.advance_until(ix); + + return assemble_token(whitespace_z, + span_type(tk_start, ix) /*token*/, + input_state_); + } + + /* here: -123, -.5e-21 for example */ + } + } else if (*ix == '>') { + /* this section load-bearing for input '>=' scanning from beginning of token. + * Need this because '>' necessarily excluded from is_1char_punctuation() + */ + ++ix; + + if (ix == input.hi()) { + /* need more input to know if/when token complete -- see captured-prefix5 below */ + } else { + CharT ch2 = *ix; + + if (ch2 != '=') { + log && log("complete '>=' token"); + + this->input_state_.advance_until(ix); + + /* ignore next char and complete token */ + return assemble_token(whitespace_z, + span_type(tk_start, ix) /*token*/, + this->input_state_); + } + + /* here: >= for example */ + } + } + + /* scan until: + * - whitespace + * - punctuation + */ + for (; ix != input.hi(); ++ix) { + if (input_state_type::is_whitespace(*ix) + || is_1char_punctuation(*ix) + || is_2char_punctuation(*ix)) + { + break; + } + + /* this section load-bearing for input '>' after beginning of a token, e.g. p> */ + if ((ix > tk_start) && (*ix == '>')) + break; + + /* this section load-bearing for input '->' at the end of another token, e.g. p->q */ + if (*ix == '-') { + if (ix + 1 == input.hi()) { + /* need more input to know if/when token complete + * + * apple-banana parses as: {tk_symbol: apple-banana} + * apple-> parses as: {tk_symbol: apple} {tk_yields} + * apple- illegal (may not end symbol with '-') + */ + break; + } + + if (*(ix + 1) == '>') { + /* treat '->' as punctuation; complete preceding token */ + break; + } + } + } + +#ifdef OBSOLETE + if (ix == input.hi()) { + /* need more input to know if/when token complete */ + this->prefix_ += std::string(tk_start, input.hi()); + + log && log(xtag("captured-prefix5", this->prefix_)); + } +#endif + } + + log && log("assemble token z", xtag("token_z", ix - tk_start)); + + assert(tk_start < ix); + + this->input_state_.advance_until(ix); + + return assemble_token(whitespace_z, + span_type(tk_start, ix) /*token*/, + this->input_state_); + } /*scan*/ + +#ifdef OBSOLETE + template + auto + tokenizer::scan2(const span_type & input, bool eof) -> result_type { + scope log(XO_DEBUG(input_state_.debug_flag())); + + auto sr = this->scan(input); + + if (sr.is_token() || sr.is_error() || !eof) + return sr; + + /* control here only if input contains no unambiguous tokens. + * This implies it contains _at most one_ final token. + */ + + span_type input2 = input.after_prefix(sr.consumed()); + + /* need to include src.consumed() in retval */ + + auto sr2 = this->notify_eof(input2); + + return result_type(sr2.get_token(), + span_type::concat(sr.consumed(), sr2.consumed()), + sr2.error()); + } +#endif + +#ifdef OBSOLETE + template + auto + tokenizer::consume(const span_type & consumed, + const span_type & input) -> span_type + { + this->input_state_.consume(consumed.size()); + + return input.after_prefix(consumed); + } +#endif + + template + void + tokenizer::discard_current_line() + { + this->input_state_.discard_current_line(); + } + +#ifdef OBSOLETE + template + auto + tokenizer::notify_eof(const span_type & input) -> result_type { + scope log(XO_DEBUG(input_state_.debug_flag())); + + log && log(xtag("prefix_", prefix_), xtag("prefix_.size", prefix_.size()), xtag("input", input)); + + /* almost meretricious to include input here, + * when called from scan2() it can only be whitespace + */ + return result_type::make_whitespace(input); + } /*notify_eof*/ +#endif + } /*namespace scm*/ +} /*namespace xo*/ + +/* end tokenizer.hpp */ diff --git a/xo-tokenizer/include/xo/tokenizer/tokenizer_error.hpp b/xo-tokenizer/include/xo/tokenizer/tokenizer_error.hpp new file mode 100644 index 00000000..6a673e53 --- /dev/null +++ b/xo-tokenizer/include/xo/tokenizer/tokenizer_error.hpp @@ -0,0 +1,162 @@ +/* file tokenizer_error.hpp + * + * author: Roland Conybeare, Jun 2025 + */ + +#pragma once + +#include "input_state.hpp" +#include "tokentype.hpp" +#include "span.hpp" +#include + +namespace xo { + namespace scm { + /** @class tokenizer_error + * @brief represent a lexing error, with context + * + * @tparam CharT representation for single characters + **/ + template + class tokenizer_error { + public: + using input_state_type = input_state; + using span_type = span; + + public: + /** @defgroup tokenizer-error-ctors **/ + ///@{ + + /** Default ctor represents a not-an-error sentinel object **/ + tokenizer_error() = default; + /** Constructor to capture parsing error context + * @p tk_start current position on entry to scanner + * @p error_pos error location relative to token start + **/ + tokenizer_error(const char * src_function, + std::string error_description, + const input_state_type & input_state, + size_t error_pos) + : src_function_{src_function}, + error_description_{std::move(error_description)}, + input_state_{input_state}, + error_pos_{error_pos} + { + scope log(XO_DEBUG(input_state.debug_flag())); + + log && log(xtag("input_state.current_pos", input_state.current_pos()), + xtag("error_pos", error_pos)); + } + ///@} + + /** @defgroup tokenizer-error-access-methods **/ + ///@{ + + const char * src_function() const { return src_function_; } + const std::string & error_description() const { return error_description_; } +#pragma GCC diagnostic push +#ifndef __APPLE__ +#pragma GCC diagnostic ignored "-Wchanges-meaning" +#endif + const input_state_type & input_state() const { return input_state_; } +#pragma GCC diagnostic pop + size_t tk_start() const { return input_state_.current_pos(); } + size_t whitespace() const { return input_state_.whitespace(); } + size_t error_pos() const { return error_pos_; } + + ///@} + + /** @defgroup tokenizer-error-general-methods **/ + ///@{ + + /** true, except for a sentinel error object **/ + bool is_error() const { return !error_description_.empty(); } + /** false except for object in sentinel state **/ + bool is_not_an_error() const { return error_description_.empty(); } + + /** Print representation to stream @p os. Intended for tokenizer diagnostics. + * For Schematika errors prefer @ref report + **/ + void print(std::ostream & os) const; + + /** Print human-oriented error report on @p os. **/ + void report(std::ostream & os) const; + + ///@} + + private: + /** @defgroup tokenizer-error-vars **/ + ///@{ + + /** source location (in tokenizer) at which error identified **/ + char const * src_function_ = nullptr; + /** static error description **/ + std::string error_description_; + /** input state associated with this error. + * Sufficient to precisely locate it with context. + **/ + input_state_type input_state_; + /** position (relative to @ref tk_entry_) of error **/ + size_t error_pos_ = 0; + + ///@} + }; /*error_token*/ + + template + void + tokenizer_error::print(std::ostream & os) const { + os << ""; + } + + template + void + tokenizer_error::report(std::ostream & os) const { + using namespace std; + + if (!error_description_.empty()) { + const char * prefix = "input: "; + /* input_state.tk_start: position of first character in token + * input_state.current_pos: position of first character following preceding token. + * error_pos: position (relative to start) at which failure detected + */ + const size_t tk_start = input_state_.tk_start(); + const size_t tk_indent = (strlen(prefix) + tk_start); + const size_t error_pos = 1 + tk_start + error_pos_; + + os << "token col: " << tk_start << ", error col: " << error_pos << "\n"; + os << prefix; + for (const char *p = input_state_.current_line().lo(), + *e = input_state_.current_line().hi(); p < e; ++p) + { + os << *p; + } + //os << endl; + os << std::setw(tk_indent) << " "; + + for (size_t i = 0; i < error_pos_; ++i) { + os << '_'; + } + os << '^' << endl; + + os << error_description_ << endl; + } + } + + template + inline std::ostream & + operator<< (std::ostream & os, + const tokenizer_error & tkerr) + { + tkerr.print(os); + return os; + } + } /*namespace scm*/ +} /*namespace xo*/ + +/* end tokenizer_error.hpp */ diff --git a/xo-tokenizer/include/xo/tokenizer/tokentype.hpp b/xo-tokenizer/include/xo/tokenizer/tokentype.hpp new file mode 100644 index 00000000..eeeb7dd0 --- /dev/null +++ b/xo-tokenizer/include/xo/tokenizer/tokentype.hpp @@ -0,0 +1,192 @@ +/** @file tokentype.hpp + * + * author: Roland Conybeare, Jul 2024 + **/ + +#pragma once + +#include "xo/indentlog/print/tag.hpp" // for STRINGIFY +#include "xo/indentlog/print/ppdetail_atomic.hpp" +#include + +namespace xo { + namespace scm { + /** @enum tokentype + * Enum to identify different schematika input token types + * + * Schematica code examples: + * + * @code + * type point :: { xcoord : f64, ycoord : f64 }; + * type matrix :: array; // 2-d array + * + * decl hypot(x : f64, y : f64) -> f64; + * + * def hypot(x : f64, y : f64) { + * let + * x2 = (x * x); + * y2 = (y * y); + * hypot2 = (x2 + y2); + * in + * sqrt(hypot2); + * }; + * + * def someconst 4; + * + * def foo(v : vec) { + * def (pi : f64) = 3.1415926; + * def (h : (f64,f64) -> f64) = hypot; + * + * h = hypot3; + * }; + * + * def matrixproduct(x : matrix, y : matrix) { + * [i, j : x.row(i) * y.col(j)]; + * }; + * @endcode + **/ + enum class tokentype { + /** sentinel value **/ + tk_invalid = -1, + + /** a boolean constant **/ + tk_bool, + + /** an integer constant (signed 64-bit integer) **/ + tk_i64, + + /** a 64-bit floating-point constant **/ + tk_f64, + + /** a string literal **/ + tk_string, + + /** a symbol **/ + tk_symbol, + + /** left-hand parenthesis @c '(' **/ + tk_leftparen, + + /** right-hand parenthesis @c ')' **/ + tk_rightparen, + + /** left-hand bracket @c '[' **/ + tk_leftbracket, + + /** right-hand bracket @c ']' **/ + tk_rightbracket, + + /** left-hand brace @c '{' **/ + tk_leftbrace, + + /** right-hand brace @c '}' **/ + tk_rightbrace, + + /** left-hand angle bracket @c '<' **/ + tk_leftangle, + + /** right-hand angle bracket @c '>' **/ + tk_rightangle, + + /** less-equal @c '<=' **/ + tk_lessequal, + + /** great-equal @c '>=' **/ + tk_greatequal, + + /** dot @c '.' **/ + tk_dot, + + /** comma @c ',' **/ + tk_comma, + + /** colon @c ':' **/ + tk_colon, + + /** double-colon @c '::' **/ + tk_doublecolon, + + /** semi-colon @c ';' **/ + tk_semicolon, + + /** single equals sign @c '=' **/ + tk_singleassign, + + /** assignment @c ':=' **/ + tk_assign, + + /** indirection @c '->' **/ + tk_yields, + + /** note: operators not treated as punctuation + * 'do-always' is a legal variable name, + * as is 'maybe*2', 'maybe+1', 'path/to/foo' + **/ + + /** operator @c '+' **/ + tk_plus, + /** operator @c '-' **/ + tk_minus, + /** operator @c '*' **/ + tk_star, + /** operator @c '/' **/ + tk_slash, + + /** operator @c '==' **/ + tk_cmpeq, + /** operator @c '!=' **/ + tk_cmpne, + + /** keyword @c 'type' **/ + tk_type, + + /** keyword @c 'def' **/ + tk_def, + + /** keyword @c 'lambda' **/ + tk_lambda, + + /** keyword @c 'if' **/ + tk_if, + + /** keyworkd @c 'then' **/ + tk_then, + + /** keyword @c 'else' **/ + tk_else, + + /** keyword @c 'let' **/ + tk_let, + + /** keyword @c 'in' **/ + tk_in, + + /** keyword @c 'end' **/ + tk_end, + + /** counts number of entries **/ + n_tokentype + }; /*tokentype*/ + + /** String representation for enum value. + * For example @c tokentype_descr(tokentype::tk_if) -> @c "if" + **/ + extern char const * + tokentype_descr(tokentype tk_type); + + /** Print enum value for @p tk_type on stream @p os **/ + inline std::ostream & + operator<< (std::ostream & os, tokentype tk_type) { + os << tokentype_descr(tk_type); + return os; + } + } /*namespace scm*/ + +#ifndef ppdetail_atomic + namespace print { + PPDETAIL_ATOMIC(xo::scm::tokentype); + } /*namespace print*/ +#endif +} /*namespace xo*/ + +/* end tokentype.hpp */ diff --git a/xo-tokenizer/src/tokenizer/CMakeLists.txt b/xo-tokenizer/src/tokenizer/CMakeLists.txt new file mode 100644 index 00000000..505b2040 --- /dev/null +++ b/xo-tokenizer/src/tokenizer/CMakeLists.txt @@ -0,0 +1,11 @@ +# tokenizer/CMakeLists.txt + +set(SELF_LIB xo_tokenizer) +set(SELF_SRCS + tokentype.cpp + token.cpp) + +xo_add_shared_library4(${SELF_LIB} ${PROJECT_NAME}Targets ${PROJECT_VERSION} 1 ${SELF_SRCS}) +xo_dependency(${SELF_LIB} indentlog) + +# end CMakeLists.txt diff --git a/xo-tokenizer/src/tokenizer/token.cpp b/xo-tokenizer/src/tokenizer/token.cpp new file mode 100644 index 00000000..2ed92ad5 --- /dev/null +++ b/xo-tokenizer/src/tokenizer/token.cpp @@ -0,0 +1,9 @@ +/** @file token.cpp + * + * author: Roland Conybeare + **/ + +#include "token.hpp" +#include + +/** end token.cpp **/ diff --git a/xo-tokenizer/src/tokenizer/tokentype.cpp b/xo-tokenizer/src/tokenizer/tokentype.cpp new file mode 100644 index 00000000..33d683de --- /dev/null +++ b/xo-tokenizer/src/tokenizer/tokentype.cpp @@ -0,0 +1,74 @@ +/* file tokentype.cpp + * + * author: Roland Conybeare + */ + +#include "tokentype.hpp" + +namespace xo { + namespace scm { + char const * + tokentype_descr(tokentype tk_type) + { +#define CASE(x) case tokentype::x: return STRINGIFY(x) + + switch(tk_type) { + CASE(tk_bool); + CASE(tk_i64); + CASE(tk_f64); + CASE(tk_string); + CASE(tk_symbol); + CASE(tk_leftparen); + + CASE(tk_rightparen); + CASE(tk_leftbracket); + CASE(tk_rightbracket); + CASE(tk_leftbrace); + CASE(tk_rightbrace); + + CASE(tk_leftangle); + CASE(tk_rightangle); + CASE(tk_lessequal); + CASE(tk_greatequal); + CASE(tk_dot); + CASE(tk_comma); + CASE(tk_colon); + + CASE(tk_doublecolon); + CASE(tk_semicolon); + CASE(tk_singleassign); + CASE(tk_assign); + CASE(tk_yields); + + CASE(tk_plus); + CASE(tk_minus); + CASE(tk_star); + CASE(tk_slash); + + CASE(tk_cmpeq); + CASE(tk_cmpne); + + CASE(tk_type); + CASE(tk_def); + CASE(tk_lambda); + CASE(tk_if); + CASE(tk_then); + CASE(tk_else); + CASE(tk_let); + + CASE(tk_in); + CASE(tk_end); + + case tokentype::tk_invalid: + case tokentype::n_tokentype: + return "?tokentype"; + } + +#undef CASE + + return "???"; + } /*tokentype_descr*/ + } /*namespace scm*/ +} /*namespace xo*/ + +/* end tokentype.cpp */ diff --git a/xo-tokenizer/utest/CMakeLists.txt b/xo-tokenizer/utest/CMakeLists.txt new file mode 100644 index 00000000..cc080294 --- /dev/null +++ b/xo-tokenizer/utest/CMakeLists.txt @@ -0,0 +1,13 @@ +# build unittest tokenizer/utest + +set(SELF_EXECUTABLE_NAME utest.tokenizer) +set(SELF_SOURCE_FILES + tokenizer_utest_main.cpp + tokenizer.test.cpp + token.test.cpp) + +xo_add_utest_executable(${SELF_EXECUTABLE_NAME} ${SELF_SOURCE_FILES}) +xo_self_dependency(${SELF_EXECUTABLE_NAME} xo_tokenizer) +xo_external_target_dependency(${SELF_EXECUTABLE_NAME} Catch2 Catch2::Catch2) + +# end CMakeLists.txt diff --git a/xo-tokenizer/utest/token.test.cpp b/xo-tokenizer/utest/token.test.cpp new file mode 100644 index 00000000..80ee6e4f --- /dev/null +++ b/xo-tokenizer/utest/token.test.cpp @@ -0,0 +1,266 @@ +/* file token.test.cpp + * + * author: Roland Conybeare + */ + +#include "xo/tokenizer/token.hpp" +#include +#include + +namespace xo { + using token = xo::scm::token; + using xo::scm::tokentype; + + namespace ut { + // also see tokenizer.test.cpp for syntax + + namespace test2 { + struct testcase_i64 { + std::string text_; + bool expect_throw_; + std::int64_t expected_; + }; + + std::vector s_testcase_v = { + {"", true, 0}, + {"0", false, 0}, + {"-", true, 0}, + {"+", true, 0}, + {"-0", false, 0}, + {"+0", false, 0}, + {"1", false, 1}, + {"-1", false, -1}, + {"9", false, 9}, + {"-9", false, -9}, + {"12", false, 12}, + {"+12", false, 12}, + {"-12", false, -12}, + {"99", false, 99}, + {"-99", false, -99}, + {"123x", true, 0}, + }; + + TEST_CASE("parse-i64", "[token]") { + for (std::size_t i_tc = 0, n_tc = s_testcase_v.size(); i_tc < n_tc; ++i_tc) { + INFO(xtag("i_tc", i_tc)); + + auto const & testcase = s_testcase_v[i_tc]; + + token tk(tokentype::tk_i64, + testcase.text_); + + REQUIRE(tk.tk_type() == tokentype::tk_i64); + + bool throw_flag = false; + try { + std::int64_t x = tk.i64_value(); + + REQUIRE(x == testcase.expected_); + } catch (std::exception & ex) { + throw_flag = true; + } + + REQUIRE(throw_flag == testcase.expect_throw_); + } + } + } + + namespace test3 { + TEST_CASE("error-i64", "[token]") { + token tk(tokentype::tk_i64, "+"); + + bool throw_flag = false; + + try { + tk.i64_value(); + } catch(std::exception & ex) { + throw_flag = true; + } + + REQUIRE(throw_flag); + } + } + + namespace test4 { + struct testcase_f64 { + std::string text_; + bool expect_throw_; + double expected_; + }; + + std::vector s_testcase_v = { + {"", true, 0}, + {"0", false, 0}, + {"-", true, 0}, + {"+", true, 0}, + {"-0", false, 0}, + + {"+0", false, 0}, + {"1", false, 1}, + {"-1", false, -1}, + {"9", false, 9}, + {"-9", false, -9}, + + {"12", false, 12}, + {"+12", false, 12}, + {"-12", false, -12}, + {"99", false, 99}, + {"-99", false, -99}, + + {"123x", true, 0}, + {"0.0", false, 0.0}, + {"0.1", false, 0.1}, + {"0.12", false, 0.12}, + {"0.123", false, 0.123}, + + {"0.1234", false, 0.1234}, + {"0.12345", false, 0.12345}, + {"0.123456", false, 0.123456}, + {"0.1234567", false, 0.1234567}, + {"0.12345678", false, 0.12345678}, + + {"0.123456789", false, 0.123456789}, + {"+0.0", false, 0.0}, + {"+0.1", false, 0.1}, + {"+0.12", false, 0.12}, + {"+0.123", false, 0.123}, + + {"+0.1234", false, 0.1234}, + {"+0.12345", false, 0.12345}, + {"+0.123456", false, 0.123456}, + {"+0.1234567", false, 0.1234567}, + {"+0.12345678", false, 0.12345678}, + + {"+0.123456789", false, 0.123456789}, + {"+0.0e0", false, 0.0}, + {"+0.1e0", false, 0.1}, + {"+0.12e0", false, 0.12}, + {"+0.123e0", false, 0.123}, + + {"+0.1234e0", false, 0.1234}, + {"+0.12345e0", false, 0.12345}, + {"+0.123456e0", false, 0.123456}, + {"+0.1234567e0", false, 0.1234567}, + {"+0.12345678e0", false, 0.12345678}, + + {"+0.123456789e0", false, 0.123456789}, + {"+0.0e1", false, 00.}, + {"+0.1e1", false, 01.}, + {"+0.12e1", false, 01.2}, + {"+0.123e1", false, 01.23}, + + {"+0.1234e1", false, 01.234}, + {"+0.12345e1", false, 01.2345}, + {"+0.123456e1", false, 01.23456}, + {"+0.1234567e1", false, 01.234567}, + {"+0.12345678e1", false, 01.2345678}, + + {"+0.123456789e1", false, 01.23456789}, + {"+0.0E1", false, 00.}, + {"+0.1E1", false, 01.}, + {"+0.12E1", false, 01.2}, + {"+0.123E1", false, 01.23}, + + {"+0.1234E1", false, 01.234}, + {"+0.12345E1", false, 01.2345}, + {"+0.123456E1", false, 01.23456}, + {"+0.1234567E1", false, 01.234567}, + {"+0.12345678E1", false, 01.2345678}, + + {"+0.123456789E1", false, 01.23456789}, + {"+0.0e9", false, 0.0}, + {"+0.1e9", false, 0.1e9}, + {"+0.12e9", false, 0.12e9}, + {"+0.123e9", false, 0.123e9}, + + {"+0.1234e9", false, 0.1234e9}, + {"+0.12345e9", false, 0.12345e9}, + {"+0.123456e9", false, 0.123456e9}, + {"+0.1234567e9", false, 0.1234567e9}, + {"+0.12345678e9", false, 0.12345678e9}, + + {"+0.123456789e9", false, 0.123456789e9}, + {"-0.0", false, -0.0}, + {"-0.1", false, -0.1}, + {"-0.12", false, -0.12}, + {"-0.123", false, -0.123}, + + {"-0.1234", false, -0.1234}, + {"-0.12345", false, -0.12345}, + {"-0.123456", false, -0.123456}, + {"-0.1234567", false, -0.1234567}, + {"-0.12345678", false, -0.12345678}, + + {"-0.123456789", false, -0.123456789}, + {"00.", false, 0.0}, + {"01.", false, 1.0}, + {"01.2", false, 1.2}, + {"01.23", false, 1.23}, + + {"01.234", false, 1.234}, + {"01.2345", false, 1.2345}, + {"01.23456", false, 1.23456}, + {"01.234567", false, 1.234567}, + {"01.2345678", false, 1.2345678}, + + {"01.23456789", false, 1.23456789}, + {"0.0", false, 0.0}, + {"1.2", false, 1.2}, + {"12.", false, 12.0}, + {"12.3", false, 12.3}, + + {"12.34", false, 12.34}, + {"12.345", false, 12.345}, + {"12.3456", false, 12.3456}, + {"12.34567", false, 12.34567}, + {"12.345678", false, 12.345678}, + + {"12.3456789", false, 12.3456789}, + {"01.23", false, 1.23}, + {"12.3", false, 12.3}, + {"123.", false, 123.0}, + {"123.4", false, 123.4}, + + {"123.45", false, 123.45}, + {"123.456", false, 123.456}, + {"123.4567", false, 123.4567}, + {"123.45678", false, 123.45678}, + {"123.456789", false, 123.456789}, + }; + + TEST_CASE("parse-f64", "[token]") { + for (std::size_t i_tc = 0, n_tc = s_testcase_v.size(); i_tc < n_tc; ++i_tc) { + auto const & testcase = s_testcase_v[i_tc]; + + INFO(tostr(xtag("i_tc", i_tc), + xtag("text", testcase.text_) + )); + + token tk(tokentype::tk_f64, + testcase.text_); + + REQUIRE(tk.tk_type() == tokentype::tk_f64); + + bool throw_flag = false; + std::string ex_msg; + + try { + double x = tk.f64_value(); + + REQUIRE(x == Approx(testcase.expected_).epsilon(1.0e-15)); + } catch (std::exception & ex) { + ex_msg = ex.what(); + + throw_flag = true; + } + + INFO(xtag("ex_msg", ex_msg)); + + REQUIRE(throw_flag == testcase.expect_throw_); + } + } + } /*namespace*/ + } /*namespace ut*/ +} /*namespace xo*/ + +/* end token.test.cpp */ diff --git a/xo-tokenizer/utest/tokenizer.test.cpp b/xo-tokenizer/utest/tokenizer.test.cpp new file mode 100644 index 00000000..604b9d25 --- /dev/null +++ b/xo-tokenizer/utest/tokenizer.test.cpp @@ -0,0 +1,576 @@ +/* file tokenizer.test.cpp + * + * author: Roland Conybeare + */ + +#include "xo/tokenizer/tokenizer.hpp" +#include + +namespace xo { + using xo::scm::tokentype; + using token = xo::scm::token; + using xo::scm::span; + + namespace ut { + /** Two-pass test harness. + * + * First pass - verify test assertions. + * Second pass only if first pass failed. + * On second pass, enable verbose logging + **/ + struct rehearser { + rehearser(std::uint32_t att = 0) : attention_{att} {} + + /* expect at most one iterator to exist per TestRehearser instance **/ + struct iterator { + explicit iterator(rehearser* parent) : parent_{parent} {} + + iterator& operator++(); + std::uint32_t operator*() { return parent_->attention_; } + + bool operator==(const iterator& ix2) const { + return (parent_ == ix2.parent_); + } + + rehearser* parent_ = nullptr; + std::uint32_t attention_ = 0; + + }; + + bool is_first_pass() const { return attention_ == 0; } + bool is_second_pass() const { return attention_ == 1; } + bool enable_debug() const { return is_second_pass(); } + + iterator begin() { return iterator(this); } + iterator end() { return iterator(nullptr); } + + public: + /** pass number: 0 or 1 **/ + std::uint32_t attention_ = 0; + /** @brief set to true when test starts; false if first pass fails **/ + bool ok_flag_ = true; + }; + + auto rehearser::iterator::operator++() -> iterator& + { + if (parent_) + ++(parent_->attention_); + + if (parent_->ok_flag_ && (parent_->attention_ == 1)) { + /* skip 2nd pass */ + ++(parent_->attention_); + } + + if (parent_->attention_ == 2) + parent_ = nullptr; + + return *this; + } + + /* use this instead of REQUIRE(expr) in context of a test_rehearser */ +# define REHEARSE(rehearser, expr) \ + if (rehearser.is_first_pass()) { \ + bool _f = (expr); \ + rehearser.ok_flag_ = rehearser.ok_flag_ && _f; \ + } else { \ + REQUIRE(expr); \ + } + + /* note: trivial REQUIRE() call in else branch bc we still want + * catch2 to count assertions when verification succeeds + */ +# define REQUIRE_ORCAPTURE(ok_flag, catch_flag, expr) \ + if (catch_flag) { \ + REQUIRE((expr)); \ + } else { \ + REQUIRE(true); \ + ok_flag &= (expr); \ + } + +# define REQUIRE_ORFAIL(ok_flag, catch_flag, expr) \ + REQUIRE_ORCAPTURE(ok_flag, catch_flag, expr); \ + if (!ok_flag) \ + return ok_flag + + namespace { + struct testcase_tkz { + std::string input_; + bool expect_throw_; + token expected_tk_; + bool consume_all_; + }; + + std::vector + s_testcase_v = { + /* + * + * expect_throw consume_all + * v v + */ + {"<", false, token::leftangle(), true}, + /* possible prefix of >= */ + {">", false, token::rightangle(), true}, + {"> ", false, token::rightangle(), true}, + + {"(", false, token::leftparen(), true}, + {")", false, token::rightparen(), true}, + + {"[", false, token::leftbracket(), true}, + {"]", false, token::rightbracket(), true}, + + {"{", false, token::leftbrace(), true}, + {" {", false, token::leftbrace(), true}, + + {"\t{", false, token::leftbrace(), true}, + {"\n{", false, token::leftbrace(), true}, + {"}", false, token::rightbrace(), true}, + + {"0", false, token::i64_token("0"), true}, + {"1", false, token::i64_token("1"), true}, + {"12", false, token::i64_token("12"), true}, + {"123", false, token::i64_token("123"), true}, + {"1234", false, token::i64_token("1234"), true}, + + {"0 ", false, token::i64_token("0"), false}, + {"1 ", false, token::i64_token("1"), false}, + {"12 ", false, token::i64_token("12"), false}, + {"123 ", false, token::i64_token("123"), false}, + {"1234 ", false, token::i64_token("1234"), false}, + + {"1<", false, token::i64_token("1"), false}, + {"1>", false, token::i64_token("1"), false}, + {"1(", false, token::i64_token("1"), false}, + {"1)", false, token::i64_token("1"), false}, + {"1[", false, token::i64_token("1"), false}, + {"1]", false, token::i64_token("1"), false}, + {"1{", false, token::i64_token("1"), false}, + {"1}", false, token::i64_token("1"), false}, + {"1;", false, token::i64_token("1"), false}, + {"1:", false, token::i64_token("1"), false}, + {"1,", false, token::i64_token("1"), false}, + + {".1", false, token::f64_token(".1"), true}, + {".12", false, token::f64_token(".12"), true}, + {".123", false, token::f64_token(".123"), true}, + + {"+.1", false, token::f64_token("+.1"), true}, + {"+.12", false, token::f64_token("+.12"), true}, + {"+.123", false, token::f64_token("+.123"), true}, + + {"-.1", false, token::f64_token("-.1"), true}, + {"-.12", false, token::f64_token("-.12"), true}, + {"-.123", false, token::f64_token("-.123"), true}, + + {"1.", false, token::f64_token("1."), true}, + {"1.2", false, token::f64_token("1.2"), true}, + {"1.23", false, token::f64_token("1.23"), true}, + + {"1e0", false, token::f64_token("1e0"), true}, + {"1e-1", false, token::f64_token("1e-1"), true}, + {"1e1", false, token::f64_token("1e1"), true}, + {"1e+1", false, token::f64_token("1e+1"), true}, + + {"\"hello\"", false, token::string_token("hello"), true}, + /* tokenizer sees this input: + * "\"hi\", she said" + */ + {"\"\\\"hi\\\", she said\"", false, token::string_token("\"hi\", she said"), true}, + /* tokenizer sees this input: + * "look ma, newline ->\n<- " + */ + {"\"look ma, newline ->\\n<- \"", false, + token::string_token("look ma, newline ->\n<- "), true}, + /* tokenizer sees this input: + * "tab to the right [\t], to the right [\t]" + */ + {"\"tab to the right [\\t], to the right [\\t]\"", false, + token::string_token("tab to the right [\t], to the right [\t]"), true}, + + {".", false, token::dot(), true}, + {":", false, token::colon(), true}, + {",", false, token::comma(), true}, + {"=", false, token::singleassign(), true}, + {":=", false, token::assign_token(), true}, + {"->", false, token::yields(), true}, + + {"+", false, token::plus_token(), true}, + {"-", false, token::minus_token(), true}, + {"*", false, token::star_token(), true}, + {"/", false, token::slash_token(), true}, + + {"symbol", false, token::symbol_token("symbol"), true}, + {"another-symbol", false, token::symbol_token("another-symbol"), true}, + + {"type", false, token::type(), true}, + {"def", false, token::def(), true}, + {"lambda", false, token::lambda(), true}, + {"if", false, token::if_token(), true}, + {"let", false, token::let(), true}, + {"in", false, token::in(), true}, + {"end", false, token::end(), true}, + + }; + } + + TEST_CASE("tokenizer", "[tokenizer]") { + for (std::size_t i_tc = 0, n_tc = s_testcase_v.size(); i_tc < n_tc; ++i_tc) { + + const testcase_tkz & testcase = s_testcase_v[i_tc]; + + rehearser rh; + + for (auto _ : rh) { + scope log(XO_DEBUG2(rh.enable_debug(), "tokenizer")); + + log && log(xtag("i_tc", i_tc), xtag("input", testcase.input_)); + + using tokenizer + = xo::scm::tokenizer; + + tokenizer tkz(rh.enable_debug()); + tokenizer::span_type + in_span(testcase.input_.c_str(), + testcase.input_.c_str() + testcase.input_.size()); + + auto sr = tkz.scan(in_span, true /*eof*/); + + REHEARSE(rh, sr.get_token().tk_type() == testcase.expected_tk_.tk_type()); + if (sr.get_token().tk_type() == tokentype::tk_i64) + { + REHEARSE(rh, !sr.get_token().text().empty()); + REHEARSE(rh, sr.get_token().i64_value() == testcase.expected_tk_.i64_value()); + } else if (sr.get_token().tk_type() == tokentype::tk_f64) + { + REHEARSE(rh, !sr.get_token().text().empty()); + REHEARSE(rh, sr.get_token().f64_value() == testcase.expected_tk_.f64_value()); + } else if(sr.get_token().tk_type() == tokentype::tk_string) + { + /* sr.get_token().text() can be empty, consider input "" */ + REHEARSE(rh, sr.get_token().text() == testcase.expected_tk_.text()); + } else if(sr.get_token().tk_type() == tokentype::tk_symbol) + { + REHEARSE(rh, !sr.get_token().text().empty()); + REHEARSE(rh, sr.get_token().text() == testcase.expected_tk_.text()); + } else { + REHEARSE(rh, sr.get_token().text().empty()); + } + + /* must consume all input for tests we're doing here */ + if (testcase.consume_all_) { + REHEARSE(rh, sr.consumed() == in_span); + } else { + REHEARSE(rh, sr.consumed() != in_span); + } + } + } + } + + namespace { + struct testcase2_tkz { + std::string input_; + bool expect_throw_; + std::vector expected_tk_v_; + }; + + std::vector + s_testcase2_v = { + {"def foo : f64 = 3.141;", + false, + {token::def(), + token::symbol_token("foo"), + token::colon(), + token::symbol_token("f64"), + token::singleassign(), + token::f64_token("3.141"), + token::semicolon() + }}, + {"def foo = lambda (x : f64) { def y = x * x; y; }", + false, + {token::def(), + token::symbol_token("foo"), + token::singleassign(), + token::lambda(), + token::leftparen(), + token::symbol_token("x"), + token::colon(), + token::symbol_token("f64"), + token::rightparen(), + token::leftbrace(), + token::def(), + token::symbol_token("y"), + token::singleassign(), + token::symbol_token("x"), + token::star_token(), + token::symbol_token("x"), + token::semicolon(), + token::symbol_token("y"), + token::semicolon(), + token::rightbrace() + }}, +#ifdef TODO + {"a.b", + false, + {token::symbol_token("a"), + token::dot(), + token::symbol_token("b") + }}, +#endif + {"a,b", + false, + {token::symbol_token("a"), + token::comma(), + token::symbol_token("b") + }}, + {"a:b", + false, + {token::symbol_token("a"), + token::colon(), + token::symbol_token("b") + }}, + {"a;b", + false, + {token::symbol_token("a"), + token::semicolon(), + token::symbol_token("b") + }}, + {"a:=b", + false, + {token::symbol_token("a"), + token::assign_token(), + token::symbol_token("b") + }}, + {"a=b", + false, + {token::symbol_token("a"), + token::singleassign(), + token::symbol_token("b") + }}, + {"p->q", + false, + {token::symbol_token("p"), + token::yields(), + token::symbol_token("q") + }}, + {"a + b", + false, + {token::symbol_token("a"), + token::plus_token(), + token::symbol_token("b") + }}, + {"a - b", + false, + {token::symbol_token("a"), + token::minus_token(), + token::symbol_token("b") + }}, + {"a-b", + false, + {token::symbol_token("a-b"), + }}, + {"(apple)", + false, + {token::leftparen(), + token::symbol_token("apple"), + token::rightparen() + }}, + {"", + false, + {token::leftangle(), + token::symbol_token("apple"), + token::rightangle() + }}, + }; + } + + TEST_CASE("tokenizer2", "[tokenizer]") { + /* this time testing token sequences */ + + using tokenizer = xo::scm::tokenizer; + + for (std::size_t i_tc = 0, n_tc = s_testcase2_v.size(); i_tc < n_tc; ++i_tc) { + const testcase2_tkz & testcase = s_testcase2_v[i_tc]; + + rehearser rh; + + for (auto _ : rh) { + scope log(XO_DEBUG2(rh.enable_debug(), "tokenizer2")); + + log && log(xtag("i_tc", i_tc), xtag("input", testcase.input_)); + + tokenizer tkz(rh.enable_debug()); + + tokenizer::span_type + in_span(testcase.input_.c_str(), + testcase.input_.c_str() + testcase.input_.size()); + + for (int i_tk = 0, n_tk = testcase.expected_tk_v_.size(); + i_tk < n_tk; ++i_tk) + { + log && log(xtag("i_tk", i_tk)); + + auto sr = tkz.scan(in_span, in_span.empty()); + const auto & tk = sr.get_token(); + + if (tk.is_valid()) { + REHEARSE(rh, tk.tk_type() == testcase.expected_tk_v_[i_tk].tk_type()); + } + if (tk.tk_type() == tokentype::tk_i64) + { + REHEARSE(rh, !tk.text().empty()); + REHEARSE(rh, tk.i64_value() == testcase.expected_tk_v_[i_tk].i64_value()); + } else if (tk.tk_type() == tokentype::tk_f64) + { + REHEARSE(rh, !tk.text().empty()); + REHEARSE(rh, tk.f64_value() == testcase.expected_tk_v_[i_tk].f64_value()); + } else if(tk.tk_type() == tokentype::tk_string) + { + /* tk.text() can be empty, consider input "" */ + REHEARSE(rh, tk.text() == testcase.expected_tk_v_[i_tk].text()); + } else if(tk.tk_type() == tokentype::tk_symbol) + { + REHEARSE(rh, !tk.text().empty()); + REHEARSE(rh, tk.text() == testcase.expected_tk_v_[i_tk].text()); + } else { + REHEARSE(rh, tk.text().empty()); + } + + in_span = in_span.after_prefix(sr.consumed()); + } + } + } + } /*TEST_CASE(tokenizer2)*/ + + namespace { + using tkz_error_type = xo::scm::tokenizer_error; + using input_state_type = xo::scm::input_state; + using span_type = xo::scm::span; + + struct testcase_error { + std::string input_; + tkz_error_type expect_error_; + }; + + testcase_error + make_testcase(const char * input, const char * src_function, const char * error_descr, + size_t tk_start, size_t whitespace, size_t error_pos) + { + size_t line_no = 1; + + testcase_error retval; + retval.input_ = input; + retval.expect_error_ = tkz_error_type(src_function, error_descr, + input_state_type(span_type::from_string(retval.input_), + tk_start, whitespace), + error_pos); + return retval; + } + + std::vector + s_testcase3_v = { + // 012345678 + // --------v + make_testcase("123.456ez", + "assemble_token", + "unexpected character in numeric constant", + 0, 0, 8), + // 01 + // -v + make_testcase("1-3", + "assemble_token", + "improperly placed sign indicator", + 0, 0, 1), + // 012 + // --v + make_testcase("1..2", + "assemble_token", + "duplicate decimal point in numeric literal", + 0, 0, 2), + // o 0123456 + // ------v + make_testcase("1.23e4e", + "assemble_token", + "duplicate exponent marker in numeric literal", + 0, 0, 6), + // tokenizer sees string ["\"] + // 0 1 2 3 + // - - - v + make_testcase("\"\\\"", + "assemble_token", + "missing terminating '\"' to complete literal string", + //"expect \\ to escape one of n|t|r|\"|\\ in string literal", + 0, 0, 3), + // tokenizer sees literal with embedded newline + // 1 2 3 + // 01234567890123456789012345678901 2 + // -------------------------------- v + make_testcase("\"everything was going fine until\n\"", + "scan", + "must use \\n or \\r to encode newline/cr in string literal", + 0, 0, 32), + // tokenizer sees string ["\] + // 0 1 2 + // - - v + make_testcase("\"\\", + "assemble_token", + "expecting key following escape character \\", + 0, 0, 2), + // tokenizer sees string ["\q"] + // 0 12 + // - -v + make_testcase("\"\\q\"", + "assemble_token", + "expecting one of n|r|\"|\\ following escape \\", + 0, 0, 2), + // + make_testcase("#", + "assemble_token", + "illegal input character", + 0, 0, 0), + }; + + TEST_CASE("tokenizer3", "[tokenizer]") { + /* testing error handling */ + + using tokenizer = xo::scm::tokenizer; + + constexpr bool c_force_debug = false; + + for (std::size_t i_tc = 0, n_tc = s_testcase3_v.size(); i_tc < n_tc; ++i_tc) { + const testcase_error & testcase = s_testcase3_v[i_tc]; + + rehearser rh(0); + + for (auto _ : rh) { + scope log(XO_DEBUG2(c_force_debug || rh.enable_debug(), "tokenizer3")); + + log && log(xtag("pass", _), xtag("ok(-)", rh.ok_flag_)); + log && log(xtag("i_tc", i_tc), xtag("input", testcase.input_)); + + tokenizer tkz(c_force_debug || rh.enable_debug()); + + auto in_span = tokenizer::span_type::from_string(testcase.input_); + + auto sr = tkz.scan(in_span, true /*eof*/); + + REHEARSE(rh, sr.is_error()); + + if (sr.error().src_function()) { + REHEARSE(rh, std::string(sr.error().src_function()) == std::string(testcase.expect_error_.src_function())); + } + if (!sr.error().error_description().empty()) { + REHEARSE(rh, std::string(sr.error().error_description()) == std::string(testcase.expect_error_.error_description())); + } + REHEARSE(rh, sr.error().whitespace() == testcase.expect_error_.whitespace()); + REHEARSE(rh, sr.error().tk_start() == testcase.expect_error_.tk_start()); + REHEARSE(rh, sr.error().error_pos() == testcase.expect_error_.error_pos()); + + log && log(xtag("ok(+)", rh.ok_flag_)); + } + } + } + } + + } /*namespace ut*/ +} /*namespace xo*/ + +/* end tokenizer.test.cpp */ diff --git a/xo-tokenizer/utest/tokenizer_utest_main.cpp b/xo-tokenizer/utest/tokenizer_utest_main.cpp new file mode 100644 index 00000000..c5e273c4 --- /dev/null +++ b/xo-tokenizer/utest/tokenizer_utest_main.cpp @@ -0,0 +1,6 @@ +/* file tokenizer_utest_main.cpp */ + +#define CATCH_CONFIG_MAIN +#include "catch2/catch.hpp" + +/* end tokenizer_utest_main.cpp */