From 5d31ac7a439c2790d9e23fdc3816067be9f19506 Mon Sep 17 00:00:00 2001 From: Roland Conybeare Date: Tue, 6 Aug 2024 11:37:41 -0400 Subject: [PATCH] xo-tokenizer: mvp: recognize keywords --- include/xo/tokenizer/token.hpp | 1 + include/xo/tokenizer/tokenizer.hpp | 28 ++++++++++++++++++++++++++++ utest/tokenizer.test.cpp | 8 ++++++++ 3 files changed, 37 insertions(+) diff --git a/include/xo/tokenizer/token.hpp b/include/xo/tokenizer/token.hpp index 3883e15a..84b1a1b8 100644 --- a/include/xo/tokenizer/token.hpp +++ b/include/xo/tokenizer/token.hpp @@ -86,6 +86,7 @@ namespace xo { static token if_token() { return token(tokentype::tk_if); } static token let() { return token(tokentype::tk_let); } static token in() { return token(tokentype::tk_in); } + static token end() { return token(tokentype::tk_end); } tokentype tk_type() const { return tk_type_; } const std::string & text() const { return text_; } diff --git a/include/xo/tokenizer/tokenizer.hpp b/include/xo/tokenizer/tokenizer.hpp index 7f34b2ad..4fad37ba 100644 --- a/include/xo/tokenizer/tokenizer.hpp +++ b/include/xo/tokenizer/tokenizer.hpp @@ -510,6 +510,34 @@ namespace xo { ; /* nothing to do here -- desired tk_text already constructed */ } + if (tk_type == tokentype::tk_symbol) { + /* check for keywords */ + + bool keep_text = false; + + if (tk_text == "type") { + tk_type = tokentype::tk_type; + } else if (tk_text == "def") { + tk_type = tokentype::tk_def; + } else if (tk_text == "lambda") { + tk_type = tokentype::tk_lambda; + } else if (tk_text == "if") { + tk_type = tokentype::tk_if; + } else if (tk_text == "let") { + tk_type = tokentype::tk_let; + } else if (tk_text == "in") { + tk_type = tokentype::tk_in; + } else if (tk_text == "end") { + tk_type = tokentype::tk_end; + } else { + /* keep as symbol */ + keep_text = true; + } + + if (!keep_text) + tk_text.clear(); + } + return token_type(tk_type, std::move(tk_text)); } /*assemble_token*/ diff --git a/utest/tokenizer.test.cpp b/utest/tokenizer.test.cpp index 03cd71ad..b5d8303a 100644 --- a/utest/tokenizer.test.cpp +++ b/utest/tokenizer.test.cpp @@ -100,6 +100,14 @@ namespace xo { token::string_token("tab to the right [\t], to the right [\t]"), true}, {"symbol", false, token::symbol_token("symbol"), true}, + + {"type", false, token::type(), true}, + {"def", false, token::def(), true}, + {"lambda", false, token::lambda(), true}, + {"if", false, token::if_token(), true}, + {"let", false, token::let(), true}, + {"in", false, token::in(), true}, + {"end", false, token::end(), true}, }; }