llvm-journey

LLVM Journey
git clone git://0xff.ir/g/llvm-journey.git
Log | Files | Refs | README | LICENSE

commit 2652885b9a33b4d5f003c16fbeadedb1845a409f
parent 317426df950ebc5ed4a3b40ee6e5e510bc94d38b
Author: Mohammad-Reza Nabipoor <m.nabipoor@yahoo.com>
Date:   Fri, 28 Aug 2020 17:48:10 +0430

Add missing parts of the parser + tests

Now parser can parse function definitions, call expressions and
binary operations.

Diffstat:
Mkaleidoscope_parser.hpp | 241+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--
Mkaleidoscope_tokens.hpp | 24++++++++++++++++++++++++
Mtests/kaleidoscope_parser.test.cpp | 157+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++----
3 files changed, 408 insertions(+), 14 deletions(-)

diff --git a/kaleidoscope_parser.hpp b/kaleidoscope_parser.hpp @@ -2,7 +2,7 @@ #pragma once #include "kaleidoscope_ast.hpp" -#include "kaleidoscope_lexer.hpp" +#include "kaleidoscope_tokens.hpp" #include <string> @@ -75,7 +75,89 @@ parse_prototype(FwdIt f, FwdIt l) template<typename FwdIt> ParseResult<FwdIt> -parse_expr(FwdIt f, FwdIt l) +parse_expr(FwdIt f, FwdIt l); + +template<typename FwdIt> +ParseResult<FwdIt> +parse_expr_idexpr(FwdIt f, FwdIt l) +{ + ParseResult<FwdIt> r{ + f, + }; + auto err = [&](auto&& msg) { + r.err.pos = f; + r.err.msg = std::move(msg); + }; + + assert(f != l); + assert(token_type(*f) == TkType::Id); + + auto fid = f++; + + if (f == l || token_str(*f) != "(") { + r.node = kal::Variable{ token_str(*fid) }; + r.parsed = f; + return r; + } + + kal::Call call{ token_str(*fid), {} }; + + ++f; // drop "(" + + while (true) { + if (f == l) { + err("early termination of call expression"); + return r; + } + + if (token_str(*f) == ")") { + r.node = std::move(call); + r.parsed = ++f; + return r; + } + + { + auto rpe = parse_expr(f, l); + + if (rpe.parsed == f) { + // FIXME error + err("error in parsing call arguments: "); + r.err.msg += rpe.err.msg + " (at relative pos " + + std::to_string(std::distance(f, rpe.err.pos)) + ")"; + return r; + } + + call.args.emplace_back(std::move(rpe.node)); + f = rpe.parsed; + } + + auto s = token_str(*f); + + switch (s[0]) { + case ')': + break; + + case ',': + ++f; + break; + + default: + err(std::string{ "expects ',' but got " } + kal::to_string(*f)); + return r; + } + } + + r.parsed = f; + return r; +} + +template<typename FwdIt> +ParseResult<FwdIt> +parse_expr_primary(FwdIt f, FwdIt l); + +template<typename FwdIt> +ParseResult<FwdIt> +parse_expr_binop_rhs(FwdIt f, FwdIt l, int precedence, ASTNode lhs) { ParseResult<FwdIt> r{ f, @@ -84,16 +166,150 @@ parse_expr(FwdIt f, FwdIt l) r.err.pos = f; r.err.msg = std::move(msg); }; + ASTNode rhs; assert(f != l); - (void)err; + while (f != l) { + int tpred = token_precedence(*f); + + if (tpred < precedence) + break; + + auto op = token_str(*f)[0]; + + ++f; + if (f == l) { + err("early termination: expects primary expression"); + break; + } + + { + auto rpe = parse_expr_primary(f, l); + + if (rpe.parsed == f) { + err("failed at parsing primary expression:"); + r.err.msg += rpe.err.msg + "(at relative pos " + + std::to_string(std::distance(f, rpe.err.pos)) + ")"; + break; + } + + f = rpe.parsed; + rhs = std::move(rpe.node); + + if (f == l) { + lhs = kal::BinaryOp{ op, std::move(lhs), std::move(rhs) }; + break; + } + } + + if (tpred < token_precedence(*f)) { // there's an operator with higher prec + auto rbo = parse_expr_binop_rhs(f, l, tpred + 1, std::move(rhs)); + + if (rbo.parsed == f) { + err("error in parsing rhs: "); + r.err.msg += rbo.err.msg + " (at relative pos " + + std::to_string(std::distance(f, rbo.err.pos)) + ")"; + return r; + } + + f = rbo.parsed; + rhs = std::move(rbo.node); + } + + lhs = kal::BinaryOp{ op, std::move(lhs), std::move(rhs) }; + } r.parsed = f; - err("not implemented yet"); + r.node = std::move(lhs); + return r; +} + +template<typename FwdIt> +ParseResult<FwdIt> +parse_expr(FwdIt f, FwdIt l); + +template<typename FwdIt> +ParseResult<FwdIt> +parse_expr_primary(FwdIt f, FwdIt l) +{ + ParseResult<FwdIt> r; + auto err = [&](auto&& msg) { + r.err.pos = f; + r.err.msg = std::move(msg); + }; + + assert(f != l); + + r.parsed = f; + + switch (token_type(*f)) { + case TkType::Num: + r.node = Number{ token_num(*f) }; + r.parsed = ++f; + return r; + + case TkType::Id: + return parse_expr_idexpr(f, l); + + case TkType::Etc: + if (token_str(*f) == "(") { + ++f; + + auto rpe = parse_expr(f, l); + + if (rpe.parsed == f) { + r.err = std::move(rpe.err); + return r; + } + + r.node = std::move(rpe.node); + + f = rpe.parsed; + if (f == l) { + err("early termination: expects `)`"); + return r; + } + + if (token_str(*f) == ")") + r.parsed = ++f; + else + err("expects `)`"); + + return r; + } + + /* fallthrough */ + + default: + // FIXME performance of concat + err(std::string{ "unexpected token (" } + kal::to_string(*f) + + ") while expecting an expression"); + return r; + } + return r; } +template<typename FwdIt> +ParseResult<FwdIt> +parse_expr(FwdIt f, FwdIt l) +{ + assert(f != l); + + auto r = parse_expr_primary(f, l); + + if (r.parsed == f) + return r; // CHKME std::move(r); + + f = r.parsed; + + if (f == l || token_precedence(*f) < 0) // not a valid operator + return r; + + return parse_expr_binop_rhs(f, l, 0, std::move(r.node)); +} + } // namespace detail // TODO std::move err msg @@ -129,13 +345,26 @@ parse(FwdIt f, FwdIt l, OutIt nodes, ErrOp error) "early termination: " "expects function prototype after `def` keyword"); + kal::Prototype proto; + { auto r = detail::parse_prototype(f, l); if (r.parsed == f) // failure return err(r.err.pos, r.err.msg); + + { + using kal::cast; // FIXME + using kal::node_type; // FIXME FIXME + + assert(node_type(r.node) == kal::NodeType::Prototype); + + auto ok = cast(r.node, proto); + + assert(ok && "unsuccessful cast"); + } + f = r.parsed; - *nodes++ = std::move(r.node); } if (f == l) @@ -147,7 +376,7 @@ parse(FwdIt f, FwdIt l, OutIt nodes, ErrOp error) if (r.parsed == f) // failure return err(r.err.pos, r.err.msg); f = r.parsed; - *nodes++ = std::move(r.node); + *nodes++ = kal::Function{ std::move(proto), std::move(r.node) }; } } break; diff --git a/kaleidoscope_tokens.hpp b/kaleidoscope_tokens.hpp @@ -64,6 +64,30 @@ token_num(const Token& t) return t.dbl; } +inline int +token_precedence(const Token& t) +{ + if (t.type != TkType::Etc) + return -1; + + // assert(!t.str.empty()); + + switch (t.str[0]) { + case '<': + return 10; + + case '+': + case '-': + return 20; + + case '*': + case '/': + return 40; + } + + return -1; +} + template<typename FwdIt, typename OutIt> OutIt tokenize(FwdIt first, FwdIt last, OutIt tokens) diff --git a/tests/kaleidoscope_parser.test.cpp b/tests/kaleidoscope_parser.test.cpp @@ -9,11 +9,11 @@ #include <string> #include <vector> -#include <iostream> // FIXME - #include "kaleidoscope_ast.hpp" #include "kaleidoscope_tokens.hpp" +using kal::to_string; + TEST_CASE("parse simple programs", "[simple]") { auto p = [](const std::string& s) { @@ -45,8 +45,6 @@ TEST_CASE("parse simple programs", "[simple]") { auto nd = p("extern sin(x)"); - using kal::to_string; - REQUIRE(nd.size() == 1); REQUIRE(to_string(nd[0]) == "(prototype sin x)"); } @@ -54,8 +52,6 @@ TEST_CASE("parse simple programs", "[simple]") { auto nd = p("extern tan2 ( arg0 arg1 )"); - using kal::to_string; - REQUIRE(nd.size() == 1); REQUIRE(to_string(nd[0]) == "(prototype tan2 arg0 arg1)"); } @@ -63,11 +59,156 @@ TEST_CASE("parse simple programs", "[simple]") { auto nd = p("extern cos(realInput);extern atan2(arg0 arg1);"); - using kal::to_string; - REQUIRE(nd.size() == 2); REQUIRE(to_string(nd[0]) == "(prototype cos realInput)"); REQUIRE(to_string(nd[1]) == "(prototype atan2 arg0 arg1)"); } } + + SECTION("def") + { + { + auto nd = p(R"( +def one(x) + 1 +)"); + + REQUIRE(nd.size() == 1); + REQUIRE(to_string(nd[0]) == "(function (prototype one x) (number 1))"); + } + + { + auto nd = p("extern sin(x) def pi2() 1.5708"); + + REQUIRE(nd.size() == 2); + REQUIRE(to_string(nd[0]) == "(prototype sin x)"); + REQUIRE(to_string(nd[1]) == "(function (prototype pi2) (number 1.5708))"); + } + + { + auto nd = p("extern sin(x) def sinPi2() sin(1.5708)"); + + REQUIRE(nd.size() == 2); + REQUIRE(to_string(nd[0]) == "(prototype sin x)"); + REQUIRE(to_string(nd[1]) == + "(function (prototype sinPi2) (call sin (number 1.5708)))"); + } + + { + auto nd = + p("extern tan2(x y);extern sin(x);def fun()tan2(sin(1.5708), 1);" + "def gun(x y z)tan2(0.1,gun(1,sin(1.5),1.0));"); + + REQUIRE(nd.size() == 4); + REQUIRE(to_string(nd[0]) == "(prototype tan2 x y)"); + REQUIRE(to_string(nd[1]) == "(prototype sin x)"); + REQUIRE(to_string(nd[2]) == + "(function (prototype fun) (call tan2 (call sin (number 1.5708)) " + "(number 1)))"); + REQUIRE(to_string(nd[3]) == + "(function (prototype gun x y z) (call tan2 (number 0.1) (call " + "gun (number 1) (call sin (number 1.5)) (number 1))))"); + } + } + + SECTION("binary operation") + { + using Num = kal::Number; + using BOp = kal::BinaryOp; + using Var = kal::Variable; + + { + auto nd = p("2 + 3"); + + REQUIRE(nd.size() == 1); + REQUIRE(nd[0] == BOp{ '+', Num{ 2 }, Num{ 3 } }); + REQUIRE(to_string(nd[0]) == "(binop '+' (number 2) (number 3))"); + } + + { + auto nd = p("x * y"); + + REQUIRE(nd.size() == 1); + REQUIRE(nd[0] == BOp{ '*', Var{ "x" }, Var{ "y" } }); + } + + { + auto nd = p("x * y * z"); + + REQUIRE(nd.size() == 1); + REQUIRE(nd[0] == + BOp{ '*', BOp{ '*', Var{ "x" }, Var{ "y" } }, Var{ "z" } }); + } + + { + auto nd = p("4 * 5 + 3"); + + REQUIRE(nd.size() == 1); + REQUIRE(nd[0] == BOp{ '+', BOp{ '*', Num{ 4 }, Num{ 5 } }, Num{ 3 } }); + } + + { + auto nd = p("4+2.15*3"); + + REQUIRE(nd.size() == 1); + REQUIRE(nd[0] == BOp{ + '+', + Num{ 4 }, + BOp{ '*', Num{ 2.15 }, Num{ 3 } }, + }); + } + + { + auto nd = p("4+2.15*3/2-4.4/2/3#expr"); + + REQUIRE(nd.size() == 1); + REQUIRE(nd[0] == + BOp{ '-', + BOp{ + '+', + Num{ 4 }, + BOp{ '/', BOp{ '*', Num{ 2.15 }, Num{ 3 } }, Num{ 2 } }, + }, + BOp{ '/', BOp{ '/', Num{ 4.4 }, Num{ 2 } }, Num{ 3 } } }); + } + + { + auto nd = p("sin(3.14/4)-2.3-3.4-(3*4+1)*3"); + + REQUIRE(nd.size() == 1); + REQUIRE( + to_string(nd[0]) == + "(binop '-' (binop '-' (binop '-' (call sin (binop '/' (number 3.14) " + "(number 4))) (number 2.3)) (number 3.4)) (binop '*' (binop '+' (binop " + "'*' (number 3) (number 4)) (number 1)) (number 3)))"); + } + } + + SECTION("extern, def, call") + { + { + auto nd = p(R"( +extern tan2(x y) + +def formula(x y z) + 3.14 + 2 * (tan2(x, y) + x*y) / z + +extern sin(x) + +sin(2.72) + formula(1, 2, 3) +)"); + + REQUIRE(nd.size() == 4); + REQUIRE(to_string(nd[0]) == "(prototype tan2 x y)"); + REQUIRE(to_string(nd[1]) == + "(function (prototype formula x y z) (binop '+' (number 3.14) " + "(binop '/' (binop '*' (number 2) (binop '+' (call tan2 " + "(variable x) (variable y)) (binop '*' (variable x) (variable " + "y)))) (variable z))))"); + REQUIRE(to_string(nd[2]) == "(prototype sin x)"); + REQUIRE(to_string(nd[3]) == + "(binop '+' (call sin (number 2.72)) (call formula (number 1) " + "(number 2) (number 3)))"); + } + } }