commit 2652885b9a33b4d5f003c16fbeadedb1845a409f
parent 317426df950ebc5ed4a3b40ee6e5e510bc94d38b
Author: Mohammad-Reza Nabipoor <m.nabipoor@yahoo.com>
Date: Fri, 28 Aug 2020 17:48:10 +0430
Add missing parts of the parser + tests
Now parser can parse function definitions, call expressions and
binary operations.
Diffstat:
3 files changed, 408 insertions(+), 14 deletions(-)
diff --git a/kaleidoscope_parser.hpp b/kaleidoscope_parser.hpp
@@ -2,7 +2,7 @@
#pragma once
#include "kaleidoscope_ast.hpp"
-#include "kaleidoscope_lexer.hpp"
+#include "kaleidoscope_tokens.hpp"
#include <string>
@@ -75,7 +75,89 @@ parse_prototype(FwdIt f, FwdIt l)
template<typename FwdIt>
ParseResult<FwdIt>
-parse_expr(FwdIt f, FwdIt l)
+parse_expr(FwdIt f, FwdIt l);
+
+template<typename FwdIt>
+ParseResult<FwdIt>
+parse_expr_idexpr(FwdIt f, FwdIt l)
+{
+ ParseResult<FwdIt> r{
+ f,
+ };
+ auto err = [&](auto&& msg) {
+ r.err.pos = f;
+ r.err.msg = std::move(msg);
+ };
+
+ assert(f != l);
+ assert(token_type(*f) == TkType::Id);
+
+ auto fid = f++;
+
+ if (f == l || token_str(*f) != "(") {
+ r.node = kal::Variable{ token_str(*fid) };
+ r.parsed = f;
+ return r;
+ }
+
+ kal::Call call{ token_str(*fid), {} };
+
+ ++f; // drop "("
+
+ while (true) {
+ if (f == l) {
+ err("early termination of call expression");
+ return r;
+ }
+
+ if (token_str(*f) == ")") {
+ r.node = std::move(call);
+ r.parsed = ++f;
+ return r;
+ }
+
+ {
+ auto rpe = parse_expr(f, l);
+
+ if (rpe.parsed == f) {
+ // FIXME error
+ err("error in parsing call arguments: ");
+ r.err.msg += rpe.err.msg + " (at relative pos " +
+ std::to_string(std::distance(f, rpe.err.pos)) + ")";
+ return r;
+ }
+
+ call.args.emplace_back(std::move(rpe.node));
+ f = rpe.parsed;
+ }
+
+ auto s = token_str(*f);
+
+ switch (s[0]) {
+ case ')':
+ break;
+
+ case ',':
+ ++f;
+ break;
+
+ default:
+ err(std::string{ "expects ',' but got " } + kal::to_string(*f));
+ return r;
+ }
+ }
+
+ r.parsed = f;
+ return r;
+}
+
+template<typename FwdIt>
+ParseResult<FwdIt>
+parse_expr_primary(FwdIt f, FwdIt l);
+
+template<typename FwdIt>
+ParseResult<FwdIt>
+parse_expr_binop_rhs(FwdIt f, FwdIt l, int precedence, ASTNode lhs)
{
ParseResult<FwdIt> r{
f,
@@ -84,16 +166,150 @@ parse_expr(FwdIt f, FwdIt l)
r.err.pos = f;
r.err.msg = std::move(msg);
};
+ ASTNode rhs;
assert(f != l);
- (void)err;
+ while (f != l) {
+ int tpred = token_precedence(*f);
+
+ if (tpred < precedence)
+ break;
+
+ auto op = token_str(*f)[0];
+
+ ++f;
+ if (f == l) {
+ err("early termination: expects primary expression");
+ break;
+ }
+
+ {
+ auto rpe = parse_expr_primary(f, l);
+
+ if (rpe.parsed == f) {
+ err("failed at parsing primary expression:");
+ r.err.msg += rpe.err.msg + "(at relative pos " +
+ std::to_string(std::distance(f, rpe.err.pos)) + ")";
+ break;
+ }
+
+ f = rpe.parsed;
+ rhs = std::move(rpe.node);
+
+ if (f == l) {
+ lhs = kal::BinaryOp{ op, std::move(lhs), std::move(rhs) };
+ break;
+ }
+ }
+
+ if (tpred < token_precedence(*f)) { // there's an operator with higher prec
+ auto rbo = parse_expr_binop_rhs(f, l, tpred + 1, std::move(rhs));
+
+ if (rbo.parsed == f) {
+ err("error in parsing rhs: ");
+ r.err.msg += rbo.err.msg + " (at relative pos " +
+ std::to_string(std::distance(f, rbo.err.pos)) + ")";
+ return r;
+ }
+
+ f = rbo.parsed;
+ rhs = std::move(rbo.node);
+ }
+
+ lhs = kal::BinaryOp{ op, std::move(lhs), std::move(rhs) };
+ }
r.parsed = f;
- err("not implemented yet");
+ r.node = std::move(lhs);
+ return r;
+}
+
+template<typename FwdIt>
+ParseResult<FwdIt>
+parse_expr(FwdIt f, FwdIt l);
+
+template<typename FwdIt>
+ParseResult<FwdIt>
+parse_expr_primary(FwdIt f, FwdIt l)
+{
+ ParseResult<FwdIt> r;
+ auto err = [&](auto&& msg) {
+ r.err.pos = f;
+ r.err.msg = std::move(msg);
+ };
+
+ assert(f != l);
+
+ r.parsed = f;
+
+ switch (token_type(*f)) {
+ case TkType::Num:
+ r.node = Number{ token_num(*f) };
+ r.parsed = ++f;
+ return r;
+
+ case TkType::Id:
+ return parse_expr_idexpr(f, l);
+
+ case TkType::Etc:
+ if (token_str(*f) == "(") {
+ ++f;
+
+ auto rpe = parse_expr(f, l);
+
+ if (rpe.parsed == f) {
+ r.err = std::move(rpe.err);
+ return r;
+ }
+
+ r.node = std::move(rpe.node);
+
+ f = rpe.parsed;
+ if (f == l) {
+ err("early termination: expects `)`");
+ return r;
+ }
+
+ if (token_str(*f) == ")")
+ r.parsed = ++f;
+ else
+ err("expects `)`");
+
+ return r;
+ }
+
+ /* fallthrough */
+
+ default:
+ // FIXME performance of concat
+ err(std::string{ "unexpected token (" } + kal::to_string(*f) +
+ ") while expecting an expression");
+ return r;
+ }
+
return r;
}
+template<typename FwdIt>
+ParseResult<FwdIt>
+parse_expr(FwdIt f, FwdIt l)
+{
+ assert(f != l);
+
+ auto r = parse_expr_primary(f, l);
+
+ if (r.parsed == f)
+ return r; // CHKME std::move(r);
+
+ f = r.parsed;
+
+ if (f == l || token_precedence(*f) < 0) // not a valid operator
+ return r;
+
+ return parse_expr_binop_rhs(f, l, 0, std::move(r.node));
+}
+
} // namespace detail
// TODO std::move err msg
@@ -129,13 +345,26 @@ parse(FwdIt f, FwdIt l, OutIt nodes, ErrOp error)
"early termination: "
"expects function prototype after `def` keyword");
+ kal::Prototype proto;
+
{
auto r = detail::parse_prototype(f, l);
if (r.parsed == f) // failure
return err(r.err.pos, r.err.msg);
+
+ {
+ using kal::cast; // FIXME
+ using kal::node_type; // FIXME FIXME
+
+ assert(node_type(r.node) == kal::NodeType::Prototype);
+
+ auto ok = cast(r.node, proto);
+
+ assert(ok && "unsuccessful cast");
+ }
+
f = r.parsed;
- *nodes++ = std::move(r.node);
}
if (f == l)
@@ -147,7 +376,7 @@ parse(FwdIt f, FwdIt l, OutIt nodes, ErrOp error)
if (r.parsed == f) // failure
return err(r.err.pos, r.err.msg);
f = r.parsed;
- *nodes++ = std::move(r.node);
+ *nodes++ = kal::Function{ std::move(proto), std::move(r.node) };
}
} break;
diff --git a/kaleidoscope_tokens.hpp b/kaleidoscope_tokens.hpp
@@ -64,6 +64,30 @@ token_num(const Token& t)
return t.dbl;
}
+inline int
+token_precedence(const Token& t)
+{
+ if (t.type != TkType::Etc)
+ return -1;
+
+ // assert(!t.str.empty());
+
+ switch (t.str[0]) {
+ case '<':
+ return 10;
+
+ case '+':
+ case '-':
+ return 20;
+
+ case '*':
+ case '/':
+ return 40;
+ }
+
+ return -1;
+}
+
template<typename FwdIt, typename OutIt>
OutIt
tokenize(FwdIt first, FwdIt last, OutIt tokens)
diff --git a/tests/kaleidoscope_parser.test.cpp b/tests/kaleidoscope_parser.test.cpp
@@ -9,11 +9,11 @@
#include <string>
#include <vector>
-#include <iostream> // FIXME
-
#include "kaleidoscope_ast.hpp"
#include "kaleidoscope_tokens.hpp"
+using kal::to_string;
+
TEST_CASE("parse simple programs", "[simple]")
{
auto p = [](const std::string& s) {
@@ -45,8 +45,6 @@ TEST_CASE("parse simple programs", "[simple]")
{
auto nd = p("extern sin(x)");
- using kal::to_string;
-
REQUIRE(nd.size() == 1);
REQUIRE(to_string(nd[0]) == "(prototype sin x)");
}
@@ -54,8 +52,6 @@ TEST_CASE("parse simple programs", "[simple]")
{
auto nd = p("extern tan2 ( arg0 arg1 )");
- using kal::to_string;
-
REQUIRE(nd.size() == 1);
REQUIRE(to_string(nd[0]) == "(prototype tan2 arg0 arg1)");
}
@@ -63,11 +59,156 @@ TEST_CASE("parse simple programs", "[simple]")
{
auto nd = p("extern cos(realInput);extern atan2(arg0 arg1);");
- using kal::to_string;
-
REQUIRE(nd.size() == 2);
REQUIRE(to_string(nd[0]) == "(prototype cos realInput)");
REQUIRE(to_string(nd[1]) == "(prototype atan2 arg0 arg1)");
}
}
+
+ SECTION("def")
+ {
+ {
+ auto nd = p(R"(
+def one(x)
+ 1
+)");
+
+ REQUIRE(nd.size() == 1);
+ REQUIRE(to_string(nd[0]) == "(function (prototype one x) (number 1))");
+ }
+
+ {
+ auto nd = p("extern sin(x) def pi2() 1.5708");
+
+ REQUIRE(nd.size() == 2);
+ REQUIRE(to_string(nd[0]) == "(prototype sin x)");
+ REQUIRE(to_string(nd[1]) == "(function (prototype pi2) (number 1.5708))");
+ }
+
+ {
+ auto nd = p("extern sin(x) def sinPi2() sin(1.5708)");
+
+ REQUIRE(nd.size() == 2);
+ REQUIRE(to_string(nd[0]) == "(prototype sin x)");
+ REQUIRE(to_string(nd[1]) ==
+ "(function (prototype sinPi2) (call sin (number 1.5708)))");
+ }
+
+ {
+ auto nd =
+ p("extern tan2(x y);extern sin(x);def fun()tan2(sin(1.5708), 1);"
+ "def gun(x y z)tan2(0.1,gun(1,sin(1.5),1.0));");
+
+ REQUIRE(nd.size() == 4);
+ REQUIRE(to_string(nd[0]) == "(prototype tan2 x y)");
+ REQUIRE(to_string(nd[1]) == "(prototype sin x)");
+ REQUIRE(to_string(nd[2]) ==
+ "(function (prototype fun) (call tan2 (call sin (number 1.5708)) "
+ "(number 1)))");
+ REQUIRE(to_string(nd[3]) ==
+ "(function (prototype gun x y z) (call tan2 (number 0.1) (call "
+ "gun (number 1) (call sin (number 1.5)) (number 1))))");
+ }
+ }
+
+ SECTION("binary operation")
+ {
+ using Num = kal::Number;
+ using BOp = kal::BinaryOp;
+ using Var = kal::Variable;
+
+ {
+ auto nd = p("2 + 3");
+
+ REQUIRE(nd.size() == 1);
+ REQUIRE(nd[0] == BOp{ '+', Num{ 2 }, Num{ 3 } });
+ REQUIRE(to_string(nd[0]) == "(binop '+' (number 2) (number 3))");
+ }
+
+ {
+ auto nd = p("x * y");
+
+ REQUIRE(nd.size() == 1);
+ REQUIRE(nd[0] == BOp{ '*', Var{ "x" }, Var{ "y" } });
+ }
+
+ {
+ auto nd = p("x * y * z");
+
+ REQUIRE(nd.size() == 1);
+ REQUIRE(nd[0] ==
+ BOp{ '*', BOp{ '*', Var{ "x" }, Var{ "y" } }, Var{ "z" } });
+ }
+
+ {
+ auto nd = p("4 * 5 + 3");
+
+ REQUIRE(nd.size() == 1);
+ REQUIRE(nd[0] == BOp{ '+', BOp{ '*', Num{ 4 }, Num{ 5 } }, Num{ 3 } });
+ }
+
+ {
+ auto nd = p("4+2.15*3");
+
+ REQUIRE(nd.size() == 1);
+ REQUIRE(nd[0] == BOp{
+ '+',
+ Num{ 4 },
+ BOp{ '*', Num{ 2.15 }, Num{ 3 } },
+ });
+ }
+
+ {
+ auto nd = p("4+2.15*3/2-4.4/2/3#expr");
+
+ REQUIRE(nd.size() == 1);
+ REQUIRE(nd[0] ==
+ BOp{ '-',
+ BOp{
+ '+',
+ Num{ 4 },
+ BOp{ '/', BOp{ '*', Num{ 2.15 }, Num{ 3 } }, Num{ 2 } },
+ },
+ BOp{ '/', BOp{ '/', Num{ 4.4 }, Num{ 2 } }, Num{ 3 } } });
+ }
+
+ {
+ auto nd = p("sin(3.14/4)-2.3-3.4-(3*4+1)*3");
+
+ REQUIRE(nd.size() == 1);
+ REQUIRE(
+ to_string(nd[0]) ==
+ "(binop '-' (binop '-' (binop '-' (call sin (binop '/' (number 3.14) "
+ "(number 4))) (number 2.3)) (number 3.4)) (binop '*' (binop '+' (binop "
+ "'*' (number 3) (number 4)) (number 1)) (number 3)))");
+ }
+ }
+
+ SECTION("extern, def, call")
+ {
+ {
+ auto nd = p(R"(
+extern tan2(x y)
+
+def formula(x y z)
+ 3.14 + 2 * (tan2(x, y) + x*y) / z
+
+extern sin(x)
+
+sin(2.72) + formula(1, 2, 3)
+)");
+
+ REQUIRE(nd.size() == 4);
+ REQUIRE(to_string(nd[0]) == "(prototype tan2 x y)");
+ REQUIRE(to_string(nd[1]) ==
+ "(function (prototype formula x y z) (binop '+' (number 3.14) "
+ "(binop '/' (binop '*' (number 2) (binop '+' (call tan2 "
+ "(variable x) (variable y)) (binop '*' (variable x) (variable "
+ "y)))) (variable z))))");
+ REQUIRE(to_string(nd[2]) == "(prototype sin x)");
+ REQUIRE(to_string(nd[3]) ==
+ "(binop '+' (call sin (number 2.72)) (call formula (number 1) "
+ "(number 2) (number 3)))");
+ }
+ }
}