llvm-journey

LLVM Journey
git clone git://0xff.ir/g/llvm-journey.git
Log | Files | Refs | README | LICENSE

commit f4d34a86af992ecea9b4ea519c0891c696b46118
parent d49d9e9e2b49650abcd8318fe665965b2d7fef22
Author: Mohammad-Reza Nabipoor <m.nabipoor@yahoo.com>
Date:   Sun, 30 Aug 2020 15:59:25 +0430

kaleidoscope_parser: Changed error reporting

Diffstat:
Mkaleidoscope_parser.hpp | 202++++++++++++++++++++++++++++++++++---------------------------------------------
Mtests/kaleidoscope_parser.test.cpp | 21+++++++++++++++------
2 files changed, 103 insertions(+), 120 deletions(-)

diff --git a/kaleidoscope_parser.hpp b/kaleidoscope_parser.hpp @@ -8,6 +8,13 @@ namespace kal { +template<typename FwdIt> +struct ParseError +{ + FwdIt pos; + std::string msg; +}; + namespace detail { template<typename FwdIt> @@ -15,57 +22,52 @@ struct ParseResult { FwdIt parsed; // position of successfully parsed token(s) ASTNode node; - struct - { - FwdIt pos; // position of the error - std::string msg; - } err; }; -template<typename FwdIt> +template<typename FwdIt, typename OutErrIt> +ParseResult<FwdIt> +parse_expr(FwdIt f, FwdIt l, OutErrIt error); + +template<typename FwdIt, typename OutErrIt> ParseResult<FwdIt> -parse_prototype(FwdIt f, FwdIt l) +parse_expr_primary(FwdIt f, FwdIt l, OutErrIt error); + +template<typename FwdIt, typename OutErrIt> +ParseResult<FwdIt> +parse_prototype(FwdIt f, FwdIt l, OutErrIt error) { ParseResult<FwdIt> r{ f, }; Prototype p; auto err = [&](auto&& msg) { - r.err.pos = f; - r.err.msg = std::move(msg); + *error++ = ParseError<FwdIt>{ f, std::move(msg) }; + return r; }; assert(f != l); - if (token_type(*f) != TkType::Id) { - err("expects identifier"); - return r; - } + if (token_type(*f) != TkType::Id) + return err("expects identifier"); p.name = token_str(*f); ++f; - if (token_str(*f) != "(") { - err("expects `(`"); - return r; - } + if (token_str(*f) != "(") + return err("expects `(`"); ++f; // args while (f != l && token_str(*f) != ")") { - if (token_type(*f) != TkType::Id) { - err("early termination: expects identifier"); - return r; - } + if (token_type(*f) != TkType::Id) + return err("early termination: expects identifier"); p.params.emplace_back(token_str(*f)); ++f; } - if (f == l) { - err("early termination: expects `)`"); - return r; - } + if (f == l) + return err("early termination: expects `)`"); ++f; r.parsed = f; @@ -73,20 +75,16 @@ parse_prototype(FwdIt f, FwdIt l) return r; } -template<typename FwdIt> -ParseResult<FwdIt> -parse_expr(FwdIt f, FwdIt l); - -template<typename FwdIt> +template<typename FwdIt, typename OutErrIt> ParseResult<FwdIt> -parse_expr_idexpr(FwdIt f, FwdIt l) +parse_expr_idexpr(FwdIt f, FwdIt l, OutErrIt error) { ParseResult<FwdIt> r{ f, }; auto err = [&](auto&& msg) { - r.err.pos = f; - r.err.msg = std::move(msg); + *error++ = ParseError<FwdIt>{ f, std::move(msg) }; + return r; }; assert(f != l); @@ -105,10 +103,8 @@ parse_expr_idexpr(FwdIt f, FwdIt l) ++f; // drop "(" while (true) { - if (f == l) { - err("early termination of call expression"); - return r; - } + if (f == l) + return err("early termination of call expression"); if (token_str(*f) == ")") { r.node = std::move(call); @@ -117,15 +113,10 @@ parse_expr_idexpr(FwdIt f, FwdIt l) } { - auto rpe = parse_expr(f, l); + auto rpe = parse_expr(f, l, error); - if (rpe.parsed == f) { - // FIXME error - err("error in parsing call arguments: "); - r.err.msg += rpe.err.msg + " (at relative pos " + - std::to_string(std::distance(f, rpe.err.pos)) + ")"; - return r; - } + if (rpe.parsed == f) + return err("error in parsing call arguments"); call.args.emplace_back(std::move(rpe.node)); f = rpe.parsed; @@ -142,8 +133,7 @@ parse_expr_idexpr(FwdIt f, FwdIt l) break; default: - err(std::string{ "expects ',' but got " } + kal::to_string(*f)); - return r; + return err("expects ','"); } } @@ -151,20 +141,20 @@ parse_expr_idexpr(FwdIt f, FwdIt l) return r; } -template<typename FwdIt> -ParseResult<FwdIt> -parse_expr_primary(FwdIt f, FwdIt l); - -template<typename FwdIt> +template<typename FwdIt, typename ErrOutIt> ParseResult<FwdIt> -parse_expr_binop_rhs(FwdIt f, FwdIt l, int precedence, ASTNode lhs) +parse_expr_binop_rhs(FwdIt f, + FwdIt l, + int precedence, + ASTNode lhs, + ErrOutIt error) { ParseResult<FwdIt> r{ f, }; auto err = [&](auto&& msg) { - r.err.pos = f; - r.err.msg = std::move(msg); + *error++ = ParseError<FwdIt>{ f, std::move(msg) }; + return r; }; ASTNode rhs; @@ -185,12 +175,10 @@ parse_expr_binop_rhs(FwdIt f, FwdIt l, int precedence, ASTNode lhs) } { - auto rpe = parse_expr_primary(f, l); + auto rpe = parse_expr_primary(f, l, error); if (rpe.parsed == f) { - err("failed at parsing primary expression:"); - r.err.msg += rpe.err.msg + "(at relative pos " + - std::to_string(std::distance(f, rpe.err.pos)) + ")"; + err("failed at parsing primary expression"); break; } @@ -204,14 +192,10 @@ parse_expr_binop_rhs(FwdIt f, FwdIt l, int precedence, ASTNode lhs) } if (tpred < token_precedence(*f)) { // there's an operator with higher prec - auto rbo = parse_expr_binop_rhs(f, l, tpred + 1, std::move(rhs)); + auto rbo = parse_expr_binop_rhs(f, l, tpred + 1, std::move(rhs), error); - if (rbo.parsed == f) { - err("error in parsing rhs: "); - r.err.msg += rbo.err.msg + " (at relative pos " + - std::to_string(std::distance(f, rbo.err.pos)) + ")"; - return r; - } + if (rbo.parsed == f) + return err("error in parsing rhs"); f = rbo.parsed; rhs = std::move(rbo.node); @@ -225,18 +209,14 @@ parse_expr_binop_rhs(FwdIt f, FwdIt l, int precedence, ASTNode lhs) return r; } -template<typename FwdIt> -ParseResult<FwdIt> -parse_expr(FwdIt f, FwdIt l); - -template<typename FwdIt> +template<typename FwdIt, typename ErrOutIt> ParseResult<FwdIt> -parse_expr_primary(FwdIt f, FwdIt l) +parse_expr_primary(FwdIt f, FwdIt l, ErrOutIt error) { ParseResult<FwdIt> r; auto err = [&](auto&& msg) { - r.err.pos = f; - r.err.msg = std::move(msg); + *error = ParseError<FwdIt>{ f, std::move(msg) }; + return r; }; assert(f != l); @@ -250,26 +230,22 @@ parse_expr_primary(FwdIt f, FwdIt l) return r; case TkType::Id: - return parse_expr_idexpr(f, l); + return parse_expr_idexpr(f, l, error); case TkType::Etc: if (token_str(*f) == "(") { ++f; - auto rpe = parse_expr(f, l); + auto rpe = parse_expr(f, l, error); - if (rpe.parsed == f) { - r.err = std::move(rpe.err); - return r; - } + if (rpe.parsed == f) + return err("failed parsing expression"); r.node = std::move(rpe.node); f = rpe.parsed; - if (f == l) { - err("early termination: expects `)`"); - return r; - } + if (f == l) + return err("early termination: expects `)`"); if (token_str(*f) == ")") r.parsed = ++f; @@ -282,22 +258,19 @@ parse_expr_primary(FwdIt f, FwdIt l) /* fallthrough */ default: - // FIXME performance of concat - err(std::string{ "unexpected token (" } + kal::to_string(*f) + - ") while expecting an expression"); - return r; + return err("unexpected token while expecting an expression"); } return r; } -template<typename FwdIt> +template<typename FwdIt, typename ErrOutIt> ParseResult<FwdIt> -parse_expr(FwdIt f, FwdIt l) +parse_expr(FwdIt f, FwdIt l, ErrOutIt error) { assert(f != l); - auto r = parse_expr_primary(f, l); + auto r = parse_expr_primary(f, l, error); if (r.parsed == f) return r; // CHKME std::move(r); @@ -307,51 +280,52 @@ parse_expr(FwdIt f, FwdIt l) if (f == l || token_precedence(*f) < 0) // not a valid operator return r; - return parse_expr_binop_rhs(f, l, 0, std::move(r.node)); + return parse_expr_binop_rhs(f, l, 0, std::move(r.node), error); } } // namespace detail -// TODO std::move err msg - -template<typename FwdIt, typename OutIt, typename ErrOp> +template<typename FwdIt, typename OutIt, typename ErrOutIt> FwdIt -parse(FwdIt f, FwdIt l, OutIt nodes, ErrOp error) +parse(FwdIt f, FwdIt l, OutIt nodes, ErrOutIt error) { - auto err = [&](FwdIt pos, const auto& msg) { - error(pos, msg); - return pos; + static_assert(std::is_assignable<decltype(*error), ParseError<FwdIt>>::value, + ""); + + auto err = [&](std::string&& msg) { + *error++ = ParseError<FwdIt>{ f, std::move(msg) }; + return f; }; while (f != l) { switch (token_type(*f)) { case TkType::Extern: { if (++f == l) - return err(f, - "early termination: " - "expects function prototype after `extern` keyword"); + return err( + "early termination: " + "expects function prototype after `extern` keyword"); - auto r = detail::parse_prototype(f, l); + auto r = detail::parse_prototype(f, l, error); if (r.parsed == f) // failure - return err(r.err.pos, r.err.msg); + return err("parsing prototype failed"); f = r.parsed; *nodes++ = std::move(r.node); } break; case TkType::Def: { if (++f == l) - return err(f, - "early termination: " - "expects function prototype after `def` keyword"); + return err( + "early termination: " + "expects function prototype after `def` keyword"); kal::Prototype proto; { - auto r = detail::parse_prototype(f, l); + auto r = detail::parse_prototype(f, l, error); if (r.parsed == f) // failure - return err(r.err.pos, r.err.msg); + return err("parsing prototype failed"); { using kal::cast; // FIXME @@ -368,13 +342,13 @@ parse(FwdIt f, FwdIt l, OutIt nodes, ErrOp error) } if (f == l) - return err(f, "early termination: expects function body"); + return err("early termination: expects function body"); { - auto r = detail::parse_expr(f, l); + auto r = detail::parse_expr(f, l, error); if (r.parsed == f) // failure - return err(r.err.pos, r.err.msg); + return err("parsing expression failed"); f = r.parsed; *nodes++ = kal::Function{ std::move(proto), std::move(r.node) }; } @@ -388,10 +362,10 @@ parse(FwdIt f, FwdIt l, OutIt nodes, ErrOp error) /* fallthrough */ default: { // top-level expr - auto r = detail::parse_expr(f, l); + auto r = detail::parse_expr(f, l, error); if (r.parsed == f) // failure - return err(r.err.pos, r.err.msg); + return err("parsing expression failed"); f = r.parsed; *nodes++ = std::move(r.node); } break; diff --git a/tests/kaleidoscope_parser.test.cpp b/tests/kaleidoscope_parser.test.cpp @@ -29,15 +29,24 @@ TEST_CASE("parse simple programs", "[simple]") auto tkb = tk.cbegin(); auto tke = tk.cend(); + std::vector<kal::ParseError<decltype(tkb)>> errs; - kal::parse( - tkb, tke, std::back_inserter(nd), [&](const auto& it, const auto& err) { - auto pos = std::distance(tkb, it); + kal::parse(tkb, tke, std::back_inserter(nd), std::back_inserter(errs)); - FAIL("PARSER ERROR pos:" << pos << " err:\"" << err << '"'); - }); + if (errs.empty()) + return nd; - return nd; + std::ostringstream oss; + + for (auto& e : errs) { + oss << " Error@" << std::distance(tkb, e.pos) << ' ' << e.msg; + + if (e.pos != tke) + oss << '\n' << kal::to_string(*e.pos) << "\n"; + } + + FAIL("PARSER ERROR\n" << oss.str()); + return nd; // dummy }; SECTION("extern")