kaleidoscope_parser.hpp (7939B)
1 2 #pragma once 3 4 #include "kaleidoscope_ast.hpp" 5 #include "kaleidoscope_tokens.hpp" 6 7 #include <string> 8 9 namespace kal { 10 11 template<typename FwdIt> 12 struct ParseError 13 { 14 FwdIt pos; 15 std::string msg; 16 }; 17 18 namespace detail { 19 20 template<typename FwdIt> 21 struct ParseResult 22 { 23 FwdIt parsed; // position of successfully parsed token(s) 24 ASTNode node; 25 }; 26 27 template<typename FwdIt, typename OutErrIt> 28 ParseResult<FwdIt> 29 parse_expr(FwdIt f, FwdIt l, OutErrIt error); 30 31 template<typename FwdIt, typename OutErrIt> 32 ParseResult<FwdIt> 33 parse_expr_primary(FwdIt f, FwdIt l, OutErrIt error); 34 35 template<typename FwdIt, typename OutErrIt> 36 ParseResult<FwdIt> 37 parse_prototype(FwdIt f, FwdIt l, OutErrIt error) 38 { 39 ParseResult<FwdIt> r{ 40 f, 41 }; 42 Prototype p; 43 auto err = [&](auto&& msg) { 44 *error++ = ParseError<FwdIt>{ f, std::move(msg) }; 45 return r; 46 }; 47 48 assert(f != l); 49 50 if (token_type(*f) != TkType::Id) 51 return err("expects identifier"); 52 53 p.name = token_str(*f); 54 ++f; 55 56 if (token_str(*f) != "(") 57 return err("expects `(`"); 58 ++f; 59 60 // args 61 while (f != l && token_str(*f) != ")") { 62 if (token_type(*f) != TkType::Id) 63 return err("early termination: expects identifier"); 64 65 p.params.emplace_back(token_str(*f)); 66 ++f; 67 } 68 69 if (f == l) 70 return err("early termination: expects `)`"); 71 ++f; 72 73 r.parsed = f; 74 r.node = std::move(p); 75 return r; 76 } 77 78 template<typename FwdIt, typename OutErrIt> 79 ParseResult<FwdIt> 80 parse_expr_idexpr(FwdIt f, FwdIt l, OutErrIt error) 81 { 82 ParseResult<FwdIt> r{ 83 f, 84 }; 85 auto err = [&](auto&& msg) { 86 *error++ = ParseError<FwdIt>{ f, std::move(msg) }; 87 return r; 88 }; 89 90 assert(f != l); 91 assert(token_type(*f) == TkType::Id); 92 93 auto fid = f++; 94 95 if (f == l || token_str(*f) != "(") { 96 r.node = kal::Variable{ token_str(*fid) }; 97 r.parsed = f; 98 return r; 99 } 100 101 kal::Call call{ token_str(*fid), {} }; 102 103 ++f; // drop "(" 104 105 while (true) { 106 if (f == l) 107 return err("early termination of call expression"); 108 109 if (token_str(*f) == ")") { 110 r.node = std::move(call); 111 r.parsed = ++f; 112 return r; 113 } 114 115 { 116 auto rpe = parse_expr(f, l, error); 117 118 if (rpe.parsed == f) 119 return err("error in parsing call arguments"); 120 121 call.args.emplace_back(std::move(rpe.node)); 122 f = rpe.parsed; 123 } 124 125 auto s = token_str(*f); 126 127 switch (s[0]) { 128 case ')': 129 break; 130 131 case ',': 132 ++f; 133 break; 134 135 default: 136 return err("expects ','"); 137 } 138 } 139 140 r.parsed = f; 141 return r; 142 } 143 144 template<typename FwdIt, typename ErrOutIt> 145 ParseResult<FwdIt> 146 parse_expr_binop_rhs(FwdIt f, 147 FwdIt l, 148 int precedence, 149 ASTNode lhs, 150 ErrOutIt error) 151 { 152 ParseResult<FwdIt> r{ 153 f, 154 }; 155 auto err = [&](auto&& msg) { 156 *error++ = ParseError<FwdIt>{ f, std::move(msg) }; 157 return r; 158 }; 159 ASTNode rhs; 160 161 assert(f != l); 162 163 while (f != l) { 164 int tpred = token_precedence(*f); 165 166 if (tpred < precedence) 167 break; 168 169 auto op = token_str(*f)[0]; 170 171 ++f; 172 if (f == l) { 173 err("early termination: expects primary expression"); 174 break; 175 } 176 177 { 178 auto rpe = parse_expr_primary(f, l, error); 179 180 if (rpe.parsed == f) { 181 err("failed at parsing primary expression"); 182 break; 183 } 184 185 f = rpe.parsed; 186 rhs = std::move(rpe.node); 187 188 if (f == l) { 189 lhs = kal::BinaryOp{ op, std::move(lhs), std::move(rhs) }; 190 break; 191 } 192 } 193 194 if (tpred < token_precedence(*f)) { // there's an operator with higher prec 195 auto rbo = parse_expr_binop_rhs(f, l, tpred + 1, std::move(rhs), error); 196 197 if (rbo.parsed == f) 198 return err("error in parsing rhs"); 199 200 f = rbo.parsed; 201 rhs = std::move(rbo.node); 202 } 203 204 lhs = kal::BinaryOp{ op, std::move(lhs), std::move(rhs) }; 205 } 206 207 r.parsed = f; 208 r.node = std::move(lhs); 209 return r; 210 } 211 212 template<typename FwdIt, typename ErrOutIt> 213 ParseResult<FwdIt> 214 parse_expr_primary(FwdIt f, FwdIt l, ErrOutIt error) 215 { 216 ParseResult<FwdIt> r; 217 auto err = [&](auto&& msg) { 218 *error = ParseError<FwdIt>{ f, std::move(msg) }; 219 return r; 220 }; 221 222 assert(f != l); 223 224 r.parsed = f; 225 226 switch (token_type(*f)) { 227 case TkType::Num: 228 r.node = Number{ token_num(*f) }; 229 r.parsed = ++f; 230 return r; 231 232 case TkType::Id: 233 return parse_expr_idexpr(f, l, error); 234 235 case TkType::Etc: 236 if (token_str(*f) == "(") { 237 ++f; 238 239 auto rpe = parse_expr(f, l, error); 240 241 if (rpe.parsed == f) 242 return err("failed parsing expression"); 243 244 r.node = std::move(rpe.node); 245 246 f = rpe.parsed; 247 if (f == l) 248 return err("early termination: expects `)`"); 249 250 if (token_str(*f) == ")") 251 r.parsed = ++f; 252 else 253 err("expects `)`"); 254 255 return r; 256 } 257 258 /* fallthrough */ 259 260 default: 261 return err("unexpected token while expecting an expression"); 262 } 263 264 return r; 265 } 266 267 template<typename FwdIt, typename ErrOutIt> 268 ParseResult<FwdIt> 269 parse_expr(FwdIt f, FwdIt l, ErrOutIt error) 270 { 271 assert(f != l); 272 273 auto r = parse_expr_primary(f, l, error); 274 275 if (r.parsed == f) 276 return r; // CHKME std::move(r); 277 278 f = r.parsed; 279 280 if (f == l || token_precedence(*f) < 0) // not a valid operator 281 return r; 282 283 return parse_expr_binop_rhs(f, l, 0, std::move(r.node), error); 284 } 285 286 } // namespace detail 287 288 enum class ParsedEntityType 289 { 290 FuncDecl, // function declaration 291 FuncDef, // function definition 292 Stmt, // statement 293 }; 294 295 template<typename FwdIt, typename ParsedOp, typename ErrOutIt> 296 FwdIt 297 parse(FwdIt f, FwdIt l, ParsedOp parsed, ErrOutIt error) 298 { 299 static_assert(std::is_assignable<decltype(*error), ParseError<FwdIt>>::value, 300 ""); 301 302 auto err = [&](std::string&& msg) { 303 *error++ = ParseError<FwdIt>{ f, std::move(msg) }; 304 return f; 305 }; 306 307 while (f != l) { 308 switch (token_type(*f)) { 309 case TkType::Extern: { 310 if (++f == l) 311 return err( 312 "early termination: " 313 "expects function prototype after `extern` keyword"); 314 315 auto r = detail::parse_prototype(f, l, error); 316 317 if (r.parsed == f) // failure 318 return err("parsing prototype failed"); 319 f = r.parsed; 320 parsed(ParsedEntityType::FuncDecl, std::move(r.node)); 321 } break; 322 323 case TkType::Def: { 324 if (++f == l) 325 return err( 326 "early termination: " 327 "expects function prototype after `def` keyword"); 328 329 kal::Prototype proto; 330 331 { 332 auto r = detail::parse_prototype(f, l, error); 333 334 if (r.parsed == f) // failure 335 return err("parsing prototype failed"); 336 337 { 338 using kal::cast; // FIXME 339 using kal::node_type; // FIXME FIXME 340 341 assert(node_type(r.node) == kal::NodeType::Prototype); 342 343 auto ok = cast(r.node, proto); 344 345 assert(ok && "unsuccessful cast"); 346 } 347 348 f = r.parsed; 349 } 350 351 if (f == l) 352 return err("early termination: expects function body"); 353 354 { 355 auto r = detail::parse_expr(f, l, error); 356 357 if (r.parsed == f) // failure 358 return err("parsing expression failed"); 359 f = r.parsed; 360 parsed(ParsedEntityType::FuncDef, 361 kal::Function{ std::move(proto), std::move(r.node) }); 362 } 363 } break; 364 365 case TkType::Etc: 366 if (token_str(*f) == ";") { 367 ++f; 368 break; 369 } 370 /* fallthrough */ 371 372 default: { // top-level expr 373 auto r = detail::parse_expr(f, l, error); 374 375 if (r.parsed == f) // failure 376 return err("parsing expression failed"); 377 f = r.parsed; 378 parsed(ParsedEntityType::Stmt, std::move(r.node)); 379 } break; 380 381 case TkType::END: 382 return f; 383 } 384 } 385 386 return f; 387 } 388 389 } // namespace kal