c++ - Spirit 仅在似乎从 Lexer 获得第一个符号后无法解析
问题描述
最近在这里问了一个问题: Boost Spirit Segfault In Parser
在这篇文章中,有人指出我正在使用的语法绝对是左递归的,并且精神是一个 PEG 解析器生成器,这意味着左递归是不可能的。
我使用 Dragon Book 中处理左递归部分的规则将语法转换为非左递归语法。
给定一个左递归文法
A -> A >> alpha | beta
可以通过执行以下操作将其转换为等效的右递归文法:
A -> beta >> A'
A' -> alpha >> A' | epsilon
这是生成的解析器,我认为是非左递归产生:
namespace interpreter {
namespace qi = boost::spirit::qi;
template <typename Iterator, typename Skipper>
struct InterpreterGrammar : qi::grammar<Iterator, Skipper>
{
template <typename TokenDef>
InterpreterGrammar(TokenDef const& tok)
: InterpreterGrammar::base_type(start)
{
using boost::phoenix::ref;
start %= functionList >> endList >> qi::eoi;
// different expressions
exp %=
qi::token(k_alphaTerminal) >> qi::token(k_equalTo) >> qi::token(k_alphaTerminal) >> expPrime
|
qi::token(k_numericTerminal) >> expPrime
|
qi::token(k_trueTok) >> expPrime
|
qi::token(k_falseTok) >> expPrime;
expPrime %=
qi::token(k_equalTo) >> exp >> expPrime
|
qi::token(k_notEq) >> exp >> expPrime
|
qi::token(k_less) >> exp >> expPrime
|
qi::token(k_lessEq) >> exp >> expPrime
|
qi::token(k_greater) >> exp >> expPrime
|
qi::token(k_greaterEq) >> exp >> expPrime
|
qi::token(k_andTok) >> exp >> expPrime
|
qi::token(k_orTok) >> exp >> expPrime
|
qi::token(k_notTok) >> exp
|
qi::token(k_plues) >> exp >> expPrime
|
qi::token(k_minus) >> exp >> expPrime
|
qi::token(k_mult) >> exp >> expPrime
|
qi::token(k_minus) >> exp
|
qi::token(k_leftParen) >> exp >> qi::token(k_rightParen)
|
qi::token(k_alphaTerminal) >> qi::token(k_leftBracket) >> exp >> qi::token(k_rightBracket)
|
qi::token(k_alphaTerminal) >> qi::token(k_leftParen) >> qi::token(k_rightParen)
|
qi::token(k_alphaTerminal) >> qi::token(k_leftParen) >> exp >> qi::token(k_rightParen)
|
qi::eps;
// parameter list
paramList %= exp >> paramListPrime;
paramListPrime %= qi::token(k_comma) >> exp >> paramListPrime
|
qi::eps;
// return statements
returnStatement %= qi::token(k_returnTok) >> exp
|
qi::token(k_returnTok);
// function call statements
callStatement %= qi::token(k_alphaTerminal) >> qi::token(k_leftParen) >> qi::token(k_rightParen)
|
qi::token(k_alphaTerminal) >> qi::token(k_leftParen) >> paramList >> qi::token(k_rightParen);
// variable assignment
assignmentStatement %= qi::token(k_alphaTerminal) >> qi::token(k_assign) >> exp
|
qi::token(k_alphaTerminal) >> qi::token(k_leftBracket) >> exp
>> qi::token(k_rightBracket) >> qi::token(k_assign) >> exp;
// list of integers
intList %= qi::token(k_numericTerminal) >> intListPrime;
intListPrime %=
qi::token(k_comma) >> qi::token(k_numericTerminal) >> intListPrime
|
qi::eps;
// print out a variable
printStatement %= qi::token(k_print) >> exp;
// take input
inputStatement %= qi::token(k_alphaTerminal) >> qi::token(k_input);
// conditional statement
conditionStatement %= qi::token(k_ifTok) >> exp >> qi::token(k_colon) >> statements >> optionalElse;
// consitions have optional else
optionalElse %= qi::token(k_elseTok) >> qi::token(k_colon) >> statements
|
qi::eps;
// while loop
whileStatement %= qi::token(k_whileTok) >> exp >> qi::token(k_colon) >> statements >> qi::token(k_elihw);
// actual program statements
endList %= end >> endListPrime;
endListPrime %= end >> endListPrime
|
qi::eps;
// end possibilities of program in global space
end %= callStatement
|
printStatement
|
qi::token(k_alphaTerminal) >> qi::token(k_assign) >> qi::token(k_input)
|
qi::token(k_alphaTerminal) >> qi::token(k_assign) >> exp
|
qi::token(k_alphaTerminal) >> qi::token(k_assign) >> qi::token(k_leftBracket) >> intList
>> qi::token(k_rightBracket)
|
qi::token(k_alphaTerminal) >> qi::token(k_leftBracket) >> exp >> qi::token(k_rightBracket)
>> qi::token(k_assign) >> exp;
// function parameters
param %=
qi::token(k_alphaTerminal) >> paramPrime
|
qi::token(k_alphaTerminal) >> qi::token(k_leftBracket) >> qi::token(k_rightBracket)
>> paramPrime;
// for handling left recursion in paramlist
paramPrime %=
qi::token(k_comma) >> qi::token(k_alphaTerminal) >> paramPrime
|
qi::eps;
// define a statement as assignment print input condition while or call
statement %=
assignmentStatement
|
printStatement
|
inputStatement
|
conditionStatement
|
whileStatement
|
callStatement
|
returnStatement;
// general statement list
statements %= statement >> statementsPrime;
// for handling left recursion in statements
statementsPrime %= statement >> statementsPrime
|
qi::eps;
// functions
functionList %= qi::token(k_def) >> qi::token(k_alphaTerminal) >> qi::token(k_leftParen)
>> param >> qi::token(k_rightParen) >> qi::token(k_colon)
>> statements >> qi::token(k_fed)
|
qi::token(k_def) >> qi::token(k_alphaTerminal) >> qi::token(k_leftParen)
>> qi::token(k_rightParen) >> qi::token(k_colon) >> statements >> qi::token(k_fed)
| qi::eps;
BOOST_SPIRIT_DEBUG_NODES((start)(functionList));
debug(start);
}
qi::rule<Iterator, Skipper> start;
qi::rule<Iterator, Skipper> functionList;
qi::rule<Iterator, Skipper> endList;
qi::rule<Iterator, Skipper> endListPrime;
qi::rule<Iterator, Skipper> param;
qi::rule<Iterator, Skipper> paramPrime;
qi::rule<Iterator, Skipper> paramList;
qi::rule<Iterator, Skipper> paramListPrime;
qi::rule<Iterator, Skipper> statements;
qi::rule<Iterator, Skipper> statementsPrime;
qi::rule<Iterator, Skipper> statement;
qi::rule<Iterator, Skipper> assignmentStatement;
qi::rule<Iterator, Skipper> printStatement;
qi::rule<Iterator, Skipper> inputStatement;
qi::rule<Iterator, Skipper> conditionStatement;
qi::rule<Iterator, Skipper> whileStatement;
qi::rule<Iterator, Skipper> callStatement;
qi::rule<Iterator, Skipper> returnStatement;
qi::rule<Iterator, Skipper> exp;
qi::rule<Iterator, Skipper> expPrime;
qi::rule<Iterator, Skipper> intList;
qi::rule<Iterator, Skipper> intListPrime;
qi::rule<Iterator, Skipper> optionalElse;
qi::rule<Iterator, Skipper> end;
};
}
这是词法分析器
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/lex_lexertl.hpp>
#include <boost/spirit/include/phoenix_operator.hpp>
#include <boost/spirit/include/phoenix_statement.hpp>
#include <boost/spirit/include/phoenix_container.hpp>
#include <iostream>
#include <fstream>
#include <streambuf>
#include <boost/bind.hpp>
#include <boost/ref.hpp>
namespace interpreter
{
namespace lex = boost::spirit::lex;
enum Tokens
{
k_andTok = 1,
k_def = 2,
k_elihw = 3,
k_elseTok = 4,
k_falseTok = 5,
k_fed = 6,
k_fi = 7,
k_ifTok = 8,
k_input = 9,
k_notTok = 10,
k_orTok = 11,
k_print = 12,
k_returnTok = 13,
k_trueTok = 14,
k_whileTok = 15,
k_plues = 16,
k_minus = 17,
k_mult = 18,
k_div = 19,
k_bang = 20,
k_equalTo = 21,
k_greaterEq = 22,
k_lessEq = 23,
k_notEq = 24,
k_less = 25,
k_greater = 26,
k_assign = 27,
k_comma = 28,
k_colon = 29,
k_leftParen = 30,
k_rightParen = 31,
k_leftBracket = 32,
k_rightBracket = 33,
k_alphaTerminal = 34,
k_numericTerminal = 35
};
template <typename Lexer>
struct LexerTokens : lex::lexer<Lexer>
{
LexerTokens() :
whiteSpace("[ \\t\\n]"),
andTok("and"),
def("def"),
elihw("elihw"),
elseTok("else"),
falseTok("false"),
fed("fed"),
fi("fi"),
ifTok("if"),
input("input"),
notTok("not"),
orTok("or"),
print("print"),
returnTok("return"),
trueTok("true"),
whileTok("while"),
plus("\\+"),
minus("\\-"),
mult("\\*"),
div("\\/"),
bang("\\!"),
equalTo("=="),
greaterEq(">="),
lessEq("<="),
notEq("!="),
less("<"),
greater(">"),
assign("="),
comma(","),
colon(":"),
leftParen("\\("),
rightParen("\\)"),
leftBracket("\\["),
rightBracket("\\["),
alphaTerminal("[a-z][a-zA-Z0-9]*"),
numericTerminal("[0-9]*")
{
this->self("WHITESPACE") = whiteSpace;
this->self.add
(andTok, k_andTok)
(def, k_def)
(elihw, k_elihw)
(elseTok, k_elseTok)
(falseTok, k_falseTok)
(fed, k_fed)
(fi, k_fi)
(ifTok, k_ifTok)
(andTok, k_andTok)
(input, k_input)
(notTok, k_notTok)
(orTok, k_orTok)
(print, k_print)
(returnTok, k_returnTok)
(trueTok, k_trueTok)
(whileTok, k_whileTok)
(plus, k_plues)
(minus, k_minus)
(mult, k_mult)
(div, k_div)
(bang, k_bang)
(equalTo, k_equalTo)
(greaterEq, k_greaterEq)
(lessEq, k_lessEq)
(notEq, k_notEq)
(less, k_less)
(greater, k_greater)
(assign, k_assign)
(comma, k_comma)
(colon, k_colon)
(leftParen, k_leftParen)
(rightParen, k_rightParen)
(leftBracket, k_leftBracket)
(rightBracket, k_rightBracket)
(alphaTerminal, k_alphaTerminal)
(numericTerminal, k_numericTerminal);
}
lex::token_def<lex::omit> whiteSpace;
lex::token_def<std::string> andTok;
lex::token_def<std::string> def;
lex::token_def<std::string> elihw;
lex::token_def<std::string> elseTok;
lex::token_def<std::string> falseTok;
lex::token_def<std::string> fed;
lex::token_def<std::string> fi;
lex::token_def<std::string> ifTok;
lex::token_def<std::string> input;
lex::token_def<std::string> notTok;
lex::token_def<std::string> orTok;
lex::token_def<std::string> print;
lex::token_def<std::string> returnTok;
lex::token_def<std::string> trueTok;
lex::token_def<std::string> whileTok;
lex::token_def<std::string> plus;
lex::token_def<std::string> minus;
lex::token_def<std::string> mult;
lex::token_def<std::string> div;
lex::token_def<std::string> bang;
lex::token_def<std::string> equalTo;
lex::token_def<std::string> greaterEq;
lex::token_def<std::string> lessEq;
lex::token_def<std::string> notEq;
lex::token_def<std::string> less;
lex::token_def<std::string> greater;
lex::token_def<std::string> assign;
lex::token_def<std::string> comma;
lex::token_def<std::string> colon;
lex::token_def<std::string> leftParen;
lex::token_def<std::string> rightParen;
lex::token_def<std::string> leftBracket;
lex::token_def<std::string> rightBracket;
lex::token_def<std::string> alphaTerminal;
lex::token_def<std::string> numericTerminal;
};
这是我的示例测试程序
int main(int argc, char** argv)
{
namespace lex = boost::spirit::lex;
namespace qi = boost::spirit::qi;
typedef lex::lexertl::token< char const*, lex::omit, boost::mpl::true_ > token_type;
typedef lex::lexertl::lexer<token_type> lexer_type;
typedef interpreter::LexerTokens<lexer_type>::iterator_type iterator_type;
typedef qi::in_state_skipper<interpreter::LexerTokens<lexer_type>::lexer_def> skipper_type;
interpreter::LexerTokens< lexer_type > lexer;
interpreter::InterpreterGrammar< iterator_type, skipper_type > parser(lexer);
std::string sourceCode("def print_it(x, y): print 3*x + y return fed print_it(8,1) x = 3 print_it(x, x)");
char const* first = sourceCode.c_str();
char const* last = &first[sourceCode.size()];
bool r = lex::tokenize_and_phrase_parse(first, last, lexer, parser, qi::in_state("WHITESPACE")[lexer.self]);
std::cout << "Remaining " << std::string(first,last) << std::endl;
std::cout << "R is " << r << std::endl;
}
这些修改引起了一些问题,首先,通过非正式地查看语法,没有构建完整的 LL(1) 解析表,我不相信这个语法是 LL(1)。我仍然需要验证这一点,但我想知道精神,能够解析这个语法吗?我知道 PEG 通常使用 / 运算符来进行前瞻,spirit 目前是否这样做?我在另一篇文章中看到精神可能没有?
其次,这个语法失败了,我注意到,在开始生产时,我简化了开始生产并使其成为:
start %= functionList;
然后将输入更改为:
def print_it(x, y): print 3*x + y return fed
语法调试语句表明解析成功。但是,我看到剩下的字符串是:
print_it(x, y): print 3*x + y return fed
所以实际上只解析了第一个令牌。经过一些调试后,我不确定为什么解析成功以及为什么只使用一个符号,这可能是词法分析器的问题吗?
此外,当我将开始生产更改为:
start %= endList;
并使用输入
y = x
然而,这无法解析并且只消耗字符 y。
最后,我的调试语句的输出不是很有帮助,当使用调试语句运行时,产生的输出是:
<start>
<try>[][][][][][][][][][][][][][][][][][][][]</try>
<fail/>
</start>
Remaining print_it(x, y): print 3*x + y return fed print_it(8,1) x = 3 print_it(x, x)
R is 0
我假设这意味着在语法中尝试了 20 个产生式,从 20 个空 [],这是一个正确的假设吗?还有为什么 [] 是空的?我通常会看到它们带有一些对调试有用的文本。是不是因为正则表达式匹配,语法就自动成功了?如果是这种情况,有没有办法让调试语句在使用令牌枚举令牌而不是使用宏添加表达式时打印有用的输出?
感谢您提供正确方向的任何帮助或点,谢谢。
解决方案
我假设这意味着在语法中尝试了 20 个产生式,从 20 个空 [],这是一个正确的假设吗?
否。[]
指示输入令牌。
还有为什么 [] 是空的?
可能没有有用的方法来打印它们,因此它们显示为空。
如果是这种情况,有没有办法让调试语句在使用令牌枚举令牌而不是使用宏添加表达式时打印有用的输出?
我会这么认为。但我从不使用 Lex。因此,可能需要一段时间才能弄清楚。
首先引起我注意的是:
typedef lex::lexertl::token< char const*, lex::omit, boost::mpl::true_ > token_type;
你的 AttributeTypes 说omit
。确实,改为
typedef lex::lexertl::token< char const*, boost::mpl::vector<lex::omit, std::string>, boost::mpl::true_ > token_type;
确实显示出生命迹象。对于输入x=y
(没有空格!)它打印:
<start>
<try>[y][=][x][][][][][][][][][][][][][][][][][]</try>
<fail/>
</start>
Remaining
R is false
现在,对于def print_it(x, y): print 3*x + y return fed
输入,输出仍然是:
<start>
<try>[def][][][][][][][][][][][][][][][][][][][]</try>
<fail/>
</start>
Remaining print_it(x, y): print 3*x + y return fed
R is false
信息量稍大一些。有趣的是,它似乎在第一个空格上也失败了。正whiteSpace
则表达式看起来不错,所以我在答案中搜索lex in_state
以记住我曾经学到的关于跳过和 Lex 的知识。
我玩弄了一些建议。在第二篇文章之后,我看到了这条评论:
好的!您可以添加到您的答案中,在词法分析器中有一个用于跳过空格的单独状态有另一个相当大的缺点:缺乏调试支持。如果您使用单独的状态进行跳过,然后使用 BOOST_SPIRIT_DEBUG_NODE,您将无法获得令牌流的完整输出。这是因为默认调试处理程序推进词法分析器迭代器以获取令牌,词法分析器当然处于初始状态。一旦遇到空格,迭代就会停止,因为词法分析器找不到匹配项。令牌流将在规则跟踪中的第一个空白处被剪切。– noxmetus 2017 年 9 月 20 日 18:28
让我们,只是为了好玩,尝试不使用单独的状态,而是使用Troubles 中的带有 boost::spirit::lex & whitespace的状态。
现在,表达式不会解析,因为
print_it
不是有效的标识符。让我们添加_
到alphaTerminal
alphaTerminal
需要=
遵循。稍微修正一下表达式:exp = ( qi::token(k_alphaTerminal) | qi::token(k_numericTerminal) | qi::token(k_trueTok) | qi::token(k_falseTok)) >> expPrime ;
事实上,这些令牌 ID 无效。您需要从
min_token_id
(即 0x10000)开始:enum Tokens { k_andTok = lex::tokenids::min_token_id + 1, k_def, k_elihw, k_elseTok, k_falseTok, k_fed, k_fi, k_ifTok, k_input, k_notTok, k_orTok, k_print, k_returnTok, k_trueTok, k_whileTok, k_plues, k_minus, k_mult, k_div, k_bang, k_equalTo, k_greaterEq, k_lessEq, k_notEq, k_less, k_greater, k_assign, k_comma, k_colon, k_leftParen, k_rightParen, k_leftBracket, k_rightBracket, k_alphaTerminal, k_numericTerminal, };
为什么还要指定令牌 ID?从您将令牌定义的引用传递给解析器这一事实猜测,我认为您会使用它们吗?
exp = (tok.alphaTerminal | tok.numericTerminal | tok.trueTok | tok.falseTok) >> expPrime ;
另外,让我们为所有规则启用调试:
BOOST_SPIRIT_DEBUG_NODES( (start) (functionList) (endList) (endListPrime) (param) (paramPrime) (paramList) (paramListPrime) (statements) (statementsPrime) (statement) (assignmentStatement) (printStatement) (inputStatement) (conditionStatement) (whileStatement) (callStatement) (returnStatement) (exp) (expPrime) (intList) (intListPrime) (optionalElse) (end) )
而且,在花了太多时间试图理解为什么状态切换的船长似乎不起作用之后,我放弃了。这是我修修补补的结果:
//#define USE_STATES
#define BOOST_SPIRIT_DEBUG
#include <boost/spirit/include/lex.hpp>
#include <boost/spirit/include/lex_lexertl.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/phoenix.hpp>
namespace lex = boost::spirit::lex;
namespace interpreter {
enum Tokens
{
k_andTok = lex::tokenids::min_token_id + 1,
k_def, k_elihw, k_elseTok, k_falseTok, k_fed, k_fi, k_ifTok, k_input,
k_notTok, k_orTok, k_print, k_returnTok, k_trueTok, k_whileTok,
k_plues, k_minus, k_mult, k_div, k_bang, k_equalTo, k_greaterEq,
k_lessEq, k_notEq, k_less, k_greater, k_assign, k_comma, k_colon,
k_leftParen, k_rightParen, k_leftBracket, k_rightBracket,
k_alphaTerminal, k_numericTerminal,
};
template <typename Lexer>
struct LexerTokens : lex::lexer<Lexer>
{
LexerTokens() :
whiteSpace("[ \\t\\n]"),
andTok("and"),
def("def"),
elihw("elihw"),
elseTok("else"),
falseTok("false"),
fed("fed"),
fi("fi"),
ifTok("if"),
input("input"),
notTok("not"),
orTok("or"),
print("print"),
returnTok("return"),
trueTok("true"),
whileTok("while"),
plus("\\+"),
minus("\\-"),
mult("\\*"),
div("\\/"),
bang("\\!"),
equalTo("=="),
greaterEq(">="),
lessEq("<="),
notEq("!="),
less("<"),
greater(">"),
assign("="),
comma(","),
colon(":"),
leftParen("\\("),
rightParen("\\)"),
leftBracket("\\["),
rightBracket("\\["),
alphaTerminal("[a-z][a-zA-Z0-9_]*"),
numericTerminal("[0-9]*")
{
this->self.add
(andTok, k_andTok)
(def, k_def)
(elihw, k_elihw)
(elseTok, k_elseTok)
(falseTok, k_falseTok)
(fed, k_fed)
(fi, k_fi)
(ifTok, k_ifTok)
(andTok, k_andTok)
(input, k_input)
(notTok, k_notTok)
(orTok, k_orTok)
(print, k_print)
(returnTok, k_returnTok)
(trueTok, k_trueTok)
(whileTok, k_whileTok)
(plus, k_plues)
(minus, k_minus)
(mult, k_mult)
(div, k_div)
(bang, k_bang)
(equalTo, k_equalTo)
(greaterEq, k_greaterEq)
(lessEq, k_lessEq)
(notEq, k_notEq)
(less, k_less)
(greater, k_greater)
(assign, k_assign)
(comma, k_comma)
(colon, k_colon)
(leftParen, k_leftParen)
(rightParen, k_rightParen)
(leftBracket, k_leftBracket)
(rightBracket, k_rightBracket)
(alphaTerminal, k_alphaTerminal)
(numericTerminal, k_numericTerminal);
#ifdef USE_STATES
this->self("WHITESPACE") = whiteSpace;
#else
this->self += whiteSpace [ lex::_pass = lex::pass_flags::pass_ignore ];
#endif
}
lex::token_def<lex::omit> whiteSpace;
lex::token_def<std::string> andTok;
lex::token_def<std::string> def;
lex::token_def<std::string> elihw;
lex::token_def<std::string> elseTok;
lex::token_def<std::string> falseTok;
lex::token_def<std::string> fed;
lex::token_def<std::string> fi;
lex::token_def<std::string> ifTok;
lex::token_def<std::string> input;
lex::token_def<std::string> notTok;
lex::token_def<std::string> orTok;
lex::token_def<std::string> print;
lex::token_def<std::string> returnTok;
lex::token_def<std::string> trueTok;
lex::token_def<std::string> whileTok;
lex::token_def<std::string> plus;
lex::token_def<std::string> minus;
lex::token_def<std::string> mult;
lex::token_def<std::string> div;
lex::token_def<std::string> bang;
lex::token_def<std::string> equalTo;
lex::token_def<std::string> greaterEq;
lex::token_def<std::string> lessEq;
lex::token_def<std::string> notEq;
lex::token_def<std::string> less;
lex::token_def<std::string> greater;
lex::token_def<std::string> assign;
lex::token_def<std::string> comma;
lex::token_def<std::string> colon;
lex::token_def<std::string> leftParen;
lex::token_def<std::string> rightParen;
lex::token_def<std::string> leftBracket;
lex::token_def<std::string> rightBracket;
lex::token_def<std::string> alphaTerminal;
lex::token_def<std::string> numericTerminal;
};
namespace qi = boost::spirit::qi;
template <typename Iterator, typename Skipper>
struct InterpreterGrammar : qi::grammar<Iterator, Skipper>
{
template <typename TokenDef>
InterpreterGrammar(TokenDef const& tok)
: InterpreterGrammar::base_type(start)
{
start
= functionList >> endList >> qi::eoi
;
// different expressions
exp
= (tok.alphaTerminal | tok.numericTerminal | tok.trueTok | tok.falseTok) >> expPrime
;
expPrime
= tok.equalTo >> exp >> expPrime
| tok.notEq >> exp >> expPrime
| tok.less >> exp >> expPrime
| tok.lessEq >> exp >> expPrime
| tok.greater >> exp >> expPrime
| tok.greaterEq >> exp >> expPrime
| tok.andTok >> exp >> expPrime
| tok.orTok >> exp >> expPrime
| tok.notTok >> exp
| tok.plus >> exp >> expPrime
| tok.minus >> exp >> expPrime
| tok.mult >> exp >> expPrime
| tok.minus >> exp
| tok.leftParen >> exp >> tok.rightParen
| tok.alphaTerminal >> tok.leftBracket >> exp >> tok.rightBracket
| tok.alphaTerminal >> tok.leftParen >> tok.rightParen
| tok.alphaTerminal >> tok.leftParen >> exp >> tok.rightParen
| qi::eps
;
// parameter list
paramList
= exp >> paramListPrime
;
paramListPrime
= tok.comma >> exp >> paramListPrime
| qi::eps
;
// return statements
returnStatement
= tok.returnTok >> exp
| tok.returnTok
;
// function call statements
callStatement
= tok.alphaTerminal >> tok.leftParen >> tok.rightParen
| tok.alphaTerminal >> tok.leftParen >> paramList >> tok.rightParen
;
// variable assignment
assignmentStatement
= tok.alphaTerminal >> tok.assign >> exp
| tok.alphaTerminal >> tok.leftBracket >> exp
>> tok.rightBracket >> tok.assign >> exp
;
// list of integers
intList
= tok.numericTerminal >> intListPrime
;
intListPrime
= tok.comma >> tok.numericTerminal >> intListPrime
| qi::eps
;
// print out a variable
printStatement
= tok.print >> exp
;
// take input
inputStatement
= tok.alphaTerminal >> tok.input
;
// conditional statement
conditionStatement
= tok.ifTok >> exp >> tok.colon >> statements >> optionalElse
;
// consitions have optional else
optionalElse
= tok.elseTok >> tok.colon >> statements
| qi::eps
;
// while loop
whileStatement
= tok.whileTok >> exp >> tok.colon >> statements >> tok.elihw
;
// actual program statements
endList
= end >> endListPrime
;
endListPrime
= end >> endListPrime
| qi::eps
;
// end possibilities of program in global space
end
= callStatement
| printStatement
| tok.alphaTerminal >> tok.assign >> tok.input
| tok.alphaTerminal >> tok.assign >> exp
| tok.alphaTerminal >> tok.assign >> tok.leftBracket >> intList
>> tok.rightBracket
| tok.alphaTerminal >> tok.leftBracket >> exp >> tok.rightBracket
>> tok.assign >> exp
;
// function parameters
param
= tok.alphaTerminal >> paramPrime
| tok.alphaTerminal >> tok.leftBracket >> tok.rightBracket
>> paramPrime
;
// for handling left recursion in paramlist
paramPrime
= tok.comma >> tok.alphaTerminal >> paramPrime
| qi::eps
;
// define a statement as assignment print input condition while or call
statement
= assignmentStatement
| printStatement
| inputStatement
| conditionStatement
| whileStatement
| callStatement
| returnStatement
;
// general statement list
statements
= statement >> statementsPrime
;
// for handling left recursion in statements
statementsPrime
= statement >> statementsPrime
| qi::eps
;
// functions
functionList
= tok.def >> tok.alphaTerminal >> tok.leftParen
>> param >> tok.rightParen >> tok.colon
>> statements >> tok.fed
| tok.def >> tok.alphaTerminal >> tok.leftParen
>> tok.rightParen >> tok.colon >> statements >> tok.fed
| qi::eps
;
BOOST_SPIRIT_DEBUG_NODES(
(start) (functionList) (endList) (endListPrime) (param)
(paramPrime) (paramList) (paramListPrime) (statements)
(statementsPrime) (statement) (assignmentStatement)
(printStatement) (inputStatement) (conditionStatement)
(whileStatement) (callStatement) (returnStatement) (exp)
(expPrime) (intList) (intListPrime) (optionalElse) (end)
)
}
private:
qi::rule<Iterator, Skipper> start;
qi::rule<Iterator, Skipper> functionList;
qi::rule<Iterator, Skipper> endList;
qi::rule<Iterator, Skipper> endListPrime;
qi::rule<Iterator, Skipper> param;
qi::rule<Iterator, Skipper> paramPrime;
qi::rule<Iterator, Skipper> paramList;
qi::rule<Iterator, Skipper> paramListPrime;
qi::rule<Iterator, Skipper> statements;
qi::rule<Iterator, Skipper> statementsPrime;
qi::rule<Iterator, Skipper> statement;
qi::rule<Iterator, Skipper> assignmentStatement;
qi::rule<Iterator, Skipper> printStatement;
qi::rule<Iterator, Skipper> inputStatement;
qi::rule<Iterator, Skipper> conditionStatement;
qi::rule<Iterator, Skipper> whileStatement;
qi::rule<Iterator, Skipper> callStatement;
qi::rule<Iterator, Skipper> returnStatement;
qi::rule<Iterator, Skipper> exp;
qi::rule<Iterator, Skipper> expPrime;
qi::rule<Iterator, Skipper> intList;
qi::rule<Iterator, Skipper> intListPrime;
qi::rule<Iterator, Skipper> optionalElse;
qi::rule<Iterator, Skipper> end;
};
}
#include <fstream>
#include <iterator>
int main(int argc, char** argv) {
namespace lex = boost::spirit::lex;
namespace qi = boost::spirit::qi;
typedef lex::lexertl::token<char const*, boost::mpl::vector<lex::omit, std::string>, boost::mpl::true_> token_type;
#ifdef USE_STATES
typedef lex::lexertl::actor_lexer<token_type> lexer_type;
typedef qi::in_state_skipper<interpreter::LexerTokens<lexer_type>::lexer_def> skipper_type;
typedef interpreter::LexerTokens<lexer_type>::iterator_type iterator_type;
#else
typedef lex::lexertl::actor_lexer<token_type> lexer_type;
typedef interpreter::LexerTokens<lexer_type>::iterator_type iterator_type;
typedef qi::unused_type skipper_type;
#endif
interpreter::LexerTokens<lexer_type> lexer;
interpreter::InterpreterGrammar<iterator_type, skipper_type> parser(lexer);
// read the file
if (argc != 2)
{
std::cout << "File required" << std::endl;
return 1;
}
std::ifstream t(argv[1]);
std::string const sourceCode { std::istreambuf_iterator<char>(t), {} };
char const* first = sourceCode.data();
char const* last = first + sourceCode.size();
#ifdef USE_STATES
bool ok = lex::tokenize_and_phrase_parse(first, last, lexer, parser, qi::in_state("WHITESPACE")[lexer.self]);
#else
bool ok = lex::tokenize_and_parse(first, last, lexer, parser);
#endif
std::cout << "Remaining '" << std::string(first,last) << "'" << std::endl;
std::cout << "R is " << std::boolalpha << ok << std::endl;
}
印刷
<start>
<try>[def][print_it][(][x][,][y][)][:][print][3][*][x][+][y][return][fed]</try>
<functionList>
<try>[def][print_it][(][x][,][y][)][:][print][3][*][x][+][y][return][fed]</try>
<param>
<try>[x][,][y][)][:][print][3][*][x][+][y][return][fed]</try>
<paramPrime>
<try>[,][y][)][:][print][3][*][x][+][y][return][fed]</try>
<paramPrime>
<try>[)][:][print][3][*][x][+][y][return][fed]</try>
<success>[)][:][print][3][*][x][+][y][return][fed]</success>
<attributes>[]</attributes>
</paramPrime>
<success>[)][:][print][3][*][x][+][y][return][fed]</success>
<attributes>[]</attributes>
</paramPrime>
<success>[)][:][print][3][*][x][+][y][return][fed]</success>
<attributes>[]</attributes>
</param>
<statements>
<try>[print][3][*][x][+][y][return][fed]</try>
<statement>
<try>[print][3][*][x][+][y][return][fed]</try>
<assignmentStatement>
<try>[print][3][*][x][+][y][return][fed]</try>
<fail/>
</assignmentStatement>
<printStatement>
<try>[print][3][*][x][+][y][return][fed]</try>
<exp>
<try>[3][*][x][+][y][return][fed]</try>
<expPrime>
<try>[*][x][+][y][return][fed]</try>
<exp>
<try>[x][+][y][return][fed]</try>
<expPrime>
<try>[+][y][return][fed]</try>
<exp>
<try>[y][return][fed]</try>
<expPrime>
<try>[return][fed]</try>
<success>[return][fed]</success>
<attributes>[]</attributes>
</expPrime>
<success>[return][fed]</success>
<attributes>[]</attributes>
</exp>
<expPrime>
<try>[return][fed]</try>
<success>[return][fed]</success>
<attributes>[]</attributes>
</expPrime>
<success>[return][fed]</success>
<attributes>[]</attributes>
</expPrime>
<success>[return][fed]</success>
<attributes>[]</attributes>
</exp>
<expPrime>
<try>[return][fed]</try>
<success>[return][fed]</success>
<attributes>[]</attributes>
</expPrime>
<success>[return][fed]</success>
<attributes>[]</attributes>
</expPrime>
<success>[return][fed]</success>
<attributes>[]</attributes>
</exp>
<success>[return][fed]</success>
<attributes>[]</attributes>
</printStatement>
<success>[return][fed]</success>
<attributes>[]</attributes>
</statement>
<statementsPrime>
<try>[return][fed]</try>
<statement>
<try>[return][fed]</try>
<assignmentStatement>
<try>[return][fed]</try>
<fail/>
</assignmentStatement>
<printStatement>
<try>[return][fed]</try>
<fail/>
</printStatement>
<inputStatement>
<try>[return][fed]</try>
<fail/>
</inputStatement>
<conditionStatement>
<try>[return][fed]</try>
<fail/>
</conditionStatement>
<whileStatement>
<try>[return][fed]</try>
<fail/>
</whileStatement>
<callStatement>
<try>[return][fed]</try>
<fail/>
</callStatement>
<returnStatement>
<try>[return][fed]</try>
<exp>
<try>[fed]</try>
<fail/>
</exp>
<success>[fed]</success>
<attributes>[]</attributes>
</returnStatement>
<success>[fed]</success>
<attributes>[]</attributes>
</statement>
<statementsPrime>
<try>[fed]</try>
<statement>
<try>[fed]</try>
<assignmentStatement>
<try>[fed]</try>
<fail/>
</assignmentStatement>
<printStatement>
<try>[fed]</try>
<fail/>
</printStatement>
<inputStatement>
<try>[fed]</try>
<fail/>
</inputStatement>
<conditionStatement>
<try>[fed]</try>
<fail/>
</conditionStatement>
<whileStatement>
<try>[fed]</try>
<fail/>
</whileStatement>
<callStatement>
<try>[fed]</try>
<fail/>
</callStatement>
<returnStatement>
<try>[fed]</try>
<fail/>
</returnStatement>
<fail/>
</statement>
<success>[fed]</success>
<attributes>[]</attributes>
</statementsPrime>
<success>[fed]</success>
<attributes>[]</attributes>
</statementsPrime>
<success>[fed]</success>
<attributes>[]</attributes>
</statements>
<success></success>
<attributes>[]</attributes>
</functionList>
<endList>
<try></try>
<end>
<try></try>
<callStatement>
<try></try>
<fail/>
</callStatement>
<printStatement>
<try></try>
<fail/>
</printStatement>
<fail/>
</end>
<fail/>
</endList>
<fail/>
</start>
Remaining ''
R is false
推荐阅读
- clojurescript - shadow-cljs - 套接字连接失败,服务器进程死了?
- python - Dockerfile 使用 python 和 mysql 创建图像
- ios - 从字符串中同时获取前 3 个索引值
- c# - Bson 不使用“新”覆盖的属性进行序列化,并报告重复使用属性名
- paypal - 金额输入,如 Flutter 中的 PayPal
- python - 调用 Python 对象时超出 /form/movie/ 最大递归深度的 RecursionError
- python - 如何在 Python 中更改对象析构函数内的异常?
- java - 是否可以更改(或有多个)containerDelegate?
- bash - 纯 bash:通过在一个命令中组合 ## 和 %% 来提取子字符串?
- java - 为什么迭代地图比迭代列表慢?