码迷,mamicode.com
首页 > 其他好文 > 详细

boost spirit parser for XQuery regexp

时间:2014-10-15 14:20:00      阅读:243      评论:0      收藏:0      [点我收藏+]

标签:des   http   io   os   ar   for   sp   on   as   

语法列表:

/* from http://www.w3.org/TR/xmlschema-2/#regexs */

// [1] regExp ::= branch ( ‘|‘ branch )*
re_reg_exp = re_branch [push_back( at_c<0>(_val), _1 )] % ‘|‘;

// [2] branch ::= piece*
re_branch = *( re_piece ) [push_back( at_c<0>(_val), _1 )];

// [3] piece ::= atom quantifier?
re_piece = re_atom [at_c<0>(_val) = _1] >> -( re_quantifier ) [at_c<1>(_val) = _1];

// [4] quantifier ::= [?*+] | ( ‘{‘ quantity ‘}‘ )
re_quantifier = qi::char_("?*+") | ( qi::char_(‘{‘) >> re_quantity >> qi::char_(‘}‘) );
// [5] quantity ::= quantRange | quantMin | QuantExact
re_quantity = re_quant_range || re_quant_exact;
// [6] quantRange ::= QuantExact ‘,‘ QuantExact
re_quant_range = re_quant_exact >> qi::char_(‘,‘) >> -re_quant_exact;
// [7] quantMin ::= QuantExact ‘,‘
// NOTE: merge rule #7 into rule #6
//re_quant_min = re_quant_exact >> qi::char_(‘,‘);
// [8] QuantExact ::= [0-9]+
re_quant_exact = +( qi::char_("0-9") );

// [9] atom ::= Char | charClass | ( ‘(‘ regExp ‘)‘ )
re_atom = re_char [_val = _1]
| re_char_class [_val = _1]
| ( qi::char_(‘(‘) >> re_reg_exp [_val = _1] >> qi::char_(‘)‘) );

// [10] Char ::= [^.\?*+{}()|^$#x5B#x5D]
// NOTE: exclude |, separator of branches, as well
re_char = qi::char_ - ".\\?*+$" -‘(‘ - ‘)‘ - ‘[‘ - ‘]‘ - ‘|‘;

// [11] charClass ::= charClassEsc | charClassExpr | WildcardEsc
re_char_class = re_char_class_esc [_val = _1]
| re_char_class_expr [_val = _1]
| re_wildcard_esc [_val = _1];

// [12] charClassExpr ::= ‘[‘ charGroup ‘]‘
// NOTE: no skip in ‘x‘ mode
re_char_class_expr = qi::char_(‘[‘) >> re_char_group [at_c<0>(_val) = _1] >> qi::char_(‘]‘);

// [13] charGroup ::= posCharGroup | negCharGroup | charClassSub
re_char_group = re_pos_char_group [_val = _1]
| re_neg_char_group [_val = _1]
| re_char_class_sub [_val = _1];

// [14] posCharGroup ::= ( charRange | charClassEsc )+
re_pos_char_group = +( re_char_range | re_char_class_esc ) [push_back( at_c<0>(_val), _1 )];
// [15] negCharGroup ::= ‘^‘ posCharGroup
re_neg_char_group = qi::char_(‘^‘) >> re_pos_char_group;
// [16] charClassSub ::= ( posCharGroup | negCharGroup ) ‘-‘ charClassExpr
re_char_class_sub = ( re_pos_char_group [at_c<0>(_val) = _1]
| re_neg_char_group [at_c<1>(_val) = _1])
>> qi::char_(‘-‘)
>> re_char_class_expr [at_c<2>(_val) = _1];

// [17] charRange ::= seRange | XmlCharIncDash
re_char_range = re_xml_char_inc_dash | re_se_range;
// [18] seRange ::= charOrEsc ‘-‘ charOrEsc
re_se_range = re_char_or_esc >> qi::char_(‘-‘) >> re_char_or_esc;
// there‘s no 19th rule
// [20] charOrEsc ::= XmlChar | SingleCharEsc
re_char_or_esc = re_xml_char || re_single_char_esc;
// [21] XmlChar ::= [^\#x2D#x5B#x5D]
re_xml_char = qi::char_ - ‘\\‘ - ‘[‘ - ‘]‘ - ‘-‘;
// [22] XmlCharIncDash ::= [^\#x5B#x5D]
re_xml_char_inc_dash = qi::char_ - ‘\\‘ - ‘[‘ - ‘]‘;

// [23] charClassEsc ::= ( SingleCharEsc | MultiCharEsc | catEsc | complEsc )
re_char_class_esc = re_cat_esc | re_compl_esc |
( qi::char_(‘\\‘) >> qi::char_("nrt\\.?*+-^[]()sSiIcCdDwW"));
// [24] SingleCharEsc ::= ‘\‘ [nrt\|.?*+(){}#x2D#x5B#x5D#x5E]
re_single_char_esc = qi::char_(‘\\‘) >> qi::char_("nrt\\.?*+-^[]()");
// [25] catEsc ::= ‘\p{‘ charProp ‘}‘
re_cat_esc = qi::string("\\p{") >> re_char_prop >> ‘}‘;
// [26] complEsc ::= ‘\P{‘ charProp ‘}‘
re_compl_esc = qi::string("\\P{") >> re_char_prop >> ‘}‘;

// [27] charProp ::= IsCategory | IsBlock
re_char_prop = re_is_category | re_is_block;

// [28] IsCategory ::= Letters | Marks | Numbers | Punctuation | Separators | Symbols | Others
re_is_category = re_letters | re_marks | re_numbers | re_punctuation | re_separators | re_symbols | re_others;
// [29] Letters ::= ‘L‘ [ultmo]?
re_letters = qi::char_(‘L‘) >> -qi::char_("ultmo");
// [30] Marks ::= ‘M‘ [nce]?
re_marks = qi::char_(‘M‘) >> -qi::char_("nce");
// [31] Numbers ::= ‘N‘ [dlo]?
re_numbers = qi::char_(‘N‘) >> -qi::char_("dlo");
// [32] Punctuation ::= ‘P‘ [cdseifo]?
re_punctuation = qi::char_(‘P‘) >> -qi::char_("cdseifo");
// [33] Separators ::= ‘Z‘ [slp]?
re_separators = qi::char_(‘Z‘) >> -qi::char_("slp");
// [34] Symbols ::= ‘S‘ [mcko]?
re_symbols = qi::char_(‘S‘) >> -qi::char_("mcko");
// [35] Others ::= ‘C‘ [cfon]?
re_others = qi::char_(‘C‘) >> -qi::char_("cfon");

// [36] IsBlock ::= ‘Is‘ [a-zA-Z0-9#x2D]+
re_is_block = qi::string("Is") >> +( qi::char_("a-zA-Z0-9") | qi::char_(‘-‘) );

// [37] MultiCharEsc ::= ‘\‘ [sSiIcCdDwW]
re_multi_char_esc = qi::char_(‘\\‘) >> qi::char_("sSiIcCdDwW");
// [37a] WildcardEsc ::= ‘.‘
re_wildcard_esc = qi::char_(‘.‘);

boost spirit parser for XQuery regexp

标签:des   http   io   os   ar   for   sp   on   as   

原文地址:http://www.cnblogs.com/ezhang/p/4026058.html

(0)
(0)
   
举报
评论 一句话评论(0
登录后才能评论!
© 2014 mamicode.com 版权所有  联系我们:gaon5@hotmail.com
迷上了代码!