码迷,mamicode.com
首页 > 其他好文 > 详细

解析xml

时间:2015-08-01 20:42:47      阅读:88      评论:0      收藏:0      [点我收藏+]

标签:

#include <vector>
#define ch_not_spec(x) ((x) != ‘/‘ && (x) != ‘<‘ && (x) != ‘>‘)


struct XML_NODE
{
char name[64];
char* property_start;
char* property_end;
int over_type;
int type;
std::vector<XML_NODE*> child_list;


XML_NODE(char* name_cpy, int len, int cpy_type, char* prop_start, char* prop_end, int ovr_type): type(cpy_type), 
property_start(prop_start), property_end(prop_end), over_type(ovr_type)
{
memcpy(name, name_cpy, len);
name[len] = 0;
}
};

//p为输入的待解析xml字符串;root为返回的xml数结构; last_node是最后一个成功被解析的xml节点,主要用于定位错误位置,解析相当于构建了一个 根 -先-后 的树,另外 假设< / >这三个字符是特殊标记字符 不能出现在属性或者其他地方,只能作为标记的一部分
bool visit_xml(char* p, XML_NODE* & root, XML_NODE* & last_node)
{
char* q, *s, *t;
std::vector<XML_NODE*> node_stack;
if(*p != ‘<‘)
return false;
while(*p != ‘\0‘)
{
if(*p == ‘<‘ && *(p + 1) != ‘/‘)//起始节点 <form  xxxx>     <form     xxx />
{
q = p + 1;
while(*q != ‘\0‘ && *q != ‘ ‘ && ch_not_spec(*q))
++q;
if(q == p + 1)
return false;
s = q;
//跳过空格
while(*s == ‘ ‘)
++s;
t = s;
//获取属性字符串
while(*t != ‘\0‘ && ch_not_spec(*t))
++t;


if(*t == ‘/‘ && *(t+1) == ‘>‘)
{
if(node_stack.size() == 0 && *(t + 2) != ‘\0‘)
return false;
if(node_stack.size() == 0)
{
root = new XML_NODE(p + 1, q - p - 1, 0, s, t, 0);
last_node = root;
return true;
}else
{
last_node = new XML_NODE(p + 1, q - p - 1, 0, s, t - 1, 0);
(*(node_stack.end() - 1))->child_list.push_back(last_node);
}
p = t + 2;
}else if(*t == ‘>‘)
{
if(node_stack.size() == 0)
{
root = new XML_NODE(p + 1, q - p - 1, 0, s, t, 1);
node_stack.push_back(root);
}else
{
(*(node_stack.end() - 1))->child_list.push_back(new XML_NODE(p + 1, q - p - 1, 0, s, t, 1));
node_stack.push_back( *((*(node_stack.end() - 1))->child_list.end() - 1) );
}
p = t + 1;
}else
{
return false;
}
}else if(*p == ‘<‘ && *(p + 1) == ‘/‘)//结束节点 </form>
{
q = p + 2;
while(*q != ‘\0‘ && *q != ‘ ‘ && ch_not_spec(*q))
++q;
if(q == p + 2)
return false;
if(*q != ‘>‘)
return false;
if(strncmp(p+2, (*(node_stack.end() - 1))->name, q - p - 2) != 0)
return false;
if(node_stack.size() > 1)
{
last_node = *(node_stack.end() - 1);
node_stack.erase((node_stack.end() - 1));
}
else
{
root = *(node_stack.end() - 1);
last_node = root;
if(*(q + 1) != ‘\0‘)
return false;
}
p = q + 1;
}else if(*p != ‘>‘ && *p != ‘/‘)//字符串节点
{
q = p;
while(*q != ‘\0‘ && ch_not_spec(*q))
++q;
(*(node_stack.end() - 1))->child_list.push_back(last_node = new XML_NODE(p, q - p, 1, p, p, 1));
p = q;
}else //非法节点
{
return false;
}
}
if(node_stack.size() > 1)
return false;


return true;
}

版权声明:本文为博主原创文章,未经博主允许不得转载。

解析xml

标签:

原文地址:http://blog.csdn.net/wangxugangzy05/article/details/47188931

(0)
(0)
   
举报
评论 一句话评论(0
登录后才能评论!
© 2014 mamicode.com 版权所有  联系我们:gaon5@hotmail.com
迷上了代码!