使用的是Philip Hazel的Perl-Compatible Regular Expression库,参考:
http://see.xidian.edu.cn/cpp/html/1428.html
执行匹配的时:
gcc myreg.c
ip.pat 内容:
ip
.*[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+
ip.txt 内容:
192.168.1.1
测试:
./a.out ip.pat ip.txt
下面是myreg.c源代码
/* myreg.c */
#include <stdio.h>
#include <stdlib.h>
#include
<string.h>
#include <sys/types.h>
#include
<sys/stat.h>
#include <fcntl.h>
#include
<regex.h>
#include <unistd.h>
#define MAX 256
/* 存放匹配位置信息的结构体 */
typedef struct placemsg_t
{
int
start[MAX]; //匹配的开始位置
int end[MAX];
//匹配的结束位置
int count; //匹配次数
}
placemsg;
#define TIMES 100
#define MAX_PATTERN_LEN 8192
/**
* @brief 实现一个字符十六进制到十进制的转换
*
* @param c
需要转换的字符
*
* @return 错误返回 -1
*/
static int
hex2dec(char c)
{
switch
(c)
{
case ‘0‘ ...
‘9‘:
return c -
‘0‘;
case ‘a‘ ...
‘f‘:
return c - ‘a‘ +
10;
case ‘A‘ ...
‘F‘:
return c - ‘A‘ +
10;
default:
fprintf(stderr, "hex2dec: bad
value!\n");
return -1;
}
}
/*
* @brief 把正则的\xHH格式的十六进制形式用字符形式代替
* takes a
string with \xHH escapes and returns one with the characters they stand
for
*
* @param s \xHH形式的正则表达式
*
*
@return 返回字符形式的正则表达式
*/
static char *pre_process(char
*s)
{
char *result =
malloc(strlen(s) + 1);
int sindex
= 0, rindex = 0;
while( sindex
< strlen(s) )
{
if(
sindex + 3 < strlen(s)
&&
s[sindex] == ‘\\‘ && s[sindex+1] == ‘x‘
&&
isxdigit(s[sindex + 2]) && isxdigit(s[sindex + 3])
)
{
/* carefully remember to call tolower here...
*/
result[rindex] = tolower( hex2dec(s[sindex + 2])*16
+
hex2dec(s[sindex + 3] )
);
sindex += 3; /* 4 total
*/
}
else
result[rindex] = tolower(s[sindex]);
sindex++;
rindex++;
}
result[rindex] = ‘\0‘;
return result;
}
//判断是不是注释
// Returns true if the line (from a pattern file) is a
comment
static int is_comment(char* line)
{
unsigned int
i;
// blank lines are comments
if(strlen(line) == 0) return 1;
// lines starting with # are
comments
if(line[0] == ‘#‘) return 1;
// lines with only whitespace are
comments
for(i = 0; i < strlen(line);
i++)
{
if(!isspace(line[i]))
{
return
0;
}
}
return 1;
}
//获得.pat中的协议名称
static char *get_protocol_name (char *line, char
**patname)
{
unsigned int i, j;
char *name =
*patname;
j = 0;
for (i=0; i<strlen(line);
i++)
{
if(!isspace
(line[i]))
{
name[j] =
line[i];
j++;
}
else
{
break;
}
}
return
name;
}
//找到最后的slash(/)
int last_mark (char *str, char mark)
{
int
site = 0;
int count = 0;
int size =
strlen(str);
while (site <= size)
{
if
(str[site++] == mark)
{
count =
site;
}
}
return count;
}
//找到第一个dot(.)
int first_mark (char *str, char mark, int
num)
{
int count = num;
while
(1)
{
if (str[count++] ==
mark)
{
break;
}
}
return
count;
}
/* 从文件全名中把文件名提取出来,没有后缀 */
int substr (char *srcstr, char **decstr, int
lastslash, int firstdot)
{
int i = 0;
//int ls =
lastslash;
char *str = *decstr;
//printf("last=%d,first=%d\n",
lastslash, firstdot);
//printf("size=%d\n",
firstdot-lastslash);
int size = firstdot-lastslash-1;
for
(i=0; i<size; i++)
{
//str[i] =
srcstr[ls++];
//printf("...%c...\n",srcstr[lastslash]);
str[i]
= srcstr[lastslash++];
}
}
//得到文件名
int basename(char *file, char **name)
{
int lastnum =
last_mark (file, ‘/‘);
int firstnum = first_mark (file, ‘.‘,
lastnum);
substr (file, name, lastnum, firstnum);
//printf
("name = %s\n", name);
return 0;
}
/**
* @brief 以下是pcre匹配的相关函数原型:
* int
regcomp(regex_t *preg, const char *regex, int
cflags);
* cflags: REG_EXTENDED | REG_NEWLINE;
* REG_EXTENDED
支持扩展的正则
* REG_NEWLINE;
包括换行
* int regexec(const regex_t *preg,
const char *string, size_t nmatch, regmatch_t pmatch[], int
eflags);
* eflags = REG_NOTBOL | REG_NOTEOL;
* REG_NOTBOL
行结尾
* REG_NOTEOL 文件结尾
* size_t
regerror(int errcode, const regex_t *preg, char *errbuf, size_t errbuf_size);
* void
regfree(regex_t *preg);
* @param regexp 正则表达式模式文件
* @param
file_path 要进行匹配的文件路径
*
* @return
匹配信息结构体
*/
struct placemsg_t reg(char *regexpfile, char
*file_path)
{
regex_t preg;
struct placemsg_t
placeinfo;
regmatch_t pmatch[MAX] = {};
size_t nmatch = MAX;
size_t length;
char errbuf[MAX] = {};
size_t
errbuf_size;
int res;
int errcode;
FILE *fp;
int fd;
long sitestart;
long
siteend;
long filesize;
char *string;
int patternlen, i;
int flag = 1;
FILE *
fp2;
char * line = NULL;
size_t len = 0;
ssize_t
size;
char *filename = (char*)malloc(256);
//提取.pat文件名
basename(regexpfile, &filename);
//提取.pat中的正则表达式
//printf ("1regexpfile=%s\n",
regexpfile);
fp2 = fopen (regexpfile, "r");
if (fp2 ==
NULL)
{
perror("fopen");
}
flag =
1;
while ((size= getline(&line, &len, fp2)) != -1)
{
line[strlen(line)-1] = ‘\0‘;
//printf
("line=%s\n",
line);
//判断是否是注释
if(is_comment(line))
{
printf("is_comment\n");
continue;
}
else
if((strstr(line, filename) == 0) &&
flag)
{
//printf("..........strcpy...........\n");
flag
= 0;
continue;
}
else
if(flag)
{
continue;
}
else
if (flag == 0)
{
printf("正则表达式是:%s\n",
line);
break;
}
}
fclose(fp2);
fp2
= NULL;
free(filename);
printf ("2line=%s\n", line);
//转换正则表达式为字符
char *regexpstr = pre_process(line); /* do \xHH
escapes */
printf("regexpstr=%s\n",
regexpstr);
if(line)
{
free(line);
}
printf("file_path=%s\n", file_path);
fp = fopen(file_path,
"r");
if (fp == NULL)
{
perror("fopen");
}
fseek(fp, 0, SEEK_SET);
sitestart = ftell(fp);
fseek(fp, 0, SEEK_END);
siteend = ftell(fp);
filesize =
siteend - sitestart;
printf("filezize=%d\n", filesize);
string = (char*) malloc(sizeof(char)*filesize+4);
memset(string, 0,
filesize+4);
fclose(fp);
fp = NULL;
fd = open(file_path, O_RDONLY);
if (fd == -1)
{
perror ("open");
}
res = read(fd, string, filesize);
if (res == -1)
{
perror ("read");
}
close(fd);
printf("res=%d,string=%s\n", res, string);
printf("begin regcomp.........\n");
int cflags =
REG_EXTENDED | REG_NEWLINE;
//int cflags = REG_EXTENDED;
//int cflags = 0;
//编译正则表达式
res = regcomp (&preg,
regexpstr, cflags);
printf("begin regexec.........\n");
//int eflags =
REG_NOTBOL | REG_NOTEOL;
int eflags = REG_NOTEOL;
//int eflags = 0;
int place[5];
int
start_front=0;
int start_now=0;
int end_front=0;
int end_now=0;
int temp = 0;
char *p =
string;
i = 0;
while (1)
{
res = regexec (&preg, p, nmatch, pmatch, eflags);
if
(res == 0)
{
printf("...........match..........\n");
start_now =
pmatch[0].rm_so;
end_now =
pmatch[0].rm_eo;
temp = end_now -
start_now;
//printf("temp=%d\n", temp);
start_now = end_front + start_now;
//printf("start_now=%d\n",
start_now);
end_now = start_now +
temp;
//printf("end_now=%d\n", end_now);
start_front = start_now;
end_front =
end_now;
//printf("start place=%d\n", pmatch[0].rm_so);
//printf("end place=%d\n", pmatch[0].rm_eo);
//printf("start
place=%d\n", start_front);
//printf("end place=%d\n",
end_front);
placeinfo.start[i] = start_front;
placeinfo.end[i] = end_front;
p += pmatch[0].rm_eo;
if(!*p)
{
i++;
break;
}
}
else
{
printf("no match\n");
break;
}
i++;
}
placeinfo.count = i;
//printf(".....i=%d\n", i);
length = regerror (res, &preg, errbuf,
errbuf_size);
regfree(&preg);
free(string);
free(regexpstr);
//printf("over\n");
return placeinfo;
}
int main (int argc, char **argv)
{
//char *regexp =
"r.t";
char *regexpfile =
argv[1];
printf("regexpfile=%s\n",
regexpfile);
//char *file_path = "t.txt";
char *file_path =
argv[2];
struct placemsg_t placeinfo;
placeinfo = reg(regexpfile, file_path);
int size =
placeinfo.count;
printf(".....show....\n");
int i =
0;
for (i=0; i<size; i++)
{
printf
("start[%d]=%d\n", i, placeinfo.start[i]);
printf
("end[%d]=%d\n", i, placeinfo.end[i]);
}
return 0;
}
原文地址:http://www.cnblogs.com/etangyushan/p/3759543.html