字符串是目前处理是目前工程项目中出现最多的问题,尤其是自然语言处理,文本处理和分析等等,c++目前只提供比较简单的字符串处理函数,不像Python,Java对字符串操作功能强大,下面是自己封装的几个常用字符处理的函数,其实功能实现应该有很多种,但是由于字符串处理是基础函数,需要适当的考虑算法实现的性能。下面是自己实现的一些功能,性能感觉还不错。
/********************************************* function: string common functions. author: liuyi date: 2015.08.29 vesion: 1.0 **********************************************/ #ifndef STR_FUN_H #define STR_FUN_H #include <iostream> #include <string.h> #include <string> #include <vector> using namespace std; inline void split_string(const char* src, const string& sep, vector<string>& split_vec) { const char *separator = sep.c_str(); split_vec.clear(); split_vec.reserve(16); if(src == NULL || separator == NULL) return; const char *p = strstr(src, separator); if(NULL == p) { split_vec.push_back(src); return; } if(p == src) { split_vec.push_back(""); } else { split_vec.push_back(string(src, p - src)); } size_t len = strlen(separator); const char* pre = p + len; while(p = strstr(p + len, separator)) { split_vec.push_back(string(pre, p - pre)); pre = p + len; } if('\0' == *pre) { split_vec.push_back(""); } else { split_vec.push_back(string(pre)); } } inline void split_string(const string& src, const string& sep, vector<string>& split_vec) { string::size_type i = src.find(sep); if(string::npos == i) { split_vec.push_back(src); return; } if(0 == i) { split_vec.push_back(""); } else { split_vec.push_back(src.substr(0, i)); } size_t len = sep.size(); string::size_type pre = i + len; while(string::npos != (i = src.find(sep, pre))) { split_vec.push_back(src.substr(pre, i - pre)); pre = i + len; } split_vec.push_back(src.substr(pre)); } inline string join_string(const string& sep, const vector<string>& str_vec) { string str; str.reserve(32*str_vec.size()); size_t len = str_vec.size(); if(len > 0) { str = str_vec[0]; } for(size_t i = 1; i < len; i++) { str.append(sep); str.append(str_vec[i]); } return str; } inline string replace_string(const string& raw_str, const string& src_str, const string& replace_str) { string new_str; new_str.reserve(raw_str.size()*2); size_t begin = 0; size_t len = src_str.size(); string::size_type i = raw_str.find(src_str); if(string::npos == i) return raw_str; do { new_str.append(raw_str.substr(begin, i - begin)); new_str.append(replace_str); begin = i + len; } while(string::npos != (i = raw_str.find(src_str, begin))); return new_str.append(raw_str.substr(begin)); } string lstrip(const string& src, const string& sub_str) { size_t len = sub_str.size(); string::size_type i = 0; i = src.find(sub_str, i); int count = 0; if(i == 0) { count = 1; while((i = src.find(sub_str, i+len)) != string::npos) { if(i != len*count) break; count++; } } else { return src; } return src.substr(len*count); } string rstrip(const string& str, const string& sub_str) { int sub_len = sub_str.size(); int last_index = str.size()-1; for(int i = last_index; i >= 0; i -= sub_len) { int flag = 0; for(int j = 0; j < i && j < sub_len; j++) { if(sub_str[j] != str[i + 1 - sub_len + j]) { flag = 1; break; } } if(flag == 1) { break; } else { last_index = i - sub_len; } } return str.substr(0, last_index+1); } inline string strip(const string& src, const string& sub_str) { return lstrip(rstrip(src, sub_str), sub_str); } inline int count_of_substr(const string& str, const string& sub_str) { int count = 0; size_t len = sub_str.size(); string::size_type i = 0; while((i = str.find(sub_str, i))!= string::npos) { count++; i += len; } return count; } inline int count_of_substr(const char *src, const string& sub_str) { int count = 0; size_t len = sub_str.size(); const char *p = sub_str.c_str(); const char *index = src; while(NULL != (index = strstr(index, p))) { count++; index += len; } return count; } string lower_string(const string& src) { string lower_str = src; size_t len = src.size(); for(size_t i = 0; i < len; i++) { if(src[i] >= 'A' && src[i] <= 'Z') lower_str[i] += 32; } return lower_str; } string upper_string(const string& src) { string upper_str = src; size_t len = src.size(); for(size_t i = 0; i < len; i++) { if(src[i] >= 'a' && src[i] <= 'z') upper_str[i] -= 32; } return upper_str; } #endif
#include <iostream> #include <stdlib.h> #include "str_fun.h" using namespace std; int main(int agrc, char *argv[]) { char s[16] = {"12#34#56"}; vector<string> v; split_string(string(s), "#", v); for(int i = 0; i < v.size(); i++) cout<<v[i]<<endl; cout<<join_string("#", v)<<endl; cout<<replace_string(s, "34", "")<<endl; cout<<count_of_substr("##", "#")<<endl; cout<<lower_string("AbC")<<endl; cout<<upper_string("AbC")<<endl; cout<<lstrip("aaaabc", "aab")<<endl; cout<<rstrip("abcabcabc", "abcd")<<endl; cout<<strip("abcabcaba", "a")<<endl; }
版权声明:本文为博主原创文章,未经博主允许不得转载。
原文地址:http://blog.csdn.net/coder_yi_liu/article/details/48143261