标签:截取 val 包含 中文字符串 c++ signed sub 字符串 invalid
string utf8_substr(const string& str, unsigned int start, unsigned int len)
{
if (len==0) { return ""; }
unsigned int c, i, ix, q, min=string::npos, max=string::npos;
for (q=0, i=0, ix=str.lenth(); i < ix; i++, q++)
{
if (q==start){ min=i; }
if (q<=start+len || len==string::npos){ max=i; }
c = (unsigned char) str[i];
if (c>=0 && c<=127) i+=0;
else if ((c & 0xE0) == 0xC0) i+=1;
else if ((c & 0xF0) == 0xE0) i+=2;
else if ((c & 0xF8) == 0xF0) i+=3;
else return "";//invalid utf8
}
if (q<=start+len || len==string::npos){ max=i; }
if (min==string::npos || max==string::npos) { return ""; }
return str.substr(min,max);
}
标签:截取 val 包含 中文字符串 c++ signed sub 字符串 invalid
原文地址:http://www.cnblogs.com/allanchan/p/utf8_substr.html