for (std::string::const_iterator i = src.begin(); i != src.end(); i++)
{
if (sep.find((*i)) != std::string::npos)
{
if (s.length())
{
r.push_back(s);
}
s = "";
}
else
{
s += (*i);
}
}
if (s.length())
{
r.push_back(s);
}
return r;
};
int CWordsFilter::GetFirstBytes(const std::string& str)
{
for (int i = 0; i < (int)str.size(); ++i)
{
std::string CWordsFilter::FilterSensitiveWords(const std::string& strContent)
{
if (!this->m_bIsInit || NULL == this->m_rootWordNode)
{
std::cout << "the sensitive words is not init" << std::endl;
return "";
}
CWordNode* pNode = this->m_rootWordNode;
std::string strBuffer = "";
std::list<std::string> lsBad;
int a = 0;
while ( a < strContent.size() )
{
std::string strContentTmp = strContent.substr(a, strContent.size());
nStep = GetFirstBytes(strContentTmp);
std::string strTmp = "";
if (nStep <= strContentTmp.size())
{
strTmp = strContentTmp.substr(0, nStep);
}
pNode = FindNode(pNode, strTmp);
if (pNode == NULL)
{
pNode = this->m_rootWordNode;
int nSize = 0;
std::list<std::string>::iterator Ite = lsBad.begin();
while (Ite != lsBad.end())
{
nSize += (*Ite).size();
++Ite;
}
if (lsBad.size() > 0)
{
lsBad.clear();
}
a = a - nSize;
if (a < 0)
{
a = 0;
}
std::string strContentTmp = strContent.substr(a, strContent.size());
nStep = GetFirstBytes(strContentTmp);
strTmp = "";
if (nStep <= strContentTmp.size())
{
strTmp = strContentTmp.substr(0, nStep);
}
strBuffer.append(strTmp);
}
else if (pNode->m_nEndTag == 1)
{
lsBad.push_back(strTmp);
for (int nIndex = 0; nIndex < lsBad.size(); ++nIndex)
{
strBuffer.append("*");
}
pNode = this->m_rootWordNode;
lsBad.clear();
}
else
{
lsBad.push_back(strTmp);
if (a == strContent.size() - nStep)
{
std::list<std::string>::const_iterator cIte = lsBad.begin();
while (cIte != lsBad.end())
{
strBuffer.append(*cIte);
++cIte;
}
}
}
strContentTmp = strContentTmp.substr(nStep, strContentTmp.size());
a += nStep;
}
return strBuffer;
}
void CWordsFilter::BuildWordTree()
{
if ( this->m_rootWordNode == NULL )
{
this->m_rootWordNode = new CWordNode("R");
if (NULL == this->m_rootWordNode)
{
return;
}
}
this->m_rootWordNode->Reset("R");