码迷,mamicode.com
首页 > 其他好文 > 详细

UTF8转换为GB编码gb2312转换为utf-8

时间:2014-06-21 13:25:12      阅读:254      评论:0      收藏:0      [点我收藏+]

标签:code   get   string   set   问题      

这个方法是用windows的字符集转换的,跟sybase
的unicode码表可能在某些符号上有差别,对于大部分字符来说,尤其是
汉字,应该不会有问题的,如果要求比较高的话,可以买sybase的
unicode开发包,:P
[code]
#include <stdio.h>

#include <stdlib.h>

#include <string.h>

#include <locale.h>

#include <ctype.h>

#include <mbstring.h>



size_t mbstoutf8(unsigned char* utf, unsigned char* mbs)

{

size_t mbl = 0;

size_t wcneed = mbstowcs(NULL, mbs, mbl);

wchar_t* wc = NULL;

size_t u8need = 0;

wchar_t* wp = NULL;



mbl = _mbslen(mbs);

wcneed = mbstowcs(NULL, mbs, mbl);



wc= calloc(wcneed + 1, sizeof(wchar_t));

if (!wc)

return -1;

if (mbstowcs(wc, mbs, mbl) < mbl)

return -1;



wc[mbl] = L‘\0‘;

wp = wc;

if (!utf)

{

while (*wp)

{

if ((*wp & 0x7f) == *wp)

u8need += 1;

else if ((*wp & 0x7ff) == *wp)

u8need += 2;

else

u8need +=3;

*wp++;

}

}

else

{

while (*wp)

{

if ((*wp & 0x7f) == *wp)

{

*utf++ = *wp & 0x7f;

u8need += 1;

}

else if ((*wp & 0x7ff) == *wp)

{

*utf++ = 0xc0 | (*wp >> 6);

*utf++ = 0x80 | (*wp & 0x3f);

u8need += 2;

}

else

{

*utf++ = 0xe0 | (*wp >> 12);

*utf++ = 0x80 | ((*wp >> 6) & 0x3f);

*utf++ = 0x80 | (*wp & 0x3f);

u8need += 3;

}

*wp++;

}

*utf = 0;

}



free(wc);

return u8need;

}



int main(void)

{

char mbs[81];

size_t sz = 0;

setlocale(LC_CTYPE, ".936");

for (; ; )

{

fgets(mbs, 80, stdin);

mbs[strlen(mbs) - 1] = ‘\0‘;

if (!*mbs)

break;

sz = mbstoutf8(NULL, mbs);

if (sz != -1)

{

char* u = malloc(sz + 1);

if (u)

{

mbstoutf8(u, mbs);

puts(u);

free(u);

}

}

}

return 0;

}
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <locale.h>
#include <ctype.h>
#include <mbstring.h>
#include <wchar.h>
size_t utf8tombs(unsigned char* mbs, wchar_t* utf)
{
size_t wcl = 0;
size_t mbneed = wcstombs(NULL, utf, wcl);
char* mc = NULL;
size_t gbneed = 0;
char* mp = NULL;
wcl = wcslen(utf);
gbneed = wcstombs(NULL, utf, wcl);
mc= calloc(gbneed + 1, sizeof(unsigned char));
if (!mc)
return -1;
if (wcstombs(mc, utf, wcl) < wcl)
return -1;
mc[wcl] = ‘\0‘;
mp = mc;
if (!mbs)
{
while (*mp)
{
if ((*mp & 0x80) == *mp)
gbneed += 1;
else if ((*mp & 0x800) == *mp)
gbneed += 2;
else
gbneed +=3;
*mp++;
}
}
else
{
while (*mp)
{
if ((*mp & 0x80) == *mp)
{
*mbs++ = *mp & 0x80;
gbneed += 1;
}
else if ((*mp & 0x800) == *mp)
{
*mbs++ = 0x3f | (*mp << 6);
*mbs++ = 0x7f | (*mp & 0xc0);

gbneed += 2;
}
else
{
*mbs++ = 0x1f | (*mp << 12);
*mbs++ = 0x7f | ((*mp << 6) & 0xc0);
*mbs++ = 0x7f | (*mp & 0xc0);
gbneed += 3;
}
*mp++;
}
*mbs = 0;
}
free(mc);
return gbneed;
}
int main(void)
{
char utf[81];
size_t sz = 0;
setlocale(LC_CTYPE, ".936");
for (; ; )
{
fgets(utf, 80, stdin);
utf[strlen(utf) - 1] = ‘\0‘;
if (!*utf)
break;
sz = utf8tombs(NULL, utf);
if (sz != -1)
{
char* u = malloc(sz + 1);
if (u)
{
utf8tombs(u, utf);
puts(u);
free(u);
}
}
}
return 0;
} [/code]

UTF8转换为GB编码gb2312转换为utf-8,布布扣,bubuko.com

UTF8转换为GB编码gb2312转换为utf-8

标签:code   get   string   set   问题      

原文地址:http://www.cnblogs.com/pengkunfan/p/3794663.html

(0)
(0)
   
举报
评论 一句话评论(0
登录后才能评论!
© 2014 mamicode.com 版权所有  联系我们:gaon5@hotmail.com
迷上了代码!