【本文谢绝转载,原文来自http://990487026.blog.51cto.com】
chunli魂斗罗~$ cat main.c
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <curl/curl.h>
#include <string.h>
#include <ctype.h>
#include <iconv.h>
#define TMP_FILE "tmp.html"
#define HTML_BUFFER_SIZE 1024*800
void split(char **arr, char *str, const char *del) {
char *s = strtok(str, del);
while (s != NULL) {
*arr++ = s;
s = strtok(NULL, del);
}
}
/*将str1字符串中第一次出现的str2字符串替换成str3*/
void replaceFirst(char *str1, char *str2, char *str3) {
char str4[strlen(str1) + 1];
char *p;
strcpy(str4, str1);
if ((p = strstr(str1, str2)) != NULL)/*p指向str2在str1中第一次出现的位置*/ {
while (str1 != p && str1 != NULL)/*将str1指针移动到p的位置*/ {
str1++;
}
str1[0] = ‘\0‘; /*将str1指针指向的值变成/0,以此来截断str1,舍弃str2及以后的内容,只保留str2以前的内容*/
strcat(str1, str3); /*在str1后拼接上str3,组成新str1*/
strcat(str1, strstr(str4, str2) + strlen(str2)); /*strstr(str4,str2)是指向str2及以后的内容(包括str2),strstr(str4,str2)+strlen(str2)就是将指针向前移动strlen(str2)位,跳过str2*/
}
}
/*将str1出现的所有的str2都替换为str3*/
void replace(char *str1, char *str2, char *str3) {
while (strstr(str1, str2) != NULL) {
replaceFirst(str1, str2, str3);
}
}
/*截取src字符串中,从下标为start开始到end-1(end前面)的字符串保存在dest中(下标从0开始)*/
void substring(char *dest, char *src, int start, int end) {
int i = start;
if (start > strlen(src))return;
if (end > strlen(src))
end = strlen(src);
while (i < end) {
dest[i - start] = src[i];
i++;
}
dest[i - start] = ‘\0‘;
return;
}
/*返回src中下标为index的字符*/
char charAt(char *src, int index) {
char *p = src;
int i = 0;
if (index < 0 || index > strlen(src))
return 0;
while (i < index)i++;
return p[i];
}
/*返回str2第一次出现在str1中的位置(下表索引),不存在返回-1*/
int indexOf(char *str1, char *str2) {
char *p = str1;
int i = 0;
p = strstr(str1, str2);
if (p == NULL)
return -1;
else {
while (str1 != p) {
str1++;
i++;
}
}
return i;
}
/*返回str1中最后一次出现str2的位置(下标),不存在返回-1*/
int lastIndexOf(char *str1, char *str2) {
char *p = str1;
int i = 0, len = strlen(str2);
p = strstr(str1, str2);
if (p == NULL)return -1;
while (p != NULL) {
for (; str1 != p; str1++)i++;
p = p + len;
p = strstr(p, str2);
}
return i;
}
/*删除str左边第一个非空白字符前面的空白字符(空格符和横向制表符)*/
void ltrim(char *str) {
int i = 0, j, len = strlen(str);
while (str[i] != ‘\0‘) {
if (str[i] != 32 && str[i] != 9)break; /*32:空格,9:横向制表符*/
i++;
}
if (i != 0)
for (j = 0; j <= len - i; j++) {
str[j] = str[j + i]; /*将后面的字符顺势前移,补充删掉的空白位置*/
}
}
/*删除str最后一个非空白字符后面的所有空白字符(空格符和横向制表符)*/
void rtrim(char *str) {
char *p = str;
int i = strlen(str) - 1;
while (i >= 0) {
if (p[i] != 32 && p[i] != 9)break;
i--;
}
str[++i] = ‘\0‘;
}
/*删除str两端的空白字符*/
void trim(char *str) {
ltrim(str);
rtrim(str);
}
//这是libcurl接收数据的回调函数,相当于recv的死循环
//其中stream可以自定义数据类型,这里我传入的是文件保存路径
static size_t write_callback(void *ptr, size_t size, size_t nmemb, void *stream) {
int len = size * nmemb;
int written = len;
FILE *fp = NULL;
const char *pathfile = (const char*)stream;
if (access(pathfile,0) == -1) {
fp = fopen((char*) stream, "wb");
} else {
fp = fopen((char*) stream, "ab");
}
if (fp) {
fwrite(ptr, size, nmemb, fp);
}
// printf("%s\n",ptr);
fclose(fp);
return written;
}
//加上-lcurl库
void test_post(char* url,char* data) {
CURL *curl;
curl = curl_easy_init();
if (curl) {
//www.baidu.com/#wd=java
curl_easy_setopt(curl, CURLOPT_URL, url);
curl_easy_setopt(curl, CURLOPT_POST, 1L);
curl_easy_setopt(curl, CURLOPT_POSTFIELDS, data);
curl_easy_perform(curl);
curl_easy_cleanup(curl);
}
fclose(curl);
}
int file_exists(char *filename) {
return (access(filename, 0) == 0);
}
int GetCharset(char *src_html,char *charCode) {
char tmp_html[HTML_BUFFER_SIZE]={0};
int pos = indexOf(src_html, "text/html; charset=");
if (pos > 0) {
strncpy(tmp_html, src_html + pos + strlen("text/html; charset="), strlen(src_html) - pos);
pos = indexOf(tmp_html, "\"");
if (pos > 0) {
strncpy(charCode, tmp_html, pos);
}
}
return 0;
}
void test_get(char* url) {
CURL *curl;
CURLcode res;
curl = curl_easy_init();
if (curl) {
if (file_exists(TMP_FILE))
remove(TMP_FILE);
curl_easy_setopt(curl, CURLOPT_URL, url);
//指定回调函数
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, write_callback);
//这个变量可作为接收或传递数据的作用
curl_easy_setopt(curl, CURLOPT_WRITEDATA, TMP_FILE);
res = curl_easy_perform(curl);
char tocode[64] = "UTF-8";
if (CURLE_OK == res) {
char *ct;
res = curl_easy_getinfo(curl, CURLINFO_CONTENT_TYPE, &ct);
if ((CURLE_OK == res) && ct)
printf("We received Content-Type: %s\n", ct);
//printf("====\n");
//int pos=strcspn(ct,"UTF-8");
int index = indexOf(ct, "=");
char* arr[3];
if (index > 0) {
split(arr, ct, "=");
//printf("%s\n", arr[1]);
strcpy(tocode, arr[1]);
}
FILE *fp = NULL;
fp = fopen(TMP_FILE, "r");
char src_html[HTML_BUFFER_SIZE]={0};
char *src_html_p = src_html;
size_t srclen = strlen(src_html);
char output_html[HTML_BUFFER_SIZE]={0};
char *output_html_p = output_html;
size_t outlen = 0;
char tmp_html[HTML_BUFFER_SIZE]={0};
if (fp) {
fread(src_html, HTML_BUFFER_SIZE, 1, fp);
strcpy(tmp_html,src_html);
if(index <0) {
GetCharset(tmp_html,tocode);
printf("%s\n",tocode);
}
//打开字符集转换
iconv_t hIconv = iconv_open(tocode,"iso-8859-1");
if (-1 == (ssize_t)hIconv) {
return ; //打开失败,可能不支持的字符集
}
printf("%s\n",src_html);
//开始转换
//size_t iconv(iconv_t cd,char **inbuf, size_t *inbytesleft,char **outbuf, size_t *outbytesleft);
size_t ret = iconv(hIconv,&src_html_p,&srclen,&output_html_p,&outlen);
if(ret == -1)
{
printf("转换失败\n");
exit(2);
}
//printf("%s\n", output_html);
//printf("ok");
if(strcmp(output_html,"")==0)
{
printf("%s\n",src_html);
}
//关闭字符集转换
iconv_close(hIconv);
}
}
curl_easy_cleanup(curl);
}
}
int main(int argc, char** argv) {
//test_post("https://www.baidu.com/","");
test_get("http://www.baidu.com/");
return 0;
}
chunli魂斗罗~$编译运行:
chunli魂斗罗~$ gcc main.c -Wall -l curl && ./a.out
We received Content-Type: text/html
UTF-8
<!DOCTYPE html><!--STATUS OK-->
<html>
<head>
<meta http-equiv="content-type" content="text/html;charset=utf-8">
<meta http-equiv="X-UA-Compatible" content="IE=Edge">
<link rel="dns-prefetch" href="//s1.bdstatic.com"/>
<link rel="dns-prefetch" href="//t1.baidu.com"/>
<link rel="dns-prefetch" href="//t2.baidu.com"/>
<link rel="dns-prefetch" href="//t3.baidu.com"/>
<link rel="dns-prefetch" href="//t10.baidu.com"/>
<link rel="dns-prefetch" href="//t11.baidu.com"/>
<link rel="dns-prefetch" href="//t12.baidu.com"/>
<link rel="dns-prefetch" href="//b1.bdstatic.com"/>
<title>百度一下,你就知道</title>
<link href="http://s1.bdstatic.com/r/www/cache/static/home/css/index.css" rel="stylesheet" type="text/css" />
<!--[if lte IE 8]><style index="index" >#content{height:480px\9}#m{top:260px\9}</style><![endif]-->
<!--[if IE 8]><style index="index" >#u1 a.mnav,#u1 a.mnav:visited{font-family:simsun}</style><![endif]-->
<script>var hashMatch = document.location.href.match(/#+(.*wd=[^&].+)/);if (hashMatch && hashMatch[0] && hashMatch[1]) {document.location.replace("http://"+location.host+"/s?"+hashMatch[1]);}var ns_c = function(){};</script>
<script>function h(obj){obj.style.behavior=‘url(#default#homepage)‘;var a = obj.setHomePage(‘//www.baidu.com/‘);}</script>
<noscript><meta http-equiv="refresh" content="0; url=/baidu.html?from=noscript"/></noscript>
【本文谢绝转载,原文来自http://990487026.blog.51cto.com】
本文出自 “魂斗罗” 博客,谢绝转载!
原文地址:http://990487026.blog.51cto.com/10133282/1855952