标签:code bfs 习惯 http 进度 注释 failed 写入文件 变量
现在有点晚了,先把源码和一些东西说一下,明天对思路和知识点进行整理。(代码有注释,比较清晰)
1 #include <iostream> 2 #include <stdio.h> 3 #include <string> 4 #include <cstdlib> 5 #include <fstream> 6 #include <WinSock2.h> 7 8 using namespace std; 9 10 #pragma warning(disable:4996) 11 //忽略VS特有警告 12 #pragma comment(lib, "ws2_32.lib") 13 //加载ws2_32.dll 14 #define BUFF_SIZE 1024 15 16 int ncount = 0; 17 string host, pos; 18 19 SOCKET ConnectFunc(string host, string pos) { 20 WSADATA wsaData; 21 SOCKET serv; 22 //创建套接字 23 if (WSAStartup(MAKEWORD(2, 2), &wsaData) != 0) {//初始化DLL 24 cout << "WSAStartup() Failed:" << WSAGetLastError() << endl; 25 system("PAUSE"); 26 return -1; 27 } 28 29 serv = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP); 30 //初始化套接字 31 if (serv == INVALID_SOCKET) { 32 cout << "socket() Failed:" << WSAGetLastError() << endl; 33 system("PAUSE"); 34 return -1; 35 } 36 37 struct hostent *pt = gethostbyname(host.c_str());//解析域名IP 38 if (!pt) { 39 cerr << "Get IP Error!!!" << endl; 40 system("PAUSE"); 41 return -1; 42 } 43 struct sockaddr_in serv_addr; 44 //创建结构体sockaddr_in结构体变量,绑定套接字 45 memcpy(&serv_addr.sin_addr, pt->h_addr, 4); 46 //自动响应服务器IP 47 serv_addr.sin_family = AF_INET; 48 //IPv4 49 serv_addr.sin_port = htons(80); 50 //端口 51 52 /*输出服务器IP 53 for (int i = 0; pt->h_addr_list[i]; i++) { 54 printf("IP addr %d: %s\n", i + 1, inet_ntoa(*(struct in_addr*)pt->h_addr_list[i])); 55 } 56 */ 57 58 if (connect(serv, (LPSOCKADDR)&serv_addr, sizeof(SOCKADDR)) == SOCKET_ERROR) {//连接服务器 59 cout << "connect() Failed:" << WSAGetLastError() << endl; 60 system("PAUSE"); 61 return -1; 62 }//与服务器建立连接 63 64 string request = "GET " + pos + " HTTP/1.1\r\nHost:" + host + "\r\nConnection:Close\r\n\r\n"; 65 //向服务器请求图片资源(发送到服务器的命令) 66 if (send(serv, request.c_str(), request.size(), 0) == SOCKET_ERROR) { 67 cout << "send() Failed:" << WSAGetLastError() << endl; 68 closesocket(serv); 69 system("PAUSE"); 70 return -1; 71 }//发送指令消息 72 73 return serv; 74 //返回套接字 75 } 76 77 void DownloadPicture() { 78 SOCKET serv_in = ConnectFunc(host, pos); 79 //连接服务器 80 81 char buffer[BUFF_SIZE] = { 0 }; 82 //数据缓存文件 83 84 string a = "G:\\Pictures\\", name; 85 cout << "picture name:"; 86 cin >> name; 87 a = a + name + ".png"; 88 //文件命名 89 90 FILE *fp = fopen(a.c_str(), "wb+"); 91 //创建文件 92 93 if (NULL == fp) { 94 cerr << "Open File" << endl; 95 system("PAUSE"); 96 exit(-1); 97 } 98 99 ncount = recv(serv_in, buffer, BUFF_SIZE, 0); 100 //跳过不需要信息(状态行和消息报头) 101 102 char *infor = strstr(buffer, "\r\n\r\n"); 103 //区分条件 104 fwrite(infor + strlen("\r\n\r\n"), sizeof(char), ncount - (infor - buffer) - strlen("\r\n\r\n"), fp); 105 //丢弃不需要数据 106 for (; (ncount = recv(serv_in, buffer, BUFF_SIZE, 0)) > 0;) { 107 fwrite(buffer, sizeof(char), BUFF_SIZE, fp); 108 Sleep(2); 109 }//循环写入数据 110 111 fclose(fp); 112 //关闭文件流指针 113 closesocket(serv_in); 114 //断开连接,清除套接字 115 } 116 117 int main() 118 { 119 CreateDirectory(L"G:\Pictures", NULL); 120 //创建文件夹 121 122 host = "images.cnblogs.com"; 123 //cin >> host; 124 //输入要爬的网站地址 125 pos = "/cnblogs_com/Mayfly-nymph/1233628/o_images.png"; 126 //cin >> pos; 127 //图片在服务器中的位置 128 DownloadPicture(); 129 //下载图片 130 system("PAUSE"); 131 return 0; 132 }
时间,学习进度原因涉及到BFS和网页分析的爬虫没有涉及,会的大牛可以去试试,直接遍历网页,提取URL...下载保存图片。晚安。= - =!
希望前辈,大牛不吝赐教!
标签:code bfs 习惯 http 进度 注释 failed 写入文件 变量
原文地址:https://www.cnblogs.com/Mayfly-nymph/p/9743996.html