标签:struct host ptr set html locking style malloc str
网络通信需要使用到socket,它是一种“打开-读/写-关闭”的模式实现。
1.连接
int build_connect(int *fd, char *ip, int port) { struct sockaddr_in server_addr; bzero(&server_addr, sizeof(struct sockaddr_in)); server_addr.sin_family = AF_INET; server_addr.sin_port = htos(port); if(!inet_aton(ip, &server_addr.sin_addr)) { return -1; } if((*fd = socket(PF_INET, SOCK_STREAM,0))<0) { return -1; } if(connect(*fd, (struct sockaddr*)&server_addr, sizeof(struct sockaddr_in))<0) { close(*fd); return -1; } return 0; }
2.设置阻塞方式
void set_nonblocking(int fd) { int flag; if(flag = fcntl(fd, F_GETTL) < 0) { LOG(LOG_LEVEL_ERROR, "fcntl getfl fail"); } flag |= O_NONBLOCK; if((flag = fcntl(fd, F_SETFL, flag))<0) { LOG(LOG_LEVEL_ERROR, "fcntl setfl fail"); } }
3.发送请求
int send_request(int fd, void *arg) { int need, begin, n; char request[1024] = {0}; Url *url = (Url*)arg; sprintf(request, "GET /%s HTTP/1.0\r\n" "Host: %s\r\n" "Accept: */*\r\n" "Connection: Keep-Alive\r\n" "User-Agent: Mozilla/5.0(compatible; Qteqpidspider/1.0;)\r\n" "Referer: %s\r\n\r\n" url->path, url->domain, url->domain); need = strlen(request); begin = 0; while(need){ n = write(fd, request+begin, need) if(n<=0){ if(errno == EAGAIN){ sleep(1000); continue; } LOG(LOG_LEVEL_WARN, "Thread %lu send ERROR: %d", pthread_self(), n); free_url(url); close(fd); return -1; } begin += n; need -= n; } return 0; }
4.接收
#define HTML_MAXLEN 1024*1024 void* recv_response(void *arg) { evso_arg *narg = (evso_arg *)arg; Response *resp = (Response *)malloc(sizeof(Response)); resp->header = NULL; resp->body = (char*)malloc(HTML_MAXLEN); resp->body_len = 0; resp->url = narg->url; LOG(LOG_LEVEL_INFO, "Crawling url: %s/%s", narg->url->domain, narg->url->path); int len=0; int trunc_head = 0; char *body_ptr = NULL; while(1) { n = read(narg->fd, resp->body+len, 1024); if(n<0) { if(errno == EAGAIN || errno == EWOULDBLOCK || errno == EINTR){ usleep(100000); continue; } LOG(LOG_LEVEL_WARN, "Read socket fail: %s", strerror(errno)); break; }else if(n == 0){ resp->body_len = len; if(resp->body_len>0){ //执行 } for(int i=0; i<(int)modules_post_html.size(); i++) modules_post_html[i]->handle(resp); } break; }else{ len+= n; resp->body[len]=‘\0‘; if(!trunc_head){ if((body_ptr = strstr(resp->body, "\r\n\r\n")) != NULL){ *(body_ptr+2) = ‘\0‘; resp->header = parse_header(resp->body); if(!header_postcheck(resp->header)){ goto leave; } trunc_head = 1; body_ptr += 4; for(i=0; *body_ptr;i++){ resp->body[i] = *body_ptr; body_ptr++; } resp->body[i]=‘\0‘; len=i; } continue; } } }
标签:struct host ptr set html locking style malloc str
原文地址:http://www.cnblogs.com/canyudeguang/p/6925270.html