通过curl_setopt()函数可以方便快捷的抓取网页(采集很方便),curl_setopt 是php的一个扩展库
使用条件:需要在php.ini 中配置开启。(PHP 4 >= 4.0.2)
//取消下面的注释
extension=php_curl.dll
在Linux下面,需要重新编译PHP了,编译时,你需要打开编译参数——在configure命令上加上“–with-curl” 参数。
1、 一个抓取网页的简单案例:
- $ch = curl_init();
-
- curl_setopt($ch, CURLOPT_URL, "http://www.baidu.com/");
- curl_setopt($ch, CURLOPT_HEADER, false);
-
- curl_exec($ch);
-
- curl_close($ch);
2、POST数据案例:
- $ch = curl_init();
- $data = ‘phone=‘. urlencode($phone);
- curl_setopt($ch, CURLOPT_URL, "http://www.post.com/");
- curl_setopt($ch, CURLOPT_POST, 1);
- curl_setopt($ch, CURLOPT_POSTFIELDS, $data);
- curl_exec($ch);
-
- curl_close($ch);
3、关于SSL和Cookie
关于SSL也就是HTTPS协议,你只需要把CURLOPT_URL连接中的http://变成https://就可以了。当然,还有一个参数叫CURLOPT_SSL_VERIFYHOST可以设置为验证站点。
关于Cookie,你需要了解下面三个参数:
CURLOPT_COOKIE,在当面的会话中设置一个cookie
CURLOPT_COOKIEJAR,当会话结束的时候保存一个Cookie
CURLOPT_COOKIEFILE,Cookie的文件。
PS:新浪微博登陆API部分截取(部分我增加了点注释,全当参数翻译下。哈哈) 有兴趣的自己研究,自己挪为己用。嘿嘿
- function http($url, $method, $postfields = NULL, $headers = array()) {
- $this->http_info = array();
- $ci = curl_init();
-
- curl_setopt($ci, CURLOPT_HTTP_VERSION, CURL_HTTP_VERSION_1_0);
- curl_setopt($ci, CURLOPT_USERAGENT, $this->useragent);
- curl_setopt($ci, CURLOPT_CONNECTTIMEOUT, $this->connecttimeout);
- curl_setopt($ci, CURLOPT_TIMEOUT, $this->timeout);
- curl_setopt($ci, CURLOPT_RETURNTRANSFER, TRUE);
- curl_setopt($ci, CURLOPT_ENCODING, "");
- curl_setopt($ci, CURLOPT_SSL_VERIFYPEER, $this->ssl_verifypeer);
- curl_setopt($ci, CURLOPT_HEADERFUNCTION, array($this, ‘getHeader‘));
- curl_setopt($ci, CURLOPT_HEADER, FALSE);
-
- switch ($method) {
- case ‘POST‘:
- curl_setopt($ci, CURLOPT_POST, TRUE);
- if (!empty($postfields)) {
- curl_setopt($ci, CURLOPT_POSTFIELDS, $postfields);
- $this->postdata = $postfields;
- }
- break;
- case ‘DELETE‘:
- curl_setopt($ci, CURLOPT_CUSTOMREQUEST, ‘DELETE‘);
- if (!empty($postfields)) {
- $url = "{$url}?{$postfields}";
- }
- }
-
- if ( isset($this->access_token) && $this->access_token )
- $headers[] = "Authorization: OAuth2 ".$this->access_token;
-
- $headers[] = "API-RemoteIP: " . $_SERVER[‘REMOTE_ADDR‘];
- curl_setopt($ci, CURLOPT_URL, $url );
- curl_setopt($ci, CURLOPT_HTTPHEADER, $headers );
- curl_setopt($ci, CURLINFO_HEADER_OUT, TRUE );
-
- $response = curl_exec($ci);
- $this->http_code = curl_getinfo($ci, CURLINFO_HTTP_CODE);
- $this->http_info = array_merge($this->http_info, curl_getinfo($ci));
- $this->url = $url;
-
- if ($this->debug) {
- echo "=====post data======\r\n";
- var_dump($postfields);
-
- echo ‘=====info=====‘."\r\n";
- print_r( curl_getinfo($ci) );
-
- echo ‘=====$response=====‘."\r\n";
- print_r( $response );
- }
- curl_close ($ci);
- return $response;
- }