码迷,mamicode.com
首页 > Web开发 > 详细

php curl 正则获取网页标题

时间:2016-05-13 09:26:26      阅读:126      评论:0      收藏:0      [点我收藏+]

标签:

<?php
/****/
//Gary xu
//1122557724@qq.com
/****/
namespace Xuyaoxiang;

	class Snoopy {
	
	public $pattern_array=array(
	‘title‘=>‘/<title>(\s*.*)<\/title>/i‘,
	‘description‘=>‘/<meta +name="[d|D]escription" +content="(.*)" +\/>/‘,
	‘charset‘=>‘/charset=\"?([\w-]+)\"?/i‘,
	);
	
	public $user_agent=‘Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.110 Safari/537.36‘; //模拟浏览器头部数据
	
	public $target_code="utf-8"; //目标编码
	
	public $url;
	
	public $data;
	
	public $pattern_key;
	
	function __construct($url)
	{
			$this->url=$url;
	}
	
	public function set_pattern($key,$val)
	{
		$this->pattern_array[$key]=$val;
	} 
	
	
	
	function get_content($pattern_key)
	{
		$this->pattern_key=$pattern_key;
		
		if($this->pattern_key==‘‘){return false;}
		
		$this->curl_get_data();
		
		if($this->data==false){return false;} 
		
		$charset=$this->get_charset();
		
		$this->check_charset($charset);
		
		$content=$this->get_key_content();
		
		return  trim($content[1]);
	}
	
	
			function curl_get_data()
		{
				$curl=curl_init();
				// 设置你需要抓取的URL
			
				curl_setopt($curl, CURLOPT_URL, $this->url);
		
				// 设置header
				curl_setopt($curl, CURLOPT_HEADER, 0);
				
				// 设置cURL 参数,要求结果保存到字符串中还是输出到屏幕上。
				curl_setopt($curl, CURLOPT_RETURNTRANSFER, 1);
				
				curl_setopt($curl, CURLOPT_USERAGENT, $this->user_agent);
			
				// 运行cURL,请求网页
				
				$this->data = curl_exec($curl);	
				
				curl_close($curl);
		}
		
		function check_charset($page_charset)
		{
			 if($page_charset!=$this->target_code)
			 {
				$this->data=mb_convert_encoding($this->data,$this->target_code,$page_charset);
			 }
		}
		
		function get_key_content()
		{
			preg_match($this->pattern_array[$this->pattern_key],$this->data,$content);
		    return $content;	
		}
		
		function get_charset()
		{
			preg_match($this->pattern_array[‘charset‘],$this->data,$reg_charset); 
			return $page_charset=strtolower($reg_charset[1]);	
		}
}


header("Content-type:text/html;charset=utf-8");
	 $snoopy=new snoopy("http://www.qq.com");
	
	 $title=$snoopy->get_content(‘title‘);
		
	 print_r($title);
?>

  

php curl 正则获取网页标题

标签:

原文地址:http://www.cnblogs.com/xuyaoxiang/p/5485373.html

(0)
(0)
   
举报
评论 一句话评论(0
登录后才能评论!
© 2014 mamicode.com 版权所有  联系我们:gaon5@hotmail.com
迷上了代码!