标签:头信息 eth api except raw request webkit body lower
存放个自己写的获取网页源码,掌握了:
1.利用CreateOLEObject方式获取源码
2.自动判断网页格式编码
需要使用到的单元:Winapi.ActiveX,System.Win.ComObj,System.WideStrUtils
需要创建结构体:TResultWebHtml (用于存放返回的源码和Cookies)
Uses Winapi.ActiveX,System.Win.ComObj,System.WideStrUtils; type TResultWebHtml = record Html : String; Cookie : String; end; function GetWebHtml(Url,Method,Code,ReferText,ReferCookies:String;Overtime:Integer;Referer,Accept,Language,Charset,Agent,ContentType:String;Redirect,Encoding,XRequestedWith:BooLean):TResultWebHtml;stdcall;//访问网页 Var I:Integer; XMLHTTP:Olevariant; ResultWebHtml:TResultWebHtml; POvertime:Integer; PUrl,PMethod,PCode,PReferText,PReferCookies:String; PReferer,PAccept,PLanguage,PCharset,PAgent,PContentType:String; PHtml: RawByteString; Temp,PGetCookies:String; TempList:TStringList; HTML:TBytes; Begin try CoInitialize(nil); //添加 CoInitialize 支持多线程调用 XMLHTTP:= CreateOLEObject(‘WinHttp.WinHttpRequest.5.1‘); Try //避免超时报错、防止出错等 //初始化默认值 if Url = ‘‘ then Exit else PUrl:=Url; //网址初始化 if Method = ‘‘ then PMethod:= ‘get‘ else PMethod:= LowerCase(Method); //初始化访问方式 if Code <> ‘‘ then PCode:= LowerCase(Code); //初始化网站编码,为空自动选择 if ReferText <> ‘‘ then PReferText:=ReferText;//设置提交信息,用于Post操作 if ReferCookies <> ‘‘ then PReferCookies:=ReferCookies;//设置网站Cookies if Overtime = 0 then POvertime:=15000 Else POvertime:=Overtime * 1000; //设置超时时间 if Referer = ‘‘ then PReferer:= PUrl else PReferer:= Referer; //设置来路,如果为空则设置为访问网站地址 if Accept = ‘‘ then PAccept:=‘text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8‘ else PAccept:=Accept; if Language = ‘‘ then PLanguage:=‘zh-CN,zh;q=0.8,en-US;q=0.6,en;q=0.4‘ else PLanguage:=Language; if Charset = ‘‘ then PCharset:=‘GBK,utf-8;q=0.7,*;q=0.3‘ else PCharset:=Charset; if Agent = ‘‘ then PAgent:=‘Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2107.3 Safari/537.36‘ else PAgent:=Agent; if ContentType = ‘‘ then PContentType:=‘application/x-www-form-urlencoded‘ else PContentType:=ContentType; XMLHTTP.open(PMethod, PUrl, False); //设置头信息 XMLHTTP.setRequestHeader(‘Referer‘, PReferer); // 仅服务器可知 来路 XMLHTTP.setRequestHeader(‘Accept‘, PAccept); XMLHTTP.setRequestHeader(‘Accept-Language‘, PLanguage); //XMLHTTP.setRequestHeader(‘Accept-Charset‘, PCharset); XMLHTTP.setRequestHeader(‘User-Agent‘, PAgent); XMLHTTP.setRequestHeader(‘Content-Type‘, PContentType); XMLHTTP.setRequestHeader(‘Connection‘, ‘keep-alive‘); if PReferCookies <> ‘‘ then XMLHTTP.setRequestHeader(‘Cookie‘, PReferCookies); if Encoding then XMLHTTP.setRequestHeader(‘Accept-Encoding‘, ‘gzip‘); //设置压缩 ‘gzip‘ if Redirect then XMLHTTP.Option(6) :=True else XMLHTTP.Option(6) :=False;//设置是否支持转跳 if XRequestedWith then XMLHTTP.setRequestHeader(‘X-Requested-With‘, ‘XMLHttpRequest‘); XMLHTTP.setTimeouts(POvertime, POvertime, POvertime, POvertime); XMLHTTP.send(PReferText); HTML:=XMLHTTP.responseBody; if PCode = ‘‘ then begin //开启自动选择 //自动判断网页格式后 SetString(PHtml, PAnsiChar(Pointer(WideString(XMLHTTP.ResponseBody))), SysStringByteLen(PWideChar(WideString(XMLHTTP.ResponseBody)))); if IsUTF8String(PHtml) then Temp := TEncoding.Default.GetEncoding(65001).GetString(HTML) else Temp :=TEncoding.Default.GetString(HTML); end else begin if PCode = ‘utf-8‘ then Temp := TEncoding.Default.GetEncoding(65001).GetString(HTML) else Temp :=TEncoding.Default.GetString(HTML); end; ResultWebHtml.Html:=Temp; //获取返回COOKIES Temp:=XMLHTTP.GetallResponseHeaders; TempList:=TStringList.Create; TempList.Text :=Temp; for I := 0 to TempList.Count -1 do begin if Pos(‘Set-Cookie:‘,TempList[I]) <> 0 then begin Temp:=Copy(TempList[I],12,Length(TempList[I])); Temp:=Copy(Temp,1,Pos(‘;‘,Temp)); if Temp <> ‘‘ then PGetCookies:=PGetCookies+Temp; end; end; TempList.Free; ResultWebHtml.Cookie :=PGetCookies; Result:=ResultWebHtml; except End; finally XMLHTTP := Unassigned; CoUnInitialize; end; End; procedure TForm1.Button1Click(Sender: TObject); begin Memo1.Text:=GetWebHtml(‘http://www.cnblogs.com/sishen‘,‘Get‘,‘‘,‘‘,‘‘,30,‘‘,‘‘,‘‘,‘‘,‘‘,‘‘,True,False,False).Html; end;
标签:头信息 eth api except raw request webkit body lower
原文地址:http://www.cnblogs.com/sishen/p/6671421.html