标签:text nil net back print prot lock package xxxx
1.获取初识页面内容
package main
import (
"net/http"
"fmt"
"io/ioutil"
)
func main() {
//打开链接,会有两个返回值,一个响应,一个error
res, err := http.Get("https://tieba.baidu.com/p/5524106374?red_tag=0000236673")
if err != nil {
panic(err)
}
defer res.Body.Close()
//如果响应状态码不等于200,证明出问题了
if res.StatusCode != 200 {
fmt.Println("err statuscode:", res.StatusCode)
return
}
//将内容读到all里面
all, _ := ioutil.ReadAll(res.Body)
fmt.Println(string(all))
//还可以使用httputil下的一个方法
//会自动的将res的响应体读取到all中
//此外该方法还会打印一些头部信息,如下
//all, _ = httputil.DumpResponse(res, true)
//fmt.Println(string(all))
// HTTP / 1.1
// 200
// OK
// Transfer - Encoding: chunked
//Connection:
// keep - alive
// Content - Type: text / html;
// charset = UTF - 8
//Date:
// Sat, 07
// Jul
// 2018
// 05:20:48
// GMT
//P3p:
// CP = " OTI DSP COR IVA OUR IND COM "
//Server:
// Apache
// Set - Cookie: TIEBA_USERTYPE = d8c56c898382fa778148475e;
// expires = Thu, 31-Dec-2020
// 15:59:59
// GMT;
// path =/;
// domain = tieba.baidu.com
// Set - Cookie: wise_device = 0;
// path =/
// Set - Cookie: BAIDUID = 3826
//C6F501EC8C114AC77215BBE0DA64:
// FG = 1;
// expires = Sun, 07-Jul-19
// 05:20:48
// GMT;
// max - age = 31536000;
// path =/;
// domain =.baidu.com;
// version = 1
//Tracecode:
// 12484498910460795914070713
//Tracecode:
// 12484498910470965258070713
//Vary:
// Accept - Encoding
// X - Xss - Protection: 1;
// mode = block
}
2.使用正则表达式解析,并提取url
package main
import (
"net/http"
"fmt"
"io/ioutil"
"regexp"
)
func main() {
res, err := http.Get("https://tieba.baidu.com/p/5524106374?red_tag=0000236673")
if err != nil {
panic(err)
}
defer res.Body.Close()
if res.StatusCode != 200 {
fmt.Println("err statuscode:", res.StatusCode)
return
}
//将内容读到all里面
all, _ := ioutil.ReadAll(res.Body)
re:=regexp.MustCompile(`src="(http[^"]+?(?:jpg|png))"`)
//找到所有图片链接 src="thhp://xxxxxx.jpg"
match := re.FindAllStringSubmatch(string(all),-1)
//go中的正则没办法单独匹配括号里面的内容,也许我们不想要整体的内容,但是go还是会匹配出来。
//而且只能使用FindAllStringSubmatch,如果FindAllString,匹配的仍然是整体
//加上SubMatch,会将整体和()里面的内容都加到切片当中
//解决的办法是for循环
urls:=make([]interface{},0)
for _,url:=range match{
//但我们的目的是找我们想要的图片,可是有一些图片是我们不想要的,但它们也符合我们的正则表达式
//因此我们可以进行一个判断
//若url的长度和我们期待的图片的长度不相等,那么就进行下一轮循环
if len(url[1]) != len("https://imgsa.baidu.com/forum/w%3D580/sign=605377bf04b30f24359aec0bf897d192/eb55981bb051f8199dc2df94d1b44aed2c73e7d5.jpg"){
continue
}
urls = append(urls,url[1])
}
fmt.Println(urls)
}
//[https://imgsa.baidu.com/forum/w%3D580/sign=51dfed7aafc27d1ea5263bcc2bd7adaf/29aa8064034f78f072d6c52d72310a55b1191cd2.jpg
// https://imgsa.baidu.com/forum/w%3D580/sign=b76619654c10b912bfc1f6f6f3fcfcb5/0b74e395d143ad4bab64181e89025aafa50f0669.jpg
// https://imgsa.baidu.com/forum/w%3D580/sign=dc2cfe017dc6a7efb926a82ecdfbafe9/6e799a3533fa828b9d23c63df61f4134960a5ab7.jpg
// https://imgsa.baidu.com/forum/w%3D580/sign=605377bf04b30f24359aec0bf897d192/eb55981bb051f8199dc2df94d1b44aed2c73e7d5.jpg
// https://imgsa.baidu.com/forum/w%3D580/sign=2f3f86cc444a20a4311e3ccfa0539847/644711f33a87e95059032db21b385343faf2b4a4.jpg
// https://imgsa.baidu.com/forum/w%3D580/sign=e30f414125dda3cc0be4b82831e83905/4b6297315c6034a8f6e6605fc0134954082376b4.jpg
// https://imgsa.baidu.com/forum/w%3D580/sign=50b8de14898ba61edfeec827713597cc/af59f91b0ef41bd54ce3302a5ada81cb38db3d00.jpg
// https://imgsa.baidu.com/forum/w%3D580/sign=3f9442bff8deb48ffb69a1d6c01e3aef/2f586a34970a304ee7717824dac8a786c8175c61.jpg
// https://imgsa.baidu.com/forum/w%3D580/sign=17f88df316950a7b75354ecc3ad3625c/a98c2146f21fbe09309315d160600c338544adea.jpg
// https://imgsa.baidu.com/forum/w%3D580/sign=4bae33e30224ab18e016e13f05f8e69a/1dbb35178a82b901535815f6788da9773b12efc2.jpg]
package main
import (
"net/http"
"fmt"
"io/ioutil"
"regexp"
)
//可以将上面的进行一个封装
//link:访问的url地址
//rule:正则表达式要匹配的规则
//target:我们想获取的链接
func get_pic_url(link,rule,target string) []interface{}{
res, err := http.Get(link)
if err != nil {
panic(err)
}
defer res.Body.Close()
if res.StatusCode != 200 {
fmt.Println("err statuscode:", res.StatusCode)
panic("出错了")
}
all, _ := ioutil.ReadAll(res.Body)
re:=regexp.MustCompile(rule)
match := re.FindAllStringSubmatch(string(all),-1)
urls:=make([]interface{},0)
for _,url:=range match{
if len(url[1]) != len(target){
continue
}
urls = append(urls,url[1])
}
return urls
}
func main(){
link := "https://tieba.baidu.com/p/4244799788?red_tag=2313275030"
rule := `src="(.+?\.jpg)"`
target := "https://imgsa.baidu.com/forum/w%3D580/sign=a80a7ab75eee3d6d22c687c373166d41/862df7246b600c337e73b7d81d4c510fd9f9a163.jpg"
urls := get_pic_url(link,rule,target)
for _,url := range urls{
fmt.Println(url)
}
}
//https://imgsa.baidu.com/forum/w%3D580/sign=4e328e1f8094a4c20a23e7233ef51bac/0b837a899e510fb3979e1887de33c895d0430ced.jpg
//https://imgsa.baidu.com/forum/w%3D580/sign=4b14d9aba11ea8d38a22740ca70b30cf/57efcc11728b4710c1fc93dcc4cec3fdfd032399.jpg
//https://imgsa.baidu.com/forum/w%3D580/sign=dc1c05ce3a6d55fbc5c6762e5d234f40/1b6a6a600c3387441173466c560fd9f9d72aa03f.jpg
//https://imgsa.baidu.com/forum/w%3D580/sign=72361f898526cffc692abfba89004a7d/5e528601a18b87d6e20cc20f000828381f30fd27.jpg
//https://imgsa.baidu.com/forum/w%3D580/sign=9627ab23968fa0ec7fc764051696594a/3a847acb0a46f21f4afe3743f1246b600d33aea3.jpg
//https://imgsa.baidu.com/forum/w%3D580/sign=9f46baed7c899e51788e3a1c72a6d990/1a78aec379310a5519a5b7e4b04543a9832610ef.jpg
//https://imgsa.baidu.com/forum/w%3D580/sign=0dceca0bd30735fa91f04eb1ae500f9f/462a024f78f0f736091524110d55b319eac41381.jpg
//https://imgsa.baidu.com/forum/w%3D580/sign=4da8783f92cad1c8d0bbfc2f4f3f67c4/2eb3fd039245d688a9d9dab4a3c27d1ed31b2491.jpg
//https://imgsa.baidu.com/forum/w%3D580/sign=6459fdcc0ffa513d51aa6cd60d6c554c/e708f31fbe096b6303f236400b338744eaf8ac82.jpg
//https://imgsa.baidu.com/forum/w%3D580/sign=5cd8795f992f07085f052a08d924b865/675d622762d0f7034a81fecc0ffa513d2697c5ba.jpg
//https://imgsa.baidu.com/forum/w%3D580/sign=77e1f18f29dda3cc0be4b82831e83905/7da177c6a7efce1ba1c1a0daa851f3deb58f6582.jpg
//https://imgsa.baidu.com/forum/w%3D580/sign=bddeeefe9b82d158bb8259b9b00b19d5/4ac8c8177f3e6709a5ea2def3cc79f3df9dc5582.jpg
//https://imgsa.baidu.com/forum/w%3D580/sign=ab1df271f4deb48ffb69a1d6c01e3aef/7f16d009b3de9c8287bf919f6b81800a18d84392.jpg
//https://imgsa.baidu.com/forum/w%3D580/sign=c527aa16a8c379317d688621dbc5b784/15578718367adab4f73a73538cd4b31c8601e483.jpg
//https://imgsa.baidu.com/forum/w%3D580/sign=0bce68f3d239b6004dce0fbfd9503526/407a5882b2b7d0a212f46f8dccef76094b369aab.jpg
//https://imgsa.baidu.com/forum/w%3D580/sign=64b851138044ebf86d716437e9f8d736/d7c9e850352ac65c1fe00e63fcf2b21192138a83.jpg
//https://imgsa.baidu.com/forum/w%3D580/sign=2e1d5d1b5cb5c9ea62f303ebe538b622/e199902397dda144b7b207a2b5b7d0a20cf48631.jpg
//https://imgsa.baidu.com/forum/w%3D580/sign=727db5e4b04543a9f51bfac42e178a7b/c33d8ad4b31c870160efd9f6207f9e2f0708ff10.jpg
//https://imgsa.baidu.com/forum/w%3D580/sign=77dd52d3d0ca7bcb7d7bc7278e086b3f/d9d5023b5bb5c9ea101d68f3d239b6003bf3b386.jpg
//https://imgsa.baidu.com/forum/w%3D580/sign=76dc96b5d643ad4ba62e46c8b2035a89/aee78326cffc1e17256de7214d90f603728de99b.jpg
//https://imgsa.baidu.com/forum/w%3D580/sign=09f988ea7ccb0a4685228b315b63f63e/ee755ab5c9ea15ce33d98a19b1003af33a87b2bd.jpg
//https://imgsa.baidu.com/forum/w%3D580/sign=d41bcbcef71f3a295ac8d5c6a925bce3/3c16cdbf6c81800a4197e31eb63533fa828b477a.jpg
//https://imgsa.baidu.com/forum/w%3D580/sign=00cd89a8d33f8794d3ff4826e21a0ead/d8b4e7cd7b899e519d3b500445a7d933c8950d33.jpg
//https://imgsa.baidu.com/forum/w%3D580/sign=0ebf913fb9096b6381195e583c328733/f88037d3d539b6003e0a59a7ee50352ac75cb7ac.jpg
//https://imgsa.baidu.com/forum/w%3D580/sign=b8db0843f1246b607b0eb27cdbf91a35/3877b7003af33a87ef82560ac15c10385243b585.jpg
//https://imgsa.baidu.com/forum/w%3D580/sign=e4baea7493eef01f4d1418cdd0fe99e0/71ccd058ccbf6c81d03f7ff8bb3eb13533fa4060.jpg
//https://imgsa.baidu.com/forum/w%3D580/sign=6ff190046c600c33f079dec02a4d5134/5b4e3bf33a87e95031b8a07c17385343faf2b4c5.jpg
//https://imgsa.baidu.com/forum/w%3D580/sign=099bb22732fae6cd0cb4ab693fb20f9e/ee0179f0f736afc3e7816c75b419ebc4b64512db.jpg
//https://imgsa.baidu.com/forum/w%3D580/sign=4b966f04ac18972ba33a00c2d6cc7b9d/a6b5faedab64034fe3709116a8c379310a551d2b.jpg
//https://imgsa.baidu.com/forum/w%3D580/sign=44dc433f92cad1c8d0bbfc2f4f3f67c4/2eb3fd039245d688a0ade1b4a3c27d1ed31b24c5.jpg
//https://imgsa.baidu.com/forum/w%3D580/sign=4bff61282f381f309e198da199004c67/8f9ef603918fa0ecdd2a371d219759ee3c6ddb84.jpg
//https://imgsa.baidu.com/forum/w%3D580/sign=0447538859df8db1bc2e7c6c3923dddb/6fc5a71ea8d3fd1f1913f02a374e251f95ca5f52.jpg
//https://imgsa.baidu.com/forum/w%3D580/sign=1d2eae1d77cf3bc7e800cde4e100babd/9050d31b0ef41bd5d749bae456da81cb39db3d52.jpg
//https://imgsa.baidu.com/forum/w%3D580/sign=1d1af6afa7ec08fa260013af69ee3d4d/96850b46f21fbe09817a91046c600c338744ad4d.jpg
//https://imgsa.baidu.com/forum/w%3D580/sign=57bfc6ab80d6277fe912323018391f63/fc91a8ec8a136327b26e9023968fa0ec09fac7ea.jpg
//https://imgsa.baidu.com/forum/w%3D580/sign=a745d5db44166d223877159c76220945/6a0aeaf81a4c510f2b47b00a6759252dd52aa5ea.jpg
//https://imgsa.baidu.com/forum/w%3D580/sign=db4dd5319413b07ebdbd50003cd69113/4e827dd98d1001e919be461fbf0e7bec55e797ea.jpg
标签:text nil net back print prot lock package xxxx
原文地址:https://www.cnblogs.com/traditional/p/9277459.html