码迷,mamicode.com
首页 > 编程语言 > 详细

go语言爬取椎名真白

时间:2018-07-07 23:05:23      阅读:213      评论:0      收藏:0      [点我收藏+]

标签:pre   util   exp   func   red   ash   nload   ocs   size   

单任务版:

package main

import (
	"net/http"
	"regexp"
	"io/ioutil"
	"os"
	"strconv"
	"time"
	"fmt"
	"runtime"
)

func get_mashiro(link,rule,target string)[]string{
	res,_:=http.Get(link)
	defer res.Body.Close()
	page_content,_:= ioutil.ReadAll(res.Body)
	re := regexp.MustCompile(rule)
	url_arr := re.FindAllStringSubmatch(string(page_content),-1)
	urls := make([]string,0)
	for _,url:=range url_arr{
		if len(url[1]) != len(target){
			continue
		}
		urls = append(urls,url[1])
	}
	return urls
}

func download_mashiro(urls []string){
	for i,v:=range urls{
		res,_:=http.Get(v)
		f,_:=os.Create(`F:\project\黑马go\mashiro\`+strconv.Itoa(i)+".jpg")
		defer f.Close()
		data,_:=ioutil.ReadAll(res.Body)
		f.Write(data)
	}

}

func main(){
	start_time := time.Now().Unix()
	link := "https://tieba.baidu.com/p/5290405550?red_tag=0872096237"
	rule := `src="(http.+?(?:jpg|png))"`
	target := "https://imgsa.baidu.com/forum/w%3D580/sign=5a28bf191fce36d3a20483380af13a24/5f572ae93901213fb9930d1f5ee736d12e2e951c.jpg"
	pic_urls := get_mashiro(link,rule,target)
	download_mashiro(pic_urls)
	end_time := time.Now().Unix()
	fmt.Println("总用时:",end_time-start_time)

}//2

  

多任务版:

package main

import (
	"net/http"
	"regexp"
	"io/ioutil"
	"os"
	"strconv"
	"time"
	"fmt"
	"runtime"
)

func get_mashiro(link,rule,target string)[]string{
	res,_:=http.Get(link)
	defer res.Body.Close()
	page_content,_:= ioutil.ReadAll(res.Body)
	re := regexp.MustCompile(rule)
	url_arr := re.FindAllStringSubmatch(string(page_content),-1)
	urls := make([]string,0)
	for _,url:=range url_arr{
		if len(url[1]) != len(target){
			continue
		}
		urls = append(urls,url[1])
	}
	return urls
}

func download_mashiro(urls []string){
	runtime.GOMAXPROCS(4)
	ch := make(chan int)
	for i,v:=range urls{
		go func(i int,v string) {
			fmt.Println(i)
			res,_:=http.Get(v)
			f,_:=os.Create(`F:\project\黑马go\mashiro\`+strconv.Itoa(i)+".jpg")
			defer f.Close()
			data,_:=ioutil.ReadAll(res.Body)
			f.Write(data)
			ch<-i
		}(i,v)
	}
	for i:=0;i<len(urls);i++{
		<-ch
	}
}

func main(){
	start_time := time.Now().Unix()
	link := "https://tieba.baidu.com/p/5290405550?red_tag=0872096237"
	rule := `src="(http.+?(?:jpg|png))"`
	target := "https://imgsa.baidu.com/forum/w%3D580/sign=5a28bf191fce36d3a20483380af13a24/5f572ae93901213fb9930d1f5ee736d12e2e951c.jpg"
	pic_urls := get_mashiro(link,rule,target)
	download_mashiro(pic_urls)
	end_time := time.Now().Unix()
	fmt.Println("总用时:",end_time-start_time)
}//1

  

go语言爬取椎名真白

标签:pre   util   exp   func   red   ash   nload   ocs   size   

原文地址:https://www.cnblogs.com/traditional/p/9278710.html

(0)
(0)
   
举报
评论 一句话评论(0
登录后才能评论!
© 2014 mamicode.com 版权所有  联系我们:gaon5@hotmail.com
迷上了代码!