标签:安装 ica content down video webp iter div encoding
目前遍历循环仍未完成,所以只会下载第一个结果,后续会完善成接口可以做单独调用,其中还有音频和视频的合并,需要先安装ffmpeg环境
# -*- coding: utf-8 -*- import requests from urllib import parse,request import urllib.request from bs4 import BeautifulSoup import re import os import subprocess import time import json import sys import io import ffmpeg sys.stdout = io.TextIOWrapper(sys.stdout.buffer,encoding=‘utf-8‘) class BiliBili(object): def __init__(self, url): self.url = url def html(self, url): headers = { ‘User-Agent‘: ‘Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.74 Safari/537.36 Edg/79.0.309.43‘, ‘Accept‘: ‘text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8‘, ‘Accept-Language‘: ‘zh-CN,zh;q=0.8‘, ‘Connection‘: ‘keep-alive‘, } html = requests.get(url, headers=headers) html = html.text #print(html) return html def get_video_html(self,url): headers = { ‘User-Agent‘: ‘Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.74 Safari/537.36 Edg/79.0.309.43‘, ‘Accept‘: ‘text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8‘, ‘Accept-Language‘: ‘zh-CN,zh;q=0.8‘, ‘Connection‘: ‘keep-alive‘, # ‘Cookie‘:cookie } response = requests.get(url, headers=headers) video_html = response.text return video_html def get_video_info(selfs, html): result = re.findall(‘<script>window.__playinfo__=(.*?)</script>‘, html, re.S)[0] html_data = json.loads(result) download_video_url = html_data[‘data‘][‘dash‘][‘video‘][0][‘backup_url‘][0] return download_video_url def get_audio_info(self, html): result = re.findall(‘<script>window.__playinfo__=(.*?)</script>‘, html, re.S)[0] html_data = json.loads(result) download_audio_url = html_data[‘data‘][‘dash‘][‘audio‘][0][‘backup_url‘][0] return download_audio_url def search_video_info(self, html): soup = BeautifulSoup(html,"html.parser") for tag in soup.find_all(‘div‘, class_=‘info‘): title = tag.find(‘a‘,class_=‘title‘).get_text() people_num = tag.find(‘span‘, class_=‘so-icon watch-num‘).get_text() up_name = tag.find(‘a‘,class_=‘up-name‘).get_text() video_url = tag.find(‘a‘).get(‘href‘) video_url = video_url.replace(‘//‘,‘‘) return title, video_url def search_video(self, html): title, video_url = self.search_video_info(html) print(title) print(video_url) print(url) self.run_video(title, video_url, url) def run_search(self): #获取搜索结果,根据搜索结果获得视频链接 html =self.html(url) self.search_video(html) def run_video(self,title, video_url,url): # 根据结果传入来获得视频下载链接 video_size = 0 audio_size = 0 print("视频名称:" + title) print(url) print(‘https://‘+ video_url) get_video_html = self.get_video_html(‘https://‘+ video_url) download_video_url = self.get_video_info(get_video_html) download_audio_url = self.get_audio_info(get_video_html) headers = { ‘User-Agent‘: ‘Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:84.0) Gecko/20100101 Firefox/84.0‘, ‘Accept‘: ‘text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8‘, ‘Referer‘: ‘https://‘+ video_url, ‘Accept-Encoding‘:"gzip, deflate, br", ‘Accept-Language‘: ‘zh-CN,zh;q=0.8,zh-TW;q=0.7,zh-HK;q=0.5,en-US;q=0.3,en;q=0.2‘, ‘Connection‘: ‘keep-alive‘, #‘Cookie‘:cookie } video_content = requests.get(download_video_url, stream=True, headers=headers) mp4_file_size = int(video_content.headers[‘content-length‘]) if video_content.status_code == 200: print(‘[文件大小]:%0.2f MB‘ %(mp4_file_size / 1024 / 1024)) with open(title + ‘.mp4‘, mode=‘wb‘) as mp4: for chunk in video_content.iter_content(chunk_size=1024): if chunk: mp4.write(chunk) video_size += len(chunk) # 已下载的文件大小 audio_content = requests.get(download_audio_url, stream=True, headers=headers) mp3_file_size = int(audio_content.headers[‘content-length‘]) if audio_content.status_code == 200: print(‘[文件大小]:%0.2f MB‘ % (mp3_file_size / 1024 / 1024)) with open(title + ‘.mp3‘, mode=‘wb‘) as mp3: for chunk in audio_content.iter_content(chunk_size=1024): if chunk: mp3.write(chunk) audio_size += len(chunk) print(‘正在保存:‘, title) self.video_audio_merge_single(title) def video_audio_merge_single(self, video_name): #合成视频 print(‘视频合成开始:‘,video_name) ffm = r"D:\sofware\ffmpeg-4.3.1-2021-01-01-full_build\bin\ffmpeg.exe " command = ffm + ‘ -i "{}.mp4" -i "{}.mp3" -vcodec copy -acodec copy "{}.mp4"‘.format( video_name, video_name, video_name + ‘(合)‘) subprocess.Popen(command, shell=True) print(command) time.sleep(10) print("视频合成结束:", video_name) if __name__ ==‘__main__‘: url = ‘https://search.bilibili.com/all?‘ keyword = ‘哈哈哈哈哈‘#需要搜索的视频名称 keyword = urllib.parse.quote(keyword) param = ‘keyword=‘ + keyword + ‘&from_source=nav_searchs&pm_id_from=333.851.b_696e7465726e6174696f6e616c486561646572.15‘ url = url + param BB = BiliBili(url) BB.run_search()
标签:安装 ica content down video webp iter div encoding
原文地址:https://www.cnblogs.com/duanminkid/p/14300350.html