标签:step 路径 www arp parse content xpl res for
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
__author__ = 'jiangwenwen'
import pdfkit
import requests
from bs4 import BeautifulSoup
# 获取所有的语法列表
response = requests.get("http://www.coelang.tufs.ac.jp/mt/ja/gmod/steplist.html")
soup = BeautifulSoup(response.content, 'html.parser')
grammarList = soup.find_all("li", class_="list")
# 循环语法列表
for grammar in grammarList:
# 语法标题
grammarTitle = grammar.a.string.replace('/', '/')
# 语法链接
grammarURL = "http://www.coelang.tufs.ac.jp/mt/ja/gmod/contents/explanation/%s.html" % (grammar.a.get('href')[-8:-5])
# 语法内容
grammarContent = requests.get(grammarURL).text
# 语法保存路径
grammarPath = r'/home/grammar/%s.pdf' % grammarTitle
# 转换为pdf文件
pdfkit.from_url(grammarContent, grammarPath)
标签:step 路径 www arp parse content xpl res for
原文地址:https://www.cnblogs.com/jiangwenwen1/p/11076546.html