module.exports = {
options: {
hostname: 'www.ozon.ru',
port: 443,
path: '/context/detail/id/144054492/',
method: 'GET',
headers: {
'accept':'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
'if-modified-since':'Fri, 08 Jun 2018 03:42:08 GMT',
'referer':'https://www.ozon.ru/catalog/1133763/?type=48856',
'upgrade-insecure-requests':1,
'user-agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.139 Safari/537.36'
}
},
baseURL: 'https://www.ozon.ru',
timeout: 3000
}
var fs = require('fs')
var datas = require('../tmp/mid_output.json')
var culs = new Object()
var prepare = str => {
if (str === undefined) {
return null
}
else {
return str.replace(/,/g,',').replace(/n/g, ';')
}
}
var num = 0;
for(var i = 0;i<datas.length;i++){
if(datas[i].params === undefined){
datas.splice(i,1);
}
}
var num1 = 0;
for(var i = 0;i<datas.length;i++){
if(datas[i].params == undefined){
num1 ++;
}
}
datas.splice(datas.length-num1,num1);
for (var data of datas) {
for (var param of data.params) {
if (culs[param.key] === undefined) culs[param.key] = true
data[param.key] = param.value
}
delete data.params
}
var columnsName = 'number,href,img,name,price,cnum'
for (var key in culs) {
columnsName += ',' + prepare(key)
}
columnsName += 'n'
fs.writeFileSync(__dirname + '/../output/output.csv', columnsName, {flag: 'a'}, err => console.log(err))
var cnt = 1
for (var data of datas) {
var str = ''
str += cnt++
str += ',' + prepare(data.href)
str += ',' + prepare(data.img)
str += ',' + prepare(data.name)
str += ',' + prepare(data.price)
str += ',' + prepare(data.cnum)
for (var key in culs) {
str += ',' + prepare(data[key])
}
str += 'n'
fs.writeFileSync(__dirname + '/../output/output.csv', str, {flag: 'a'}, err => console.log(err))
}
console.log(cnt)
const https = require('https')
const fs = require('fs')
const iconv = require('iconv-lite')
const jsdom = require('jsdom')
const { JSDOM } = jsdom
var config = require('../config')
var items = new Array()
var getInput = () => {
var result = fs.readFileSync(__dirname + '/../input/input.txt')
const dom = new JSDOM(result.toString())
var lines = dom.window.document.getElementsByClassName('bOneTile inline')
var i = 1
for (var line of lines) {
var href = line.getElementsByClassName('eOneTile_link')[0].href
var img = line.getElementsByClassName('eOneTile_image_link')[0].getAttribute('data-image-src')
var name = line.getAttribute('data-name')
var price = (line.getAttribute('data-price') !== undefined) ? line.getAttribute('data-price') : 'null'
var cnum = (line.getElementsByClassName('eOneTile_ReviewsCount')[0] === undefined ? '0' : line.getElementsByClassName('eOneTile_ReviewsCount')[0].innerHTML)
items.push({
href: href,
img: img,
name: name,
price: price,
cnum: cnum
})
}
}
var getDetail = idx => {
var item = items[idx]
config.options.path = item.href
const req = https.get(config.options, res => {
var datas = []
var size = 0
res.on('data', data => {
datas.push(data)
size += data.length
})
res.on('end', () => {
var buff = Buffer.concat(datas, size)
var result = iconv.decode(buff, 'win1251')
const dom = new JSDOM(result.toString())
var lines = dom.window.document.getElementsByClassName('eItemProperties_line')
item.params = new Array()
for (var line of lines) {
var key = line.childNodes[1].innerHTML
var value = line.childNodes[3].innerHTML
item.params.push({
key: key,
value: value
})
}
})
})
req.end()
config.options.headers.referer = config.baseURL + item.href
}
var getAllDetail = (idx, end) => () => {
if (idx < end) {
console.log(idx)
getDetail(idx)
setTimeout(getAllDetail(idx + 1, end), config.timeout)
}
else {
setOutput()
}
}
var setOutput = () => {
fs.writeFile(__dirname + '/../tmp/mid_output.json',JSON.stringify(items), err => console.log(err))
}
getInput()
setOutput()
getAllDetail(0, items.length)()