标签:
2016-05-16
最近公司有个项目,要爬取7个视频网站、3个图片网站,关于5个关键词的视频、图片数据,并将爬取到的视频、图片做成展示墙,放到企业客户的iPad上展示用。以下是相关的etl过程,写了一整天,纪念一下。哈哈~
1、SQL语句:ddl_social_scan
-- CREATE TABLE social_scan AS -- 1_in_Maybelline SELECT * FROM ( SELECT source AS website, ( CASE brand WHEN ‘Maybelline‘ THEN ‘美宝莲‘ WHEN ‘Loreal‘ THEN ‘欧莱雅‘ WHEN ‘Meiji‘ THEN ‘美即‘ WHEN ‘NYX‘ THEN ‘NYX‘ ELSE ‘none‘ END ) AS keyword, tag_name AS title, ROUND( IFNULL( ( POWER(IFNULL(pic_like_num, 0), 2) + POWER( IFNULL(pic_comments_num, 0), 2 ) + POWER(IFNULL(browse_num, 0), 2) ) / ( IFNULL(pic_like_num, 0) + IFNULL(pic_comments_num, 0) + IFNULL(browse_num, 0) ), 0 ), 0 ) AS volume, IFNULL(pic_like_num, 0) AS dianzan_num, IFNULL(pic_comments_num, 0) AS comment_num, IFNULL(browse_num, 0) AS browse_num, 1 / 0 AS stay_time, REPLACE ( LEFT ( SUBSTRING_INDEX( picture.nice.pic_url, ‘.info/in/‘, - 1 ), 10 ), ‘/‘, ‘‘ ) AS upload_date, pic_url AS download_url, REPLACE (date_time, ‘-‘, ‘‘) AS sys_date, ‘picture‘ AS file_type, 1 / 0 AS media_file_name, 1 / 0 AS save_dir, 0 AS video_not_found FROM picture.nice WHERE source = ‘in‘ AND brand = ‘Maybelline‘ ORDER BY volume DESC LIMIT 20 ) 1_in_Maybelline UNION ALL -- 2_in_Loreal SELECT * FROM ( SELECT source AS website, ( CASE brand WHEN ‘Maybelline‘ THEN ‘美宝莲‘ WHEN ‘Loreal‘ THEN ‘欧莱雅‘ WHEN ‘Meiji‘ THEN ‘美即‘ WHEN ‘NYX‘ THEN ‘NYX‘ ELSE ‘none‘ END ) AS keyword, tag_name AS title, ROUND( IFNULL( ( POWER(IFNULL(pic_like_num, 0), 2) + POWER( IFNULL(pic_comments_num, 0), 2 ) + POWER(IFNULL(browse_num, 0), 2) ) / ( IFNULL(pic_like_num, 0) + IFNULL(pic_comments_num, 0) + IFNULL(browse_num, 0) ), 0 ), 0 ) AS volume, IFNULL(pic_like_num, 0) AS dianzan_num, IFNULL(pic_comments_num, 0) AS comment_num, IFNULL(browse_num, 0) AS browse_num, 1 / 0 AS stay_time, REPLACE ( LEFT ( SUBSTRING_INDEX( picture.nice.pic_url, ‘.info/in/‘, - 1 ), 10 ), ‘/‘, ‘‘ ) AS upload_date, pic_url AS download_url, REPLACE (date_time, ‘-‘, ‘‘) AS sys_date, ‘picture‘ AS file_type, 1 / 0 AS media_file_name, 1 / 0 AS save_dir, 0 AS video_not_found FROM picture.nice WHERE source = ‘in‘ AND brand = ‘Loreal‘ ORDER BY volume DESC LIMIT 20 ) 2_in_Loreal UNION ALL -- 3_in_Meiji SELECT * FROM ( SELECT source AS website, ( CASE brand WHEN ‘Maybelline‘ THEN ‘美宝莲‘ WHEN ‘Loreal‘ THEN ‘欧莱雅‘ WHEN ‘Meiji‘ THEN ‘美即‘ WHEN ‘NYX‘ THEN ‘NYX‘ ELSE ‘none‘ END ) AS keyword, tag_name AS title, ROUND( IFNULL( ( POWER(IFNULL(pic_like_num, 0), 2) + POWER( IFNULL(pic_comments_num, 0), 2 ) + POWER(IFNULL(browse_num, 0), 2) ) / ( IFNULL(pic_like_num, 0) + IFNULL(pic_comments_num, 0) + IFNULL(browse_num, 0) ), 0 ), 0 ) AS volume, IFNULL(pic_like_num, 0) AS dianzan_num, IFNULL(pic_comments_num, 0) AS comment_num, IFNULL(browse_num, 0) AS browse_num, 1 / 0 AS stay_time, REPLACE ( LEFT ( SUBSTRING_INDEX( picture.nice.pic_url, ‘.info/in/‘, - 1 ), 10 ), ‘/‘, ‘‘ ) AS upload_date, pic_url AS download_url, REPLACE (date_time, ‘-‘, ‘‘) AS sys_date, ‘picture‘ AS file_type, 1 / 0 AS media_file_name, 1 / 0 AS save_dir, 0 AS video_not_found FROM picture.nice WHERE source = ‘in‘ AND brand = ‘Meiji‘ ORDER BY volume DESC LIMIT 20 ) 3_in_Meiji UNION ALL -- 4_in_NYX SELECT * FROM ( SELECT source AS website, ( CASE brand WHEN ‘Maybelline‘ THEN ‘美宝莲‘ WHEN ‘Loreal‘ THEN ‘欧莱雅‘ WHEN ‘Meiji‘ THEN ‘美即‘ WHEN ‘NYX‘ THEN ‘NYX‘ ELSE ‘none‘ END ) AS keyword, tag_name AS title, ROUND( IFNULL( ( POWER(IFNULL(pic_like_num, 0), 2) + POWER( IFNULL(pic_comments_num, 0), 2 ) + POWER(IFNULL(browse_num, 0), 2) ) / ( IFNULL(pic_like_num, 0) + IFNULL(pic_comments_num, 0) + IFNULL(browse_num, 0) ), 0 ), 0 ) AS volume, IFNULL(pic_like_num, 0) AS dianzan_num, IFNULL(pic_comments_num, 0) AS comment_num, IFNULL(browse_num, 0) AS browse_num, 1 / 0 AS stay_time, REPLACE ( LEFT ( SUBSTRING_INDEX( picture.nice.pic_url, ‘.info/in/‘, - 1 ), 10 ), ‘/‘, ‘‘ ) AS upload_date, pic_url AS download_url, REPLACE (date_time, ‘-‘, ‘‘) AS sys_date, ‘picture‘ AS file_type, 1 / 0 AS media_file_name, 1 / 0 AS save_dir, 0 AS video_not_found FROM picture.nice WHERE source = ‘in‘ AND brand = ‘NYX‘ ORDER BY volume DESC LIMIT 20 ) 4_in_NYX UNION ALL -- 5_nice_Maybelline SELECT * FROM ( SELECT source AS website, ( CASE brand WHEN ‘Maybelline‘ THEN ‘美宝莲‘ WHEN ‘Loreal‘ THEN ‘欧莱雅‘ WHEN ‘Meiji‘ THEN ‘美即‘ WHEN ‘NYX‘ THEN ‘NYX‘ ELSE ‘none‘ END ) AS keyword, tag_name AS title, ROUND( IFNULL( ( POWER(IFNULL(pic_like_num, 0), 2) + POWER( IFNULL(pic_comments_num, 0), 2 ) + POWER(IFNULL(browse_num, 0), 2) ) / ( IFNULL(pic_like_num, 0) + IFNULL(pic_comments_num, 0) + IFNULL(browse_num, 0) ), 0 ), 0 ) AS volume, IFNULL(pic_like_num, 0) AS dianzan_num, IFNULL(pic_comments_num, 0) AS comment_num, IFNULL(browse_num, 0) AS browse_num, 1 / 0 AS stay_time, REPLACE ( LEFT ( SUBSTRING_INDEX( picture.nice.pic_url, ‘upload/show/‘, - 1 ), 10 ), ‘/‘, ‘‘ ) AS upload_date, pic_url AS download_url, REPLACE (date_time, ‘-‘, ‘‘) AS sys_date, ‘picture‘ AS file_type, 1 / 0 AS media_file_name, 1 / 0 AS save_dir, 0 AS video_not_found FROM picture.nice WHERE source = ‘nice‘ AND brand = ‘Maybelline‘ ORDER BY volume DESC LIMIT 20 ) 5_nice_Maybelline UNION ALL -- 6_nice_Loreal SELECT * FROM ( SELECT source AS website, ( CASE brand WHEN ‘Maybelline‘ THEN ‘美宝莲‘ WHEN ‘Loreal‘ THEN ‘欧莱雅‘ WHEN ‘Meiji‘ THEN ‘美即‘ WHEN ‘NYX‘ THEN ‘NYX‘ ELSE ‘none‘ END ) AS keyword, tag_name AS title, ROUND( IFNULL( ( POWER(IFNULL(pic_like_num, 0), 2) + POWER( IFNULL(pic_comments_num, 0), 2 ) + POWER(IFNULL(browse_num, 0), 2) ) / ( IFNULL(pic_like_num, 0) + IFNULL(pic_comments_num, 0) + IFNULL(browse_num, 0) ), 0 ), 0 ) AS volume, IFNULL(pic_like_num, 0) AS dianzan_num, IFNULL(pic_comments_num, 0) AS comment_num, IFNULL(browse_num, 0) AS browse_num, 1 / 0 AS stay_time, REPLACE ( LEFT ( SUBSTRING_INDEX( picture.nice.pic_url, ‘upload/show/‘, - 1 ), 10 ), ‘/‘, ‘‘ ) AS upload_date, pic_url AS download_url, REPLACE (date_time, ‘-‘, ‘‘) AS sys_date, ‘picture‘ AS file_type, 1 / 0 AS media_file_name, 1 / 0 AS save_dir, 0 AS video_not_found FROM picture.nice WHERE source = ‘nice‘ AND brand = ‘Loreal‘ ORDER BY volume DESC LIMIT 20 ) 6_nice_Loreal UNION ALL -- 7_nice_Meiji SELECT * FROM ( SELECT source AS website, ( CASE brand WHEN ‘Maybelline‘ THEN ‘美宝莲‘ WHEN ‘Loreal‘ THEN ‘欧莱雅‘ WHEN ‘Meiji‘ THEN ‘美即‘ WHEN ‘NYX‘ THEN ‘NYX‘ ELSE ‘none‘ END ) AS keyword, tag_name AS title, ROUND( IFNULL( ( POWER(IFNULL(pic_like_num, 0), 2) + POWER( IFNULL(pic_comments_num, 0), 2 ) + POWER(IFNULL(browse_num, 0), 2) ) / ( IFNULL(pic_like_num, 0) + IFNULL(pic_comments_num, 0) + IFNULL(browse_num, 0) ), 0 ), 0 ) AS volume, IFNULL(pic_like_num, 0) AS dianzan_num, IFNULL(pic_comments_num, 0) AS comment_num, IFNULL(browse_num, 0) AS browse_num, 1 / 0 AS stay_time, REPLACE ( LEFT ( SUBSTRING_INDEX( picture.nice.pic_url, ‘upload/show/‘, - 1 ), 10 ), ‘/‘, ‘‘ ) AS upload_date, pic_url AS download_url, REPLACE (date_time, ‘-‘, ‘‘) AS sys_date, ‘picture‘ AS file_type, 1 / 0 AS media_file_name, 1 / 0 AS save_dir, 0 AS video_not_found FROM picture.nice WHERE source = ‘nice‘ AND brand = ‘Meiji‘ ORDER BY volume DESC LIMIT 20 ) 7_nice_Meiji UNION ALL -- 8_nice_NYX SELECT * FROM ( SELECT source AS website, ( CASE brand WHEN ‘Maybelline‘ THEN ‘美宝莲‘ WHEN ‘Loreal‘ THEN ‘欧莱雅‘ WHEN ‘Meiji‘ THEN ‘美即‘ WHEN ‘NYX‘ THEN ‘NYX‘ ELSE ‘none‘ END ) AS keyword, tag_name AS title, ROUND( IFNULL( ( POWER(IFNULL(pic_like_num, 0), 2) + POWER( IFNULL(pic_comments_num, 0), 2 ) + POWER(IFNULL(browse_num, 0), 2) ) / ( IFNULL(pic_like_num, 0) + IFNULL(pic_comments_num, 0) + IFNULL(browse_num, 0) ), 0 ), 0 ) AS volume, IFNULL(pic_like_num, 0) AS dianzan_num, IFNULL(pic_comments_num, 0) AS comment_num, IFNULL(browse_num, 0) AS browse_num, 1 / 0 AS stay_time, REPLACE ( LEFT ( SUBSTRING_INDEX( picture.nice.pic_url, ‘upload/show/‘, - 1 ), 10 ), ‘/‘, ‘‘ ) AS upload_date, pic_url AS download_url, REPLACE (date_time, ‘-‘, ‘‘) AS sys_date, ‘picture‘ AS file_type, 1 / 0 AS media_file_name, 1 / 0 AS save_dir, 0 AS video_not_found FROM picture.nice WHERE source = ‘nice‘ AND brand = ‘Meiji‘ ORDER BY volume DESC LIMIT 20 ) 8_nice_NYX UNION ALL -- 9_美拍_美宝莲 SELECT * FROM ( SELECT DISTINCT source AS website, ( CASE keyword WHEN ‘美宝莲‘ THEN ‘美宝莲‘ WHEN ‘Loreal‘ THEN ‘欧莱雅‘ WHEN ‘Meiji‘ THEN ‘美即‘ WHEN ‘NYX‘ THEN ‘NYX‘ ELSE ‘none‘ END ) AS keyword, title AS title, ROUND( IFNULL( ( POWER(IFNULL(zanCount, 0), 2) + POWER(IFNULL(commentCount, 0), 2) + POWER(IFNULL(watchCount, 0), 2) + POWER(IFNULL(netCount, 0), 2) + POWER(IFNULL(favCount, 0), 2) + POWER(IFNULL(tanmuCount, 0), 2) + POWER(IFNULL(shareCount, 0), 2) ) / ( IFNULL(zanCount, 0) + IFNULL(commentCount, 0) + IFNULL(watchCount, 0) + IFNULL(netCount, 0) + IFNULL(favCount, 0) + IFNULL(tanmuCount, 0) + IFNULL(shareCount, 0) ), 0 ), 0 ) AS volume, IFNULL(zanCount, 0) AS dianzan_num, IFNULL(commentCount, 0) AS comment_num, IFNULL(watchCount, 0) AS browse_num, length AS stay_time, REPLACE (uploadTime, ‘-‘, ‘‘) AS upload_date, downUrl AS download_url, REPLACE (LEFT(cralwTime, 10), ‘-‘, ‘‘) AS sys_date, ‘media‘ AS file_type, 1 / 0 AS media_file_name, 1 / 0 AS save_dir, 0 AS video_not_found FROM media.7videos WHERE source = ‘美拍‘ AND keyword = ‘美宝莲‘ AND downUrl <> ‘‘ ORDER BY volume DESC LIMIT 20 ) 9_美拍_美宝莲
2、SP过程:ddl_p_social_scan
drop procedure IF EXISTS p_social_scan; delimiter // CREATE DEFINER = `root`@`%` PROCEDURE `p_social_scan` ( IN v_dayid_first INT, IN v_dayid_end INT ) BEGIN DECLARE v_website VARCHAR (50) DEFAULT NULL; DECLARE v_keyword VARCHAR (4) DEFAULT NULL; DECLARE v_title VARCHAR (250) DEFAULT NULL; DECLARE v_volume DOUBLE (17, 0) DEFAULT 0; DECLARE v_dianzan_num BIGINT (20) DEFAULT 0; DECLARE v_comment_num BIGINT (20) DEFAULT 0; DECLARE v_browse_num BIGINT (20) DEFAULT 0; DECLARE v_stay_time VARCHAR (50) DEFAULT NULL; DECLARE v_upload_date INT (11) DEFAULT NULL; DECLARE v_download_url VARCHAR (250) DEFAULT NULL; DECLARE v_sys_date INT (10) DEFAULT NULL; DECLARE v_file_type VARCHAR (7) DEFAULT NULL; DECLARE v_media_file_name VARCHAR (250) DEFAULT NULL; DECLARE v_save_dir VARCHAR (250) DEFAULT NULL; DECLARE v_video_not_found INT (11) DEFAULT 0; DECLARE var INT DEFAULT 0; DECLARE cur CURSOR FOR -- 1_in_Maybelline SELECT * FROM ( SELECT source AS website, ( CASE brand WHEN ‘Maybelline‘ THEN ‘美宝莲‘ WHEN ‘Loreal‘ THEN ‘欧莱雅‘ WHEN ‘Meiji‘ THEN ‘美即‘ WHEN ‘NYX‘ THEN ‘NYX‘ ELSE ‘none‘ END ) AS keyword, tag_name AS title, ROUND( IFNULL( ( POWER(IFNULL(pic_like_num, 0), 2) + POWER( IFNULL(pic_comments_num, 0), 2 ) + POWER(IFNULL(browse_num, 0), 2) ) / ( IFNULL(pic_like_num, 0) + IFNULL(pic_comments_num, 0) + IFNULL(browse_num, 0) ), 0 ), 0 ) AS volume, IFNULL(pic_like_num, 0) AS dianzan_num, IFNULL(pic_comments_num, 0) AS comment_num, IFNULL(browse_num, 0) AS browse_num, 1 / 0 AS stay_time, REPLACE ( LEFT ( SUBSTRING_INDEX( picture.nice.pic_url, ‘.info/in/‘, - 1 ), 10 ), ‘/‘, ‘‘ ) AS upload_date, pic_url AS download_url, REPLACE (date_time, ‘-‘, ‘‘) AS sys_date, ‘picture‘ AS file_type, 1 / 0 AS media_file_name, 1 / 0 AS save_dir, 0 AS video_not_found FROM picture.nice WHERE source = ‘in‘ AND brand = ‘Maybelline‘ AND REPLACE (date_time, ‘-‘, ‘‘) >= v_dayid_first AND REPLACE (date_time, ‘-‘, ‘‘) < v_dayid_end ORDER BY volume DESC LIMIT 20 ) 1_in_Maybelline UNION ALL -- 2_in_Loreal SELECT * FROM ( SELECT source AS website, ( CASE brand WHEN ‘Maybelline‘ THEN ‘美宝莲‘ WHEN ‘Loreal‘ THEN ‘欧莱雅‘ WHEN ‘Meiji‘ THEN ‘美即‘ WHEN ‘NYX‘ THEN ‘NYX‘ ELSE ‘none‘ END ) AS keyword, tag_name AS title, ROUND( IFNULL( ( POWER(IFNULL(pic_like_num, 0), 2) + POWER( IFNULL(pic_comments_num, 0), 2 ) + POWER(IFNULL(browse_num, 0), 2) ) / ( IFNULL(pic_like_num, 0) + IFNULL(pic_comments_num, 0) + IFNULL(browse_num, 0) ), 0 ), 0 ) AS volume, IFNULL(pic_like_num, 0) AS dianzan_num, IFNULL(pic_comments_num, 0) AS comment_num, IFNULL(browse_num, 0) AS browse_num, 1 / 0 AS stay_time, REPLACE ( LEFT ( SUBSTRING_INDEX( picture.nice.pic_url, ‘.info/in/‘, - 1 ), 10 ), ‘/‘, ‘‘ ) AS upload_date, pic_url AS download_url, REPLACE (date_time, ‘-‘, ‘‘) AS sys_date, ‘picture‘ AS file_type, 1 / 0 AS media_file_name, 1 / 0 AS save_dir, 0 AS video_not_found FROM picture.nice WHERE source = ‘in‘ AND brand = ‘Loreal‘ AND REPLACE (date_time, ‘-‘, ‘‘) >= v_dayid_first AND REPLACE (date_time, ‘-‘, ‘‘) < v_dayid_end ORDER BY volume DESC LIMIT 20 ) 2_in_Loreal UNION ALL -- 3_in_Meiji SELECT * FROM ( SELECT source AS website, ( CASE brand WHEN ‘Maybelline‘ THEN ‘美宝莲‘ WHEN ‘Loreal‘ THEN ‘欧莱雅‘ WHEN ‘Meiji‘ THEN ‘美即‘ WHEN ‘NYX‘ THEN ‘NYX‘ ELSE ‘none‘ END ) AS keyword, tag_name AS title, ROUND( IFNULL( ( POWER(IFNULL(pic_like_num, 0), 2) + POWER( IFNULL(pic_comments_num, 0), 2 ) + POWER(IFNULL(browse_num, 0), 2) ) / ( IFNULL(pic_like_num, 0) + IFNULL(pic_comments_num, 0) + IFNULL(browse_num, 0) ), 0 ), 0 ) AS volume, IFNULL(pic_like_num, 0) AS dianzan_num, IFNULL(pic_comments_num, 0) AS comment_num, IFNULL(browse_num, 0) AS browse_num, 1 / 0 AS stay_time, REPLACE ( LEFT ( SUBSTRING_INDEX( picture.nice.pic_url, ‘.info/in/‘, - 1 ), 10 ), ‘/‘, ‘‘ ) AS upload_date, pic_url AS download_url, REPLACE (date_time, ‘-‘, ‘‘) AS sys_date, ‘picture‘ AS file_type, 1 / 0 AS media_file_name, 1 / 0 AS save_dir, 0 AS video_not_found FROM picture.nice WHERE source = ‘in‘ AND brand = ‘Meiji‘ AND REPLACE (date_time, ‘-‘, ‘‘) >= v_dayid_first AND REPLACE (date_time, ‘-‘, ‘‘) < v_dayid_end ORDER BY volume DESC LIMIT 20 ) 3_in_Meiji UNION ALL -- 4_in_NYX SELECT * FROM ( SELECT source AS website, ( CASE brand WHEN ‘Maybelline‘ THEN ‘美宝莲‘ WHEN ‘Loreal‘ THEN ‘欧莱雅‘ WHEN ‘Meiji‘ THEN ‘美即‘ WHEN ‘NYX‘ THEN ‘NYX‘ ELSE ‘none‘ END ) AS keyword, tag_name AS title, ROUND( IFNULL( ( POWER(IFNULL(pic_like_num, 0), 2) + POWER( IFNULL(pic_comments_num, 0), 2 ) + POWER(IFNULL(browse_num, 0), 2) ) / ( IFNULL(pic_like_num, 0) + IFNULL(pic_comments_num, 0) + IFNULL(browse_num, 0) ), 0 ), 0 ) AS volume, IFNULL(pic_like_num, 0) AS dianzan_num, IFNULL(pic_comments_num, 0) AS comment_num, IFNULL(browse_num, 0) AS browse_num, 1 / 0 AS stay_time, REPLACE ( LEFT ( SUBSTRING_INDEX( picture.nice.pic_url, ‘.info/in/‘, - 1 ), 10 ), ‘/‘, ‘‘ ) AS upload_date, pic_url AS download_url, REPLACE (date_time, ‘-‘, ‘‘) AS sys_date, ‘picture‘ AS file_type, 1 / 0 AS media_file_name, 1 / 0 AS save_dir, 0 AS video_not_found FROM picture.nice WHERE source = ‘in‘ AND brand = ‘NYX‘ AND REPLACE (date_time, ‘-‘, ‘‘) >= v_dayid_first AND REPLACE (date_time, ‘-‘, ‘‘) < v_dayid_end ORDER BY volume DESC LIMIT 20 ) 4_in_NYX UNION ALL -- 5_nice_Maybelline SELECT * FROM ( SELECT source AS website, ( CASE brand WHEN ‘Maybelline‘ THEN ‘美宝莲‘ WHEN ‘Loreal‘ THEN ‘欧莱雅‘ WHEN ‘Meiji‘ THEN ‘美即‘ WHEN ‘NYX‘ THEN ‘NYX‘ ELSE ‘none‘ END ) AS keyword, tag_name AS title, ROUND( IFNULL( ( POWER(IFNULL(pic_like_num, 0), 2) + POWER( IFNULL(pic_comments_num, 0), 2 ) + POWER(IFNULL(browse_num, 0), 2) ) / ( IFNULL(pic_like_num, 0) + IFNULL(pic_comments_num, 0) + IFNULL(browse_num, 0) ), 0 ), 0 ) AS volume, IFNULL(pic_like_num, 0) AS dianzan_num, IFNULL(pic_comments_num, 0) AS comment_num, IFNULL(browse_num, 0) AS browse_num, 1 / 0 AS stay_time, REPLACE ( LEFT ( SUBSTRING_INDEX( picture.nice.pic_url, ‘upload/show/‘, - 1 ), 10 ), ‘/‘, ‘‘ ) AS upload_date, pic_url AS download_url, REPLACE (date_time, ‘-‘, ‘‘) AS sys_date, ‘picture‘ AS file_type, 1 / 0 AS media_file_name, 1 / 0 AS save_dir, 0 AS video_not_found FROM picture.nice WHERE source = ‘nice‘ AND brand = ‘Maybelline‘ AND REPLACE (date_time, ‘-‘, ‘‘) >= v_dayid_first AND REPLACE (date_time, ‘-‘, ‘‘) < v_dayid_end ORDER BY volume DESC LIMIT 20 ) 5_nice_Maybelline UNION ALL -- 6_nice_Loreal SELECT * FROM ( SELECT source AS website, ( CASE brand WHEN ‘Maybelline‘ THEN ‘美宝莲‘ WHEN ‘Loreal‘ THEN ‘欧莱雅‘ WHEN ‘Meiji‘ THEN ‘美即‘ WHEN ‘NYX‘ THEN ‘NYX‘ ELSE ‘none‘ END ) AS keyword, tag_name AS title, ROUND( IFNULL( ( POWER(IFNULL(pic_like_num, 0), 2) + POWER( IFNULL(pic_comments_num, 0), 2 ) + POWER(IFNULL(browse_num, 0), 2) ) / ( IFNULL(pic_like_num, 0) + IFNULL(pic_comments_num, 0) + IFNULL(browse_num, 0) ), 0 ), 0 ) AS volume, IFNULL(pic_like_num, 0) AS dianzan_num, IFNULL(pic_comments_num, 0) AS comment_num, IFNULL(browse_num, 0) AS browse_num, 1 / 0 AS stay_time, REPLACE ( LEFT ( SUBSTRING_INDEX( picture.nice.pic_url, ‘upload/show/‘, - 1 ), 10 ), ‘/‘, ‘‘ ) AS upload_date, pic_url AS download_url, REPLACE (date_time, ‘-‘, ‘‘) AS sys_date, ‘picture‘ AS file_type, 1 / 0 AS media_file_name, 1 / 0 AS save_dir, 0 AS video_not_found FROM picture.nice WHERE source = ‘nice‘ AND brand = ‘Loreal‘ AND REPLACE (date_time, ‘-‘, ‘‘) >= v_dayid_first AND REPLACE (date_time, ‘-‘, ‘‘) < v_dayid_end ORDER BY volume DESC LIMIT 20 ) 6_nice_Loreal UNION ALL -- 7_nice_Meiji SELECT * FROM ( SELECT source AS website, ( CASE brand WHEN ‘Maybelline‘ THEN ‘美宝莲‘ WHEN ‘Loreal‘ THEN ‘欧莱雅‘ WHEN ‘Meiji‘ THEN ‘美即‘ WHEN ‘NYX‘ THEN ‘NYX‘ ELSE ‘none‘ END ) AS keyword, tag_name AS title, ROUND( IFNULL( ( POWER(IFNULL(pic_like_num, 0), 2) + POWER( IFNULL(pic_comments_num, 0), 2 ) + POWER(IFNULL(browse_num, 0), 2) ) / ( IFNULL(pic_like_num, 0) + IFNULL(pic_comments_num, 0) + IFNULL(browse_num, 0) ), 0 ), 0 ) AS volume, IFNULL(pic_like_num, 0) AS dianzan_num, IFNULL(pic_comments_num, 0) AS comment_num, IFNULL(browse_num, 0) AS browse_num, 1 / 0 AS stay_time, REPLACE ( LEFT ( SUBSTRING_INDEX( picture.nice.pic_url, ‘upload/show/‘, - 1 ), 10 ), ‘/‘, ‘‘ ) AS upload_date, pic_url AS download_url, REPLACE (date_time, ‘-‘, ‘‘) AS sys_date, ‘picture‘ AS file_type, 1 / 0 AS media_file_name, 1 / 0 AS save_dir, 0 AS video_not_found FROM picture.nice WHERE source = ‘nice‘ AND brand = ‘Meiji‘ AND REPLACE (date_time, ‘-‘, ‘‘) >= v_dayid_first AND REPLACE (date_time, ‘-‘, ‘‘) < v_dayid_end ORDER BY volume DESC LIMIT 20 ) 7_nice_Meiji UNION ALL -- 8_nice_NYX SELECT * FROM ( SELECT source AS website, ( CASE brand WHEN ‘Maybelline‘ THEN ‘美宝莲‘ WHEN ‘Loreal‘ THEN ‘欧莱雅‘ WHEN ‘Meiji‘ THEN ‘美即‘ WHEN ‘NYX‘ THEN ‘NYX‘ ELSE ‘none‘ END ) AS keyword, tag_name AS title, ROUND( IFNULL( ( POWER(IFNULL(pic_like_num, 0), 2) + POWER( IFNULL(pic_comments_num, 0), 2 ) + POWER(IFNULL(browse_num, 0), 2) ) / ( IFNULL(pic_like_num, 0) + IFNULL(pic_comments_num, 0) + IFNULL(browse_num, 0) ), 0 ), 0 ) AS volume, IFNULL(pic_like_num, 0) AS dianzan_num, IFNULL(pic_comments_num, 0) AS comment_num, IFNULL(browse_num, 0) AS browse_num, 1 / 0 AS stay_time, REPLACE ( LEFT ( SUBSTRING_INDEX( picture.nice.pic_url, ‘upload/show/‘, - 1 ), 10 ), ‘/‘, ‘‘ ) AS upload_date, pic_url AS download_url, REPLACE (date_time, ‘-‘, ‘‘) AS sys_date, ‘picture‘ AS file_type, 1 / 0 AS media_file_name, 1 / 0 AS save_dir, 0 AS video_not_found FROM picture.nice WHERE source = ‘nice‘ AND brand = ‘Meiji‘ AND REPLACE (date_time, ‘-‘, ‘‘) >= v_dayid_first AND REPLACE (date_time, ‘-‘, ‘‘) < v_dayid_end ORDER BY volume DESC LIMIT 20 ) 8_nice_NYX UNION ALL -- 9_美拍_美宝莲 SELECT * FROM ( SELECT DISTINCT source AS website, ( CASE keyword WHEN ‘美宝莲‘ THEN ‘美宝莲‘ WHEN ‘Loreal‘ THEN ‘欧莱雅‘ WHEN ‘Meiji‘ THEN ‘美即‘ WHEN ‘NYX‘ THEN ‘NYX‘ ELSE ‘none‘ END ) AS keyword, title AS title, ROUND( IFNULL( ( POWER(IFNULL(zanCount, 0), 2) + POWER(IFNULL(commentCount, 0), 2) + POWER(IFNULL(watchCount, 0), 2) + POWER(IFNULL(netCount, 0), 2) + POWER(IFNULL(favCount, 0), 2) + POWER(IFNULL(tanmuCount, 0), 2) + POWER(IFNULL(shareCount, 0), 2) ) / ( IFNULL(zanCount, 0) + IFNULL(commentCount, 0) + IFNULL(watchCount, 0) + IFNULL(netCount, 0) + IFNULL(favCount, 0) + IFNULL(tanmuCount, 0) + IFNULL(shareCount, 0) ), 0 ), 0 ) AS volume, IFNULL(zanCount, 0) AS dianzan_num, IFNULL(commentCount, 0) AS comment_num, IFNULL(watchCount, 0) AS browse_num, length AS stay_time, REPLACE (uploadTime, ‘-‘, ‘‘) AS upload_date, downUrl AS download_url, REPLACE (LEFT(cralwTime, 10), ‘-‘, ‘‘) AS sys_date, ‘media‘ AS file_type, 1 / 0 AS media_file_name, 1 / 0 AS save_dir, 0 AS video_not_found FROM media.7videos WHERE source = ‘美拍‘ AND keyword = ‘美宝莲‘ AND downUrl <> ‘‘ AND REPLACE (LEFT(cralwTime, 10), ‘-‘, ‘‘) >= v_dayid_first AND REPLACE (LEFT(cralwTime, 10), ‘-‘, ‘‘) < v_dayid_end ORDER BY volume DESC LIMIT 20 ) 9_美拍_美宝莲; DECLARE CONTINUE HANDLER FOR NOT FOUND SET var = 1; /*是否达到记录的末尾控制变量*/ OPEN cur; FETCH cur INTO v_website, v_keyword, v_title, v_volume, v_dianzan_num, v_comment_num, v_browse_num, v_stay_time, v_upload_date, v_download_url, v_sys_date, v_file_type, v_media_file_name, v_save_dir, v_video_not_found; /*获取第一条记录*/ WHILE var <> 1 DO INSERT INTO loreal.`social_scan` ( website, keyword, title, volume, dianzan_num, comment_num, browse_num, stay_time, upload_date, download_url, sys_date, file_type, media_file_name, save_dir, video_not_found ) VALUES ( v_website, v_keyword, v_title, v_volume, v_dianzan_num, v_comment_num, v_browse_num, v_stay_time, v_upload_date, v_download_url, v_sys_date, v_file_type, v_media_file_name, v_save_dir, v_video_not_found ); FETCH cur INTO v_website, v_keyword, v_title, v_volume, v_dianzan_num, v_comment_num, v_browse_num, v_stay_time, v_upload_date, v_download_url, v_sys_date, v_file_type, v_media_file_name, v_save_dir, v_video_not_found; /*取下一条记录*/ END WHILE; CLOSE cur; END // delimiter ;
3、JOB事件:ddl_p_social_scan_job
DROP EVENT IF EXISTS p_social_scan_job; delimiter // CREATE DEFINER = `root`@`%` EVENT `p_social_scan_job` ON SCHEDULE EVERY 1 DAY STARTS ‘2016-05-16 02:00:00‘ ON COMPLETION PRESERVE ENABLE DO BEGIN CALL loreal.p_social_scan ( cast( ( date_sub(curdate(), INTERVAL 1 DAY) ) AS SIGNED INTEGER ), cast(CURDATE() AS SIGNED INTEGER) ) ; END// delimiter ;
标签:
原文地址:http://www.cnblogs.com/cenliang/p/5499392.html