码迷,mamicode.com
首页 > 数据库 > 详细

mysql etl过程 p_social_scan

时间:2016-05-16 21:44:07      阅读:277      评论:0      收藏:0      [点我收藏+]

标签:

2016-05-16

最近公司有个项目,要爬取7个视频网站、3个图片网站,关于5个关键词的视频、图片数据,并将爬取到的视频、图片做成展示墙,放到企业客户的iPad上展示用。以下是相关的etl过程,写了一整天,纪念一下。哈哈~

1、SQL语句:ddl_social_scan  

技术分享
-- CREATE TABLE social_scan AS
-- 1_in_Maybelline
SELECT
    *
FROM
    (
        SELECT
            source AS website,
            (
                CASE brand
                WHEN Maybelline THEN
                    美宝莲
                WHEN Loreal THEN
                    欧莱雅
                WHEN Meiji THEN
                    美即
                WHEN NYX THEN
                    NYX
                ELSE
                    none
                END
            ) AS keyword,
            tag_name AS title,
            ROUND(
                IFNULL(
                    (
                        POWER(IFNULL(pic_like_num, 0), 2) + POWER(
                            IFNULL(pic_comments_num, 0),
                            2
                        ) + POWER(IFNULL(browse_num, 0), 2)
                    ) / (
                        IFNULL(pic_like_num, 0) + IFNULL(pic_comments_num, 0) + IFNULL(browse_num, 0)
                    ),
                    0
                ),
                0
            ) AS volume,
            IFNULL(pic_like_num, 0) AS dianzan_num,
            IFNULL(pic_comments_num, 0) AS comment_num,
            IFNULL(browse_num, 0) AS browse_num,
            1 / 0 AS stay_time,
            REPLACE (
                LEFT (
                    SUBSTRING_INDEX(
                        picture.nice.pic_url,
                        .info/in/,
                        - 1
                    ),
                    10
                ),
                /,
                ‘‘
            ) AS upload_date,
            pic_url AS download_url,
            REPLACE (date_time, -, ‘‘) AS sys_date,
            picture AS file_type,
            1 / 0 AS media_file_name,
            1 / 0 AS save_dir,
            0 AS video_not_found
        FROM
            picture.nice
        WHERE
            source = in
        AND brand = Maybelline
        ORDER BY
            volume DESC
        LIMIT 20
    ) 1_in_Maybelline
UNION ALL
    -- 2_in_Loreal
    SELECT
        *
    FROM
        (
            SELECT
                source AS website,
                (
                    CASE brand
                    WHEN Maybelline THEN
                        美宝莲
                    WHEN Loreal THEN
                        欧莱雅
                    WHEN Meiji THEN
                        美即
                    WHEN NYX THEN
                        NYX
                    ELSE
                        none
                    END
                ) AS keyword,
                tag_name AS title,
                ROUND(
                    IFNULL(
                        (
                            POWER(IFNULL(pic_like_num, 0), 2) + POWER(
                                IFNULL(pic_comments_num, 0),
                                2
                            ) + POWER(IFNULL(browse_num, 0), 2)
                        ) / (
                            IFNULL(pic_like_num, 0) + IFNULL(pic_comments_num, 0) + IFNULL(browse_num, 0)
                        ),
                        0
                    ),
                    0
                ) AS volume,
                IFNULL(pic_like_num, 0) AS dianzan_num,
                IFNULL(pic_comments_num, 0) AS comment_num,
                IFNULL(browse_num, 0) AS browse_num,
                1 / 0 AS stay_time,
                REPLACE (
                    LEFT (
                        SUBSTRING_INDEX(
                            picture.nice.pic_url,
                            .info/in/,
                            - 1
                        ),
                        10
                    ),
                    /,
                    ‘‘
                ) AS upload_date,
                pic_url AS download_url,
                REPLACE (date_time, -, ‘‘) AS sys_date,
                picture AS file_type,
                1 / 0 AS media_file_name,
                1 / 0 AS save_dir,
                0 AS video_not_found
            FROM
                picture.nice
            WHERE
                source = in
            AND brand = Loreal
            ORDER BY
                volume DESC
            LIMIT 20
        ) 2_in_Loreal
    UNION ALL
        -- 3_in_Meiji
        SELECT
            *
        FROM
            (
                SELECT
                    source AS website,
                    (
                        CASE brand
                        WHEN Maybelline THEN
                            美宝莲
                        WHEN Loreal THEN
                            欧莱雅
                        WHEN Meiji THEN
                            美即
                        WHEN NYX THEN
                            NYX
                        ELSE
                            none
                        END
                    ) AS keyword,
                    tag_name AS title,
                    ROUND(
                        IFNULL(
                            (
                                POWER(IFNULL(pic_like_num, 0), 2) + POWER(
                                    IFNULL(pic_comments_num, 0),
                                    2
                                ) + POWER(IFNULL(browse_num, 0), 2)
                            ) / (
                                IFNULL(pic_like_num, 0) + IFNULL(pic_comments_num, 0) + IFNULL(browse_num, 0)
                            ),
                            0
                        ),
                        0
                    ) AS volume,
                    IFNULL(pic_like_num, 0) AS dianzan_num,
                    IFNULL(pic_comments_num, 0) AS comment_num,
                    IFNULL(browse_num, 0) AS browse_num,
                    1 / 0 AS stay_time,
                    REPLACE (
                        LEFT (
                            SUBSTRING_INDEX(
                                picture.nice.pic_url,
                                .info/in/,
                                - 1
                            ),
                            10
                        ),
                        /,
                        ‘‘
                    ) AS upload_date,
                    pic_url AS download_url,
                    REPLACE (date_time, -, ‘‘) AS sys_date,
                    picture AS file_type,
                    1 / 0 AS media_file_name,
                    1 / 0 AS save_dir,
                    0 AS video_not_found
                FROM
                    picture.nice
                WHERE
                    source = in
                AND brand = Meiji
                ORDER BY
                    volume DESC
                LIMIT 20
            ) 3_in_Meiji
        UNION ALL
            -- 4_in_NYX
            SELECT
                *
            FROM
                (
                    SELECT
                        source AS website,
                        (
                            CASE brand
                            WHEN Maybelline THEN
                                美宝莲
                            WHEN Loreal THEN
                                欧莱雅
                            WHEN Meiji THEN
                                美即
                            WHEN NYX THEN
                                NYX
                            ELSE
                                none
                            END
                        ) AS keyword,
                        tag_name AS title,
                        ROUND(
                            IFNULL(
                                (
                                    POWER(IFNULL(pic_like_num, 0), 2) + POWER(
                                        IFNULL(pic_comments_num, 0),
                                        2
                                    ) + POWER(IFNULL(browse_num, 0), 2)
                                ) / (
                                    IFNULL(pic_like_num, 0) + IFNULL(pic_comments_num, 0) + IFNULL(browse_num, 0)
                                ),
                                0
                            ),
                            0
                        ) AS volume,
                        IFNULL(pic_like_num, 0) AS dianzan_num,
                        IFNULL(pic_comments_num, 0) AS comment_num,
                        IFNULL(browse_num, 0) AS browse_num,
                        1 / 0 AS stay_time,
                        REPLACE (
                            LEFT (
                                SUBSTRING_INDEX(
                                    picture.nice.pic_url,
                                    .info/in/,
                                    - 1
                                ),
                                10
                            ),
                            /,
                            ‘‘
                        ) AS upload_date,
                        pic_url AS download_url,
                        REPLACE (date_time, -, ‘‘) AS sys_date,
                        picture AS file_type,
                        1 / 0 AS media_file_name,
                        1 / 0 AS save_dir,
                        0 AS video_not_found
                    FROM
                        picture.nice
                    WHERE
                        source = in
                    AND brand = NYX
                    ORDER BY
                        volume DESC
                    LIMIT 20
                ) 4_in_NYX
            UNION ALL
                -- 5_nice_Maybelline
                SELECT
                    *
                FROM
                    (
                        SELECT
                            source AS website,
                            (
                                CASE brand
                                WHEN Maybelline THEN
                                    美宝莲
                                WHEN Loreal THEN
                                    欧莱雅
                                WHEN Meiji THEN
                                    美即
                                WHEN NYX THEN
                                    NYX
                                ELSE
                                    none
                                END
                            ) AS keyword,
                            tag_name AS title,
                            ROUND(
                                IFNULL(
                                    (
                                        POWER(IFNULL(pic_like_num, 0), 2) + POWER(
                                            IFNULL(pic_comments_num, 0),
                                            2
                                        ) + POWER(IFNULL(browse_num, 0), 2)
                                    ) / (
                                        IFNULL(pic_like_num, 0) + IFNULL(pic_comments_num, 0) + IFNULL(browse_num, 0)
                                    ),
                                    0
                                ),
                                0
                            ) AS volume,
                            IFNULL(pic_like_num, 0) AS dianzan_num,
                            IFNULL(pic_comments_num, 0) AS comment_num,
                            IFNULL(browse_num, 0) AS browse_num,
                            1 / 0 AS stay_time,
                            REPLACE (
                                LEFT (
                                    SUBSTRING_INDEX(
                                        picture.nice.pic_url,
                                        upload/show/,
                                        - 1
                                    ),
                                    10
                                ),
                                /,
                                ‘‘
                            ) AS upload_date,
                            pic_url AS download_url,
                            REPLACE (date_time, -, ‘‘) AS sys_date,
                            picture AS file_type,
                            1 / 0 AS media_file_name,
                            1 / 0 AS save_dir,
                            0 AS video_not_found
                        FROM
                            picture.nice
                        WHERE
                            source = nice
                        AND brand = Maybelline
                        ORDER BY
                            volume DESC
                        LIMIT 20
                    ) 5_nice_Maybelline
                UNION ALL
                    -- 6_nice_Loreal
                    SELECT
                        *
                    FROM
                        (
                            SELECT
                                source AS website,
                                (
                                    CASE brand
                                    WHEN Maybelline THEN
                                        美宝莲
                                    WHEN Loreal THEN
                                        欧莱雅
                                    WHEN Meiji THEN
                                        美即
                                    WHEN NYX THEN
                                        NYX
                                    ELSE
                                        none
                                    END
                                ) AS keyword,
                                tag_name AS title,
                                ROUND(
                                    IFNULL(
                                        (
                                            POWER(IFNULL(pic_like_num, 0), 2) + POWER(
                                                IFNULL(pic_comments_num, 0),
                                                2
                                            ) + POWER(IFNULL(browse_num, 0), 2)
                                        ) / (
                                            IFNULL(pic_like_num, 0) + IFNULL(pic_comments_num, 0) + IFNULL(browse_num, 0)
                                        ),
                                        0
                                    ),
                                    0
                                ) AS volume,
                                IFNULL(pic_like_num, 0) AS dianzan_num,
                                IFNULL(pic_comments_num, 0) AS comment_num,
                                IFNULL(browse_num, 0) AS browse_num,
                                1 / 0 AS stay_time,
                                REPLACE (
                                    LEFT (
                                        SUBSTRING_INDEX(
                                            picture.nice.pic_url,
                                            upload/show/,
                                            - 1
                                        ),
                                        10
                                    ),
                                    /,
                                    ‘‘
                                ) AS upload_date,
                                pic_url AS download_url,
                                REPLACE (date_time, -, ‘‘) AS sys_date,
                                picture AS file_type,
                                1 / 0 AS media_file_name,
                                1 / 0 AS save_dir,
                                0 AS video_not_found
                            FROM
                                picture.nice
                            WHERE
                                source = nice
                            AND brand = Loreal
                            ORDER BY
                                volume DESC
                            LIMIT 20
                        ) 6_nice_Loreal
                    UNION ALL
                        -- 7_nice_Meiji
                        SELECT
                            *
                        FROM
                            (
                                SELECT
                                    source AS website,
                                    (
                                        CASE brand
                                        WHEN Maybelline THEN
                                            美宝莲
                                        WHEN Loreal THEN
                                            欧莱雅
                                        WHEN Meiji THEN
                                            美即
                                        WHEN NYX THEN
                                            NYX
                                        ELSE
                                            none
                                        END
                                    ) AS keyword,
                                    tag_name AS title,
                                    ROUND(
                                        IFNULL(
                                            (
                                                POWER(IFNULL(pic_like_num, 0), 2) + POWER(
                                                    IFNULL(pic_comments_num, 0),
                                                    2
                                                ) + POWER(IFNULL(browse_num, 0), 2)
                                            ) / (
                                                IFNULL(pic_like_num, 0) + IFNULL(pic_comments_num, 0) + IFNULL(browse_num, 0)
                                            ),
                                            0
                                        ),
                                        0
                                    ) AS volume,
                                    IFNULL(pic_like_num, 0) AS dianzan_num,
                                    IFNULL(pic_comments_num, 0) AS comment_num,
                                    IFNULL(browse_num, 0) AS browse_num,
                                    1 / 0 AS stay_time,
                                    REPLACE (
                                        LEFT (
                                            SUBSTRING_INDEX(
                                                picture.nice.pic_url,
                                                upload/show/,
                                                - 1
                                            ),
                                            10
                                        ),
                                        /,
                                        ‘‘
                                    ) AS upload_date,
                                    pic_url AS download_url,
                                    REPLACE (date_time, -, ‘‘) AS sys_date,
                                    picture AS file_type,
                                    1 / 0 AS media_file_name,
                                    1 / 0 AS save_dir,
                                    0 AS video_not_found
                                FROM
                                    picture.nice
                                WHERE
                                    source = nice
                                AND brand = Meiji
                                ORDER BY
                                    volume DESC
                                LIMIT 20
                            ) 7_nice_Meiji
                        UNION ALL
                            -- 8_nice_NYX
                            SELECT
                                *
                            FROM
                                (
                                    SELECT
                                        source AS website,
                                        (
                                            CASE brand
                                            WHEN Maybelline THEN
                                                美宝莲
                                            WHEN Loreal THEN
                                                欧莱雅
                                            WHEN Meiji THEN
                                                美即
                                            WHEN NYX THEN
                                                NYX
                                            ELSE
                                                none
                                            END
                                        ) AS keyword,
                                        tag_name AS title,
                                        ROUND(
                                            IFNULL(
                                                (
                                                    POWER(IFNULL(pic_like_num, 0), 2) + POWER(
                                                        IFNULL(pic_comments_num, 0),
                                                        2
                                                    ) + POWER(IFNULL(browse_num, 0), 2)
                                                ) / (
                                                    IFNULL(pic_like_num, 0) + IFNULL(pic_comments_num, 0) + IFNULL(browse_num, 0)
                                                ),
                                                0
                                            ),
                                            0
                                        ) AS volume,
                                        IFNULL(pic_like_num, 0) AS dianzan_num,
                                        IFNULL(pic_comments_num, 0) AS comment_num,
                                        IFNULL(browse_num, 0) AS browse_num,
                                        1 / 0 AS stay_time,
                                        REPLACE (
                                            LEFT (
                                                SUBSTRING_INDEX(
                                                    picture.nice.pic_url,
                                                    upload/show/,
                                                    - 1
                                                ),
                                                10
                                            ),
                                            /,
                                            ‘‘
                                        ) AS upload_date,
                                        pic_url AS download_url,
                                        REPLACE (date_time, -, ‘‘) AS sys_date,
                                        picture AS file_type,
                                        1 / 0 AS media_file_name,
                                        1 / 0 AS save_dir,
                                        0 AS video_not_found
                                    FROM
                                        picture.nice
                                    WHERE
                                        source = nice
                                    AND brand = Meiji
                                    ORDER BY
                                        volume DESC
                                    LIMIT 20
                                ) 8_nice_NYX
                            UNION ALL
                                -- 9_美拍_美宝莲 
                                SELECT
                                    *
                                FROM
                                    (
                                        SELECT DISTINCT
                                            source AS website,
                                            (
                                                CASE keyword
                                                WHEN 美宝莲 THEN
                                                    美宝莲
                                                WHEN Loreal THEN
                                                    欧莱雅
                                                WHEN Meiji THEN
                                                    美即
                                                WHEN NYX THEN
                                                    NYX
                                                ELSE
                                                    none
                                                END
                                            ) AS keyword,
                                            title AS title,
                                            ROUND(
                                                IFNULL(
                                                    (
                                                        POWER(IFNULL(zanCount, 0), 2) + POWER(IFNULL(commentCount, 0), 2) + POWER(IFNULL(watchCount, 0), 2) + POWER(IFNULL(netCount, 0), 2) + POWER(IFNULL(favCount, 0), 2) + POWER(IFNULL(tanmuCount, 0), 2) + POWER(IFNULL(shareCount, 0), 2)
                                                    ) / (
                                                        IFNULL(zanCount, 0) + IFNULL(commentCount, 0) + IFNULL(watchCount, 0) + IFNULL(netCount, 0) + IFNULL(favCount, 0) + IFNULL(tanmuCount, 0) + IFNULL(shareCount, 0)
                                                    ),
                                                    0
                                                ),
                                                0
                                            ) AS volume,
                                            IFNULL(zanCount, 0) AS dianzan_num,
                                            IFNULL(commentCount, 0) AS comment_num,
                                            IFNULL(watchCount, 0) AS browse_num,
                                            length AS stay_time,
                                            REPLACE (uploadTime, -, ‘‘) AS upload_date,
                                            downUrl AS download_url,
                                            REPLACE (LEFT(cralwTime, 10), -, ‘‘) AS sys_date,
                                            media AS file_type,
                                            1 / 0 AS media_file_name,
                                            1 / 0 AS save_dir,
                                            0 AS video_not_found
                                        FROM
                                            media.7videos
                                        WHERE
                                            source = 美拍
                                        AND keyword = 美宝莲
                                        AND downUrl <> ‘‘
                                        ORDER BY
                                            volume DESC
                                        LIMIT 20
                                    ) 9_美拍_美宝莲
View Code

2、SP过程:ddl_p_social_scan

技术分享
drop procedure IF EXISTS p_social_scan;
delimiter //

CREATE DEFINER = `root`@`%` PROCEDURE `p_social_scan` (
    IN v_dayid_first INT,
    IN v_dayid_end INT
)
BEGIN
    DECLARE
        v_website VARCHAR (50) DEFAULT NULL;

DECLARE
    v_keyword VARCHAR (4) DEFAULT NULL;

DECLARE
    v_title VARCHAR (250) DEFAULT NULL;

DECLARE
    v_volume DOUBLE (17, 0) DEFAULT 0;

DECLARE
    v_dianzan_num BIGINT (20) DEFAULT 0;

DECLARE
    v_comment_num BIGINT (20) DEFAULT 0;

DECLARE
    v_browse_num BIGINT (20) DEFAULT 0;

DECLARE
    v_stay_time VARCHAR (50) DEFAULT NULL;

DECLARE
    v_upload_date INT (11) DEFAULT NULL;

DECLARE
    v_download_url VARCHAR (250) DEFAULT NULL;

DECLARE
    v_sys_date INT (10) DEFAULT NULL;

DECLARE
    v_file_type VARCHAR (7) DEFAULT NULL;

DECLARE
    v_media_file_name VARCHAR (250) DEFAULT NULL;

DECLARE
    v_save_dir VARCHAR (250) DEFAULT NULL;

DECLARE
    v_video_not_found INT (11) DEFAULT 0;

DECLARE
    var INT DEFAULT 0;

DECLARE
    cur CURSOR FOR -- 1_in_Maybelline
    SELECT
        *
    FROM
        (
            SELECT
                source AS website,
                (
                    CASE brand
                    WHEN Maybelline THEN
                        美宝莲
                    WHEN Loreal THEN
                        欧莱雅
                    WHEN Meiji THEN
                        美即
                    WHEN NYX THEN
                        NYX
                    ELSE
                        none
                    END
                ) AS keyword,
                tag_name AS title,
                ROUND(
                    IFNULL(
                        (
                            POWER(IFNULL(pic_like_num, 0), 2) + POWER(
                                IFNULL(pic_comments_num, 0),
                                2
                            ) + POWER(IFNULL(browse_num, 0), 2)
                        ) / (
                            IFNULL(pic_like_num, 0) + IFNULL(pic_comments_num, 0) + IFNULL(browse_num, 0)
                        ),
                        0
                    ),
                    0
                ) AS volume,
                IFNULL(pic_like_num, 0) AS dianzan_num,
                IFNULL(pic_comments_num, 0) AS comment_num,
                IFNULL(browse_num, 0) AS browse_num,
                1 / 0 AS stay_time,
                REPLACE (
                    LEFT (
                        SUBSTRING_INDEX(
                            picture.nice.pic_url,
                            .info/in/,
                            - 1
                        ),
                        10
                    ),
                    /,
                    ‘‘
                ) AS upload_date,
                pic_url AS download_url,
                REPLACE (date_time, -, ‘‘) AS sys_date,
                picture AS file_type,
                1 / 0 AS media_file_name,
                1 / 0 AS save_dir,
                0 AS video_not_found
            FROM
                picture.nice
            WHERE
                source = in
            AND brand = Maybelline
            AND REPLACE (date_time, -, ‘‘) >= v_dayid_first
            AND REPLACE (date_time, -, ‘‘) < v_dayid_end
            ORDER BY
                volume DESC
            LIMIT 20
        ) 1_in_Maybelline
    UNION ALL
        -- 2_in_Loreal
        SELECT
            *
        FROM
            (
                SELECT
                    source AS website,
                    (
                        CASE brand
                        WHEN Maybelline THEN
                            美宝莲
                        WHEN Loreal THEN
                            欧莱雅
                        WHEN Meiji THEN
                            美即
                        WHEN NYX THEN
                            NYX
                        ELSE
                            none
                        END
                    ) AS keyword,
                    tag_name AS title,
                    ROUND(
                        IFNULL(
                            (
                                POWER(IFNULL(pic_like_num, 0), 2) + POWER(
                                    IFNULL(pic_comments_num, 0),
                                    2
                                ) + POWER(IFNULL(browse_num, 0), 2)
                            ) / (
                                IFNULL(pic_like_num, 0) + IFNULL(pic_comments_num, 0) + IFNULL(browse_num, 0)
                            ),
                            0
                        ),
                        0
                    ) AS volume,
                    IFNULL(pic_like_num, 0) AS dianzan_num,
                    IFNULL(pic_comments_num, 0) AS comment_num,
                    IFNULL(browse_num, 0) AS browse_num,
                    1 / 0 AS stay_time,
                    REPLACE (
                        LEFT (
                            SUBSTRING_INDEX(
                                picture.nice.pic_url,
                                .info/in/,
                                - 1
                            ),
                            10
                        ),
                        /,
                        ‘‘
                    ) AS upload_date,
                    pic_url AS download_url,
                    REPLACE (date_time, -, ‘‘) AS sys_date,
                    picture AS file_type,
                    1 / 0 AS media_file_name,
                    1 / 0 AS save_dir,
                    0 AS video_not_found
                FROM
                    picture.nice
                WHERE
                    source = in
                AND brand = Loreal
                AND REPLACE (date_time, -, ‘‘) >= v_dayid_first
                AND REPLACE (date_time, -, ‘‘) < v_dayid_end
                ORDER BY
                    volume DESC
                LIMIT 20
            ) 2_in_Loreal
        UNION ALL
            -- 3_in_Meiji
            SELECT
                *
            FROM
                (
                    SELECT
                        source AS website,
                        (
                            CASE brand
                            WHEN Maybelline THEN
                                美宝莲
                            WHEN Loreal THEN
                                欧莱雅
                            WHEN Meiji THEN
                                美即
                            WHEN NYX THEN
                                NYX
                            ELSE
                                none
                            END
                        ) AS keyword,
                        tag_name AS title,
                        ROUND(
                            IFNULL(
                                (
                                    POWER(IFNULL(pic_like_num, 0), 2) + POWER(
                                        IFNULL(pic_comments_num, 0),
                                        2
                                    ) + POWER(IFNULL(browse_num, 0), 2)
                                ) / (
                                    IFNULL(pic_like_num, 0) + IFNULL(pic_comments_num, 0) + IFNULL(browse_num, 0)
                                ),
                                0
                            ),
                            0
                        ) AS volume,
                        IFNULL(pic_like_num, 0) AS dianzan_num,
                        IFNULL(pic_comments_num, 0) AS comment_num,
                        IFNULL(browse_num, 0) AS browse_num,
                        1 / 0 AS stay_time,
                        REPLACE (
                            LEFT (
                                SUBSTRING_INDEX(
                                    picture.nice.pic_url,
                                    .info/in/,
                                    - 1
                                ),
                                10
                            ),
                            /,
                            ‘‘
                        ) AS upload_date,
                        pic_url AS download_url,
                        REPLACE (date_time, -, ‘‘) AS sys_date,
                        picture AS file_type,
                        1 / 0 AS media_file_name,
                        1 / 0 AS save_dir,
                        0 AS video_not_found
                    FROM
                        picture.nice
                    WHERE
                        source = in
                    AND brand = Meiji
                    AND REPLACE (date_time, -, ‘‘) >= v_dayid_first
                    AND REPLACE (date_time, -, ‘‘) < v_dayid_end
                    ORDER BY
                        volume DESC
                    LIMIT 20
                ) 3_in_Meiji
            UNION ALL
                -- 4_in_NYX
                SELECT
                    *
                FROM
                    (
                        SELECT
                            source AS website,
                            (
                                CASE brand
                                WHEN Maybelline THEN
                                    美宝莲
                                WHEN Loreal THEN
                                    欧莱雅
                                WHEN Meiji THEN
                                    美即
                                WHEN NYX THEN
                                    NYX
                                ELSE
                                    none
                                END
                            ) AS keyword,
                            tag_name AS title,
                            ROUND(
                                IFNULL(
                                    (
                                        POWER(IFNULL(pic_like_num, 0), 2) + POWER(
                                            IFNULL(pic_comments_num, 0),
                                            2
                                        ) + POWER(IFNULL(browse_num, 0), 2)
                                    ) / (
                                        IFNULL(pic_like_num, 0) + IFNULL(pic_comments_num, 0) + IFNULL(browse_num, 0)
                                    ),
                                    0
                                ),
                                0
                            ) AS volume,
                            IFNULL(pic_like_num, 0) AS dianzan_num,
                            IFNULL(pic_comments_num, 0) AS comment_num,
                            IFNULL(browse_num, 0) AS browse_num,
                            1 / 0 AS stay_time,
                            REPLACE (
                                LEFT (
                                    SUBSTRING_INDEX(
                                        picture.nice.pic_url,
                                        .info/in/,
                                        - 1
                                    ),
                                    10
                                ),
                                /,
                                ‘‘
                            ) AS upload_date,
                            pic_url AS download_url,
                            REPLACE (date_time, -, ‘‘) AS sys_date,
                            picture AS file_type,
                            1 / 0 AS media_file_name,
                            1 / 0 AS save_dir,
                            0 AS video_not_found
                        FROM
                            picture.nice
                        WHERE
                            source = in
                        AND brand = NYX
                        AND REPLACE (date_time, -, ‘‘) >= v_dayid_first
                        AND REPLACE (date_time, -, ‘‘) < v_dayid_end
                        ORDER BY
                            volume DESC
                        LIMIT 20
                    ) 4_in_NYX
                UNION ALL
                    -- 5_nice_Maybelline
                    SELECT
                        *
                    FROM
                        (
                            SELECT
                                source AS website,
                                (
                                    CASE brand
                                    WHEN Maybelline THEN
                                        美宝莲
                                    WHEN Loreal THEN
                                        欧莱雅
                                    WHEN Meiji THEN
                                        美即
                                    WHEN NYX THEN
                                        NYX
                                    ELSE
                                        none
                                    END
                                ) AS keyword,
                                tag_name AS title,
                                ROUND(
                                    IFNULL(
                                        (
                                            POWER(IFNULL(pic_like_num, 0), 2) + POWER(
                                                IFNULL(pic_comments_num, 0),
                                                2
                                            ) + POWER(IFNULL(browse_num, 0), 2)
                                        ) / (
                                            IFNULL(pic_like_num, 0) + IFNULL(pic_comments_num, 0) + IFNULL(browse_num, 0)
                                        ),
                                        0
                                    ),
                                    0
                                ) AS volume,
                                IFNULL(pic_like_num, 0) AS dianzan_num,
                                IFNULL(pic_comments_num, 0) AS comment_num,
                                IFNULL(browse_num, 0) AS browse_num,
                                1 / 0 AS stay_time,
                                REPLACE (
                                    LEFT (
                                        SUBSTRING_INDEX(
                                            picture.nice.pic_url,
                                            upload/show/,
                                            - 1
                                        ),
                                        10
                                    ),
                                    /,
                                    ‘‘
                                ) AS upload_date,
                                pic_url AS download_url,
                                REPLACE (date_time, -, ‘‘) AS sys_date,
                                picture AS file_type,
                                1 / 0 AS media_file_name,
                                1 / 0 AS save_dir,
                                0 AS video_not_found
                            FROM
                                picture.nice
                            WHERE
                                source = nice
                            AND brand = Maybelline
                            AND REPLACE (date_time, -, ‘‘) >= v_dayid_first
                            AND REPLACE (date_time, -, ‘‘) < v_dayid_end
                            ORDER BY
                                volume DESC
                            LIMIT 20
                        ) 5_nice_Maybelline
                    UNION ALL
                        -- 6_nice_Loreal
                        SELECT
                            *
                        FROM
                            (
                                SELECT
                                    source AS website,
                                    (
                                        CASE brand
                                        WHEN Maybelline THEN
                                            美宝莲
                                        WHEN Loreal THEN
                                            欧莱雅
                                        WHEN Meiji THEN
                                            美即
                                        WHEN NYX THEN
                                            NYX
                                        ELSE
                                            none
                                        END
                                    ) AS keyword,
                                    tag_name AS title,
                                    ROUND(
                                        IFNULL(
                                            (
                                                POWER(IFNULL(pic_like_num, 0), 2) + POWER(
                                                    IFNULL(pic_comments_num, 0),
                                                    2
                                                ) + POWER(IFNULL(browse_num, 0), 2)
                                            ) / (
                                                IFNULL(pic_like_num, 0) + IFNULL(pic_comments_num, 0) + IFNULL(browse_num, 0)
                                            ),
                                            0
                                        ),
                                        0
                                    ) AS volume,
                                    IFNULL(pic_like_num, 0) AS dianzan_num,
                                    IFNULL(pic_comments_num, 0) AS comment_num,
                                    IFNULL(browse_num, 0) AS browse_num,
                                    1 / 0 AS stay_time,
                                    REPLACE (
                                        LEFT (
                                            SUBSTRING_INDEX(
                                                picture.nice.pic_url,
                                                upload/show/,
                                                - 1
                                            ),
                                            10
                                        ),
                                        /,
                                        ‘‘
                                    ) AS upload_date,
                                    pic_url AS download_url,
                                    REPLACE (date_time, -, ‘‘) AS sys_date,
                                    picture AS file_type,
                                    1 / 0 AS media_file_name,
                                    1 / 0 AS save_dir,
                                    0 AS video_not_found
                                FROM
                                    picture.nice
                                WHERE
                                    source = nice
                                AND brand = Loreal
                                AND REPLACE (date_time, -, ‘‘) >= v_dayid_first
                                AND REPLACE (date_time, -, ‘‘) < v_dayid_end
                                ORDER BY
                                    volume DESC
                                LIMIT 20
                            ) 6_nice_Loreal
                        UNION ALL
                            -- 7_nice_Meiji
                            SELECT
                                *
                            FROM
                                (
                                    SELECT
                                        source AS website,
                                        (
                                            CASE brand
                                            WHEN Maybelline THEN
                                                美宝莲
                                            WHEN Loreal THEN
                                                欧莱雅
                                            WHEN Meiji THEN
                                                美即
                                            WHEN NYX THEN
                                                NYX
                                            ELSE
                                                none
                                            END
                                        ) AS keyword,
                                        tag_name AS title,
                                        ROUND(
                                            IFNULL(
                                                (
                                                    POWER(IFNULL(pic_like_num, 0), 2) + POWER(
                                                        IFNULL(pic_comments_num, 0),
                                                        2
                                                    ) + POWER(IFNULL(browse_num, 0), 2)
                                                ) / (
                                                    IFNULL(pic_like_num, 0) + IFNULL(pic_comments_num, 0) + IFNULL(browse_num, 0)
                                                ),
                                                0
                                            ),
                                            0
                                        ) AS volume,
                                        IFNULL(pic_like_num, 0) AS dianzan_num,
                                        IFNULL(pic_comments_num, 0) AS comment_num,
                                        IFNULL(browse_num, 0) AS browse_num,
                                        1 / 0 AS stay_time,
                                        REPLACE (
                                            LEFT (
                                                SUBSTRING_INDEX(
                                                    picture.nice.pic_url,
                                                    upload/show/,
                                                    - 1
                                                ),
                                                10
                                            ),
                                            /,
                                            ‘‘
                                        ) AS upload_date,
                                        pic_url AS download_url,
                                        REPLACE (date_time, -, ‘‘) AS sys_date,
                                        picture AS file_type,
                                        1 / 0 AS media_file_name,
                                        1 / 0 AS save_dir,
                                        0 AS video_not_found
                                    FROM
                                        picture.nice
                                    WHERE
                                        source = nice
                                    AND brand = Meiji
                                    AND REPLACE (date_time, -, ‘‘) >= v_dayid_first
                                    AND REPLACE (date_time, -, ‘‘) < v_dayid_end
                                    ORDER BY
                                        volume DESC
                                    LIMIT 20
                                ) 7_nice_Meiji
                            UNION ALL
                                -- 8_nice_NYX
                                SELECT
                                    *
                                FROM
                                    (
                                        SELECT
                                            source AS website,
                                            (
                                                CASE brand
                                                WHEN Maybelline THEN
                                                    美宝莲
                                                WHEN Loreal THEN
                                                    欧莱雅
                                                WHEN Meiji THEN
                                                    美即
                                                WHEN NYX THEN
                                                    NYX
                                                ELSE
                                                    none
                                                END
                                            ) AS keyword,
                                            tag_name AS title,
                                            ROUND(
                                                IFNULL(
                                                    (
                                                        POWER(IFNULL(pic_like_num, 0), 2) + POWER(
                                                            IFNULL(pic_comments_num, 0),
                                                            2
                                                        ) + POWER(IFNULL(browse_num, 0), 2)
                                                    ) / (
                                                        IFNULL(pic_like_num, 0) + IFNULL(pic_comments_num, 0) + IFNULL(browse_num, 0)
                                                    ),
                                                    0
                                                ),
                                                0
                                            ) AS volume,
                                            IFNULL(pic_like_num, 0) AS dianzan_num,
                                            IFNULL(pic_comments_num, 0) AS comment_num,
                                            IFNULL(browse_num, 0) AS browse_num,
                                            1 / 0 AS stay_time,
                                            REPLACE (
                                                LEFT (
                                                    SUBSTRING_INDEX(
                                                        picture.nice.pic_url,
                                                        upload/show/,
                                                        - 1
                                                    ),
                                                    10
                                                ),
                                                /,
                                                ‘‘
                                            ) AS upload_date,
                                            pic_url AS download_url,
                                            REPLACE (date_time, -, ‘‘) AS sys_date,
                                            picture AS file_type,
                                            1 / 0 AS media_file_name,
                                            1 / 0 AS save_dir,
                                            0 AS video_not_found
                                        FROM
                                            picture.nice
                                        WHERE
                                            source = nice
                                        AND brand = Meiji
                                        AND REPLACE (date_time, -, ‘‘) >= v_dayid_first
                                        AND REPLACE (date_time, -, ‘‘) < v_dayid_end
                                        ORDER BY
                                            volume DESC
                                        LIMIT 20
                                    ) 8_nice_NYX
                                UNION ALL
                                    -- 9_美拍_美宝莲 
                                    SELECT
                                        *
                                    FROM
                                        (
                                            SELECT DISTINCT
                                                source AS website,
                                                (
                                                    CASE keyword
                                                    WHEN 美宝莲 THEN
                                                        美宝莲
                                                    WHEN Loreal THEN
                                                        欧莱雅
                                                    WHEN Meiji THEN
                                                        美即
                                                    WHEN NYX THEN
                                                        NYX
                                                    ELSE
                                                        none
                                                    END
                                                ) AS keyword,
                                                title AS title,
                                                ROUND(
                                                    IFNULL(
                                                        (
                                                            POWER(IFNULL(zanCount, 0), 2) + POWER(IFNULL(commentCount, 0), 2) + POWER(IFNULL(watchCount, 0), 2) + POWER(IFNULL(netCount, 0), 2) + POWER(IFNULL(favCount, 0), 2) + POWER(IFNULL(tanmuCount, 0), 2) + POWER(IFNULL(shareCount, 0), 2)
                                                        ) / (
                                                            IFNULL(zanCount, 0) + IFNULL(commentCount, 0) + IFNULL(watchCount, 0) + IFNULL(netCount, 0) + IFNULL(favCount, 0) + IFNULL(tanmuCount, 0) + IFNULL(shareCount, 0)
                                                        ),
                                                        0
                                                    ),
                                                    0
                                                ) AS volume,
                                                IFNULL(zanCount, 0) AS dianzan_num,
                                                IFNULL(commentCount, 0) AS comment_num,
                                                IFNULL(watchCount, 0) AS browse_num,
                                                length AS stay_time,
                                                REPLACE (uploadTime, -, ‘‘) AS upload_date,
                                                downUrl AS download_url,
                                                REPLACE (LEFT(cralwTime, 10), -, ‘‘) AS sys_date,
                                                media AS file_type,
                                                1 / 0 AS media_file_name,
                                                1 / 0 AS save_dir,
                                                0 AS video_not_found
                                            FROM
                                                media.7videos
                                            WHERE
                                                source = 美拍
                                            AND keyword = 美宝莲
                                            AND downUrl <> ‘‘
                                            AND REPLACE (LEFT(cralwTime, 10), -, ‘‘) >= v_dayid_first
                                            AND REPLACE (LEFT(cralwTime, 10), -, ‘‘) < v_dayid_end
                                            ORDER BY
                                                volume DESC
                                            LIMIT 20
                                        ) 9_美拍_美宝莲;

DECLARE
    CONTINUE HANDLER FOR NOT FOUND
SET var = 1;

/*是否达到记录的末尾控制变量*/
OPEN cur;

FETCH cur INTO v_website,
 v_keyword,
 v_title,
 v_volume,
 v_dianzan_num,
 v_comment_num,
 v_browse_num,
 v_stay_time,
 v_upload_date,
 v_download_url,
 v_sys_date,
 v_file_type,
 v_media_file_name,
 v_save_dir,
 v_video_not_found;

/*获取第一条记录*/
WHILE var <> 1 DO
    INSERT INTO loreal.`social_scan` (
        website,
        keyword,
        title,
        volume,
        dianzan_num,
        comment_num,
        browse_num,
        stay_time,
        upload_date,
        download_url,
        sys_date,
        file_type,
        media_file_name,
        save_dir,
        video_not_found
    )
VALUES
    (
        v_website,
        v_keyword,
        v_title,
        v_volume,
        v_dianzan_num,
        v_comment_num,
        v_browse_num,
        v_stay_time,
        v_upload_date,
        v_download_url,
        v_sys_date,
        v_file_type,
        v_media_file_name,
        v_save_dir,
        v_video_not_found
    );

FETCH cur INTO v_website,
 v_keyword,
 v_title,
 v_volume,
 v_dianzan_num,
 v_comment_num,
 v_browse_num,
 v_stay_time,
 v_upload_date,
 v_download_url,
 v_sys_date,
 v_file_type,
 v_media_file_name,
 v_save_dir,
 v_video_not_found;

/*取下一条记录*/
END
WHILE;

CLOSE cur;

END

//
delimiter ;
View Code

3、JOB事件:ddl_p_social_scan_job

技术分享
DROP EVENT
IF EXISTS p_social_scan_job;
delimiter //


CREATE DEFINER = `root`@`%` EVENT `p_social_scan_job` ON SCHEDULE EVERY 1 DAY STARTS 2016-05-16 02:00:00 ON COMPLETION PRESERVE ENABLE DO

BEGIN
    CALL loreal.p_social_scan (
        cast(
            (
                date_sub(curdate(), INTERVAL 1 DAY)
            ) AS SIGNED INTEGER
        ),
        cast(CURDATE() AS SIGNED INTEGER)
    ) ;
END//
delimiter ;
View Code

 

mysql etl过程 p_social_scan

标签:

原文地址:http://www.cnblogs.com/cenliang/p/5499392.html

(0)
(0)
   
举报
评论 一句话评论(0
登录后才能评论!
© 2014 mamicode.com 版权所有  联系我们:gaon5@hotmail.com
迷上了代码!