标签:结果 status ica alt 基于 attach content distinct 本地
1.数据清洗
步骤:
1.查询charge_record表业务类型为充值且订单状态为成功的数据
2.将上述数据转移到本地数据库
使用如下脚本:
# coding=utf-8
import pymysql
# 原数据库链接
db1 = pymysql.connect(
host=‘***‘,
port=3306, user=‘***‘,
passwd=‘***‘,
db=‘***‘,
charset=‘utf8‘)
cursor1 = db1.cursor()
# 定义查询语句
len1 = cursor1.execute(‘select uid,update_time from charge_record where buss_type=0 and charge_status=2 and charge_prod_id is not null‘)
# 迁移数据库链接
db2 = pymysql.connect(
host=‘127.0.0.1‘,
port=3306, user=‘root‘,
passwd=‘123456‘,
db=‘test‘,
charset=‘utf8‘)
cursor2 = db2.cursor()
# 批量插入语句
sql = ‘insert into charge_record(uid,update_time) value(%s, %s)‘
# 导入全部数据
data2 = cursor1.fetchall()
cursor2.executemany(sql, data2)
# 提交到数据库
db2.commit()
# 关闭数据库连接
db1.close()
db2.close()
2.计算留存率
使用的MySQL语句如下:
USE test;
-- 计算首单时间
SELECT uid,min(update_time) u_time FROM charge_record GROUP BY uid;
-- 重采首付时间
SELECT a.uid,b.u_time,TIMESTAMPDIFF(MONTH,b.u_time,a.update_time) m_diff,CONCAT(YEAR(b.u_time),"年",MONTH(b.u_time),"月") y_m FROM charge_record a
LEFT JOIN (
SELECT uid,min(update_time) u_time FROM charge_record GROUP BY uid LIMIT 0,7000
) b on a.uid=b.uid WHERE b.u_time is NOT NULL;
-- 计算留存量
CREATE table cohort as
SELECT c.y_m "首付月份",c.m_diff"月份差",COUNT(DISTINCT c.uid) "留存量" FROM (
SELECT a.uid,b.u_time,TIMESTAMPDIFF(MONTH,b.u_time,a.update_time) m_diff,CONCAT(YEAR(b.u_time),"年",MONTH(b.u_time),"月") y_m FROM charge_record a
LEFT JOIN (
SELECT uid,min(update_time) u_time FROM charge_record GROUP BY uid
) b on a.uid=b.uid WHERE b.u_time is NOT NULL ) c GROUP BY c.y_m,c.m_diff;
-- 计算留存率
SELECT c.`首付月份`,CONCAT(ROUND((c.`留存量`/m.`留存量`)*100,2),"%") 留存率 FROM cohort c
LEFT JOIN (
SELECT 首付月份,留存量 FROM cohort WHERE `月份差`=0
) m on c.`首付月份`=m.`首付月份`;
-- 留存率进阶版
SELECT
n.`首付月份`,
AVG(n.`留存量`) "本月新增",
CONCAT(sum(n.`+1月`),"%") "+1月",
CONCAT(sum(n.`+2月`),"%") "+2月",
CONCAT(sum(n.`+3月`),"%") "+3月",
CONCAT(sum(n.`+4月`),"%") "+4月",
CONCAT(sum(n.`+5月`),"%") "+5月"
FROM (
# 一级子查询:转置表格,将月份差作为列名
SELECT
a.`首付月份`,
a.`留存量`,
CASE a.`月份差` when 1 THEN a.`留存率` ELSE 0 END "+1月",
CASE a.`月份差` when 2 THEN a.`留存率` ELSE 0 END "+2月",
CASE a.`月份差` when 3 THEN a.`留存率` ELSE 0 END "+3月",
CASE a.`月份差` when 4 THEN a.`留存率` ELSE 0 END "+4月",
CASE a.`月份差` when 5 THEN a.`留存率` ELSE 0 END "+5月"
FROM(
# 二级子查询:计算留存率
SELECT a.`首付月份`,b.`留存量`,a.`月份差`,ROUND((a.`留存量`/b.`留存量`)*100,2) 留存率
FROM cohort a
LEFT JOIN (
# 三级子查询:查询首月用户量
SELECT `首付月份`,`留存量`
FROM cohort
WHERE cohort.`月份差`=0
) b
on a.`首付月份`=b.`首付月份`
) a
) n
GROUP BY n.`首付月份`;
最终结果如下:
标签:结果 status ica alt 基于 attach content distinct 本地
原文地址:https://www.cnblogs.com/wanyuan/p/13107172.html