数据库常见面试题 —— 15.七日留存率
SQL 七日留存率
·
1. 基本含义
留存率是用于反映网站、互联网应用或网络游戏的运营情况的统计指标,其含义是在统计周期(周/月)内,每日活跃用户数在第N日仍启动该App的用户数占比的平均值。
留存率常用于反映用户粘性,当N取值越大、留存率越高时,用户粘性越高。此外,不同种类应用的留存率也有各自的基准,如游戏的首月留存率通常比社交类高,而工具类的首月留存率又比游戏高。
① 次日留存:
指在某一时段内的新增用户中,次日仍然活跃或使用的用户所占的比例。
② 三日留存:
指在某一时段内的新增用户中,第三天仍然活跃或使用的用户所占的比例。
③ 七日留存:
指在某一时段内的新增用户中,第七天仍然活跃或使用的用户所占的比例。
2. 实际应用
假设有一张注册表UserInfo, 一张登录表 LogInfo
--建表
if object_id('UserInfo','U') IS NOT NULL DROP TABLE UserInfo
GO
CREATE TABLE UserInfo
(
UserName varchar(20),
RegistrationTime DATETIME
)
GO
insert into UserInfo values
('Alice','2020-01-01 09:15:00'),
('Bob','2020-01-01 00:04:00'),
('Charlie','2020-01-01 22:16:00'),
('David','2020-01-01 20:32:00'),
('Edward','2020-01-01 13:59:00'),
('Frank','2020-01-01 21:28:00'),
('George','2020-01-01 14:03:00'),
('Helen','2020-01-01 11:00:00'),
('Isabel','2020-01-01 23:57:00'),
('Jack','2020-01-01 04:46:00'),
('Katherine','2020-01-02 14:21:00'),
('Lawrence','2020-01-02 11:15:00'),
('Margaret','2020-01-02 07:26:00'),
('Nancy','2020-01-02 10:34:00'),
('Oliver','2020-01-02 08:22:00'),
('Patricia','2020-01-02 14:23:00'),
('Richard','2020-01-03 09:20:00'),
('Sarah','2020-01-03 11:21:00'),
('Thomas','2020-01-03 12:17:00'),
('Victoria','2020-01-03 15:26:00');
-- 登陆日志表
if object_id('LogInfo','u') is not null drop table LogInfo
go
create table LogInfo(
UserName varchar(20)
,LogTime datetime
)
go
insert into LogInfo values
('Bob','2020-01-02 00:14:00'),
('Jack','2020-01-02 08:32:00'),
('Charlie','2020-01-02 09:20:00'),
('Helen','2020-01-02 10:07:00'),
('David','2020-01-02 10:29:00'),
('Isabel','2020-01-02 11:45:00'),
('Edward','2020-01-02 12:19:00'),
('Alice','2020-01-02 14:29:00'),
('Oliver','2020-01-03 00:26:00'),
('Nancy','2020-01-03 11:18:00'),
('Katherine','2020-01-03 13:18:00'),
('Patricia','2020-01-03 14:33:00'),
('Frank','2020-01-04 07:51:00'),
('Sarah','2020-01-04 08:11:00'),
('George','2020-01-04 09:27:00'),
('Jack','2020-01-04 10:59:00'),
('Victoria','2020-01-04 11:51:00'),
('Charlie','2020-01-04 12:37:00'),
('Richard','2020-01-04 15:07:00'),
('Helen','2020-01-04 16:35:00'),
('Alice','2020-01-04 19:29:00'),
('Nancy','2020-01-05 08:03:00'),
('Lawrence','2020-01-05 10:27:00'),
('Oliver','2020-01-05 16:33:00'),
('Thomas','2020-01-06 09:03:00'),
('Victoria','2020-01-06 15:26:00'),
('David','2020-01-08 11:03:00'),
('Edward','2020-01-08 12:54:00'),
('Frank','2020-01-08 19:22:00'),
('Margaret','2020-01-09 10:20:00'),
('Oliver','2020-01-09 16:40:00'),
('Sarah','2020-01-10 21:34:00');
--查询
select
cast(RegistrationTime as date) dt,
count(distinct a.username) 新增用户数,
count(distinct b.username) 次日留存用户数,
count(distinct c.username) 三日留存用户数,
count(distinct b.username) 七日留存用户数,
count(distinct b.username)* 1.0 /count(distinct a.username) 次日留存率,
count(distinct c.username)* 1.0 /count(distinct a.username) 三日留存率,
count(distinct b.username)* 1.0 /count(distinct a.username) 七日留存率
from userinfo a
left join logInfo b
on a.username =b.username and datediff(day,RegistrationTime,b.LogTime)=1
left join logInfo c
on a.username =c.username and datediff(day,RegistrationTime,c.LogTime)=3
left join logInfo d
on a.username = d.username and datediff(day,RegistrationTime,d.LogTime)=7
group by cast(RegistrationTime as date)
更多推荐




所有评论(0)