日次で連続してログインや訪問しているユーザの日数とその連続する期間の最初の日と最後の日を集計するクエリ。連続ログインが途切れて再度ログインした際は、別の期間として新たに集計。急いで書いたから読みにくい。。
◇クエリ例
WITH
stays_table AS (
SELECT
*,
CASE
WHEN visit_date != DATE_ADD(lag_visit_date, INTERVAL 1 DAY) THEN 1
ELSE
0
END
AS not_continuous_flg
FROM (
--ユーザ毎の日付昇順での前回のデータを入れる項目の作成
SELECT
userid,
visit_date,
LAG(userid,1) OVER (PARTITION BY userid ORDER BY visit_date) AS lag_user_id,
LAG(visit_date,1) OVER (PARTITION BY userid ORDER BY visit_date) AS lag_visit_date
FROM (
SELECT
userid,
DATE(TIMESTAMP(jpn_day)) AS visit_date
FROM
`data`
GROUP BY
userid,
visit_date ) ) ),
flg_cum AS(
SELECT
*,
SUM(not_continuous_flg) OVER (PARTITION BY userid ORDER BY visit_date) AS cum_not_cont_date_flg
FROM
stays_table ),
t1 AS (
SELECT
*,
COUNT(cum_not_cont_date_flg) OVER (PARTITION BY userid, cum_not_cont_date_flg ORDER BY cum_not_cont_date_flg) AS cnt
FROM
flg_cum ),
t00 AS (
--00グループ:初回
SELECT
userid,
not_continuous_flg,
cum_not_cont_date_flg,
MAX(cnt) AS stay_days,
MIN(visit_date) AS first_loginday
FROM
t1
WHERE
t1.not_continuous_flg = 0
AND t1.cum_not_cont_date_flg = 0
GROUP BY
userid,
not_continuous_flg,
cum_not_cont_date_flg,
cnt
ORDER BY
userid ),
t11 AS (
--1と1以上グループ:2回目以降
SELECT
userid,
not_continuous_flg,
cum_not_cont_date_flg,
MAX(cnt) AS stay_days,
MIN(visit_date) AS first_loginday
FROM
t1
WHERE
t1.not_continuous_flg = 1
AND t1.cum_not_cont_date_flg > 0
GROUP BY
userid,
not_continuous_flg,
cum_not_cont_date_flg,
cnt
ORDER BY
userid,
not_continuous_flg,
cum_not_cont_date_flg ),
x1 AS (
SELECT
userid,
cum_not_cont_date_flg,
stay_days,
first_loginday
FROM
t00
UNION ALL
SELECT
userid,
cum_not_cont_date_flg,
stay_days,
first_loginday
FROM
t11 ),
x2 AS (
SELECT
userid,
cum_not_cont_date_flg,
stay_days,
first_loginday
FROM
x1
GROUP BY
userid,
cum_not_cont_date_flg,
stay_days,
first_loginday
ORDER BY
userid,
cum_not_cont_date_flg,
stay_days,
first_loginday )
SELECT
userid,
cum_not_cont_date_flg,
stay_days,
first_loginday,
DATE_ADD(first_loginday, INTERVAL stay_days - 1 DAY) AS last_loginday
FROM
x2
◇参考サイト