2015年12月21日月曜日

R セグメント分けと集計

目的:ユーザ別で集計した結果を条件に従ってセグメント分けして各セグメントの合計を求める
補足:library(dplyr)が必要

#サンプルデータ作成
v.x1 <- c("oda","oda","oda","toyo","toyo","toyo","ie","ie","ie","ie")
v.x2 <- c("a","a","b","b","b","c","c","c","c","c")
v.x3 <- c(100,100,800,700,600,500,4000,5000,2000,1000)
df.payment_log <- data.frame(user_id = v.x1, item = v.x2, payment =v.x3)

> df.payment_log
   user_id item payment
1      oda    a     100
2      oda    a     100
3      oda    b     800
4     toyo    b     700
5     toyo    b     600
6     toyo    c     500
7       ie    c    4000
8       ie    c    5000
9       ie    c    2000
10      ie    c    1000

#各ユーザの合計を求める
df.user_payment <- df.payment_log %>%
  group_by(user_id) %>%
  summarize(pay_total = sum(payment))

> df.user_payment
Source: local data frame [3 x 2]

  user_id pay_total
   (fctr)     (dbl)
1      ie     12000
2     oda      1000
3    toyo      1800

 #セグメントに分ける
df.user_payment$rank <- "N"
df.user_payment[df.user_payment$pay_total >= 1 & df.user_payment$pay_total <= 1000,]$rank <- "C"
df.user_payment[df.user_payment$pay_total >= 1001 & df.user_payment$pay_total <= 10000,]$rank <- "B"
df.user_payment[df.user_payment$pay_total >= 10001,]$rank <- "A"

> df.user_payment
Source: local data frame [3 x 3]

  user_id pay_total  rank
   (fctr)     (dbl) (chr)
1      ie     12000     A
2     oda      1000     C
3    toyo      1800     B

 #セグメント別に集計する
df.seg <- df.user_payment %>%
  group_by(rank) %>%
  summarise(pay = sum(pay_total), pu = length(user_id))

> df.seg
Source: local data frame [3 x 3]

   rank   pay    pu
  (chr) (dbl) (int)
1     A 12000     1
2     B  1800     1
3     C  1000     1

0 件のコメント :

コメントを投稿