with
association1 as (
-- 合計購入者数
select
count(distinct userId) as allPu
from
dataset.table
)
, association2 as (
-- productId別購入数と合計購入者数のまとめ
select
a.userId,
a.productId,
count(distinct userId) over(partition by a.productId) as product_cnt,
b.allPu
from
dataset.table as a
cross join (select allPu from association1) as b
)
, associationBase as (
-- 買い合わせproductId別でのアソシエーションのベースデータ
select
a.productId as A_productId,
b.productId as B_productId,
a.product_cnt as A_count,
b.product_cnt as B_count,
count(*) as A_B_count,
a.allPu
from
association2 as a
inner join association2 as b
on a.userId = b.userId
where
a <> b
group by
1, 2, 3, 4, 6
)
, reports as (
select
A_productId,
B_productId,
-- 支持度=同時購入者数/購入者全体数
round(1.0 * A_B_count / allPu, 4) as support,
-- 信頼度=同時購入者数/商品A購入者数
round(1.0 * A_B_count / A_count, 4) as confidence,
-- 期待信頼度=商品B購入者数/購入者全体数
round(1.0 * B_count / allPu), 4) as expected_confidence,
-- リフト値=信頼度/期待信頼度
round(1.0 * (A_B_count / A_count) / B_count , 4) as lift,
A_count,
B_count,
A_B_count,
allPu
from
associationBase
)
select
*
from
reports
▼出力結果例
| A_ productId | B_ productId | sup port | confi dence | expected_ confidence | lift | A_ count | B_ count | A_B_ count | allPu |
|---|---|---|---|---|---|---|---|---|---|
| A01 | B01 | 0.02 | 0.2 | 0.2 | 0.002 | 50 | 100 | 10 | 500 |
| A01 | B02 | 0.04 | 0.5 | 0.6 | 0.0017 | 40 | 300 | 20 | 500 |
| B01 | B02 | 0.14 | 0.7 | 0.6 | 0.0023 | 100 | 300 | 70 | 500 |
アソシエーション分析に関しては、以下のblogがすごくわかりやすかったので、詳細知りたい方はリンク先を参照ください。



