with
association1 as ( 
  -- 合計購入者数
  select
    count(distinct userId) as allPu 
  from
    dataset.table
) 
, association2 as ( 
  -- productId別購入数と合計購入者数のまとめ
  select
    a.userId,
    a.productId,
    count(distinct userId) over(partition by a.productId) as product_cnt,
    b.allPu 
  from
    dataset.table as a 
    cross join (select allPu from association1) as b
) 
, associationBase as ( 
  -- 買い合わせproductId別でのアソシエーションのベースデータ   
  select
    a.productId as A_productId,
    b.productId as B_productId,
    a.product_cnt as A_count,
    b.product_cnt as B_count,
    count(*) as A_B_count,
    a.allPu 
  from
    association2 as a 
    inner join association2 as b 
    on a.userId = b.userId 
  where
    a <> b 
  group by
    1, 2, 3, 4, 6
) 
, reports as (
  select
    A_productId,
    B_productId,
    -- 支持度=同時購入者数/購入者全体数
    round(1.0 * A_B_count / allPu, 4) as support,
    -- 信頼度=同時購入者数/商品A購入者数
    round(1.0 * A_B_count / A_count, 4) as confidence,
    -- 期待信頼度=商品B購入者数/購入者全体数
    round(1.0 * B_count / allPu), 4) as expected_confidence,
    -- リフト値=信頼度/期待信頼度
    round(1.0 * (A_B_count / A_count) / B_count , 4) as lift,
    A_count,
    B_count,
    A_B_count,
    allPu
  from
    associationBase 
)
select
  *
from
  reports
▼出力結果例
| A_ productId | B_ productId | sup port | confi dence | expected_ confidence | lift | A_ count | B_ count | A_B_ count | allPu | 
|---|---|---|---|---|---|---|---|---|---|
| A01 | B01 | 0.02 | 0.2 | 0.2 | 0.002 | 50 | 100 | 10 | 500 | 
| A01 | B02 | 0.04 | 0.5 | 0.6 | 0.0017 | 40 | 300 | 20 | 500 | 
| B01 | B02 | 0.14 | 0.7 | 0.6 | 0.0023 | 100 | 300 | 70 | 500 | 
アソシエーション分析に関しては、以下のblogがすごくわかりやすかったので、詳細知りたい方はリンク先を参照ください。
 
							
											

 
                         
                        
 
																	 
																	 
																	


