大數據開發-數倉ads層指標計算

ads層數據每每是最終的結果指標數據,在大屏展現,或者實時流處理時候使用,經過下面兩個例子來練習業務大屏展現sql該怎麼寫。sql

1.會員分析案例

1.1 數據準備

表結構以下,其中此表是dws層以天爲維度的會員表,好比天天的會員信息彙總,app

use dws;
drop table if exists dws.dws_member_start_day;
create table dws.dws_member_start_day(
`device_id` string, -- 設備id,來區分用戶
`uid` string, -- uid
`app_v` string,
`os_type` string,
`language` string,
`channel` string,
`area` string,
`brand` string
) COMMENT '會員日啓動彙總'
partitioned by(dt string)
stored as parquet;

1.2 會員指標計算

沉默會員的定義:只在安裝當天啓動過App,並且安裝時間是在7天前函數

流失會員的定義:最近30天未登陸的會員大數據

1.2.1 如何計算沉默會員數

-- 拿到只啓動一次的會員,後面再過濾安裝時間是再7天前的,使用sum 窗口函數
SELECT count(*)
FROM
  (SELECT device_id,
          sum(device_id) OVER (PARTITION BY device_id) AS sum_num,
                     dt
   FROM dws.dws_member_start_day) tmp
WHERE dt <= date_add(CURRENT_DATE, -7)
  AND sum_num=1

1.2.2 如何計算流失會員數

-- 拿到會員最近一次登陸時間,並用row_number來過濾
SELECT count(*)
FROM
  (SELECT device_id,
          dt,
          row_number() OVER (PARTITION BY device_id
                             ORDER BY dt DESC) ro
   FROM dws.dws_member_start_day) tmp
WHERE ro=1
  AND dt >= date_add(CURRENT_DATE, -30)

2. 核心交易案例

2.1 數據準備

給定一個每日訂單維度表,表結構以下圖:ui

DROP TABLE IF EXISTS dwd.dwd_trade_orders;
create table dwd.dwd_trade_orders(
`orderId`    int,
`orderNo`   string,
`userId`    bigint,
`status`    tinyint,
`productMoney` decimal,
`totalMoney`  decimal,
`payMethod`   tinyint,
`isPay`     tinyint,
`areaId`    int,
`tradeSrc`   tinyint,
`tradeType`   int,
`isRefund`   tinyint,
`dataFlag`   tinyint,
`createTime`  string,
`payTime`   string,
`modifiedTime` string,
`start_date`  string,
`end_date`   string
) COMMENT '訂單事實拉鍊表'
partitioned by (dt string)
STORED AS PARQUET;

其中,訂單狀態 -3 用戶拒收 -2未付款的訂單 -1用戶取消 0 待發貨 1配送中 2用戶確認收貨,訂單有效標誌 -1 刪除 1 有效人工智能

數據預處理,在明細事實拉鍊表處理時不太方便,能夠作一張中間表,dws_trade_orders_day 其表結構和加工以下:code

DROP TABLE IF EXISTS dws.dws_trade_orders_day;

CREATE TABLE IF NOT EXISTS dws.dws_trade_orders_day(day_dt string COMMENT '日期:yyyy-MM-dd',
                                                   day_cnt decimal commnet '日訂單筆數',
                                                   day_sum decimal COMMENT '日訂單總額') COMMENT '日訂單統計表';

SELECT dt,
       count(*) cnt,
       sum(totalMoney) sm
FROM
  (SELECT DISTINCT orderid,
                   dt,
                   totalMoney
   FROM dwd.dwd_trade_orders
   WHERE status >= 0
     AND dataFlag = '1') tmp
GROUP BY dt;


INSERT OVERWRITE TABLE dws.dws_trade_orders_day
SELECT dt,
       count(*) cnt,
       sum(totalMoney) sm
FROM
  (SELECT DISTINCT orderid,
                   dt,
                   totalMoney
   FROM dwd.dwd_trade_orders
   WHERE status >= 0
     AND dataFlag = '1') tmp
GROUP BY dt;


SELECT *
FROM dws.dws_trade_orders_day
WHERE day_dt BETWEEN '2020-01-01' AND '2020-12-31';

2.2 指標1,統計2020年每一個季度的銷售訂單筆數、訂單總額

先建立ads指標表:dws_trade_orders_quarterci

DROP TABLE IF EXISTS dws.dws_trade_orders_quarter;


CREATE TABLE IF NOT EXISTS dws.dws_trade_orders_quarter(YEAR string COMMENT '年份',
                                                        QUARTER string COMMENT '季度',
                                                        cnt decimal COMMENT '訂單總筆數',
                                                        SUM decimal COMMENT '訂單總額') COMMENT '季度訂單統計表';


INSERT OVERWRITE TABLE dws.dws_trade_orders_quarter WITH tmp AS
  (SELECT substr(day_dt, 0, 4) YEAR,
                               CASE WHEN substr(dat_dt, 6, 2)="01"
   OR substr(dat_dt, 6, 2)="02"
   OR substr(day_dt, 6, 2)="03" THEN "1" WHEN substr(dat_dt, 6, 2)="04"
   OR substr(dat_dt, 6, 2)="05"
   OR substr(day_dt, 6, 2)="06" THEN "2" WHEN substr(dat_dt, 6, 2)="07"
   OR substr(dat_dt, 6, 2)="08"
   OR substr(day_dt, 6, 2)="09" THEN "3" WHEN substr(dat_dt, 6, 2)="10"
   OR substr(dat_dt, 6, 2)="11"
   OR substr(day_dt, 6, 2)="12" THEN "4" AS QUARTER day_cnt,
                                     day_sum
   FROM dws.dws_trade_orders_day)
SELECT YEAR,
       QUARTER,
       sum(day_cnt),
       sum(day_sum)
FROM tmp
GROUP BY YEAR QUARTER;

2.3 統計2020年每月的銷售訂單筆數、訂單總額

先建立ads指標表:dws_trade_orders_monthstring

DROP TABLE IF EXISTS dws.dws_trade_orders_month;

CREATE TABLE IF NOT EXISTS dws.dws_trade_orders_month(yearstring COMMENT '年份',
                                                      MONTH string COMMENT '月份',
                                                      month_cnt decimal COMMENT '月訂單總筆數',
                                                      month_sum decimal COMMENT '月訂單總額') COMMENT '月訂單統計表';


INSERT OVERWRITE TABLE dws.dws_trade_orders_month WITH tmp AS
  (SELECT substr(day_dt, 0, 4) YEAR,
                               sunstr(day_dt, 6, 2) MONTH,
                                                    day_cnt,
                                                    day_sum
   FROM dws.dws_trade_orders_day)
SELECT YEAR,
       MONTH,
       sum(day_cnt) month_cnt,
       sum(day_sum) month_sum
FROM tmp
GROUP BY YEAR,
         MONTH;

2.4 統計2020年每週(週一到週日)的銷售訂單筆數、訂單總額

建立ads層指標表:dws_trade_orders_week 利用到日期函數weekofyearit

DROP TABLE IF EXISTS dws.dws_trade_orders_week;
CREATE TABLE IF NOT EXISTS dws.dws_trade_orders_week(YEAR string COMMENT '年份',
                                                     WEEK string COMMENT '一年中的第幾周',
                                                     week_cnt decimal COMMENT '周訂單總筆數',
                                                     week_sum decimal COMMENT '周訂單總額') COMMENT '周訂單統計表';


INSERT OVERWRITE TABLE dws.dws_trade_orders_week
SELECT substr(day_dt, 0, 4) YEAR,
                            weekofyear(day_dt) WEEK,
                                               sum(day_cnt),
                                               sum(day_sum)
FROM dws.dws_trade_orders_day
GROUP BY substr(day_dt, 0, 4) YEAR,
                              weekofyear(day_dt) WEEK;

2.5 統計2020年國家法定節假日、休息日、工做日的訂單筆數、訂單總額

建立日期信息維表:dim_day_info 並錄入節假日信息數據(數據每一年都不同,須要國務院通知的公告,因此按期手動維護)

drop table if exists dim.dim_day_info;
create table if not exists dim.dim_day_info(
  day_dt string comment '日期',
  is_holidays int comment '節假日標識: 0不是 1是',
  is_workday int comment '工做日標識 0不是 1是'
) comment '日期信息表';
-- 統計2020節假日的訂單筆數,訂單總額

SELECT nvl(sum(day_cnt), 0) nvl(sum(day_sum), 0)
FROM dws.dws_trade_orders_day A
LEFT JOIN dim.dim_day_info B ON A.day_dt = B.day_dt
WHERE B.is_holiday = 1;

-- 統計2020年休息日的訂單筆數,訂單總額

SELECT nvl(sum(day_cnt), 0) nvl(sum(day_sum), 0)
FROM dws.dws_trade_orders_day A
LEFT JOIN dim.dim_day_info B ON A.day_dt = B.day_dt
WHERE B.is_workday = 0;

-- 統計2020節工做日的訂單筆數,訂單總額

SELECT nvl(sum(day_cnt), 0) nvl(sum(day_sum), 0)
FROM dws.dws_trade_orders_day A
LEFT JOIN dim.dim_day_info B ON A.day_dt = B.day_dt
WHERE B.is_workday = 1;

吳邪,小三爺,混跡於後臺,大數據,人工智能領域的小菜鳥。
更多請關注
file

相關文章
相關標籤/搜索