添加配置java
a1.sinks.k1.hdfs.round = false a1.sinks.k1.hdfs.roundValue = 30 a1.sinks.k1.hdfs.roundUnit = second a1.sinks.k1.hdfs.rollInterval = 30 a1.sinks.k1.hdfs.rollSize = 0 a1.sinks.k1.hdfs.rollCount = 0
參數 | 說明 |
---|---|
round | 表示是否對時間戳四捨五入(true/flase) |
roundUnit | 時間單位(second,minute,hour) |
roundValue | 時間戳四捨五入的倍數,要小於當前時間 |
rollInterval | 每隔N個時間單位截斷一個文件。設置爲0表示不會由於時間間隔截斷文件(整數N) |
rollSize | 文件字節數超過N截斷一個文件。設置爲0就不由於文件大小截斷文件(字節數N) |
rollCount | 每N個event截斷一個文件。設置爲0就不由於event數量截斷文件 |
ext_startup_logs
表結構:apache
createdatms bigint appid string tenantid string deviceid string appversion string appchannel string appplatform string ostype string devicestyle string country string province string ipaddress string network string carrier string brand string screensize string ym string day string hm string
hive> select count(distinct(deviceid)) from ext_startup_logs where appid='sdk34734'
DateUtil.java: 獲取某天的0點0分app
package com.applogs.udf; import org.apache.hadoop.hive.ql.exec.Description; import org.apache.hadoop.hive.ql.exec.UDF; import org.apache.hadoop.hive.ql.udf.UDFType; import java.text.ParseException; import java.text.SimpleDateFormat; import java.util.Calendar; import java.util.Date; @Description(name = "udf_getEndDay", value = "getEndTimeInDay", extended = "udf() ; udf('2020/03/27 02:03:04') ; udf('2020-03-27 02-03-04','yyyy-MM-dd HH-mm-ss')") @UDFType(deterministic = true, stateful = false) public class DayEndUDF extends UDF { public long evaluate(){ return evaluate(new Date()); } // 返回某天的當天開始時間 public long evaluate(Date date){ // date那天0時 Date startDate = DateUtil.getZeroDate(date); // 經過calendar獲取次日0時 Calendar c = Calendar.getInstance(); c.setTime(startDate); // 後推一天即爲次日0時 c.add(Calendar.DAY_OF_MONTH, 1); return c.getTimeInMillis(); } public long evaluate(String dateStr) throws ParseException { return evaluate(dateStr, "yyyy/MM/dd HH:mm:ss"); } public long evaluate(String dateStr, String pattern){ try { SimpleDateFormat sdf = new SimpleDateFormat(pattern); Date date = sdf.parse(dateStr); return evaluate(date); } catch (ParseException e) { e.printStackTrace(); } return 0; } }
DayStartUDF.java: 得到某天0時的UDF函數
package com.applogs.udf; import org.apache.hadoop.hive.ql.exec.Description; import org.apache.hadoop.hive.ql.exec.UDF; import org.apache.hadoop.hive.ql.udf.UDFType; import java.text.ParseException; import java.text.SimpleDateFormat; import java.util.Date; @Description(name = "udf_getStartay", value = "getStartInDay", extended = "udf() ; udf('2020/03/27 02:03:04') ; udf('2020-03-27 02-03-04','yyyy-MM-dd HH-mm-ss')") @UDFType(deterministic = true, stateful = false) public class DayStartUDF extends UDF { // 返回當前時刻的當天開始時間 public long evaluate(){ return evaluate(new Date()); } // 返回某天的當天開始時間 public long evaluate(Date date){ return DateUtil.getZeroDate(date).getTime(); } public long evaluate(String dateStr) throws ParseException { return evaluate(dateStr, "yyyy/MM/dd HH:mm:ss"); } public long evaluate(String dateStr, String pattern){ try { SimpleDateFormat sdf = new SimpleDateFormat(pattern); Date date = sdf.parse(dateStr); Date zeroDate = DateUtil.getZeroDate(date); return zeroDate.getTime(); } catch (ParseException e) { e.printStackTrace(); } return 0; } }
DayEndUDF.java: 得到某天結束時間的UDF,即獲取次日的0時oop
package com.applogs.udf; import org.apache.hadoop.hive.ql.exec.Description; import org.apache.hadoop.hive.ql.exec.UDF; import org.apache.hadoop.hive.ql.udf.UDFType; import java.text.ParseException; import java.text.SimpleDateFormat; import java.util.Calendar; import java.util.Date; @Description(name = "udf_getEndDay", value = "getEndTimeInDay", extended = "udf() ; udf('2020/03/27 02:03:04') ; udf('2020-03-27 02-03-04','yyyy-MM-dd HH-mm-ss')") @UDFType(deterministic = true, stateful = false) public class DayEndUDF extends UDF { public long evaluate(){ return evaluate(new Date()); } // 返回某天的當天開始時間 public long evaluate(Date date){ // date那天0時 Date startDate = DateUtil.getZeroDate(date); // 經過calendar獲取次日0時 Calendar c = Calendar.getInstance(); c.setTime(startDate); // 後推一天即爲次日0時 c.add(Calendar.DAY_OF_MONTH, 1); return c.getTimeInMillis(); } public long evaluate(String dateStr) throws ParseException { return evaluate(dateStr, "yyyy/MM/dd HH:mm:ss"); } public long evaluate(String dateStr, String pattern){ try { SimpleDateFormat sdf = new SimpleDateFormat(pattern); Date date = sdf.parse(dateStr); return evaluate(date); } catch (ParseException e) { e.printStackTrace(); } return 0; } }
1.將UDF的jar包導出並放至hive/lib下
2.啓動hive,臨時導入jar包lua
$hive> add jar app-logs-hive-1.0-SNAPSHOT.jar ;
3.註冊臨時函數code
$hive>create temporary function getstartday AS 'com.applogs.udf.DayStartUDF'; $hive>create temporary function getendday AS 'com.applogs.udf.DayEndUDF';