參考:http://blog.csdn.net/beckham008/article/details/23741151?utm_source=tuicool&utm_medium=referralphp
1.設置合理solt數html
|
Q22
|
Q23
|
Q24
|
未壓縮
|
2m9.787s
|
14m19.011s
|
4m41.635s
|
壓縮
|
2m22.371s
|
13m57.379s
|
4m43.945s
|
|
Q22
|
Q23
|
Q24
|
未壓縮
|
2m9.787s
|
14m19.011s |
4m41.635s
|
壓縮
|
2m14.084s
|
13m48.921s
|
4m40.755s
|
|
Q22
|
Q23
|
Q24
|
8388608 (8MB)
|
1m40.767s
|
9m54.701s
|
4m54.342s
|
16777216 (16MB)
|
1m44.801s
|
10m45.015s
|
4m41.974s
|
33554432 (32MB)
|
2m0.222s
|
12m43.198s
|
4m36.464s
|
67108864 (64MB)
|
2m9.787s
|
14m19.011s |
4m41.635s
|
134217728 (128MB)
|
2m51.450s
|
16m3.758s
|
4m43.410s
|
|
Q22
|
Q23
|
Q24
|
128000000
|
2m7.526s
|
13m44.007s
|
4m42.296s
|
256000000
|
2m9.787s |
14m19.011s |
4m41.635s
|
512000000
|
2m7.969s
|
13m45.184s
|
4m39.975s
|
|
Q22
|
Q23
|
Q24
|
True
|
2m9.787s
|
14m19.011s |
4m41.635s
|
False
|
9m44.347s
|
45m1.006s
|
5m23.501s
|
set hive.auto.convert.join.noconditionaltask = true; set hive.auto.convert.join.noconditionaltask.size = 10000000;
解釋:hive.auto.convert.join.noconditionaltask.size代表能夠轉化爲MapJoin的表的大小總合。例若有A、B兩個表,他們的大小都小於該屬性值,那麼他們都會都會分別被轉化爲MapJoin,若是兩個表大小總和加起來也小於該屬性值,那麼這兩個表會被合併爲一個MapJoin。
測試:
1.
set hive.auto.convert.join=false;
set hive.auto.convert.join.noconditionaltask = true;
Q22
|
Q23
|
Q24
|
9m22.254s
|
44m56.032s
|
5m26.398s
|
Q22
|
Q23
|
Q24
|
9m5.161s
|
18m6.333s |
4m45.650s
|
|
Q22
|
Q23
|
Q24
|
10000 (10K)
|
9m17.021s
|
16m8.071s
|
4m46.207s
|
10000000 (10M Default)
|
2m11.891s
|
13m38.050s
|
4m33.742s
|
100000000 (100M)
|
1m34.005s
|
10m43.252s
|
4m39.885s
|
1000000000 (1G)
|
1m30.704s
|
10m49.992s
|
Exception in thread "main" java.lang.OutOfMemoryError: Java heap space
at org.apache.log4j.spi.LoggingEvent.<init>(LoggingEvent.java:165)
at org.apache.log4j.Category.forcedLog(Category.java:391)
at org.apache.log4j.Category.log(Category.java:856)
at org.apache.commons.logging.impl.Log4JLogger.error(Log4JLogger.java:229)
at org.apache.hadoop.hive.ql.exec.mr.MapredLocalTask.executeInProcess(MapredLocalTask.java:349)
at org.apache.hadoop.hive.ql.exec.mr.ExecDriver.main(ExecDriver.java:744)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:606)
at org.apache.hadoop.util.RunJar.main(RunJar.java:212)
Execution failed with exit status: 1
Obtaining error information
Task failed!
|
1000000000 (1G)
export HADOOP_CLIENT_OPTS="-Xmx2g" |
1m35.893s
|
10m45.741s
|
10m26.837s
|
|
Q22
|
Q23
|
Q24
|
Undefined
|
2m9.787s
|
14m19.011s |
4m41.635s
|
True
|
Query returned non-zero code: 1, cause: hive configuration hive.optimize.mapjoin.mapreduce does not exists.
|
|
|
|
Q22
|
Q23
|
Q24
|
0
|
2m9.787s
|
14m19.011s |
4m41.635s
|
1024
|
2m11.625s
|
13m41.085s
|
4m39.853s
|
|
Q22
|
Q23
|
Q24
|
Origin
|
2m9.787s
|
14m19.011s |
4m41.635s
|
After
|
2m12.891s
|
13m50.629s
|
4m39.853s
|
|
Q22
|
Q23
|
Q24
|
Origin
|
2m9.787s
|
14m19.011s |
4m41.635s
|
After
|
FAILED: ParseException line 2:4 missing KW_ROLE at 'hive' near 'hive'
line 2:8 missing EOF at '.' near 'hive'
|
|
|
|
Q22
|
Q23
|
Q24
|
False
|
2m9.787s
|
14m19.011s |
4m41.635s
|
True
|
2m48.965s
|
17m2.289s
|
5m59.542s
|
|
Q22
|
Q23
|
Q24
|
False
|
2m9.787s
|
14m19.011s |
4m41.635s
|
True
|
2m16.959s
|
9m32.682s
|
3m12.682s
|
|
Q22
|
Q23
|
Q24
|
False
|
2m9.787s
|
14m19.011s |
4m41.635s
|
True
|
2m13.427s
|
14m7.106s
|
4m50.376s
|
|
Q22
|
Q23
|
Q24
|
False
|
2m9.787s
|
14m19.011s |
4m41.635s
|
True
|
2m15.132s
|
14m10.505s
|
4m49.860s
|
|
Q22
|
Q23
|
Q24
|
1M
|
2m17.173s
|
14m11.657s
|
4m57.931s
|
5M
|
2m9.787s
|
14m19.011s |
4m41.635s
|
10M
|
2m15.576s
|
14m20.439s
|
5m0.655s
|
|
Q22
|
Q23
|
Q24
|
Origin
|
2m9.787s
|
14m19.011s |
4m41.635s
|
After
|
1m20.856s (37.7% up) |
7m22.865s (48.4% up)
|
3m42.101s (21.1 % up)
|
|
Q22
|
Q23
|
Q24
|
Origin
|
20m8.207s
|
24m50.490s |
12m47.515s
|
|
Q22
|
Q23
|
Q24
|
8388608 (8MB)
|
10m57.554s
|
40+m
|
(
skip
)
|
16777216 (16MB)
|
7m27.275s
|
40+m
|
(skip)
|
33554432 (32MB)
|
6m9.236s
|
36m27.416s
|
(skip)
|
67108864 (64MB default)
|
5m36.872s
|
31m55.655s
|
遠大於12min
|
134217728 (128MB)
|
4m49.397s
|
29m44.575s
|
20m17.109s
|
268435456 (256MB)
|
5m43.634
|
skip
|
skip |
|
Q22
|
Q23
|
Q24
|
10M(default)
|
40min+
|
|
|
100M
|
4m49.397s |
29m44.575s |
20m17.109s
|
500M
|
4m57.060s
|
29min左右
|
同上
|
1G (export HADOOP_CLIENT_OPTS="-Xmx2g") |
4m56.741s
|
29min左右
|
同上
|
|
Q22
|
Q23
|
Q24
|
Origin
|
44m56.835s
|
45m18.217s
|
34m43.947s
|
After
|
4m49.397s |
29m44.575s
|
20m17.109s
|
|
Q22
|
Q23
|
Q24
|
Origin
|
23m7.372s
|
26m49.460s
|
15m57.590s
|
After
|
2m31.808 (89% up) |
18m18.278s (31.8% up)
|
12m55.900s (19.0% up)
|
Map Reduce數量相關java
set mapreduce.input.fileinputformat.split.maxsize=750000000;node
set hive.exec.reducers.bytes.per.reducer=629145600;mysql
set hive.tez.auto.reducer.parallelism = true;
執行計劃相關web
set hive.execution.engine=mr;算法
set hive.cbo.enable=true;sql
set hive.optimize.reducededuplication=true;數據庫
set hive.optimize.reducededuplication.min.reducer=4;apache
set hive.auto.convert.join=true;
set hive.auto.convert.join.noconditionaltask=true;
set hive.smbjoin.cache.rows=10000;
set hive.auto.convert.join.noconditionaltask.size=894435328;
set hive.map.aggr=false;
set hive.map.aggr.hash.percentmemory=0.5;
set hive.fetch.task.conversion=more;
set hive.fetch.task.conversion.threshold=1073741824;
set hive.fetch.task.aggr=false;
set hive.optimize.bucketmapjoin= false;
set hive.optimize.bucketmapjoin.sortedmerge=false;
set hive.vectorized.execution.enabled=false;
set hive.vectorized.execution.reduce.enabled=false;
set hive.vectorized.groupby.checkinterval=4096;
set hive.vectorized.groupby.flush.percent=0.1;
動態分區相關
set hive.optimize.sort.dynamic.partition=false;
set hive.exec.dynamic.partition=true;
set hive.exec.dynamic.partition.mode=nonstrict;
小文件相關
set hive.merge.mapfiles=true;
set hive.merge.mapredfiles=true;
set hive.merge.tezfiles=true;
set hive.merge.sparkfiles=false;
set hive.merge.size.per.task=536870912;
set hive.merge.smallfiles.avgsize=536870912;
set hive.merge.orcfile.stripe.level=true;
ORC相關
https://orc.apache.org/docs/hive-config.html
set hive.orc.splits.include.file.footer=false;
set hive.exec.orc.default.stripe.size=67108864;
統計相關
set hive.stats.autogather=true;
set hive.compute.query.using.stats=true;
set hive.stats.fetch.column.stats=true;
set hive.stats.fetch.partition.stats=true;
ANALYZE TABLE COMPUTE STATISTICS;
ANALYZE TABLE COMPUTE STATISTICS for COLUMNS;
ANALYZE TABLE partition (coll=」x」) COMPUTE STATISTICS for COLUMNS;
其餘
set hive.limit.pushdown.memory.usage=0.1;
set hive.optimize.index.filter=true;
set mapreduce.input.fileinputformat.list-status.num-threads=5;