Atlas 是什麼?java
Atlas是一組可擴展和可擴展的核心基礎治理服務,使企業可以有效地知足Hadoop中的合規性要求,並容許與整個企業數據生態系統集成。node
Apache Atlas爲組織提供了開放的元數據管理和治理功能,以創建其數據資產的目錄,對這些資產進行分類和治理,併爲數據科學家,分析師和數據治理團隊提供圍繞這些數據資產的協做功能。python
若是沒有Atlassql
大數據表依賴問題很差解決,元數據管理須要自行開發,如:hive血緣依賴圖apache
對於表依賴問題,沒有一個能夠查詢的工具,不方便錯誤定位,即業務sql開發bootstrap
官網:http://atlas.apache.orgvim
表與表之間的血緣依賴ruby
安裝須要組件,HDFS、Yarn、Zookeeper、Kafka、Hbase、Solr、Hive,Python2.7環境服務器
須要Maven3.5.0以上,jdk_151以上,python2.7。網絡
<hadoop.version>3.0.0</hadoop.version> <hbase.version>2.1.0</hbase.version> <kafka.version>2.1.0</kafka.version> <zookeeper.version>3.4.5</zookeeper.version>
apache-atlas-2.0.0-sources\apache-atlas-sources-2.0.0\distro\src\conf\atlas-application.properties
#集成修改hbase配置 atlas.graph.storage.hostname=cdh01.cm:2181,cdh02.cm:2181,cdh03.cm:2181 #集成修改solr配置 atlas.graph.index.search.solr.zookeeper-url=cdh01.cm:2181,cdh02.cm:2181,cdh03.cm:2181/solr #集成修改kafka配置 atlas.notification.embedded=false #false外置的kafka atlas.kafka.zookeeper.connect=cdh01.cm:2181,cdh02.cm:2181,cdh03.cm:2181 atlas.kafka.bootstrap.servers=cdh01.cm:9092,cdh02.cm:9092,cdh03.cm:9092 atlas.kafka.zookeeper.session.timeout.ms=60000 atlas.kafka.zookeeper.connection.timeout.ms=30000 atlas.kafka.enable.auto.commit=true #集成修改其餘配置 atlas.rest.address=http://cdh01.cm:21000 #訪問地址端口,此值修改不生效,默認本地21000端口,此端口和impala衝突 atlas.server.run.setup.on.start=false #若是啓用並設置爲true,則在服務器啓動時將運行安裝步驟 atlas.audit.hbase.zookeeper.quorum=cdh01.cm:2181,cdh02.cm:2181,cdh03.cm:2181 #集成添加hive鉤子配置(文件最下面便可) #在hive中作任何操做,都會被鉤子所感應到,並生成相應的事件發往atlas所訂閱的kafka-topic,再由atlas進行元數據生成和存儲管理 ######### Hive Hook Configs ####### atlas.hook.hive.synchronous=false atlas.hook.hive.numRetries=3 atlas.hook.hive.queueSize=10000 atlas.cluster.name=primary #配置用戶名密碼(選作) #開啓或關閉三種驗證方法 atlas.authentication.method.kerberos=true|false atlas.authentication.method.ldap=true|false atlas.authentication.method.file=true #vim users-credentials.properties(修改該文件) #>>>源文件 #username=group::sha256-password admin=ADMIN::8c6976e5b5410415bde908bd4dee15dfb167a9c873fc4bb8a81f6f2ab448a918 rangertagsync=RANGER_TAG_SYNC::e3f67240f5117d1753c940dae9eea772d36ed5fe9bd9c94a300e40413f1afb9d #<<< #>>>修改爲用戶名bigdata123,密碼bigdata123 #username=group::sha256-password bigdata123=ADMIN::aa0336d976ba6db36f33f75a20f68dd9035b1e0e2315c331c95c2dc19b2aac13 rangertagsync=RANGER_TAG_SYNC::e3f67240f5117d1753c940dae9eea772d36ed5fe9bd9c94a300e40413f1afb9d #<<< #計算sha256:echo -n "bigdata123"|sha256sum
apache-atlas-2.0.0-sources\apache-atlas-sources-2.0.0\distro\src\conf\atlas-env.sh
#集成添加hbase配置->下面的目錄爲atlas下的hbase配置目錄,須要後面加入集羣hbase配置 export HBASE_CONF_DIR=/usr/local/src/atlas/apache-atlas-2.0.0/conf/hbase/conf #export MANAGE_LOCAL_HBASE=false (false外置的zk和hbase) #export MANAGE_LOCAL_SOLR=false (false外置的solr) #修改內存指標(根據線上機器配置) export ATLAS_SERVER_OPTS="-server -XX:SoftRefLRUPolicyMSPerMB=0 -XX:+CMSClassUnloadingEnabled -XX:+UseConcMarkSweepGC -XX:+CMSParallelRemarkEnabled -XX:+PrintTenuringDistribution -XX:+HeapDumpOnOutOfMemoryError -XX:HeapDumpPath=dumps/atlas_server.hprof -Xloggc:logs/gc-worker.log -verbose:gc -XX:+UseGCLogFileRotation -XX:NumberOfGCLogFiles=10 -XX:GCLogFileSize=1m -XX:+PrintGCDetails -XX:+PrintHeapAtGC -XX:+PrintGCTimeStamps" #優化 JDK1.8(如下須要16G內存) export ATLAS_SERVER_HEAP="-Xms15360m -Xmx15360m -XX:MaxNewSize=5120m -XX:MetaspaceSize=100M -XX:MaxMetaspaceSize=512m"
apache-atlas-2.0.0-sources\apache-atlas-sources-2.0.0\distro\src\conf\atlas-log4j.xml
#去掉以下代碼的註釋(開啓以下代碼) <appender name="perf_appender" class="org.apache.log4j.DailyRollingFileAppender"> <param name="file" value="${atlas.log.dir}/atlas_perf.log" /> <param name="datePattern" value="'.'yyyy-MM-dd" /> <param name="append" value="true" /> <layout class="org.apache.log4j.PatternLayout"> <param name="ConversionPattern" value="%d|%t|%m%n" /> </layout> </appender> <logger name="org.apache.atlas.perf" additivity="false"> <level value="debug" /> <appender-ref ref="perf_appender" /> </logger>
export MAVEN_OPTS="-Xms2g -Xmx2g" mvn clean -DskipTests install mvn clean -DskipTests package -Pdist
出現錯誤,使用-X參數打印詳情對應解決,無非是一些網絡,node代理等
Build將建立如下文件,這些文件用於安裝Apache Atlas。
mkdir /usr/local/src/atlas cd /usr/local/src/atlas #複製apache-atlas-2.0.0-bin.tar.gz到安裝目錄 tar -zxvf apache-atlas-2.0.0-bin.tar.gz cd apache-atlas-2.0.0/conf
ln -s /etc/hbase/conf/ /usr/local/src/atlas/apache-atlas-2.0.0/conf/hbase/ #ln -s /opt/cloudera/parcels/CDH-6.2.0-1.cdh6.2.0.p0.967373/lib/hbase/conf/ /usr/local/src/atlas/apache-atlas-2.0.0/conf/hbase/
http://archive.apache.org/dist/lucene/solr/7.5.0/
官網推薦的solr節點數爲3,內存爲32G
mkdir /usr/local/src/solr cd /usr/local/src/solr
tar -zxvf solr-7.5.0.tgz cd solr-7.5.0/
ZK_HOST="cdh01.cm:2181,cdh02.cm:2181,cdh03.cm:2181"
SOLR_HOST="cdh01.cm"
SOLR_PORT=8983
scp -r /usr/local/src/solr/solr-7.5.0 root@cdh02.cm:/usr/local/src/solr scp -r /usr/local/src/solr/solr-7.5.0 root@cdh03.cm:/usr/local/src/solr
分發完,將SOLR_HOST改成對應機器
將apache-atlas-2.0.0/conf/solr文件拷貝到/usr/local/src/solr/solr-7.5.0/,改名爲atlas-solr
scp -r /usr/local/src/atlas/apache-atlas-2.0.0/conf/solr/ root@cdh01.cm:/usr/local/src/solr/solr-7.5.0/ scp -r /usr/local/src/atlas/apache-atlas-2.0.0/conf/solr/ root@cdh02.cm:/usr/local/src/solr/solr-7.5.0/ scp -r /usr/local/src/atlas/apache-atlas-2.0.0/conf/solr/ root@cdh03.cm:/usr/local/src/solr/solr-7.5.0/
cd /usr/local/src/solr/solr-7.5.0/ mv solr/ atlas-solr #全部節點建立用戶附權限 useradd atlas && echo atlas | passwd --stdin atlas chown -R atlas:atlas /usr/local/src/solr/ #全部節點執行啓動(cloud模式) su atlas /usr/local/src/solr/solr-7.5.0/bin/solr start -c -z cdh01.cm:2181,cdh02.cm:2181,cdh03.cm:2181 -p 8983
/usr/local/src/solr/solr-7.5.0/bin/solr create -c vertex_index -d /usr/local/src/solr/solr-7.5.0/atlas-solr -shards 3 -replicationFactor 2 /usr/local/src/solr/solr-7.5.0/bin/solr create -c edge_index -d /usr/local/src/solr/solr-7.5.0/atlas-solr -shards 3 -replicationFactor 2 /usr/local/src/solr/solr-7.5.0/bin/solr create -c fulltext_index -d /usr/local/src/solr/solr-7.5.0/atlas-solr -shards 3 -replicationFactor 2 #若是建立錯誤,可以使用 /opt/cloudera/parcels/CDH/lib/solr/bin/solr delete -c ${collection_name} 刪除 #切換root用戶繼續配置其餘 su root
kafka-topics --zookeeper cdh01.cm:2181,cdh02.cm:2181,cdh03.cm:2181 --create --replication-factor 3 --partitions 3 --topic _HOATLASOK kafka-topics --zookeeper cdh01.cm:2181,cdh02.cm:2181,cdh03.cm:2181 --create --replication-factor 3 --partitions 3 --topic ATLAS_ENTITIES kafka-topics --zookeeper cdh01.cm:2181,cdh02.cm:2181,cdh03.cm:2181 --create --replication-factor 3 --partitions 3 --topic ATLAS_HOOK
cd /usr/local/src/atlas/apache-atlas-2.0.0/hook/hive zip -u /usr/local/src/atlas/apache-atlas-2.0.0/hook/hive/atlas-plugin-classloader-2.0.0.jar /usr/local/src/atlas/apache-atlas-2.0.0/conf/atlas-application.properties
<property> <name>hive.exec.post.hooks</name> <value>org.apache.atlas.hive.hook.HiveHook</value> </property> <property> <name>hive.reloadable.aux.jars.path</name> <value>/usr/local/src/atlas/apache-atlas-2.0.0/hook/hive</value> </property>
LANG=en_US.UTF-8 HIVE_AUX_JARS_PATH=/usr/local/src/atlas/apache-atlas-2.0.0/hook/hive
scp -r /usr/local/src/atlas/apache-atlas-2.0.0 root@cdh02.cm:/usr/local/src/atlas/ scp -r /usr/local/src/atlas/apache-atlas-2.0.0 root@cdh02.cm:/usr/local/src/atlas/
重啓集羣
scp /usr/local/src/atlas/apache-atlas-2.0.0/conf/atlas-application.properties root@cdh01.cm:/etc/hive/conf scp /usr/local/src/atlas/apache-atlas-2.0.0/conf/atlas-application.properties root@cdh02.cm:/etc/hive/conf scp /usr/local/src/atlas/apache-atlas-2.0.0/conf/atlas-application.properties root@cdh03.cm:/etc/hive/conf
#啓動 ./bin/atlas_start.py #中止:./bin/atlas_stop.py
注意監控日誌,看是否報錯。主要日誌application.log
vim /etc/profile #>>> #hive export HIVE_HOME=/opt/cloudera/parcels/CDH-6.2.0-1.cdh6.2.0.p0.967373/lib/hive export HIVE_CONF_DIR=/etc/hive/conf export PATH=$HIVE_HOME/bin:$PATH #<<< source /etc/profile
./bin/import-hive.sh #輸入用戶名:admin;輸入密碼:admin(如修改請使用修改的)