概述
- 公司集羣崩了,hive訪問不上,想要建表語句
- 功能是一次性的(不要有下次了),就用python寫了
適用場景
- Hive版本:
Hive release version 1.1.0
其餘自行測試
- CDH版本:
1.1.0-cdh5.12.0
其餘自行測試
- 通常的hive表,不帶分桶的,分桶表的沒作
- 壓縮的也沒作,因此有壓縮表的建表語句也不完整
前期準備
- 要對mysql裏的hive數據庫結構有必定的瞭解,請訪問如下網址:Hive的元數據表結構詳解
- mysql的連接信息根據本身的配置更改
代碼
#!/usr/bin/env python
#-*- coding:utf8 -*-
# 從mysql中提取hive建表語句
import os
import fileinput
import datetime
import mysql.connector
def hive_create_table():
#conn = mysql.connector.connect(host="11.11.11.11",user='root',passwd='root',database='hive')
conn = mysql.connector.connect(host="localhost",user='root',database='hive')
mycursor = conn.cursor()
# 獲取DB_ID
select_DB_ID = "select DB_ID from DBS;"
mycursor.execute(select_DB_ID)
result_DB_ID = mycursor.fetchall()
fo = open("foo.txt", "w")
for dir_DB_ID in result_DB_ID :
# 獲取數據庫名
DB_ID = str(dir_DB_ID)[1:].split(',')[0]
print(DB_ID)
select_DB_NAME = "select NAME from DBS where DB_ID="+DB_ID+";"
print(select_DB_NAME )
mycursor.execute(select_DB_NAME)
result_DB_NAME = mycursor.fetchone()
fo.write("\n===========數據庫:"+str(result_DB_NAME).split('\'')[1]+"===========\n")
print(result_DB_NAME)
# 獲取表名
select_table_name_sql = "select TBL_NAME from TBLS where DB_ID="+DB_ID+";"
mycursor.execute(select_table_name_sql)
result_table_names = mycursor.fetchall()
for table_name in result_table_names :
fo.write("\nCREATE TABLE `"+str(table_name).split('\'')[1]+"`(\n")
# 根據表名獲取SD_ID
select_table_SD_ID = "select SD_ID from TBLS where tbl_name='"+str(table_name).split('\'')[1]+"' and DB_ID="+DB_ID+";"
print(select_table_SD_ID)
mycursor.execute(select_table_SD_ID)
result_SD_ID = mycursor.fetchone()
print(result_SD_ID )
# 根據SD_ID獲取CD_ID
SD_ID=str(result_SD_ID)[1:].split(',')[0]
select_table_CD_ID = "select CD_ID from SDS where SD_ID="+str(result_SD_ID)[1:].split(',')[0]+";"
print(select_table_CD_ID)
mycursor.execute(select_table_CD_ID)
result_CD_ID = mycursor.fetchone()
print(result_CD_ID)
# 根據CD_ID獲取表的列
CD_ID=str(result_CD_ID)[1:].split(',')[0]
select_table_COLUMN_NAME = "select COLUMN_NAME,TYPE_NAME from COLUMNS_V2 where CD_ID="+str(result_CD_ID)[1:].split(',')[0]+" order by INTEGER_IDX;"
print(select_table_COLUMN_NAME)
mycursor.execute(select_table_COLUMN_NAME)
result_COLUMN_NAME = mycursor.fetchall()
print(result_COLUMN_NAME)
index=0
for col,col_type in result_COLUMN_NAME:
print(col)
print(col_type)
print(len(result_COLUMN_NAME) )
# 寫入表的列和列的類型到文件
fo.write(" `"+str(col)+"` "+str(col_type))
if index < len(result_COLUMN_NAME)-1:
index = index + 1
fo.write(",\n")
elif index == len(result_COLUMN_NAME)-1:
fo.write(")")
# 根據表名獲取TBL_ID
select_table_SD_ID = "select TBL_ID from TBLS where tbl_name='"+str(table_name).split('\'')[1]+"' and DB_ID="+DB_ID+";"
print(select_table_SD_ID)
mycursor.execute(select_table_SD_ID)
result_TBL_ID = mycursor.fetchone()
print(result_TBL_ID)
# 根據TBL_ID獲取分區信息
select_table_PKEY_NAME_TYPE = "select PKEY_NAME,PKEY_TYPE from PARTITION_KEYS where TBL_ID="+str(result_TBL_ID)[1:].split(',')[0]+" order by INTEGER_IDX;"
print(select_table_PKEY_NAME_TYPE)
mycursor.execute(select_table_PKEY_NAME_TYPE)
result_PKEY_NAME_TYPE = mycursor.fetchall()
print(result_PKEY_NAME_TYPE)
if len(result_PKEY_NAME_TYPE) > 0:
#if result_PKEY_NAME_TYPE is not None:
fo.write("\nPARTITIONED BY (\n")
#elif len(result_PKEY_NAME_TYPE) == 0:
else :
fo.write("\n")
i=0
for pkey_name,pkey_type in result_PKEY_NAME_TYPE:
fo.write(" `"+str(pkey_name)+"` "+str(pkey_type))
if i < len(result_PKEY_NAME_TYPE)- 1:
i = i + 1
fo.write(",\n")
elif i == len(result_PKEY_NAME_TYPE) - 1:
fo.write(")\n")
# 根據SD_ID和CD_ID獲取SERDE_ID
select_SERDE_ID = "select SERDE_ID from SDS where SD_ID="+SD_ID+" and CD_ID="+CD_ID+";"
print(select_SERDE_ID)
mycursor.execute(select_SERDE_ID)
result_SERDE_ID = mycursor.fetchone()
print(result_SERDE_ID)
# 根據SERDE_ID獲取PARAM_VALUE(列分隔符)
select_PARAM_VALUE = "select PARAM_VALUE from SERDE_PARAMS where SERDE_ID="+str(result_SERDE_ID)[1:].split(",")[0]+" and PARAM_KEY='field.delim';"
print(select_PARAM_VALUE)
mycursor.execute(select_PARAM_VALUE)
result_PARAM_VALUE = mycursor.fetchone()
print(result_PARAM_VALUE)
if result_PARAM_VALUE is not None:
fo.write("ROW FORMAT DELIMITED\n")
fo.write("FIELDS TERMINATED BY '"+str(result_PARAM_VALUE).split('\'')[1]+"'\n")
# 根據SD_ID和CD_ID獲取輸入輸出格式
select_table_STORE_FORMAT = "select INPUT_FORMAT from SDS where SD_ID="+SD_ID+" and CD_ID="+CD_ID+";"
print(select_table_STORE_FORMAT)
mycursor.execute(select_table_STORE_FORMAT)
result_table_STORE_FORMAT= mycursor.fetchall()
print(result_table_STORE_FORMAT)
for store_format in result_table_STORE_FORMAT:
if "Orc" in str(store_format):
fo.write("STORED AS ORC;\n")
elif "RCFile" in str(store_format):
fo.write("STORED AS RCFILE;\n")
else :
fo.write("STORED AS TEXTFILE;\n")
#fo.write(";\n")
fo.close()
hive_create_table()