prepare csv:sql
# vim /tmp/cars.csv year,make,model,comment,blank "2012","Tesla","S","No comment", "1997","Ford,E350","Go get one now they are going fast", "2015","Chevy","Volt"
scala code:apache
package com.liupu import org.apache.spark.{ SparkContext, SparkConf } import org.apache.spark.sql.hive.HiveContext import org.apache.spark.sql.hive.orc._ object LoadCsvParams { def main(args: Array[String]) { var sourceCsvPath = args(0) var targetPath = args(1) var hiveTableName = args(2) var sc = new SparkContext() val hiveContext = new org.apache.spark.sql.hive.HiveContext(sc) val df = hiveContext.read .format("com.databricks.spark.csv") .option("header", "true") .option("inferSchema", "true") .load(sourceCsvPath) val selectedData = df.select("year", "model") selectedData.write.format("orc").option("header", "true").save(targetPath) hiveContext.sql(s"create external table $hiveTableName(year int,model string) stored as orc location '$targetPath'") hiveContext.sql("show tables").collect().foreach(println) sc.stop() } }
spark submit:vim
./spark-submit \ --class com.liupu.LoadCsvParams \ --master local[*] \ /home/pl62716/scalaTest.jar \ "/tmp/cars.csv" "/tmp/hive_cars2" "hive_cars2" "/tmp/hive_cars2"