在hive中建表格式存儲格式爲orcsql
create table user(id int,name string) stored as orc; apache
spark寫文件 json
val jsons = "hdfs://localhost:9000/test/artist_orc.json" val people = sc.textFile(jsons) val schemaString = "id name" val schema = StructType(schemaString.split(" ").map(fieldName => {if(fieldName == "name") StructField(fieldName, StringType, true) else StructField(fieldName, IntegerType, true)})) val rowRDD = people.map(line=>{ JSONObject.fromObject(line) }).map(p => Row(new Integer(p.get("id").toString), p.get("name"))) val hiveContext = new org.apache.spark.sql.hive.HiveContext(sc) val peopleSchemaRDD = hiveContext.createDataFrame(rowRDD, schema) peopleSchemaRDD.write.format("orc").save("hdfs://localhost:9000/user/xb/warehouse/artist_orc/adf")