1.若是是格式化成Json的話直接 函數
val rdd = df.toJSON.rddorm
2.若是要指定格式須要自定義函數以下:ip
//格式化具體字段條目string
def formatItem(p:(StructField,Any)):String={
p match {
case (sf,a) =>
sf.dataType match {
case StringType => "\"" + sf.name + "\":\"" + a + "\""
case IntegerType => "\"" + sf.name + "\":" + a
case LongType => "\"" + sf.name + "\":" + a
case StructType(s) => "\"" + sf.name + "\":" + formatStruct(s, a.asInstanceOf[Row])
}
}
}
form
//格式化整行數據格式
def formatStruct(schema:Seq[StructField],r:Row)= {
val paired = schema.zip(r.toSeq)
"{" + paired.foldLeft("")((s,p) => (if(s == "") "" else (s + ", ")) + formatItem(p)) + "}"
}
foreach
//格式化整個DF
def formatDataFrame(st:StructType,srdd:DataFrame)={
srdd.rdd.map(formatStruct(st.fields,_))
}map
調用示例:數據
val strings = formatDataFrame(df.schema, df)cas
strings.foreach { println }data