package spark_read;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.VoidFunction;
import org.bson.Document;java
import com.alibaba.fastjson.JSON;
import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;
import com.mongodb.MongoClient;
import com.mongodb.MongoClientURI;
import com.mongodb.spark.MongoSpark;
import com.mongodb.spark.rdd.api.java.JavaMongoRDD;mongodb
public class Read_Mongo {apache
public static void main(String[] args) {
JavaSparkContext jsc = createJavaSparkContext(args);json
JavaMongoRDD<Document> mongoRDD = MongoSpark.load(jsc);api
mongoRDD.foreach(new VoidFunction<Document>() {app
private static final long serialVersionUID = 1L;spa
public void call(Document document) throws Exception {ip
String data = document.toJson();get
JSONObject jsonObject = JSON.parseObject(data);input
JSONArray src = jsonObject.getJSONArray("_src");
JSONObject src_obj = (JSONObject) src.get(0);
System.out.println(src_obj.getString("site"));
}
});
}
/**
建立spark鏈接,並設置mongodb讀寫路徑信息
*/
private static JavaSparkContext createJavaSparkContext(final String[] args) {
String uri = getMongoClientURI(args);
//dropDatabase(uri);
SparkConf conf = new SparkConf()
.setMaster("local")
.setAppName("MongoSparkConnectorTest")
.set("spark.app.id", "MongoSparkConnectorTour")
.set("spark.mongodb.input.uri", uri)
.set("spark.mongodb.output.uri", uri);
return new JavaSparkContext(conf);
}
/**
刪除mongo已存在文件
*/
private static void dropDatabase(final String connectionString) {
MongoClientURI uri = new MongoClientURI(connectionString);
new MongoClient(uri).dropDatabase(uri.getDatabase());
}
/**
獲取mondo讀寫路徑
*/
private static String getMongoClientURI(final String[] args) {
String uri;
if (args.length == 0) {
uri = "mongodb://ip:27017/mongo庫名.表名"; // default
} else {
uri = args[0];
}
return uri;
}
}