發現用spark寫表join比flink 簡單不少,至少schema 能夠省了,下面是一個例子mysql
public static void main(String[] args) { SparkSession s= SparkSession.builder().appName("rec").getOrCreate(); Dataset<Row> user=s.read().format("jdbc") .option("driver", "com.mysql.jdbc.Driver") .option("url", "jdbc:mysql://*") .option("dbtable", "user") .option("user", "1") .option("password", "1") .load(); Dataset<Row> house=s.read().format("jdbc") .option("driver", "com.mysql.jdbc.Driver") .option("url", "jdbc:mysql://") .option("dbtable", "house") .option("user", "1") .option("password", "1") .load(); user.cache(); house.cache(); user.createOrReplaceTempView("user"); house.createOrReplaceTempView("house"); Dataset<Row> temp= s.sql("select user.user_name, house.house_name from user inner join house where user.uid=house.uid "); temp.write().csv("/home/ziroom/house-user"); }