目錄:html
目錄見文章1java
本文是基於windows下來操做,linux下,mysql-connector-java-5.1.46.jar包的放置有講究。mysql
mr程序linux
import java.io.DataInput; import java.io.DataOutput; import java.io.IOException; import java.sql.PreparedStatement; import java.sql.ResultSet; import java.sql.SQLException; import java.util.Iterator; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.io.Writable; import org.apache.hadoop.mapred.FileOutputFormat; import org.apache.hadoop.mapred.JobClient; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapred.MapReduceBase; import org.apache.hadoop.mapred.Mapper; import org.apache.hadoop.mapred.OutputCollector; import org.apache.hadoop.mapred.Reducer; import org.apache.hadoop.mapred.Reporter; import org.apache.hadoop.mapred.lib.db.DBConfiguration; import org.apache.hadoop.mapred.lib.db.DBInputFormat; import org.apache.hadoop.mapred.lib.db.DBWritable; /** * @author DELL_pc * @date 2017年6月27日 */ public class DbaMysql { public static class DBAccessMapper extends MapReduceBase implements Mapper<LongWritable,StudentRecord , IntWritable, Text> { public void map(LongWritable key, StudentRecord value, OutputCollector<IntWritable, Text> output, Reporter reporter) throws IOException { // TODO Auto-generated method stub output.collect(new IntWritable(value.id), new Text(value.toString())); } } public static class DBAccessReduce extends MapReduceBase implements Reducer<IntWritable, Text, IntWritable, Text> { public void reduce(IntWritable key, Iterator<Text> values, OutputCollector<IntWritable, Text> output, Reporter reporter) throws IOException { while (values.hasNext()) { output.collect(key, values.next()); } } } public static void main(String[] args) { System.setProperty("hadoop.home.dir", "D:\\hadoop-2.7.6");//這一行必定要 Configuration configuration=new Configuration(); JobConf jobConf=new JobConf(configuration); jobConf.setOutputKeyClass(IntWritable.class); jobConf.setOutputValueClass(Text.class); jobConf.setInputFormat(DBInputFormat.class); // String[] fields={"id,name"}; // DBInputFormat.setInput(jobConf, StudentRecord.class, "bbb", "length(name)>2", "",fields );//bbb是表名,讀取方式1 DBInputFormat.setInput(jobConf, StudentRecord.class,"select id,name from bbb","select 3 from dual");//讀取方式2 DBConfiguration.configureDB(jobConf, "com.mysql.jdbc.Driver", "jdbc:mysql://localhost:3306/test","root","admin"); jobConf.setMapperClass(DBAccessMapper.class); jobConf.setReducerClass(DBAccessReduce.class); FileOutputFormat.setOutputPath(jobConf,new Path("output_mysql")); try { JobClient.runJob(jobConf); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } } public static class StudentRecord implements Writable, DBWritable { int id; String name; //構造方法 public StudentRecord() { } //Writable接口是對數據流進行操做的,因此輸入是DataInput類對象 public void readFields(DataInput in) throws IOException { this.id = in.readInt(); //輸入流中的讀取下一個整數,並返回 this.name = Text.readString(in); } public String toString() { return new String(this.id + " " + this.name); } //DBWritable負責對數據庫進行操做,因此輸出格式是PreparedStatement //PreparedStatement接口繼承並擴展了Statement接口,用來執行動態的SQL語句,即包含參數的SQL語句 public void write(PreparedStatement stmt) throws SQLException { stmt.setInt(1, this.id); stmt.setString(2, this.name); } //DBWritable負責對數據庫進行操做,輸入格式是ResultSet // ResultSet接口相似於一張數據表,用來暫時存放從數據庫查詢操做所得到的結果集 public void readFields(ResultSet result) throws SQLException { this.id = result.getInt(1); this.name = result.getString(2); } //Writable接口是對數據流進行操做的,因此輸出是DataOutput類對象 public void write(DataOutput out) throws IOException { out.writeInt(this.id); Text.writeString(out, this.name); } } }
pom.xml:sql
<dependencies> <dependency> <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-common</artifactId> <version>2.7.3</version> </dependency> <dependency> <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-mapreduce-client-core</artifactId> <version>2.7.3</version> </dependency> <!-- https://mvnrepository.com/artifact/mysql/mysql-connector-java --> <dependency> <groupId>mysql</groupId> <artifactId>mysql-connector-java</artifactId> <version>5.1.43</version> </dependency> </dependencies>
————————————————————————————————————————————數據庫
附錄:apache
本程序在windows上成功後,把打好的jar包 mstest.jar(jar包內已包含mysql驅動類)丟linux的hadoop上跑,會報錯,說windows
18/07/20 00:34:07 WARN mapreduce.JobResourceUploader: No job jar file set. User classes may not be found. See Job or Job#setJar(String). 18/07/20 00:34:07 INFO mapreduce.JobSubmitter: Cleaning up the staging area file:/usr/software/hadoop/tmp/mapred/staging/root1889661768/.staging/job_local1889661768_0001 Exception in thread "main" java.lang.RuntimeException: java.lang.RuntimeException: java.lang.ClassNotFoundException: com.mysql.jdbc.Driver at org.apache.hadoop.mapreduce.lib.db.DBInputFormat.setConf(DBInputFormat.java:171) at org.apache.hadoop.util.ReflectionUtils.setConf(ReflectionUtils.java:76) at org.apache.hadoop.util.ReflectionUtils.newInstance(ReflectionUtils.java:136)
既使手動丟個mysql-connector-java-5.1.46.jar到java_home下沒用,丟hadoop_home下沒用,app
最終丟的目錄:ide
[hadoop_home]/share/hadoop/yarn/
而後,mysql就能被hdfs用到了