五.hadoop 從mysql中讀取數據寫到hdfs

目錄:html

目錄見文章1java

本文是基於windows下來操做,linux下,mysql-connector-java-5.1.46.jar包的放置有講究。mysql

mr程序linux

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.util.Iterator;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.Mapper;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reducer;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.mapred.lib.db.DBConfiguration;
import org.apache.hadoop.mapred.lib.db.DBInputFormat;
import org.apache.hadoop.mapred.lib.db.DBWritable;

/**
 * @author DELL_pc
 *  @date 2017年6月27日
 */
public class DbaMysql {
     public static class DBAccessMapper extends MapReduceBase    implements Mapper<LongWritable,StudentRecord , IntWritable, Text>
     {
        public void map(LongWritable key, StudentRecord value, OutputCollector<IntWritable, Text> output,
                Reporter reporter) throws IOException {
            // TODO Auto-generated method stub
            output.collect(new IntWritable(value.id), new Text(value.toString()));
        }
     }
      public static class DBAccessReduce extends MapReduceBase implements Reducer<IntWritable, Text, IntWritable, Text>
      {
        public void reduce(IntWritable key, Iterator<Text> values, OutputCollector<IntWritable, Text> output,
                Reporter reporter) throws IOException {
              while (values.hasNext()) {
                  output.collect(key, values.next());
                }
        }
      }
     public static void main(String[] args) {
         System.setProperty("hadoop.home.dir", "D:\\hadoop-2.7.6");//這一行必定要
        Configuration configuration=new Configuration();
        JobConf jobConf=new JobConf(configuration);

        jobConf.setOutputKeyClass(IntWritable.class);
        jobConf.setOutputValueClass(Text.class);
        jobConf.setInputFormat(DBInputFormat.class);

//        String[] fields={"id,name"};
//        DBInputFormat.setInput(jobConf, StudentRecord.class, "bbb", "length(name)>2", "",fields );//bbb是表名,讀取方式1

         DBInputFormat.setInput(jobConf, StudentRecord.class,"select id,name from bbb","select 3 from dual");//讀取方式2

        DBConfiguration.configureDB(jobConf, "com.mysql.jdbc.Driver", "jdbc:mysql://localhost:3306/test","root","admin");
        jobConf.setMapperClass(DBAccessMapper.class);
        jobConf.setReducerClass(DBAccessReduce.class);
        FileOutputFormat.setOutputPath(jobConf,new Path("output_mysql"));
        try {
            JobClient.runJob(jobConf);
        } catch (IOException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }
    }
    public static class StudentRecord implements Writable, DBWritable {
        int id;
        String name;
        //構造方法
        public StudentRecord() { }
        //Writable接口是對數據流進行操做的,因此輸入是DataInput類對象
        public void readFields(DataInput in) throws IOException {
            this.id = in.readInt(); //輸入流中的讀取下一個整數,並返回
            this.name = Text.readString(in);
        }
        public String toString() {
            return new String(this.id + " " + this.name);
        }
        //DBWritable負責對數據庫進行操做,因此輸出格式是PreparedStatement
        //PreparedStatement接口繼承並擴展了Statement接口,用來執行動態的SQL語句,即包含參數的SQL語句
        public void write(PreparedStatement stmt) throws SQLException {
            stmt.setInt(1, this.id);
            stmt.setString(2, this.name);
        }
        //DBWritable負責對數據庫進行操做,輸入格式是ResultSet
        // ResultSet接口相似於一張數據表,用來暫時存放從數據庫查詢操做所得到的結果集
        public void readFields(ResultSet result) throws SQLException {
            this.id = result.getInt(1);
            this.name = result.getString(2);
        }
        //Writable接口是對數據流進行操做的,因此輸出是DataOutput類對象
        public void write(DataOutput out) throws IOException {
            out.writeInt(this.id);
            Text.writeString(out, this.name);
        }
    }
}
View Code

 

pom.xml:sql

<dependencies>
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-common</artifactId>
            <version>2.7.3</version>
        </dependency>
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-mapreduce-client-core</artifactId>
            <version>2.7.3</version>
        </dependency>
        <!-- https://mvnrepository.com/artifact/mysql/mysql-connector-java -->
        <dependency>
            <groupId>mysql</groupId>
            <artifactId>mysql-connector-java</artifactId>
            <version>5.1.43</version>
        </dependency>
    </dependencies>
View Code

 

 

————————————————————————————————————————————數據庫

附錄:apache

本程序在windows上成功後,把打好的jar包 mstest.jar(jar包內已包含mysql驅動類)丟linux的hadoop上跑,會報錯,說windows

18/07/20 00:34:07 WARN mapreduce.JobResourceUploader: No job jar file set.  User classes may not be found. See Job or Job#setJar(String).
18/07/20 00:34:07 INFO mapreduce.JobSubmitter: Cleaning up the staging area file:/usr/software/hadoop/tmp/mapred/staging/root1889661768/.staging/job_local1889661768_0001
Exception in thread "main" java.lang.RuntimeException: java.lang.RuntimeException: java.lang.ClassNotFoundException: com.mysql.jdbc.Driver
    at org.apache.hadoop.mapreduce.lib.db.DBInputFormat.setConf(DBInputFormat.java:171)
    at org.apache.hadoop.util.ReflectionUtils.setConf(ReflectionUtils.java:76)
    at org.apache.hadoop.util.ReflectionUtils.newInstance(ReflectionUtils.java:136)

既使手動丟個mysql-connector-java-5.1.46.jar到java_home下沒用,丟hadoop_home下沒用,app

最終丟的目錄:ide

[hadoop_home]/share/hadoop/yarn/

而後,mysql就能被hdfs用到了

參考:Sqoop安裝與MapReduce讀MySql寫HDFS加載不了JDBC驅動問題

相關文章
相關標籤/搜索