springboot + spark + hadoop

能夠在一個Controller裏面實現spark抓取的代碼:html

import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RestController;
import org.springframework.web.servlet.ModelAndView;
import org.apache.spark.sql.SparkSession;
import org.apache.spark.SparkConf;
import org.springframework.web.bind.annotation.ResponseBody;

/**
 * @author flash勝龍
 */
@RestController
public class DataFigureController {

    @RequestMapping("/dataimportlocal.html")
    public ModelAndView dataimportlocal() {
        System.setProperty("hadoop.home.dir", "D:\\hadoop-2.7.2test");
        System.setProperty("HADOOP_USER_NAME", "hadoop");
        SparkSession spark = SparkSession.builder().master("local[*]").appName("Word Count").config("spark.sql.warehouse.dir", "file:///d:/tmp").getOrCreate();
        Dataset<Row> df = spark.read().option("header", true).csv("D:\\book.csv");
        df.show();
        return new ModelAndView("dataimport");
    }
}

在spark的rdd進行Row封裝的時候,會涉及到日期類型的轉換問題。java

默認org.apache.spark.sql.RowFactory 類型只接受java.sql.Datemysql

// util.date轉換成sql.date
java.util.Date utilDate = new java.util.Date(); //獲取當前時間
java.sql.Date sqlDate = new java.sql.Date(utilDate.getTime());

// sql.date轉換成util.date
java.sql.Date sqlDate1 = new java.sql.Date(new java.util.Date().getTime());
java.util.Date utilDate1 = new java.util.Date(sqlDate1.getTime());

Maven配置以下(英 ['meɪv(ə)n]  美 ['mevn])web

其中有很多坑:一個是包衝突問題,hadoop、spark和springboot體系裏面每一個都本身引用了一系列logger實現的包,一塊兒編譯運行會有衝突,對部分包的依賴要exclusions掉;二個是版本問題,對於要使用的版本,必須整個工程先後一致。若是一個引用的是A版本,另外一個引用的是B版本,就會出問題,要麼把A給exclusion掉,只用B版本,要不想其它辦法:spring

<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
    <modelVersion>4.0.0</modelVersion>

    <groupId>my.groud.id</groupId>
    <artifactId>sparkuitest</artifactId>
    <version>0.0.1-SNAPSHOT</version>
    <packaging>jar</packaging>

    <name>sparkuitest</name>
    <url>http://maven.apache.org</url>

    <repositories>
        <repository>
            <id>central</id>
            <url>http://maven.aliyun.com/nexus/content/groups/public/</url>
            <!--<url>https://mvnrepository.com/artifact</url>-->
            <snapshots>
                <enabled>true</enabled>
            </snapshots>
        </repository>
        <repository>
            <id>maven2</id>
            <url>http://repo1.maven.org/maven2</url>
            <snapshots>
                <enabled>true</enabled>
            </snapshots>
        </repository>
    </repositories>
    
    <properties>
        <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
        <scala.version>2.11.8</scala.version>
        <spark.version>2.0.0</spark.version>
        <hadoop.version>2.6.0</hadoop.version>
        <junit.version>4.12</junit.version>
        <jackson.version>2.6.5</jackson.version>
        <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
        <maven.compiler.source>1.8</maven.compiler.source>
        <maven.compiler.target>1.8</maven.compiler.target>

    </properties>

    <dependencies>
        
        <!-- 解決Caused by: java.lang.IllegalArgumentException: LoggerFactory is
        not a Logback LoggerContext but Logback is on the classpath. Either 
       remove Logback or the competing implementation (class org.slf4j.impl.Log4jLoggerFactory 
       loaded from jar:file:/home/liangzl2/run/sparkWeb/sparkuitest-0.0.1-SNAPSHOT.jar!/BOOT-INF/lib/slf4j-log4j12-1.6.1.jar!/). 
       If you are using WebLogic you will need to add 'org.slf4j' to 
       prefer-application-packages in WEB-INF/weblogic.xml Object of class 
       [org.slf4j.impl.Log4jLoggerFactory] must be an instance of class 
       ch.qos.logback.classic.LoggerContext
        -->
        <!-- https://mvnrepository.com/artifact/com.opencsv/opencsv -->
		<dependency>
			<groupId>com.opencsv</groupId>
			<artifactId>opencsv</artifactId>
			<version>4.1</version>
		</dependency>
        
        <!-- JsonMappingException: Incompatible Jackson version: 2.8.4 -->
        <dependency>
            <groupId>com.fasterxml.jackson.core</groupId>
            <artifactId>jackson-core</artifactId>
            <version>${jackson.version}</version>
        </dependency>
        <dependency>
            <groupId>com.fasterxml.jackson.core</groupId>
            <artifactId>jackson-databind</artifactId>
            <version>${jackson.version}</version>
        </dependency>
        <dependency>
            <groupId>com.fasterxml.jackson.core</groupId>
            <artifactId>jackson-annotations</artifactId>
            <version>${jackson.version}</version>
        </dependency>

        
        
        <dependency>
            <groupId>org.springframework.boot</groupId>
            <artifactId>spring-boot-starter-test</artifactId>
            <version>1.4.2.RELEASE</version>
        </dependency>
        <dependency>
            <groupId>org.springframework.boot</groupId>
            <artifactId>spring-boot-starter-jdbc</artifactId>
            <version>1.4.2.RELEASE</version>
            <exclusions><!-- 排除 IllegalArgumentException: LoggerFactory is not
             a Logback LoggerContext but Logback is on the classpath. Either 
             remove Logback or the competing implementation問題 -->
<!--                <exclusion>
                    <groupId>ch.qos.logback</groupId>
                   <artifactId>logback-classic</artifactId>
                </exclusion>-->
           </exclusions>
        </dependency>
        <dependency>
            <groupId>com.h2database</groupId>
            <artifactId>h2</artifactId>
            <version>1.3.156</version>
        </dependency>
        <dependency>
            <groupId>mysql</groupId>
            <artifactId>mysql-connector-java</artifactId>
            <version>5.1.27</version>
        </dependency>
        <dependency>
            <groupId>org.springframework.boot</groupId>
            <artifactId>spring-boot-starter-web</artifactId>
            <version>1.4.2.RELEASE</version>
            
        </dependency>
        <dependency>
            <groupId>com.alibaba</groupId>
            <artifactId>druid</artifactId>
            <version>1.0.11</version>
        </dependency>
    
        <dependency>
            <groupId>org.scala-lang</groupId>
            <artifactId>scala-library</artifactId>
            <version>${scala.version}</version>
        </dependency>

        <dependency><!-- 用來處理配置文件 -->
            <groupId>com.typesafe</groupId>
            <artifactId>config</artifactId>
            <version>1.2.1</version>
        </dependency>

        <dependency>
            <groupId>org.apache.spark</groupId>
            <artifactId>spark-core_2.11</artifactId>
            <version>${spark.version}</version>
            <exclusions>  
                <exclusion>   
                    <groupId>org.slf4j</groupId>  
                    <artifactId>slf4j-log4j12</artifactId>  
                </exclusion>  
                <exclusion>   
                    <groupId>log4j</groupId>  
                    <artifactId>log4j</artifactId>  
                </exclusion>  
            </exclusions> 
            <!-- <scope>provided</scope>-->
        </dependency>
        <!-- https://mvnrepository.com/artifact/org.apache.spark/spark-yarn 
        解決ClassNotFoundException: org.apache.spark.deploy.yarn.YarnSparkHadoopUtil問題
        -->
        <dependency>
            <groupId>org.apache.spark</groupId>
            <artifactId>spark-yarn_2.11</artifactId>
            <version>${spark.version}</version>
            <scope>provided</scope>
        </dependency>

        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-client</artifactId>
            <version>${hadoop.version}</version>
            <exclusions>  
                <exclusion>   
                    <groupId>org.slf4j</groupId>  
                    <artifactId>slf4j-log4j12</artifactId>  
                </exclusion>  
                <exclusion>   
                    <groupId>log4j</groupId>  
                    <artifactId>log4j</artifactId>  
                </exclusion>
                <exclusion>
                    <groupId>org.mortbay.jetty</groupId>
                    <artifactId>jetty-util</artifactId>
                </exclusion>
                <exclusion>
                    <groupId>javax.servlet</groupId>
                    <artifactId>servlet-api</artifactId>
                </exclusion>
            </exclusions> 
            <!--<scope>provided</scope>-->
        </dependency>
        
        <dependency>  
            <groupId>javax.servlet</groupId>  
            <artifactId>javax.servlet-api</artifactId>  
            <version>3.1.0</version>  
        </dependency>  



        <dependency>
            <groupId>org.apache.spark</groupId>
            <artifactId>spark-hive_2.11</artifactId>
            <version>${spark.version}</version>
            <scope>provided</scope>
        </dependency>

        <dependency>
            <groupId>org.apache.spark</groupId>
            <artifactId>spark-sql_2.11</artifactId>
            <version>${spark.version}</version>
            <!--  <scope>provided</scope>-->
        </dependency>

        <dependency>
            <groupId>org.apache.spark</groupId>
            <artifactId>spark-streaming_2.11</artifactId>
            <version>${spark.version}</version>
            <scope>provided</scope>
        </dependency>

        <dependency>
            <groupId>org.apache.spark</groupId>
            <!--<artifactId>spark-streaming-kafka_2.11</artifactId>-->
            <artifactId>spark-streaming-kafka-0-8_2.11</artifactId>
            <!--<version>1.6.1</version>-->
            <version>${spark.version}</version>
            <scope>provided</scope>

        </dependency>


        <dependency><!-- 數據庫鏈接池 -->
            <groupId>com.mchange</groupId>
            <artifactId>c3p0</artifactId>
            <version>0.9.5.2</version>
        </dependency>

        <dependency>
            <groupId>junit</groupId>
            <artifactId>junit</artifactId>
            <version>${junit.version}</version>
            <scope>test</scope>
        </dependency>
    </dependencies>
	
    <build>
        <plugins>
            <plugin>
                <groupId>org.springframework.boot</groupId>
                <artifactId>spring-boot-maven-plugin</artifactId>
                <version>1.4.2.RELEASE</version>
                <executions>
                    <execution>
                        <goals>
                            <goal>repackage</goal>
                        </goals>
                    </execution>
                </executions>
            </plugin>
            <plugin>
                <groupId>org.apache.maven.plugins</groupId>
                <artifactId>maven-compiler-plugin</artifactId>
                <version>2.3.2</version>
                <configuration>
                    <encoding>${project.build.sourceEncoding}</encoding>
                </configuration>
            </plugin>
            <plugin>
                <groupId>org.apache.maven.plugins</groupId>
                <artifactId>maven-resources-plugin</artifactId>
                <version>2.4.3</version>
                <configuration>
                    <encoding>${project.build.sourceEncoding}</encoding>
                </configuration>
            </plugin>
        </plugins>
        <defaultGoal>compile</defaultGoal>
    </build>
	
</project>
相關文章
相關標籤/搜索