能夠在一個Controller裏面實現spark抓取的代碼:html
import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Row; import org.springframework.web.bind.annotation.RequestMapping; import org.springframework.web.bind.annotation.RestController; import org.springframework.web.servlet.ModelAndView; import org.apache.spark.sql.SparkSession; import org.apache.spark.SparkConf; import org.springframework.web.bind.annotation.ResponseBody; /** * @author flash勝龍 */ @RestController public class DataFigureController { @RequestMapping("/dataimportlocal.html") public ModelAndView dataimportlocal() { System.setProperty("hadoop.home.dir", "D:\\hadoop-2.7.2test"); System.setProperty("HADOOP_USER_NAME", "hadoop"); SparkSession spark = SparkSession.builder().master("local[*]").appName("Word Count").config("spark.sql.warehouse.dir", "file:///d:/tmp").getOrCreate(); Dataset<Row> df = spark.read().option("header", true).csv("D:\\book.csv"); df.show(); return new ModelAndView("dataimport"); } }
在spark的rdd進行Row封裝的時候,會涉及到日期類型的轉換問題。java
默認org.apache.spark.sql.RowFactory 類型只接受java.sql.Datemysql
// util.date轉換成sql.date java.util.Date utilDate = new java.util.Date(); //獲取當前時間 java.sql.Date sqlDate = new java.sql.Date(utilDate.getTime()); // sql.date轉換成util.date java.sql.Date sqlDate1 = new java.sql.Date(new java.util.Date().getTime()); java.util.Date utilDate1 = new java.util.Date(sqlDate1.getTime());
Maven配置以下(英 ['meɪv(ə)n] 美 ['mevn])web
其中有很多坑:一個是包衝突問題,hadoop、spark和springboot體系裏面每一個都本身引用了一系列logger實現的包,一塊兒編譯運行會有衝突,對部分包的依賴要exclusions掉;二個是版本問題,對於要使用的版本,必須整個工程先後一致。若是一個引用的是A版本,另外一個引用的是B版本,就會出問題,要麼把A給exclusion掉,只用B版本,要不想其它辦法:spring
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> <modelVersion>4.0.0</modelVersion> <groupId>my.groud.id</groupId> <artifactId>sparkuitest</artifactId> <version>0.0.1-SNAPSHOT</version> <packaging>jar</packaging> <name>sparkuitest</name> <url>http://maven.apache.org</url> <repositories> <repository> <id>central</id> <url>http://maven.aliyun.com/nexus/content/groups/public/</url> <!--<url>https://mvnrepository.com/artifact</url>--> <snapshots> <enabled>true</enabled> </snapshots> </repository> <repository> <id>maven2</id> <url>http://repo1.maven.org/maven2</url> <snapshots> <enabled>true</enabled> </snapshots> </repository> </repositories> <properties> <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding> <scala.version>2.11.8</scala.version> <spark.version>2.0.0</spark.version> <hadoop.version>2.6.0</hadoop.version> <junit.version>4.12</junit.version> <jackson.version>2.6.5</jackson.version> <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding> <maven.compiler.source>1.8</maven.compiler.source> <maven.compiler.target>1.8</maven.compiler.target> </properties> <dependencies> <!-- 解決Caused by: java.lang.IllegalArgumentException: LoggerFactory is not a Logback LoggerContext but Logback is on the classpath. Either remove Logback or the competing implementation (class org.slf4j.impl.Log4jLoggerFactory loaded from jar:file:/home/liangzl2/run/sparkWeb/sparkuitest-0.0.1-SNAPSHOT.jar!/BOOT-INF/lib/slf4j-log4j12-1.6.1.jar!/). If you are using WebLogic you will need to add 'org.slf4j' to prefer-application-packages in WEB-INF/weblogic.xml Object of class [org.slf4j.impl.Log4jLoggerFactory] must be an instance of class ch.qos.logback.classic.LoggerContext --> <!-- https://mvnrepository.com/artifact/com.opencsv/opencsv --> <dependency> <groupId>com.opencsv</groupId> <artifactId>opencsv</artifactId> <version>4.1</version> </dependency> <!-- JsonMappingException: Incompatible Jackson version: 2.8.4 --> <dependency> <groupId>com.fasterxml.jackson.core</groupId> <artifactId>jackson-core</artifactId> <version>${jackson.version}</version> </dependency> <dependency> <groupId>com.fasterxml.jackson.core</groupId> <artifactId>jackson-databind</artifactId> <version>${jackson.version}</version> </dependency> <dependency> <groupId>com.fasterxml.jackson.core</groupId> <artifactId>jackson-annotations</artifactId> <version>${jackson.version}</version> </dependency> <dependency> <groupId>org.springframework.boot</groupId> <artifactId>spring-boot-starter-test</artifactId> <version>1.4.2.RELEASE</version> </dependency> <dependency> <groupId>org.springframework.boot</groupId> <artifactId>spring-boot-starter-jdbc</artifactId> <version>1.4.2.RELEASE</version> <exclusions><!-- 排除 IllegalArgumentException: LoggerFactory is not a Logback LoggerContext but Logback is on the classpath. Either remove Logback or the competing implementation問題 --> <!-- <exclusion> <groupId>ch.qos.logback</groupId> <artifactId>logback-classic</artifactId> </exclusion>--> </exclusions> </dependency> <dependency> <groupId>com.h2database</groupId> <artifactId>h2</artifactId> <version>1.3.156</version> </dependency> <dependency> <groupId>mysql</groupId> <artifactId>mysql-connector-java</artifactId> <version>5.1.27</version> </dependency> <dependency> <groupId>org.springframework.boot</groupId> <artifactId>spring-boot-starter-web</artifactId> <version>1.4.2.RELEASE</version> </dependency> <dependency> <groupId>com.alibaba</groupId> <artifactId>druid</artifactId> <version>1.0.11</version> </dependency> <dependency> <groupId>org.scala-lang</groupId> <artifactId>scala-library</artifactId> <version>${scala.version}</version> </dependency> <dependency><!-- 用來處理配置文件 --> <groupId>com.typesafe</groupId> <artifactId>config</artifactId> <version>1.2.1</version> </dependency> <dependency> <groupId>org.apache.spark</groupId> <artifactId>spark-core_2.11</artifactId> <version>${spark.version}</version> <exclusions> <exclusion> <groupId>org.slf4j</groupId> <artifactId>slf4j-log4j12</artifactId> </exclusion> <exclusion> <groupId>log4j</groupId> <artifactId>log4j</artifactId> </exclusion> </exclusions> <!-- <scope>provided</scope>--> </dependency> <!-- https://mvnrepository.com/artifact/org.apache.spark/spark-yarn 解決ClassNotFoundException: org.apache.spark.deploy.yarn.YarnSparkHadoopUtil問題 --> <dependency> <groupId>org.apache.spark</groupId> <artifactId>spark-yarn_2.11</artifactId> <version>${spark.version}</version> <scope>provided</scope> </dependency> <dependency> <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-client</artifactId> <version>${hadoop.version}</version> <exclusions> <exclusion> <groupId>org.slf4j</groupId> <artifactId>slf4j-log4j12</artifactId> </exclusion> <exclusion> <groupId>log4j</groupId> <artifactId>log4j</artifactId> </exclusion> <exclusion> <groupId>org.mortbay.jetty</groupId> <artifactId>jetty-util</artifactId> </exclusion> <exclusion> <groupId>javax.servlet</groupId> <artifactId>servlet-api</artifactId> </exclusion> </exclusions> <!--<scope>provided</scope>--> </dependency> <dependency> <groupId>javax.servlet</groupId> <artifactId>javax.servlet-api</artifactId> <version>3.1.0</version> </dependency> <dependency> <groupId>org.apache.spark</groupId> <artifactId>spark-hive_2.11</artifactId> <version>${spark.version}</version> <scope>provided</scope> </dependency> <dependency> <groupId>org.apache.spark</groupId> <artifactId>spark-sql_2.11</artifactId> <version>${spark.version}</version> <!-- <scope>provided</scope>--> </dependency> <dependency> <groupId>org.apache.spark</groupId> <artifactId>spark-streaming_2.11</artifactId> <version>${spark.version}</version> <scope>provided</scope> </dependency> <dependency> <groupId>org.apache.spark</groupId> <!--<artifactId>spark-streaming-kafka_2.11</artifactId>--> <artifactId>spark-streaming-kafka-0-8_2.11</artifactId> <!--<version>1.6.1</version>--> <version>${spark.version}</version> <scope>provided</scope> </dependency> <dependency><!-- 數據庫鏈接池 --> <groupId>com.mchange</groupId> <artifactId>c3p0</artifactId> <version>0.9.5.2</version> </dependency> <dependency> <groupId>junit</groupId> <artifactId>junit</artifactId> <version>${junit.version}</version> <scope>test</scope> </dependency> </dependencies> <build> <plugins> <plugin> <groupId>org.springframework.boot</groupId> <artifactId>spring-boot-maven-plugin</artifactId> <version>1.4.2.RELEASE</version> <executions> <execution> <goals> <goal>repackage</goal> </goals> </execution> </executions> </plugin> <plugin> <groupId>org.apache.maven.plugins</groupId> <artifactId>maven-compiler-plugin</artifactId> <version>2.3.2</version> <configuration> <encoding>${project.build.sourceEncoding}</encoding> </configuration> </plugin> <plugin> <groupId>org.apache.maven.plugins</groupId> <artifactId>maven-resources-plugin</artifactId> <version>2.4.3</version> <configuration> <encoding>${project.build.sourceEncoding}</encoding> </configuration> </plugin> </plugins> <defaultGoal>compile</defaultGoal> </build> </project>