export SCALA_HOME=/home/hadoop/cluster/scala-2.10.5 export JAVA_HOME=/home/hadoop/cluster/jdk1.7.0_79 export HADOOP_HOME=/home/hadoop/cluster/hadoop-2.6.0 export HADOOP_CONF_DIR=$HADOOP_HOME/etc/hadoop #注意這個地方必定要指定爲IP,不然下面的eclipse去鏈接的時候會報: #All masters are unresponsive! Giving up. 這個錯誤的。 SPARK_MASTER_IP=10.16.112.121 SPARK_LOCAL_DIRS=/home/hadoop/cluster/spark-1.4.0-bin-hadoop2.6 SPARK_DRIVER_MEMORY=1G
sbin/start-master.sh sbin/start-slave.sh
此時能夠在瀏覽器中輸入:http://yourip:8080 查看Spark集羣的狀況
此時默認的 Spark-Master 爲: spark://10.16.112.121:7077html
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> <modelVersion>4.0.0</modelVersion> <groupId>spark.test</groupId> <artifactId>FirstTrySpark</artifactId> <version>0.0.1-SNAPSHOT</version> <properties> <!-- 填寫對應版本 --> <hadoop.version>2.6.0</hadoop.version> <spark.version>1.4.0</spark.version> </properties> <dependencies> <dependency> <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-client</artifactId> <version>${hadoop.version}</version> <scope>provided</scope> <!-- 記得排除servlet依賴,不然會報衝突 --> <exclusions> <exclusion> <groupId>javax.servlet</groupId> <artifactId>*</artifactId> </exclusion> </exclusions> </dependency> <dependency> <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-common</artifactId> <version>2.6.0</version> </dependency> <dependency> <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-mapreduce-client-jobclient</artifactId> <version>2.6.0</version> </dependency> <dependency> <groupId>org.apache.spark</groupId> <artifactId>spark-core_2.10</artifactId> <version>${spark.version}</version> </dependency> </dependencies> <build> <sourceDirectory>src/main/java</sourceDirectory> <plugins> <!-- bind the maven-assembly-plugin to the package phase this will create a jar file without the storm dependencies suitable for deployment to a cluster. --> <plugin> <groupId>net.alchim31.maven</groupId> <artifactId>scala-maven-plugin</artifactId> <version>3.2.0</version> <executions> <execution> <goals> <goal>compile</goal> <goal>testCompile</goal> </goals> </execution> </executions> <configuration> <scalaVersion>2.10</scalaVersion> </configuration> </plugin> <plugin> <groupId>org.apache.maven.plugins</groupId> <artifactId>maven-assembly-plugin</artifactId> <version>2.5.5</version> <configuration> <descriptorRefs> <descriptorRef>jar-with-dependencies</descriptorRef> </descriptorRefs> </configuration> <executions> <execution> <phase>package</phase> <goals> <goal>single</goal> </goals> </execution> </executions> </plugin> <plugin> <groupId>org.apache.maven.plugins</groupId> <artifactId>maven-compiler-plugin</artifactId> <configuration> <source>1.7</source> <target>1.7</target> </configuration> </plugin> </plugins> <resources> <resource> <directory>src/main/resources</directory> </resource> </resources> </build> </project>
src/main/java #編寫 java 代碼 src/main/scala #編寫 scala 代碼 src/main/resources #存放資源文件 src/test/java #編寫測試 java 代碼 src/test/scala #編寫測試 scala 代碼 src/test/resources #存放資源文件
此時環境所有搭建完畢!java
import org.apache.spark.SparkConf import org.apache.spark.SparkConf import org.apache.spark.SparkContext /** * @author clebeg */ object FirstTry { def main(args: Array[String]): Unit = { val conf = new SparkConf conf.setMaster("spark://yourip:7077") conf.set("spark.app.name", "first-tryspark") val sc = new SparkContext(conf) val rawblocks = sc.textFile("hdfs://yourip:9000/user/hadoop/linkage") println(rawblocks.first) } }
15/10/10 08:49:01 INFO executor.CoarseGrainedExecutorBackend: Registered signal handlers for [TERM, HUP, INT] 15/10/10 08:49:01 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable 15/10/10 08:49:02 INFO spark.SecurityManager: Changing view acls to: hadoop,Administrator 15/10/10 08:49:02 INFO spark.SecurityManager: Changing modify acls to: hadoop,Administrator 15/10/10 08:49:02 INFO spark.SecurityManager: SecurityManager: authentication disabled; ui acls disabled; users with view permissions: Set(hadoop, Administrator); users with modify permissions: Set(hadoop, Administrator) 15/10/10 08:49:02 INFO slf4j.Slf4jLogger: Slf4jLogger started 15/10/10 08:49:02 INFO Remoting: Starting remoting 15/10/10 08:49:02 INFO Remoting: Remoting started; listening on addresses :[akka.tcp://driverPropsFetcher@10.16.112.121:58708] 15/10/10 08:49:02 INFO util.Utils: Successfully started service 'driverPropsFetcher' on port 58708. Exception in thread "main" java.lang.reflect.UndeclaredThrowableException at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1643) at org.apache.spark.deploy.SparkHadoopUtil.runAsSparkUser(SparkHadoopUtil.scala:65) at org.apache.spark.executor.CoarseGrainedExecutorBackend$.run(CoarseGrainedExecutorBackend.scala:146) at org.apache.spark.executor.CoarseGrainedExecutorBackend$.main(CoarseGrainedExecutorBackend.scala:245) at org.apache.spark.executor.CoarseGrainedExecutorBackend.main(CoarseGrainedExecutorBackend.scala) Caused by: java.util.concurrent.TimeoutException: Futures timed out after [120 seconds] at scala.concurrent.impl.Promise$DefaultPromise.ready(Promise.scala:219) at scala.concurrent.impl.Promise$DefaultPromise.result(Promise.scala:223) at scala.concurrent.Await$$anonfun$result$1.apply(package.scala:107) at scala.concurrent.BlockContext$DefaultBlockContext$.blockOn(BlockContext.scala:53) at scala.concurrent.Await$.result(package.scala:107) at org.apache.spark.rpc.RpcEnv.setupEndpointRefByURI(RpcEnv.scala:97) at org.apache.spark.executor.CoarseGrainedExecutorBackend$$anonfun$run$1.apply$mcV$sp(CoarseGrainedExecutorBackend.scala:159) at org.apache.spark.deploy.SparkHadoopUtil$$anon$1.run(SparkHadoopUtil.scala:66) at org.apache.spark.deploy.SparkHadoopUtil$$anon$1.run(SparkHadoopUtil.scala:65) at java.security.AccessController.doPrivileged(Native Method) at javax.security.auth.Subject.doAs(Subject.java:415) at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1628) ... 4 more 15/10/10 08:51:02 INFO util.Utils: Shutdown hook called
仔細一看原來是權限的問題:立馬關閉 Hadoop, 在 etc/hadoop/core-site.xml 中添加:shell
<property> <name>hadoop.security.authorization</name> <value>false</value> </property>
設置任何人均可以讀取,問題立馬搞定。apache
mkdir linkage cd linkage/ curl -o donation.zip http://bit.ly/1Aoywaq unzip donation.zip unzip "block_*.zip" hdfs dfs -mkdir /user/hadoop/linkage hdfs dfs -put block_*.csv /user/hadoop/linkage