spark示例

1)java(App.java)html

package com.ejiajie.bi.hello;

import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.SparkConf;

/**
 * Hello world!
 *
 */
public class App 
{
    public static void main( String[] args )
    {
	SparkConf conf = new SparkConf();
	JavaSparkContext sc = new JavaSparkContext(conf);
        System.out.println( "Hello World!" );
    }
}

2)python(PyHelloWorld.py)

from pyspark import SparkContext, SparkConf  

conf = SparkConf()
sc = SparkContext(conf=conf)


import numpy as np
import scipy.sparse as sps
from pyspark.mllib.linalg import Vectors

# Use a NumPy array as a dense vector.
dv1 = np.array([1.0, 0.0, 3.0])
# Use a Python list as a dense vector.
dv2 = [1.0, 0.0, 3.0]
# Create a SparseVector.
sv1 = Vectors.sparse(3, [0, 2], [1.0, 3.0])
# Use a single-column SciPy csc_matrix as a sparse vector.
sv2 = sps.csc_matrix((np.array([1.0, 3.0]), np.array([0, 2]), np.array([0, 2])), shape = (3, 1))

from pyspark.mllib.linalg import SparseVector
from pyspark.mllib.regression import LabeledPoint

# Create a labeled point with a positive label and a dense feature vector.
pos = LabeledPoint(1.0, [1.0, 0.0, 3.0])

# Create a labeled point with a negative label and a sparse feature vector.
neg = LabeledPoint(0.0, SparseVector(3, [0, 2], [1.0, 3.0]))

3)執行命令

javajava

# spark-submit --class com.ejiajie.bi.hello.App --master yarn --deploy-mode client ./hello-1.0-SNAPSHOT-jar-with-dependencies.jar 
# spark-submit --class com.ejiajie.bi.hello.App --master yarn --deploy-mode cluster ./hello-1.0-SNAPSHOT-jar-with-dependencies.jar

python
python

# spark-submit  --master yarn --deploy-mode client /home/lihanhui/work/spark-example/PyHelloWorld.py 
# spark-submit  --master yarn --deploy-mode cluster /home/lihanhui/work/spark-example/PyHelloWorld.py

4)hadoop查看任務狀態

相關文章
相關標籤/搜索