- Hadoop+Spark大數據巨量分析與機器學習整合開發實戰, 作者:林大貴 ,出版社:博碩 ,出版日期:2015/11/0
- 下載Scala, http://www.scala-lang.org/files/archive/
- wget http://www.scala-lang.org/files/archive/scala-2.11.6.tgz
- tar xvf scala-2.11.6.tgz
- sudo mv scala-2.11.6 /usr/local/scala
- sudo gedit ~/.bashrc
- #SCALA Variable
- export SCALA_HOME=/usr/local/scala
- export PATH=$PATH:$SCALA_HOME/bin
- #SCALA Variable
- source ~/.bashrc
- scala
- 下載 Spark
- https://spark.apache.org/downloads.html
- tar xzf spark-1.4.0-bin-hadoop2.6.tgz
- sudo mv spark-1.4.0-bin-hadoop2.6 /usr/local/spark/
- sudo gedit ~/.bashrc
- #SPARK Variable
- export SPARK_HOME=/usr/local/spark
- export PATH=$PATH:$SPARK_HOME/bin
- #SPARK Variable
- source ~/.bashrc
- 8-5 減少顯示訊息
- cd /usr/local/spark/conf
- cp log4j.properties.template log4j.properties
- sudo gedit log4j.properties
- log4j.rootCategory=INFO, console
- =>log4j.rootCategory=WARN, console
- spark-shell
- 8-6 啟動 Hadoop
- 8-7 本機執行 spark-shell
- sparl-sell --master local[4]
- scala> val textFile=sc.textFile("file:/usr/local/spark/README.md")
- scala>textFile.count
- scala>var textFile=sc.textFile(hdfs://master:9000/user/hduser/wordcount/input/test.txt")
- scala>textFile.count
- 8-8 Hadoop YARN 執行 spark-shell
- SPARK_JAR=/usr/local/spark/lib/spark-assembly-1.4.0-hadoop2.6.0.jar HADOOP_CONF_DIR=/usr/local/hadoop/etc/hadoop MASTER=yarn-client /usr/local/spark/bin/spark-shell
- 8-9 Spark standalone cluster 執行環境
- cp /usr/local/spark/conf/spark-env.sh.template /usr/local/spark/conf/spark-env.sh
- sudo gedit /usr/local/spark/conf/spark-env.sh
- export SPARK_MASTER_IP=master
- export SPARK_WORKER_CORES=1
- export SPARK_WORKER_MEMORY=800m
- export SPARK_WORKER_INSTANCES=2
- copy master spark to data1,data2,data3
- ssh data1
- data1>sudo mkdir /usr/local/spark
- data1>sudo chown hduser:hduser /usr/local/spark
- data1>exit
- sudo scp -r /usr/local/spark hduser@data1:/usr/local
- ssh data2
- data1>sudo mkdir /usr/local/spark
- data1>sudo chown hduser:hduser /usr/local/spark
- data1>exit
- sudo scp -r /usr/local/spark hduser@data2:/usr/local
- ssh data3
- data1>sudo mkdir /usr/local/spark
- data1>sudo chown hduser:hduser /usr/local/spark
- data1>exit
- sudo scp -r /usr/local/spark hduser@data3:/usr/local
- slave 檔案
- sudo gedit /usr/local/spark/conf/slaves
- Spark standalone 執行spark
- /usr/local/spark/sbin/start-all.sh
- spark-shell --master spark://master:7077
- http://master:8080/
- scala> val textFile=sc.textFile("file:/usr/local/spark/README.md")
- scala>textFile.count
- scala>var textFile=sc.textFile(hdfs://master:9000/user/hduser/wordcount/input/test.txt")
- scala>textFile.count
- /usr/local/spark/sbin/stop-all.sh