This application allows to deploy multi-nodes hadoop cluster with spark 2.4.1 on yarn.
- Clone the repo
- cd inside ../docker-spark-yarn-cluster
- Run
docker build -t pierrekieffer/spark-hadoop-cluster .
You can directly pull image from docker hub
docker pull pierrekieffer/spark-hadoop-cluster
- Run
./startHadoopCluster.sh
- Access to master
docker exec -it mycluster-master bash
-
spark-shell :
spark-shell --master yarn --deploy-mode client
-
spark :
spark-submit --master yarn --deploy-mode client or cluster --num-executors 2 --executor-memory 4G --executor-cores 4 --class org.apache.spark.examples.SparkPi $SPARK_HOME/examples/jars/spark-examples_2.11-2.4.1.jar
-
Access to Hadoop cluster Web UI : :8088
-
Access to spark Web UI : :8080
-
Access to hdfs Web UI : :50070
docker stop $(docker ps -a -q)
docker container prune