/spark

Primary LanguagePython

https://hub.docker.com/r/bitnami/spark#environment-variables

# copy docker-compose.yml
curl -LO https://raw.githubusercontent.com/bitnami/containers/main/bitnami/spark/docker-compose.yml
# add volume

    volumes:
      - .:/opt/spark/
docker-compose up -d --build
docker exec -it <container_id> bash

run scala code inside shell

val df = spark.read.format("csv").option("header", "true").load("/opt/spark/titanic.csv")
df.printSchema
df.select("Name", "Age").show()
df.select("Name", "Age").filter("Age > 30").show()

run .py script

spark-submit friends-by-age.py