This runs XGBoost on spark, and saves the trained model
Minikube, helm
mkdir /tmp/spark
cp iris.csv /tmp/spark
minikube start --memory 8192 --cpus 5 --mount-string /tmp/spark:/data --mount --kubernetes-version v1.15.3 # older k8s version for working with spark 2.4.5
minikube addons enable registry
docker run --rm -it --network=host alpine ash -c "apk add socat && socat TCP-LISTEN:5000,reuseaddr,fork TCP:$(minikube ip):5000"
docker build . -t xgboost
docker tag xgboost localhost:5000/xgboost
docker push localhost:5000/xgboost
kubectl create namespace spark-operator
helm install spark-operator incubator/sparkoperator --namespace spark-operator --set sparkJobNamespace=default,enableWebhook=true,operatorVersion=v1beta2-1.1.2-2.4.5
kubectl create serviceaccount spark
kubectl create clusterrolebinding spark-role --clusterrole=edit --serviceaccount=default:spark
kubectl apply -f operator.yml
minikube dashboard # use this to monitor pods
This schedules/orchestrates the xgboost job
kubectl create clusterrolebinding spark-admin --clusterrole=cluster-admin --serviceaccount=default:default --namespace=default
helm repo add argo https://argoproj.github.io/argo-helm
helm update
helm install argo-wf argo/argo -f argo.yml
kubectl port-forward deployment/argo-wf-server 2746:2746
argo submit --watch argo_wf.yml