/AWS_EMR

Primary LanguagePython

AWS_EMR

git clone https://github.com/thejungwon/AWS_EMR
cd AWS_EMR

curl -L -o actors.list https://www.dropbox.com/s/vofyl0uryectfyt/actors.list\?dl\=1
hadoop fs -mkdir /test
hadoop fs -put actors.list /test/actors.list

virtualenv -p python3 venv
. venv/bin/activate
pip install mrjob
cp .mrjob.conf ~/.mrjob.conf
python mr_word_freq_count.py -r hdfs:///test/actors.list --output-dir hdfs:///test/output

vi ~/.mrjob.conf
{
  "runners": {
    "hadoop": {
      "setup": [
        "set -e"
      ],
      "sh_bin": "/bin/bash -x"
    }
  }
}