/spark-otus

otus-spark

Primary LanguagePython

hadoop fs -put crime.csv crime.csv hadoop fs -put offense_codes.csv offense_codes.csv

spark-submit --master local[*] crimes.py

hive> CREATE EXTERNAL TABLE IF NOT EXISTS crimes_result > ( > crimes_total INT, > lat DOUBLE, > lng DOUBLE, > crimes_monthly DOUBLE, > frequent_crime_types STRING > ) > STORED AS PARQUET > LOCATION 'output'; OK Time taken: 1.018 seconds hive> select * from crimes_result; OK 23460 42.212122584455564 -70.85561011772236 586.5 INVESTIGATE PERSON, TRESPASSING, DRUGS 1765 25.239505193693443 -43.44877438704255 NULL NULL 49945 42.31600367732768 -71.07569930654317 1248.625 DRUGS, WARRANT ARREST 42530 42.29263740900063 -71.05125995734362 1063.25 DRUGS, FRAUD, PROPERTY, WARRANT ARREST, SERVICE TO OTHER PD INSIDE OF MA., VANDALISM, FORGERY OR UTTERING 17536 42.30980365570989 -71.0980047887838 438.4 DRUGS, WARRANT ARREST 35442 42.28305944520106 -71.07894914185484 886.05 INVESTIGATE PERSON, DRUGS, WARRANT ARREST, SEARCH WARRANT 13239 42.19796999447011 -71.00440862434752 330.975 WARRANT ARREST 6505 42.17915525091085 -70.74472508958512 162.625 13544 42.36070260499386 -71.00394833039849 338.6 20127 42.34350724510931 -71.13125461726487 503.175 DRUGS, SEARCH WARRANT 41915 42.34124251790864 -71.07725024946998 1047.875 DRUGS, TRESPASSING 17348 42.262680611225974 -71.118919987577 433.7 WEAPON, WARRANT ARREST, INVESTIGATE PROPERTY, DRUGS, FIREARM/WEAPON 35717 42.33123077259839 -71.01991881362001 892.925 WARRANT ARREST, DRUGS, INVESTIGATE PERSON, TRESPASSING, PROSTITUTION Time taken: 1.843 seconds, Fetched: 13 row(s)