/matplotlib

Mock analysis of ride sharing data

Primary LanguageJupyter Notebook

Pyber Ride Sharing

  • Looking at the bubble plot, we can see that as we move from rural to suburban to urban areas, there are more riders and drivers.
  • Also, the average fare price goes down as we move into urban cities.
  • Comparing the pie charts of total rides and total drivers, we see that there are more riders in urban areas than drivers, while there are more drivers than riders in urban and suburban areas.
# Import Dependencies
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns

# Read csv files
city = pd.read_csv('raw_data/city_data.csv')
ride = pd.read_csv('raw_data/ride_data.csv')
# Merge datasets
city_ride = pd.merge(city, ride, on='city')

# Group by city
grouped = city_ride.groupby('city')

# Get average fare, total drivers, total rides, and city type 
avg_fare = grouped['fare'].mean()
total_rides = grouped['ride_id'].count()
total_drivers = grouped['driver_count'].mean()
city_type = grouped['type'].max()

# Create dataframe with information 
city_ride_df = pd.DataFrame({'Average Fare ($) Per City': avg_fare,
                   'Total Rides Per City': total_rides,
                   'Total Drivers Per City': total_drivers,
                   'City Type': city_type})
city_ride_df.index.name = 'City'
city_ride_df['Average Fare ($) Per City'] = city_ride_df['Average Fare ($) Per City'].map('{:.2f}'.format)

city_ride_df.head()
Average Fare ($) Per City City Type Total Drivers Per City Total Rides Per City
City
Alvarezhaven 23.93 Urban 21 31
Alyssaberg 20.61 Urban 67 26
Anitamouth 37.32 Suburban 16 9
Antoniomouth 23.62 Urban 21 22
Aprilchester 21.98 Urban 49 19

Bubble Plot of Ride Sharing Data

# Change average fare to numeric 
city_ride_df['Average Fare ($) Per City'] = pd.to_numeric(city_ride_df['Average Fare ($) Per City'])

# Create bubble plot for urban cities
urban = city_ride_df.loc[city_ride_df['City Type'] == 'Urban']
ux = urban['Total Rides Per City']
uy = urban['Average Fare ($) Per City']
uz = urban['Total Drivers Per City']

urban_plt = plt.scatter(ux, uy, s=uz*5, c='lightcoral', edgecolors='white', alpha=0.7, label='Urban')

# Create bubble plot for suburban cities
suburban = city_ride_df.loc[city_ride_df['City Type'] == 'Suburban']
sx = suburban['Total Rides Per City']
sy = suburban['Average Fare ($) Per City']
sz = suburban['Total Drivers Per City']

suburban_plt = plt.scatter(sx, sy, s=sz*5, c='lightskyblue', edgecolors='white', alpha=0.7, label='Suburban')

# Create bubble plot for rural cities
rural = city_ride_df.loc[city_ride_df['City Type'] == 'Rural']
rx = rural['Total Rides Per City']
ry = rural['Average Fare ($) Per City']
rz = rural['Total Drivers Per City']

rural_plt = plt.scatter(rx, ry, s=rz*5, c='gold', edgecolors='white', alpha=0.7, label='Rural')

# Set axis limits and labels
plt.ylim(15, 40)
plt.xlim(0, 35)
plt.xlabel('Total Number of Rides (Per City)')
plt.ylabel('Average Fare ($)')

# Use suptitle for entire figure 
plt.suptitle('Pyber Ride Sharing Data (2016)')

# Create legend and format legend markers
legend = plt.legend(loc = "best", numpoints = 1, fontsize = 10)
legend.legendHandles[0]._sizes = [30]
legend.legendHandles[1]._sizes = [30]
legend.legendHandles[2]._sizes = [30]

# Save figure
plt.savefig("pyber_rideshare_data.png")

# Show all bubble plots
plt.show()

png

Total Fares by City Type

# Get sum of total fares from city_ride
city_group = city_ride.groupby(['type'])
fares = city_group['fare'].sum()

# Create pie chart
labels = fares.index
colors = ['gold', 'lightskyblue', 'lightcoral']
explode = [0,0,0.1]

plt.pie(fares, explode=explode, colors=colors, labels=labels, 
        startangle=-60, autopct='%1.1f%%', shadow=True)

# Create title
plt.suptitle('% of Total Fares by City Type')

# Set axis to equal for a circular piechart
plt.axis('equal')

# Save figure
plt.savefig("totalfares.png")

# Show pie chart
plt.show()

png

Total Rides by City Type

# Calculate % of total rides by city type
# I could have just used groupby but here's a different way of doing it
total_rides = city_ride_df['Total Rides Per City'].sum()
u_ride = urban['Total Rides Per City'].sum()
ur_slice = u_ride/total_rides * 100

sub_ride = suburban['Total Rides Per City'].sum()
subr_slice = sub_ride/total_rides * 100

r_ride = rural['Total Rides Per City'].sum()
rr_slice = r_ride/total_rides * 100

# Create pie chart
labels = ['Urban', 'Rural', 'Suburban']
sizes = [ur_slice, rr_slice, subr_slice]
colors = ['lightcoral', 'gold', 'lightskyblue']
explode = (0.1, 0, 0)

plt.pie(sizes, explode=explode, labels=labels, colors=colors,
        autopct="%.1f%%", shadow=True, startangle=60)

# Create title and set axis to equal
plt.suptitle('% of Total Rides by City Type')
plt.axis('equal')

# Save figure
plt.savefig("totalrides.png")

# Show pie chart
plt.show()

png

Total Drivers by City Type

# Calculate % of total drivers by city type
total_drivers = city_ride_df['Total Drivers Per City'].sum()
u_drive = urban['Total Drivers Per City'].sum()
ud_slice = u_drive/total_drivers * 100

sub_drive = suburban['Total Drivers Per City'].sum()
subd_slice = sub_drive/total_drivers * 100

r_drive = rural['Total Drivers Per City'].sum()
rd_slice = r_drive/total_drivers * 100

# Create pie chart
labels = ['Urban', 'Rural','Suburban']
sizes = [ud_slice, rd_slice, subd_slice]
colors = ['lightcoral', 'gold', 'lightskyblue']
explode = (0.1, 0, 0)

plt.pie(sizes, explode=explode, labels=labels, colors=colors,
        autopct="%.1f%%", shadow=True, startangle=40)

# Create title and set axis to equal
plt.suptitle('% of Total Drivers by City Type')
plt.axis('equal')

# Save figure
plt.savefig("totalrides.png")

# Show pie chart
plt.show()

png