This repository is all about accessing Spotify data using Python's "Spotipy" module and saving it in a local database.
Jupyter Notebook
Boilerplate code
# Boilerplate codes. importosimportsysimportspotipyimportspotipy.utilasutilimportjsonSpotify=spotipy.Spotify## spotify account details## you can save those in your .bash_profile file and access it from there..## this way# os.environ['SPOTIPY_CLIENT_ID'] = 'your-spotify-client-id'# os.environ['SPOTIPY_CLIENT_SECRET'] = 'your-spotify-client-secret'# os.environ['SPOTIPY_REDIRECT_URI'] = 'your-app-redirect-url'## or this way. token=os.environ['SPOTIPY_CLIENT_TOKEN']
base_url='https://api.spotify.com'scope='playlist-read-private'# spotify:playlist:token=util.prompt_for_user_token( token,scope=scope)
spotify_object=Spotify(auth=token)
Creating database functions
# Creating DataBase functionsimportsqlite3fromsqlite3importError## write some functions to create batabase table and insert valuesdefcreate_connection(db_file):
""" create a database connection to the SQLite database specified by db_file :param db_file: database file :return: Connection object or None """conn=Nonetry:
conn=sqlite3.connect(db_file)
returnconnexceptErrorase:
print(e)
returnconn## Creating table functiondefcreate_table(conn, create_table_sql):
""" create a table from the create_table_sql statement :param conn: Connection object :param create_table_sql: a CREATE TABLE statement :return: """try:
c=conn.cursor()
c.execute(create_table_sql)
exceptErrorase:
pass## function to insert values definsert_values(conn, sql_statement, values):
""" create a table from the create_table_sql statement :param conn: Connection object :param create_table_sql: a CREATE TABLE statement :return: """try:
c=conn.cursor()
c.execute(sql_statement, values)
exceptErrorase:
pass
defgetting_playlist_data_to_database(playlist_id, playlist_owner, db, sql_table_statement, sql_value_statement):
## Getting a bit cleaned up. results=spotify_object.user_playlist(playlist_owner, playlist_id)
results=results['tracks']['items']
## initializing some lists.track_name= []
track_popularity= []
duration_ms= []
artist_name= []
track_id= []
album_name= []
release_date= []
## just going through the spotify object and retriving data. forresultinresults:
track_name.append(result['track']['name'])
duration_ms.append(result['track']['duration_ms'])
track_popularity.append(result['track']['popularity'])
artist_name.append(result['track']['artists'][0]['name'])
track_id.append(result['track']['id'])
album_name.append(result['track']['album']['name'])
release_date.append(result['track']['album']['release_date'])
## creating a connection with databaseconn=create_connection(db)
create_table(conn, sql_table_statement)
## looping through all the lists and inserting values. fori, n, p, d, at, alb, dtinzip(track_id,
track_name,
track_popularity,
duration_ms,
artist_name,
album_name,
release_date):
values= (i, n, p, d, at, alb, dt)
insert_values(conn, sql_value_statement, values)
conn.commit()
sqlite_db='360i_db.sqlite'## Getting playlist id and owner name of the playlist(spotify)playlist_id=results['playlists']['items'][0]['id']
playlist_owner=results['playlists']['items'][0]['owner']['id']
## sql_command for creating table. sql_table='CREATE TABLE IF NOT EXISTS tracks(track_id CHAR(20) PRIMARY KEY, name TEXT, popularity INTEGER, duration_ms INTEGER, artist_name TEXT, album TEXT, release_date TEXT);'## sql command for inserting valuesvalue_statement='INSERT INTO tracks VALUES (?,?,?,?,?,?,?)'
## this function creates database and inserts values. getting_playlist_data_to_database(playlist_id, playlist_owner, sqlite_db,sql_table, value_statement)
## Initializing a dict to store unique artist id and name. This is to avoid duplicates. artist_dict= {}
forresultinresults:
foriteminresult['track']['artists']:
ifitem['id'] inartist_dict:
continueelse:
artist_dict[item['id']] =item['name']
## using the created dict above to get popularity and followers column. forkey,valueinartist_dict.items():
try:
artist_dict[key] = (value, spotify_object.artist(key)['popularity'], spotify_object.artist(key)['followers']['total'])
except:
artist_dict[key] = (value, 'NA', 'NA')
## getting rid of values that are NA. artist_dict= {i: vfori, vinartist_dict.items() ifv[1] !='NA'}
## Creating 'artists' tableconn=sqlite3.connect(sqlite_db)
sql_statement='CREATE TABLE IF NOT EXISTS artists(artist_id CHAR(22) PRIMARY KEY, name TEXT, popularity INTEGER, followers INTEGER);'create_table(conn, sql_statement)
importpandasaspdfrompandas.ioimportsql## Writing a little function to make our life a bit easier. defQ(input_string, db=conn):
# return sql.read_sql(input_string, db)returnpd.read_sql(input_string, db)
## How many songs are there in the tracks table. Q('SELECT COUNT(*) FROM tracks')
## What are the top 5 tracks by artist follower count?Q("""SELECT tracks."name" FROM tracks JOIN artistsON tracks."artist_name" = artists."name"ORDER BY artists."followers" DESCLIMIT 5""")
## What are the top 5 tracks by artist follower count?Q("""SELECT tracks."name", artists."name", artists."followers" FROM tracks JOIN artistsON tracks."artist_name" = artists."name"ORDER BY artists."followers" DESCLIMIT 5""")
## What is the relationship between track and artist popularity?## Let's do a little analysis, df=Q("""SELECT tracks."popularity" as 'tracks_popularity', tracks."duration_ms", tracks."artist_name", artists."popularity" as 'artist_popularity',artists."followers"FROM tracksJOIN artistsON tracks."artist_name"= artists."name"""")
As you can see there is a positive correlation between tracks and artists popularity. It seems as if popular artists tend to produce popular tracks. However, sometimes things as you can see the top left data point
Bonus Analysis
sns.pairplot(df);
It looks like there are a couple of datapoint in the followers column, that are outliers. If we get rid off those couple of points we might be able to get a better look at the data. Let's find out those artist with top followers.
Well, If we look at the top right plot,we see that there are a cluster of datapoints where tracks_popularity is fairly high eventhough there isn't enough followers. However, the relationship between artist_popularity and tracks_popularity is quite significant as we have discussed earlier. May be if we have more data we can reveal something more significant.
sqlite_db='360i_db.sqlite'## Getting playlist id and owner name of the playlist(spotify)playlist_owner='maxmoroz07'playlist_id='7htu5ftbLBRFAwiuHVcUAg'## sql_command for creating table. sql_table='CREATE TABLE IF NOT EXISTS tracks_100(track_id CHAR(20) PRIMARY KEY, name TEXT, popularity INTEGER, duration_ms INTEGER, artist_name TEXT, album TEXT, release_date TEXT);'## sql command for inserting valuesvalue_statement='INSERT INTO tracks_100 VALUES (?,?,?,?,?,?,?)'
## this function creates database and inserts values. getting_playlist_data_to_database(playlist_id, playlist_owner, sqlite_db,sql_table, value_statement)