# Twitter API Keysconsumer_key='ibYsFmAdHS8fhnupeA3opTRHN'consumer_secret='cSfliluzlYkSJ8EPJEOvQA5kKG9BE6MG1ddF8kHAyNc5ZJ6601'access_token='943311356534472704-3tToKzZ2RMDtNOo4frlY6IEAg6iWGL1'access_token_secret='42jS6EeOwV5ZaWde0LwxpL4dPyozVt5rv3URu7ZlP9m17'
# Setup Tweepy API Authenticationauth=tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
api=tweepy.API(auth, parser=tweepy.parsers.JSONParser())
# Loop through all target usersfortargetintarget_terms:
# Variables for holding sentimentscompound_list= []
compound_list_with_date= []
positive_list= []
negative_list= []
neutral_list= []
# Using a loop to manage iteration thru each userifcounter<=len_targets:
# Use API to grab 100 tweetspublic_tweets=api.user_timeline(target, count=100)
# Loop through all tweetsfortweetinpublic_tweets:
compound=analyzer.polarity_scores(tweet["text"])["compound"]
pos=analyzer.polarity_scores(tweet["text"])["pos"]
neu=analyzer.polarity_scores(tweet["text"])["neu"]
neg=analyzer.polarity_scores(tweet["text"])["neg"]
compound_list.append(compound)
compound_list_with_date.append(compound)
compound_list_with_date.append(tweet['created_at'])
compound_list_with_date.append(target)
positive_list.append(pos)
negative_list.append(neg)
neutral_list.append(neu)
sentiment= {"User": target,
"Compound": np.mean(compound_list),
"Positive": np.mean(positive_list),
"Neutral": np.mean(negative_list),
"Negative": np.mean(neutral_list),
"Tweet Count": len(compound_list)}
save_sentiment.append(sentiment)
save_compound_list.append(compound_list)
save_compound_list_with_date.append(compound_list_with_date)
counter=counter+1
Getting the dataframes ready for plotting
# Using the transpose function to make the rows columnscompound_df=pd.DataFrame(save_compound_list)
compound_df=compound_df.transpose()
compound_df.head()
# Creating a 'index' so that plotting is easiercompound_df=compound_df.reset_index()
# Plotting one of the users...but I have 5 total userscompound_df.plot(kind='scatter', x='index', y='@BBCWorld', subplots=False)
plt.show()
Scatter Plotting 'Compound' Sentiment Analysis for ALL Datapoints
# Using this plotting method given the number of variables/users# Note that the plot legend is not on the graphfig, ax=plt.subplots(sharex=True, figsize=(8, 6))
compound_df.plot(x='index', y='@BBCWorld', markersize=5, color='blue', linestyle='none', ax=ax, marker='o')
compound_df.plot(x='index', y='@CBSNews', markersize=5, color='red', linestyle='none', ax=ax, marker='o')
compound_df.plot(x='index', y='@CNNbrk', markersize=5, color='green', linestyle='none', ax=ax, marker='o')
compound_df.plot(x='index', y='@FoxNews', markersize=5, color='black', linestyle='none', ax=ax, marker='o')
compound_df.plot(x='index', y='@nytimes', markersize=5, color='orange', linestyle='none', ax=ax, marker='o')
plt.ylabel("Tweet Polarity", size=10)
plt.grid(True, color='gray', linestyle='-', linewidth=.5)
plt.xlabel("Tweets Ago", size=10)
ax.legend(loc='upper left', bbox_to_anchor=(1, 1))
plt.title('Sentiment Analysis of Media Tweets', size=17)
plt.show()
Extra Credit - Plotting TimeSeries Data
# This was a neat exercise of learning how to convert a list into a 3 column dataframe# Using the reshape function is required before the column names can be assignedcompound_wdate_df0=pd.DataFrame(np.array(save_compound_list_with_date[0]).reshape(100,3), columns= ['Compound', 'Date', 'User'])
compound_wdate_df1=pd.DataFrame(np.array(save_compound_list_with_date[1]).reshape(100,3), columns= ['Compound', 'Date', 'User'])
compound_wdate_df2=pd.DataFrame(np.array(save_compound_list_with_date[2]).reshape(100,3), columns= ['Compound', 'Date', 'User'])
compound_wdate_df3=pd.DataFrame(np.array(save_compound_list_with_date[3]).reshape(100,3), columns= ['Compound', 'Date', 'User'])
compound_wdate_df4=pd.DataFrame(np.array(save_compound_list_with_date[4]).reshape(100,3), columns= ['Compound', 'Date', 'User'])
# This step is needed to get the date into the index# This is required if you want to easily plot the datacompound_final.set_index(['Date'], inplace=True)
compound_final.head()
# This graph is the right data but the x and y axis are not on the right scale. Notice the date span on the x axis# Notice the y axis labelsplt.scatter(compound_final.index, compound_final.Compound)
plt.show()
Plotting the Average Compound Value for Each User
# Creating a sentiment dataframe# Writing sentiment data to a csv filesentiment_df=pd.DataFrame(save_sentiment)
sentiment_df.to_csv('sentiment.csv', sep=',', header=True, index=True, index_label=None)
sentiment_df
# Note that the colors for each bar are the samecolors= ['#624ea7', 'g', 'yellow', 'k']
sentiment_df.plot.bar(x='User', y='Compound', subplots=False, color=colors)
plt.grid(True, color='gray', linestyle='-', linewidth=.5)
plt.axhline(0, color='k')
plt.ylabel("Tweet Polarity", size=10)
plt.xlabel("User", size=10)
plt.title('Sentiment Analysis of Media Tweets', size=17)
plt.show()
# Using different code to change the color of each bar# Also notice the highlighted '0' linen=5tick_label=sentiment_df['User']
data=sentiment_df['Compound']
fig, ax=plt.subplots(figsize=(7, 5))
bar_locations=np.arange(n)
colors= ['#624ea7', 'g', 'yellow', 'k', 'maroon']
plt.grid(True, color='gray', linestyle='-', linewidth=.5)
plt.axhline(0, color='k')
ax=plt.bar(bar_locations, data, color=colors, tick_label=tick_label)
plt.ylabel("Tweet Polarity", size=10)
plt.xlabel("Users", size=10)
plt.title('Sentiment Analysis of Media Tweets', size=17)
plt.show()
Extra Credit - Adding a data table to the bottom of my graph
# With more time I would have liked to have experimented with this methoddata=sentiment_df['Compound']
columns=target_termsrows='Compound'values=np.arange(-.175, .150, .025)
# Get some pastel shades for the colorscolors=plt.cm.BuPu(np.linspace(0, 0.5, len(rows)))
n_rows=1index=np.arange(len(columns)) +0.3bar_width=0.4# Initialize the vertical-offset for the stacked bar chart.y_offset=np.zeros(len(columns))
# Plot bars and create text labels for the tablecell_text= []
forrowinrange(n_rows):
plt.bar(index, data[row], bar_width, bottom=y_offset, color=colors[row])
y_offset=y_offset+data[row]
cell_text.append(['%1.1f'%xforxiny_offset])
# Reverse colors and text labels to display the last value at the top.colors=colors[::-1]
cell_text.reverse()
# Add a table at the bottom of the axesthe_table=plt.table(cellText=cell_text,
rowColours=colors,
colLabels=columns)
# Adjust layout to make room for the table:plt.subplots_adjust(left=0.2, bottom=0.2)
#plt.ylabel("Loss in ${0}'s".format(value_increment))#plt.yticks(values ['%d' % val for val in values])#plt.xticks([])plt.title('Title')
plt.show()
# Adding data labels to the graph using the zip featurex=sentiment_df['User']
y=sentiment_df['Compound']
plt.bar(x, y)
fora,binzip(x, y):
plt.text(a, b, str(b))
colors= ['#624ea7', 'g', 'yellow', 'k', 'maroon']
plt.grid(True, color='gray', linestyle='-', linewidth=.5)
plt.axhline(0, color='k')
plt.ylabel("Tweet Polarity", size=10)
plt.xlabel("Users", size=10)
plt.title('Sentiment Analysis of Media Tweets', size=17)
plt.show()
Obversations
On the surface this appeared to be a relatively straight forward assignment. There were challenges with graphs (data labels and changing the color of the each bar for example). There were challenges with the list and dataframes including working with DateTime, index, adding rows to dataframes, etc.
Interesting that only one news agency had a positive sentiment. It would be interesting to do more analysis on the
CBS tweets to determine why this user is far less than 2 of the others...why it's the worse score.
With more time I would have loved to have down more analysis on the positive sentiment tweets to see which words or which subjects generated positive sentiments.