This is based on:

https://www.r-bloggers.com/setting-up-the-twitter-r-package-for-text-analytics/

https://www.r-bloggers.com/greenville-on-twitter/

  1. Install the twitteR package and make it available in your R session.
#install.packages("twitteR")
#install.packages("tidytext")
#install.packages("dplyr")
#install.packages("ggplot2")
  1. Now on the Twitter side you need to do a few things to get setup if you have not done them already.
library(twitteR)

consumer_key <- consumer_key_nt
consumer_secret <- consumer_secret_nt
access_token <- access_token_nt
access_secret <- access_secret_nt

setup_twitter_oauth(consumer_key, consumer_secret, access_token, access_secret)
## [1] "Using direct authentication"

Example analyses

Fake News

Now let’s search for fakenews, plot the frequent words within these tweets, and do a quick sentiment analysis.

library(tidyverse)
library(twitteR)
fn_twitter <- searchTwitter("#Houston",n=1000,lang="en")

fn_twitter_df <- twListToDF(fn_twitter) # Convert to data frame


tweet_words <- fn_twitter_df %>% select(id, text) %>% unnest_tokens(word,text)

tweet_words %>% count(word,sort=T) %>% slice(1:20) %>% 
  ggplot(aes(x = reorder(word, 
    n, function(n) -n), y = n)) + geom_bar(stat = "identity") + theme(axis.text.x = element_text(angle = 60, 
    hjust = 1)) + xlab("")

# Create a list of stop words: a list of words that are not worth including

my_stop_words <- stop_words %>% select(-lexicon) %>% 
  bind_rows(data.frame(word = c("https", "t.co", "rt", "amp","4yig9gzh5t","fyy2ceydhi","78","fakenews")))

tweet_words_interesting <- tweet_words %>% anti_join(my_stop_words)

tweet_words_interesting %>% group_by(word) %>% tally(sort=TRUE) %>% slice(1:25) %>% ggplot(aes(x = reorder(word, 
    n, function(n) -n), y = n)) + geom_bar(stat = "identity") + theme(axis.text.x = element_text(angle = 60, 
    hjust = 1)) + xlab("")

bing_lex <- get_sentiments("nrc")

fn_sentiment <- tweet_words_interesting %>% left_join(bing_lex)

fn_sentiment %>% filter(!is.na(sentiment)) %>% group_by(sentiment) %>% summarise(n=n())
## # A tibble: 10 x 2
##       sentiment     n
##           <chr> <int>
##  1        anger   124
##  2 anticipation   245
##  3      disgust    80
##  4         fear   191
##  5          joy   315
##  6     negative   552
##  7     positive  1023
##  8      sadness   249
##  9     surprise   128
## 10        trust   569

U.S. Travel Ban

Let’s do a similar analysis for travel ban tweets.

library(tidyverse)
library(twitteR)
tb_twitter <- searchTwitter("travelban",n=1000)

tb_twitter_df <- twListToDF(tb_twitter) # Convert to data frame

tweet_words <- tb_twitter_df %>% select(id, text) %>% unnest_tokens(word,text)

tweet_words %>% count(word,sort=T) %>% slice(1:20) %>% 
  ggplot(aes(x = reorder(word, 
    n, function(n) -n), y = n)) + geom_bar(stat = "identity") + theme(axis.text.x = element_text(angle = 60, 
    hjust = 1)) + xlab("")

# Create a list of stop words: a list of words that are not worth including

my_stop_words <- stop_words %>% select(-lexicon) %>% 
  bind_rows(data.frame(word = c("https", "t.co", "rt", "amp","travelban")))

tweet_words_interesting <- tweet_words %>% anti_join(my_stop_words)

tweet_words_interesting %>% group_by(word) %>% tally(sort=TRUE) %>% slice(1:25) %>% ggplot(aes(x = reorder(word, 
    n, function(n) -n), y = n)) + geom_bar(stat = "identity") + theme(axis.text.x = element_text(angle = 60, 
    hjust = 1)) + xlab("")