#Package to get Tweets and lots of cool stuff 
library(twitteR) 
#Package to manipulate data sets 
library(tidyverse) 
## Loading tidyverse: ggplot2
## Loading tidyverse: tibble
## Loading tidyverse: tidyr
## Loading tidyverse: readr
## Loading tidyverse: purrr
## Loading tidyverse: dplyr
## Conflicts with tidy packages ----------------------------------------------
## filter():   dplyr, stats
## id():       dplyr, twitteR
## lag():      dplyr, stats
## location(): dplyr, twitteR
#Package for text mining 
library(tidytext) 
## [1] "Using direct authentication"
macron_tweets <-
  searchTwitter( 
    searchString="Macron -filter:retweets",
    n=1000, 
    retryOnRateLimit=120, 
    lang="en", 
    resultType="mixed",
)
print(head(macron_tweets))
## [[1]]
## [1] "HillaryClinton: Victory for Macron, for France, the EU, &amp; the world. \n\nDefeat to those interfering w/democracy. (But the media says I can't talk about that)"
## 
## [[2]]
## [1] "realDonaldTrump: Congratulations to Emmanuel Macron on his big win today as the next President of France. I look very much forward to working with him!"
## 
## [[3]]
## [1] "France24_en: \xed\xa0\xbd\xed\xb4\xb4 #BREAKING - Emmanuel #Macron elected president of France (with 65.1% of the vote) https://t.co/4Vod8NNBm4"
## 
## [[4]]
## [1] "MarkEng30768932: Much the same as @timfarron &amp; @LibDems save they will never sniff Government. https://t.co/umzGVhWcMw"
## 
## [[5]]
## [1] "mlnangalama: Macron's win papered over deep cracks in France, especially in the famed champagne region https://t.co/gP7EHmkeZ5"
## 
## [[6]]
## [1] "Ecroaker: #TISNews Emmanuel Macron Is The New French President https://t.co/mcOTCwnSo1"
print(str(macron_tweets[[1]]))
## Reference class 'status' [package "twitteR"] with 17 fields
##  $ text         : chr "Victory for Macron, for France, the EU, &amp; the world. \n\nDefeat to those interfering w/democracy. (But the "| __truncated__
##  $ favorited    : logi FALSE
##  $ favoriteCount: num 275888
##  $ replyToSN    : chr(0) 
##  $ created      : POSIXct[1:1], format: "2017-05-07 20:32:18"
##  $ truncated    : logi FALSE
##  $ replyToSID   : chr(0) 
##  $ id           : chr "861317789537193988"
##  $ replyToUID   : chr(0) 
##  $ statusSource : chr "<a href=\"http://twitter.com/download/iphone\" rel=\"nofollow\">Twitter for iPhone</a>"
##  $ screenName   : chr "HillaryClinton"
##  $ retweetCount : num 81680
##  $ isRetweet    : logi FALSE
##  $ retweeted    : logi FALSE
##  $ longitude    : chr(0) 
##  $ latitude     : chr(0) 
##  $ urls         :'data.frame':   0 obs. of  4 variables:
##   ..$ url         : chr(0) 
##   ..$ expanded_url: chr(0) 
##   ..$ dispaly_url : chr(0) 
##   ..$ indices     : num(0) 
##  and 53 methods, of which 39 are  possibly relevant:
##    getCreated, getFavoriteCount, getFavorited, getId, getIsRetweet,
##    getLatitude, getLongitude, getReplyToSID, getReplyToSN, getReplyToUID,
##    getRetweetCount, getRetweeted, getRetweeters, getRetweets,
##    getScreenName, getStatusSource, getText, getTruncated, getUrls,
##    initialize, setCreated, setFavoriteCount, setFavorited, setId,
##    setIsRetweet, setLatitude, setLongitude, setReplyToSID, setReplyToSN,
##    setReplyToUID, setRetweetCount, setRetweeted, setScreenName,
##    setStatusSource, setText, setTruncated, setUrls, toDataFrame,
##    toDataFrame#twitterObj
## NULL
eth_text <- sapply(macron_tweets, "[[", "text")
head(eth_text)
## [1] "Victory for Macron, for France, the EU, &amp; the world. \n\nDefeat to those interfering w/democracy. (But the media says I can't talk about that)"
## [2] "Congratulations to Emmanuel Macron on his big win today as the next President of France. I look very much forward to working with him!"            
## [3] "\xed\xa0\xbd\xed\xb4\xb4 #BREAKING - Emmanuel #Macron elected president of France (with 65.1% of the vote) https://t.co/4Vod8NNBm4"                            
## [4] "Much the same as @timfarron &amp; @LibDems save they will never sniff Government. https://t.co/umzGVhWcMw"                                         
## [5] "Macron's win papered over deep cracks in France, especially in the famed champagne region https://t.co/gP7EHmkeZ5"                                 
## [6] "#TISNews Emmanuel Macron Is The New French President https://t.co/mcOTCwnSo1"
eth_words <-
#Create a tibble (kind of data frame) 
tibble(tweet=eth_text) %>% 
#Remove all links, RT, ampersand and some other special characters. 
mutate(tweet = stringr::str_replace_all(tweet,
"https://t.co/[A-Za-z\\d]+|http://[A-Za-z\\d]+|&amp;|&lt;|&gt;|RT|https|\'|\"", "")) %>% 
#Separate each tweet into words 
#Keep hashtags(#) and (@) since they are special character to Twitter
#Never mind the regex, it is always complicated 
unnest_tokens(word,tweet, 
token="regex", 
pattern="([^A-Za-z_\\d#@']|'(?![A-Za-z_\\d#@]))") %>% 
#Remove stop words such as; and, or, before, after etc. 
anti_join(stop_words,by="word") %>% 
#Remove numbers 
filter(stringr::str_detect(word, "[a-z]"))
eth_words %>% count(word,sort=TRUE) %>% 
  print(n=25)
## # A tibble: 2,135 × 2
##                   word     n
##                  <chr> <int>
## 1               macron   782
## 2               french   187
## 3               france   153
## 4                   le   135
## 5             emmanuel   132
## 6                  pen   128
## 7                  win   117
## 8             election   110
## 9              #macron   107
## 10           president    99
## 11               polls    94
## 12                exit    90
## 13                wins    66
## 14                vote    65
## 15 #presidentielle2017    58
## 16             macrons    57
## 17             victory    50
## 18        presidential    46
## 19             results    45
## 20                live    37
## 21           elections    34
## 22              marine    34
## 23            populism    34
## 24             frances    33
## 25             europes    32
## # ... with 2,110 more rows
library(wordcloud)
## Loading required package: RColorBrewer
eth_words %>% 
filter(!grepl("\\#|@",word)) %>% 
count(word,sort=TRUE) %>% 
with(wordcloud(word, n, max.words = 100))

eth_words %>% 
filter(grepl("\\#",word)) %>% 
count(word,sort=TRUE) %>% 
with(wordcloud(word, n, max.words = 100))

eth_words %>% 
filter(grepl("@",word)) %>% 
count(word,sort=TRUE) %>% 
with(wordcloud(word, n, max.words = 100))
## Warning in wordcloud(word, n, max.words = 100): @telegraphnews could not be
## fit on page. It will not be plotted.
## Warning in wordcloud(word, n, max.words = 100): @emmanuelmacron could not
## be fit on page. It will not be plotted.
## Warning in wordcloud(word, n, max.words = 100): @garylineker could not be
## fit on page. It will not be plotted.
## Warning in wordcloud(word, n, max.words = 100): @bbcworld could not be fit
## on page. It will not be plotted.

get_sentiments("bing") %>% sample_n(10)
## # A tibble: 10 × 2
##                 word sentiment
##                <chr>     <chr>
## 1                vex  negative
## 2            beseech  negative
## 3       disintegrate  negative
## 4              repel  negative
## 5             killed  negative
## 6        ineffectual  negative
## 7             warmth  positive
## 8  misrepresentation  negative
## 9           delirium  negative
## 10           anti-us  negative
eth_bing_sentiments <-
eth_words %>% 
count(word,sort=TRUE) %>% 
inner_join(.,get_sentiments("bing"),by="word")
print(eth_bing_sentiments)
## # A tibble: 283 × 3
##           word     n sentiment
##          <chr> <int>     <chr>
## 1          win   117  positive
## 2         wins    66  positive
## 3      victory    50  positive
## 4          won    24  positive
## 5      leading    17  positive
## 6        trump    17  positive
## 7         hack    12  negative
## 8      winning    12  positive
## 9  comfortable    10  positive
## 10     crushed    10  negative
## # ... with 273 more rows
eth_bing_sentiments %>% 
group_by(sentiment) %>% 
summarise(occurence=sum(n)) %>% 
ungroup() %>% 
mutate(share=round(occurence/sum(occurence),2)) 
## # A tibble: 2 × 3
##   sentiment occurence share
##       <chr>     <int> <dbl>
## 1  negative       291  0.38
## 2  positive       476  0.62
eth_bing_sentiments %>% 
  reshape2::acast(word ~ sentiment, value.var = "n", fill = 0) %>%
  comparison.cloud(colors = c("#F8766D", "#00BFC4"), 
                   max.words = 100)