#Package to get Tweets and lots of cool stuff
library(twitteR)
#Package to manipulate data sets
library(tidyverse)
## Loading tidyverse: ggplot2
## Loading tidyverse: tibble
## Loading tidyverse: tidyr
## Loading tidyverse: readr
## Loading tidyverse: purrr
## Loading tidyverse: dplyr
## Conflicts with tidy packages ----------------------------------------------
## filter(): dplyr, stats
## id(): dplyr, twitteR
## lag(): dplyr, stats
## location(): dplyr, twitteR
#Package for text mining
library(tidytext)
## [1] "Using direct authentication"
macron_tweets <-
searchTwitter(
searchString="Macron -filter:retweets",
n=1000,
retryOnRateLimit=120,
lang="en",
resultType="mixed",
)
print(head(macron_tweets))
## [[1]]
## [1] "HillaryClinton: Victory for Macron, for France, the EU, & the world. \n\nDefeat to those interfering w/democracy. (But the media says I can't talk about that)"
##
## [[2]]
## [1] "realDonaldTrump: Congratulations to Emmanuel Macron on his big win today as the next President of France. I look very much forward to working with him!"
##
## [[3]]
## [1] "France24_en: \xed\xa0\xbd\xed\xb4\xb4 #BREAKING - Emmanuel #Macron elected president of France (with 65.1% of the vote) https://t.co/4Vod8NNBm4"
##
## [[4]]
## [1] "MarkEng30768932: Much the same as @timfarron & @LibDems save they will never sniff Government. https://t.co/umzGVhWcMw"
##
## [[5]]
## [1] "mlnangalama: Macron's win papered over deep cracks in France, especially in the famed champagne region https://t.co/gP7EHmkeZ5"
##
## [[6]]
## [1] "Ecroaker: #TISNews Emmanuel Macron Is The New French President https://t.co/mcOTCwnSo1"
print(str(macron_tweets[[1]]))
## Reference class 'status' [package "twitteR"] with 17 fields
## $ text : chr "Victory for Macron, for France, the EU, & the world. \n\nDefeat to those interfering w/democracy. (But the "| __truncated__
## $ favorited : logi FALSE
## $ favoriteCount: num 275888
## $ replyToSN : chr(0)
## $ created : POSIXct[1:1], format: "2017-05-07 20:32:18"
## $ truncated : logi FALSE
## $ replyToSID : chr(0)
## $ id : chr "861317789537193988"
## $ replyToUID : chr(0)
## $ statusSource : chr "<a href=\"http://twitter.com/download/iphone\" rel=\"nofollow\">Twitter for iPhone</a>"
## $ screenName : chr "HillaryClinton"
## $ retweetCount : num 81680
## $ isRetweet : logi FALSE
## $ retweeted : logi FALSE
## $ longitude : chr(0)
## $ latitude : chr(0)
## $ urls :'data.frame': 0 obs. of 4 variables:
## ..$ url : chr(0)
## ..$ expanded_url: chr(0)
## ..$ dispaly_url : chr(0)
## ..$ indices : num(0)
## and 53 methods, of which 39 are possibly relevant:
## getCreated, getFavoriteCount, getFavorited, getId, getIsRetweet,
## getLatitude, getLongitude, getReplyToSID, getReplyToSN, getReplyToUID,
## getRetweetCount, getRetweeted, getRetweeters, getRetweets,
## getScreenName, getStatusSource, getText, getTruncated, getUrls,
## initialize, setCreated, setFavoriteCount, setFavorited, setId,
## setIsRetweet, setLatitude, setLongitude, setReplyToSID, setReplyToSN,
## setReplyToUID, setRetweetCount, setRetweeted, setScreenName,
## setStatusSource, setText, setTruncated, setUrls, toDataFrame,
## toDataFrame#twitterObj
## NULL
eth_text <- sapply(macron_tweets, "[[", "text")
head(eth_text)
## [1] "Victory for Macron, for France, the EU, & the world. \n\nDefeat to those interfering w/democracy. (But the media says I can't talk about that)"
## [2] "Congratulations to Emmanuel Macron on his big win today as the next President of France. I look very much forward to working with him!"
## [3] "\xed\xa0\xbd\xed\xb4\xb4 #BREAKING - Emmanuel #Macron elected president of France (with 65.1% of the vote) https://t.co/4Vod8NNBm4"
## [4] "Much the same as @timfarron & @LibDems save they will never sniff Government. https://t.co/umzGVhWcMw"
## [5] "Macron's win papered over deep cracks in France, especially in the famed champagne region https://t.co/gP7EHmkeZ5"
## [6] "#TISNews Emmanuel Macron Is The New French President https://t.co/mcOTCwnSo1"
eth_words <-
#Create a tibble (kind of data frame)
tibble(tweet=eth_text) %>%
#Remove all links, RT, ampersand and some other special characters.
mutate(tweet = stringr::str_replace_all(tweet,
"https://t.co/[A-Za-z\\d]+|http://[A-Za-z\\d]+|&|<|>|RT|https|\'|\"", "")) %>%
#Separate each tweet into words
#Keep hashtags(#) and (@) since they are special character to Twitter
#Never mind the regex, it is always complicated
unnest_tokens(word,tweet,
token="regex",
pattern="([^A-Za-z_\\d#@']|'(?![A-Za-z_\\d#@]))") %>%
#Remove stop words such as; and, or, before, after etc.
anti_join(stop_words,by="word") %>%
#Remove numbers
filter(stringr::str_detect(word, "[a-z]"))
eth_words %>% count(word,sort=TRUE) %>%
print(n=25)
## # A tibble: 2,135 × 2
## word n
## <chr> <int>
## 1 macron 782
## 2 french 187
## 3 france 153
## 4 le 135
## 5 emmanuel 132
## 6 pen 128
## 7 win 117
## 8 election 110
## 9 #macron 107
## 10 president 99
## 11 polls 94
## 12 exit 90
## 13 wins 66
## 14 vote 65
## 15 #presidentielle2017 58
## 16 macrons 57
## 17 victory 50
## 18 presidential 46
## 19 results 45
## 20 live 37
## 21 elections 34
## 22 marine 34
## 23 populism 34
## 24 frances 33
## 25 europes 32
## # ... with 2,110 more rows
library(wordcloud)
## Loading required package: RColorBrewer
eth_words %>%
filter(!grepl("\\#|@",word)) %>%
count(word,sort=TRUE) %>%
with(wordcloud(word, n, max.words = 100))
eth_words %>%
filter(grepl("\\#",word)) %>%
count(word,sort=TRUE) %>%
with(wordcloud(word, n, max.words = 100))
eth_words %>%
filter(grepl("@",word)) %>%
count(word,sort=TRUE) %>%
with(wordcloud(word, n, max.words = 100))
## Warning in wordcloud(word, n, max.words = 100): @telegraphnews could not be
## fit on page. It will not be plotted.
## Warning in wordcloud(word, n, max.words = 100): @emmanuelmacron could not
## be fit on page. It will not be plotted.
## Warning in wordcloud(word, n, max.words = 100): @garylineker could not be
## fit on page. It will not be plotted.
## Warning in wordcloud(word, n, max.words = 100): @bbcworld could not be fit
## on page. It will not be plotted.
get_sentiments("bing") %>% sample_n(10)
## # A tibble: 10 × 2
## word sentiment
## <chr> <chr>
## 1 vex negative
## 2 beseech negative
## 3 disintegrate negative
## 4 repel negative
## 5 killed negative
## 6 ineffectual negative
## 7 warmth positive
## 8 misrepresentation negative
## 9 delirium negative
## 10 anti-us negative
eth_bing_sentiments <-
eth_words %>%
count(word,sort=TRUE) %>%
inner_join(.,get_sentiments("bing"),by="word")
print(eth_bing_sentiments)
## # A tibble: 283 × 3
## word n sentiment
## <chr> <int> <chr>
## 1 win 117 positive
## 2 wins 66 positive
## 3 victory 50 positive
## 4 won 24 positive
## 5 leading 17 positive
## 6 trump 17 positive
## 7 hack 12 negative
## 8 winning 12 positive
## 9 comfortable 10 positive
## 10 crushed 10 negative
## # ... with 273 more rows
eth_bing_sentiments %>%
group_by(sentiment) %>%
summarise(occurence=sum(n)) %>%
ungroup() %>%
mutate(share=round(occurence/sum(occurence),2))
## # A tibble: 2 × 3
## sentiment occurence share
## <chr> <int> <dbl>
## 1 negative 291 0.38
## 2 positive 476 0.62
eth_bing_sentiments %>%
reshape2::acast(word ~ sentiment, value.var = "n", fill = 0) %>%
comparison.cloud(colors = c("#F8766D", "#00BFC4"),
max.words = 100)