Start
library(twitteR)
library(tidyverse)
## Loading tidyverse: ggplot2
## Loading tidyverse: tibble
## Loading tidyverse: tidyr
## Loading tidyverse: readr
## Loading tidyverse: purrr
## Loading tidyverse: dplyr
## Conflicts with tidy packages ----------------------------------------------
## filter(): dplyr, stats
## id(): dplyr, twitteR
## lag(): dplyr, stats
## location(): dplyr, twitteR
library(tidytext)
setup_twitter_oauth(
consumer_key=the_api_key,
consumer_secret=the_api_secret,
access_token=the_access_token,
access_secret=the_access_secret )
## [1] "Using direct authentication"
pubg_tweets <-
searchTwitter( searchString="Pubg -filter:retweets",
n=1000, retryOnRateLimit=120,
lang="en",
resultType="mixed",
)
print(head(pubg_tweets))
## [[1]]
## [1] "G2A_com: Do you think any of the game in top 10 can dethrone #GTAV next month? #PUBG #Overwatch #CSGO #DS3 https://t.co/cJIrWN0hgh"
##
## [[2]]
## [1] "IIJERiiCHOII: Das stream iz live, Rocket League to start, then prob PUBG\n\n> https://t.co/mAOdJvZEbq"
##
## [[3]]
## [1] "kurtjmac: Get yourself someone who looks at you the way @kurtjmac looks at #PUBG chicken dinners. (h/t @jeffmorton for the sc… https://t.co/yGZdVRZwyb"
##
## [[4]]
## [1] "BCWise_: Pubg sound is the worst thing I've ever experienced"
##
## [[5]]
## [1] "WattiesGames: Having a BLAST!! in #pubg. Come chill! #twitch #SupportSmallStreamers #TeamEmmmmsie https://t.co/GqdN0kk9xu"
##
## [[6]]
## [1] "DevTwoThousand: I don't think there's a person in mankind that has ever gotten less loot in PUBG than me and @TheSuperDave did tonight, jeeze."
eth_text <- sapply(pubg_tweets, "[[", "text")
head(eth_text)
## [1] "Do you think any of the game in top 10 can dethrone #GTAV next month? #PUBG #Overwatch #CSGO #DS3 https://t.co/cJIrWN0hgh"
## [2] "Das stream iz live, Rocket League to start, then prob PUBG\n\n> https://t.co/mAOdJvZEbq"
## [3] "Get yourself someone who looks at you the way @kurtjmac looks at #PUBG chicken dinners. (h/t @jeffmorton for the sc… https://t.co/yGZdVRZwyb"
## [4] "Pubg sound is the worst thing I've ever experienced"
## [5] "Having a BLAST!! in #pubg. Come chill! #twitch #SupportSmallStreamers #TeamEmmmmsie https://t.co/GqdN0kk9xu"
## [6] "I don't think there's a person in mankind that has ever gotten less loot in PUBG than me and @TheSuperDave did tonight, jeeze."
eth_words <- tibble(tweet=eth_text) %>%
mutate(tweet = stringr::str_replace_all(tweet, "https://t.co/[A-Za-z\\d]+|http://[A-Za-z\\d]+|&|<|>|RT|https|\'|\"", "")) %>% unnest_tokens(word,tweet,
token="regex",
pattern="([^A-Za-z_\\d#@']|'(?![A-Za-z_\\d#@]))") %>%
anti_join(stop_words,by="word") %>%
filter(stringr::str_detect(word, "[a-z]"))
eth_words %>%
count(word,sort=TRUE) %>%
print(n=25)
## # A tibble: 2,389 × 2
## word n
## <chr> <int>
## 1 pubg 763
## 2 #pubg 208
## 3 @youtube 111
## 4 playing 97
## 5 stream 95
## 6 play 94
## 7 live 92
## 8 game 77
## 9 battlegrounds 67
## 10 im 60
## 11 video 59
## 12 games 46
## 13 win 43
## 14 time 42
## 15 solo 41
## 16 tonight 41
## 17 #twitch 39
## 18 playerunknowns 39
## 19 @pubattlegrounds 35
## 20 #supportsmallstreamers 34
## 21 squad 34
## 22 chicken 32
## 23 night 32
## 24 fun 29
## 25 streaming 29
## # ... with 2,364 more rows
library(wordcloud)
## Loading required package: RColorBrewer
eth_words %>%
filter(!grepl("\\#|@",word)) %>%
count(word,sort=TRUE) %>%
with(wordcloud(word, n, max.words = 100))
eth_words %>%
filter(grepl("\\#",word)) %>%
count(word,sort=TRUE) %>%
with(wordcloud(word, n, max.words = 100))
eth_words %>%
filter(grepl("@",word)) %>%
count(word,sort=TRUE) %>%
with(wordcloud(word, n, max.words = 100))
get_sentiments("bing") %>% sample_n(10)
## # A tibble: 10 × 2
## word sentiment
## <chr> <chr>
## 1 intuitive positive
## 2 adulterate negative
## 3 snappy positive
## 4 devout positive
## 5 cataclysmic negative
## 6 evasive negative
## 7 illness negative
## 8 tanked negative
## 9 sensible positive
## 10 insinuating negative
eth_bing_sentiments <-
eth_words %>%
count(word,sort=TRUE) %>%
inner_join(.,get_sentiments("bing"),by="word")
print(eth_bing_sentiments)
## # A tibble: 263 × 3
## word n sentiment
## <chr> <int> <chr>
## 1 win 43 positive
## 2 fun 29 positive
## 3 kill 19 negative
## 4 chill 18 negative
## 5 top 17 positive
## 6 love 15 positive
## 7 kills 13 negative
## 8 winner 12 positive
## 9 shit 11 negative
## 10 awesome 10 positive
## # ... with 253 more rows
eth_bing_sentiments %>%
group_by(sentiment) %>%
summarise(occurence=sum(n)) %>%
ungroup() %>%
mutate(share=round(occurence/sum(occurence),2))
## # A tibble: 2 × 3
## sentiment occurence share
## <chr> <int> <dbl>
## 1 negative 340 0.51
## 2 positive 324 0.49
eth_bing_sentiments %>%
reshape2::acast(word ~ sentiment, value.var = "n", fill = 0) %>%
comparison.cloud(colors = c("#F8766D", "#00BFC4"), max.words = 100)