## -----------------------------------------------------------------------------
library(dplyr)
library(ggplot2)
library(tibble)

## -----------------------------------------------------------------------------
library(shoppingwords)

## -----------------------------------------------------------------------------
glimpse(reviews)

## -----------------------------------------------------------------------------
# $ rating <dbl> 5, 5, 5, 5, 5, 1, 5, 5, 5, 5, 2, 3, 1, 1, 3, 3, 1, 1, 1, 1, 1, 3, 1, 4,...
# $ comment <chr> "I gave 5 stars so that the comment would be visible I ordered a 5-pack and ...
# $ id <dbl> 3573, 3975, 4910, 4950, 5908, 6144, 6192, 6335, 6370, 6371,...```

## -----------------------------------------------------------------------------
stopwords_tr |> 
  slice(c(37:39))

## -----------------------------------------------------------------------------
stopwords::stopwords("tr", source = "stopwords-iso") |> 
  head(n = 5)

## -----------------------------------------------------------------------------
phrases |>
  slice(c(7:8))

## -----------------------------------------------------------------------------
cleaned_reviews <- match_stopwords(reviews)  # Remove stopwords 
cleaned_reviews |>    
  group_by(rating) |>   
  summarise(avg_text_length = mean(nchar(cleaned_text)))  

## -----------------------------------------------------------------------------
reviews_sample <- tibble(
    comment = c(
      "Bu ürün xs ancak fiyatı yüksek gibi", 
      "Fiyat çok pahalı ama kaliteli iyi"
    ), 
    rating = c(4.5, 3.0)
)

cleaned_sample <- match_stopwords(reviews_sample) 

## -----------------------------------------------------------------------------
reviews_test |>
  slice_head(n = 3)

## -----------------------------------------------------------------------------
reviews_test |> 
  count(emotion, sort = TRUE)

## -----------------------------------------------------------------------------
reviews_test |> 
  count(rating, emotion, sort = TRUE) |> 
  arrange(desc(rating))

## ----plot-example, fig.alt = "The distribution of the reviews across all ratings"----
reviews_test |> 
    count(rating, emotion) |> 
    ggplot(aes(x = factor(rating), y = n, fill = emotion)) +
    geom_col(position = "dodge") +
    scale_fill_manual(
      values = c("p" = "lightblue", "n" = "darkred"),
      labels = c("p" = "Positive", "n" = "Negative")
    ) +
    labs(
      x = "User Ratings",
      y = "Number of Reviews",
      fill = "Polarity"
      
    ) +
    theme_minimal() +
    theme(legend.position = "right")

