443-970-2353
[email protected]
CV Resume
This is a continuation to the previous blog post on Tableau’s Level Of Detail (LOD) Expressions with R. I suggest you go over part 1 before trying this part. The data is from Tableau's website. You can download it from here. After downloading it, don't forget to unzip it. We will solve the questions below using both R Shiny and Tableau's Level of Detail Expressions. In the previous post, we saw the fixed Tableau LOD expression. In this post, we will see, include and exclude expressions. You can download the Tableau visualizations to see the calculated fields that use LOD expressions. I have also included screenshots of the calculated fields. The R Shiny codes are included at the bottom. As you can see from the code, R visualizations need more customization, which has its own pros and cons.
library(tidyverse)
library(data.table)
library(plotly)
library(lubridate)
library(shiny)
Sample_Superstore = fread("data/Sample_Superstore_orders.csv")
Sample_Superstore$`Order Date` = mdy(Sample_Superstore$`Order Date`) # change Order Date and Ship Date to appropriate date format.
Sample_Superstore$`Ship Date` = mdy(Sample_Superstore$`Ship Date`) # They are imported as character.
ui <- fluidPage(
fluidRow(
br(),
br(),
column(4, offset = 3,
selectInput("subcategory", "Select Sub-Category", unique(Sample_Superstore$`Sub-Category`), "Copiers" )
)
),
fluidRow(
tags$h4("Sales of all of Sub-Categories compared to that of a selected Sub-ategory",
style = "text-align: center" ),
column(6,
plotlyOutput("difference")
),
column(5,
plotlyOutput("sales_histogram")
)
)
)
server <- function(input, output) {
avg_of_selected = reactive({
selected = input$subcategory
temp = Sample_Superstore %>% filter(`Sub-Category` ==selected) %>%.$Sales
Sample_Superstore %>% group_by(`Sub-Category`) %>% summarise(`Difference From Selected` = sum(Sales) - sum(temp))
})
output$difference <- renderPlotly({
gg = avg_of_selected() %>% mutate(`Sub-Category` = factor(`Sub-Category`,levels = `Sub-Category`[order(`Difference From Selected`,decreasing = F)]))%>%
ggplot(aes(x = `Sub-Category`, y = `Difference From Selected`, fill = `Difference From Selected`)) +
geom_bar(stat="identity") +
scale_fill_gradient2(low='#9e3d22', mid='snow3', high='#2b5c8a', space='Lab') +
coord_flip() +
xlab("") + ylab("Difference From Selected") +
ggtitle("") +
theme(plot.title = element_text(size = 20, hjust = 0.5), legend.position = "none")
ggplotly(gg)
})
output$sales_histogram<- renderPlotly({
gg = Sample_Superstore %>% group_by(`Sub-Category`) %>% summarise(Total = sum(Sales)) %>%
mutate(`Sub-Category` = factor(`Sub-Category`,levels = `Sub-Category`[order(`Total`,decreasing = F)]))%>%
ggplot(aes(x = `Sub-Category`, y = Total)) +
geom_bar(stat="identity") +
coord_flip() +
xlab("") + ylab("Sales") +
ggtitle("") +
theme(plot.title = element_text(size = 20, hjust = 0.5))
ggplotly(gg)
})
}
shinyApp(ui = ui, server = server)
library(tidyverse)
library(data.table)
library(plotly)
library(lubridate)
Sample_Superstore = fread("data/Sample_Superstore_orders.csv")
Sample_Superstore$`Order Date` = mdy(Sample_Superstore$`Order Date`)
Sample_Superstore$`Ship Date` = mdy(Sample_Superstore$`Ship Date`)
library(shiny)
ui <- fluidPage(
fluidRow(
br(),
br(),
br(),
tags$h3("What percent of customers order once every month, 2 months, N months?",
style = "text-align: center;color: #996600" ),
br(),
plotlyOutput("order_frequency", height = "600px")
)
)
server <- function(input, output) {
avg_of_selected = reactive({
selected = input$subcategory
temp = Sample_Superstore %>% filter(`Sub-Category` ==selected) %>%.$Sales
Sample_Superstore %>% group_by(`Sub-Category`) %>% summarise(`Difference From Selected` = sum(Sales) - sum(temp))
})
output$order_frequency <- renderPlotly({
temp = Sample_Superstore %>% group_by(`Customer ID`) %>% summarise(
first_order_date = min(`Order Date`),
last_order_date = max(`Order Date`),
number_of_orders = length(unique(`Order ID`)),
active_months = interval(first_order_date, last_order_date) %/% months(1),
frequency_of_orders = active_months/(number_of_orders),
frequency_of_orders_bin = round(frequency_of_orders, 0)) %>%
group_by(frequency_of_orders_bin) %>% summarise(count = n())
temp$number_of_customers_percentage = (temp$count)/(sum(temp$count)) *100
avg_freq = sum(temp$frequency_of_orders_bin * temp$count)/sum(temp$count)
temp$difference = temp$frequency_of_orders_bin - avg_freq
gg = temp %>% ggplot(aes(x = frequency_of_orders_bin, y = number_of_customers_percentage, fill = difference)) + geom_bar(stat = "identity") +
scale_fill_gradient2(low= 'darkblue', mid = "skyblue", high = 'darkred', space='Lab') +
geom_vline(xintercept = avg_freq, lty = 2, color = "blue", lwd = 1) +
theme(legend.position = "none") + ggtitle("Order Frequency") + xlab("Frequency of orders bin") + ylab("% of customers") +
theme(plot.title = element_text(size = 16, hjust = 0.5),
axis.title = element_text(size = 14),
axis.text = element_text(size = 13)) +
annotate("text", label = paste0("Average frequency: ", round(avg_freq, 3), " months"), x = avg_freq + 2, y = 18, color = "darkred")
ggplotly(gg)
})
}
shinyApp(ui = ui, server = server)