import vizro
from vizro import Vizro
import vizro.models as vm
import vizro.actions as va
import pandas as pd
import numpy as np
import vizro.plotly.express as px
from vizro.models.types import capture
from vizro.tables import dash_ag_grid, dash_data_table
def load_superstore_data():
"""Load the Superstore dataset from GitHub"""
url = "https://raw.githubusercontent.com/WuCandice/Superstore-Sales-Analysis/main/dataset/Superstore%20Dataset.csv"
df = pd.read_csv(url)
# Clean and prepare data for JSON serialization
for col in df.columns:
if df[col].dtype == 'object':
# Fill NaN values and convert to string
df[col] = df[col].fillna('').astype(str)
elif df[col].dtype in ['float64', 'int64']:
# Fill NaN values for numeric columns
df[col] = df[col].fillna(0)
elif pd.api.types.is_datetime64_any_dtype(df[col]):
# Convert datetime to string
df[col] = df[col].dt.strftime('%Y-%m-%d').fillna('')
# Remove any columns with complex data types
df = df.select_dtypes(include=[np.number, 'object'])
return df
@capture("graph")
def monthly_sales_profit_chart(data_frame, selected_categories=None):
"""Create monthly sales and profit line chart"""
# Handle default case
if selected_categories is None:
selected_categories = list(data_frame['Category'].unique())
selected_categories = [selected_categories]
# Filter data by selected categories
df_filtered = data_frame[data_frame['Category'].isin(selected_categories)]
# Convert Order Date to datetime
df_copy = df_filtered.copy()
df_copy['Order Date'] = pd.to_datetime(df_copy['Order Date'])
# Extract year-month
df_copy['Year-Month'] = df_copy['Order Date'].dt.to_period('M')
# Group by month and sum sales and profit
monthly_data = df_copy.groupby('Year-Month').agg({
'Sales': 'sum',
'Profit': 'sum'
}).reset_index()
# Convert Period back to string for plotting
monthly_data['Year-Month'] = monthly_data['Year-Month'].astype(str)
# Melt the data for line chart
monthly_melted = monthly_data.melt(
id_vars=['Year-Month'],
value_vars=['Sales', 'Profit'],
var_name='Metric',
value_name='Amount'
)
# Create title based on selected categories
if len(selected_categories) == 1:
category_text = selected_categories[0]
else:
category_text = ", ".join(sorted(selected_categories))
# Create dynamic title
chart_title = f"Sales and Profit by Month - {category_text}"
# Create the line chart
fig = px.line(
data_frame=monthly_melted,
x="Year-Month",
y="Amount",
color="Metric",
title=chart_title,
labels={
"Year-Month": "Month",
"Amount": "Amount ($)",
"Metric": "Metric"
}
)
fig.update_layout(
height=500,
xaxis_title="Month",
yaxis_title="Sales ($)",
legend=dict(
orientation="h",
yanchor="bottom",
y=-0.15,
xanchor="center",
x=0.5
),
margin=dict(l=50, r=50, t=80, b=80)
)
fig.update_traces(line=dict(width=3))
return fig
@capture("graph")
def monthly_sales_by_category_chart(data_frame):
"""Create monthly sales grouped bar chart by category"""
# Convert Order Date to datetime
df_copy = data_frame.copy()
df_copy['Order Date'] = pd.to_datetime(df_copy['Order Date'])
# Extract year-month
df_copy['Year-Month'] = df_copy['Order Date'].dt.to_period('M')
# Group by month and category, sum sales
monthly_category_data = df_copy.groupby(['Year-Month', 'Category']).agg({
'Sales': 'sum'
}).reset_index()
# Convert Period back to string for plotting
monthly_category_data['Year-Month'] = monthly_category_data['Year-Month'].astype(str)
# Create dynamic title with drill-down hint
chart_title = f"Monthly Sales by Category - All Categories<br><sup>Click on a category bar to drill down to product details</sup>"
# Create the grouped bar chart
fig = px.bar(
data_frame=monthly_category_data,
x="Year-Month",
y="Sales",
color="Category",
title=chart_title,
labels={
"Year-Month": "Month",
"Sales": "Sales ($)",
"Category": "Category"
},
custom_data=["Category"]
)
fig.update_layout(
height=500,
xaxis_title="Month",
yaxis_title="Sales ($)",
legend=dict(
orientation="h",
yanchor="bottom",
y=-0.15,
xanchor="center",
x=0.5
),
# barmode='group',
title_y=0.95,
margin=dict(l=50, r=50, t=100, b=80)
)
return fig
@capture("graph")
def monthly_sales_by_product_chart(data_frame, selected_category=None):
"""Create monthly sales stacked bar chart by product name for a specific category"""
# Handle default case
if selected_category is None:
selected_category = sorted(data_frame['Category'].unique())[0]
# Filter data by selected category
df_filtered = data_frame[data_frame['Category'] == selected_category]
# Convert Order Date to datetime
df_copy = df_filtered.copy()
df_copy['Order Date'] = pd.to_datetime(df_copy['Order Date'])
# Extract year-month
df_copy['Year-Month'] = df_copy['Order Date'].dt.to_period('M')
# Group by month and product name, sum sales
# Get top 10 products by total sales to avoid overcrowding
top_products = df_copy.groupby('Product Name')['Sales'].sum().nlargest(10).index
df_top_products = df_copy[df_copy['Product Name'].isin(top_products)]
monthly_product_data = df_top_products.groupby(['Year-Month', 'Product Name']).agg({
'Sales': 'sum'
}).reset_index()
# Convert Period back to string for plotting
monthly_product_data['Year-Month'] = monthly_product_data['Year-Month'].astype(str)
# Create dynamic title
chart_title = f"Monthly Sales by Product - {selected_category} (Top 10 Products)"
# Create the stacked bar chart
fig = px.bar(
data_frame=monthly_product_data,
x="Year-Month",
y="Sales",
color="Product Name",
title=chart_title,
labels={
"Year-Month": "Month",
"Sales": "Sales ($)",
"Product Name": "Product"
}
)
fig.update_layout(
height=500,
xaxis_title="Month",
yaxis_title="Sales ($)",
legend=dict(
orientation="v",
yanchor="top",
y=1,
xanchor="left",
x=1.02
),
# barmode='stack',
margin=dict(l=50, r=200, t=80, b=50)
)
return fig
# Load the data
superstore_data = load_superstore_data()
print(f"Loaded data with {len(superstore_data)} rows and {len(superstore_data.columns)} columns")
print("Column types:", superstore_data.dtypes)
# Get unique categories for the parameter selector
categories = sorted(superstore_data['Category'].unique())
@capture("table")
def superstore_table(data_frame):
"""Create a table for the superstore data"""
return data_frame[:10][["Ship Mode"]]
# Define the dashboard
dashboard = vm.Dashboard(
title="Superstore Dataset Dashboard",
pages=[
vm.Page(
title="Raw Data",
components=[
vm.Table(
figure=dash_data_table(data_frame=superstore_data),
)
]
),
vm.Page(
id="summaries-page",
title="Summaries",
controls=[
vm.Parameter(
targets=["monthly-chart.selected_categories"],
id="category_filter",
selector=vm.Dropdown(
options=categories,
value=categories[0],
multi=False,
title="Select Category"
),
show_in_url=True
)
],
components=[
vm.Container(
layout=vm.Grid(grid=[[0, 1]]),
components=[
vm.Graph(
id="monthly-chart",
figure=monthly_sales_profit_chart(data_frame=superstore_data, selected_categories=categories[0])
),
vm.Graph(
id="category-bar-chart",
figure=monthly_sales_by_category_chart(data_frame=superstore_data),
actions=[
va.set_control(control="category_filter", value="customdata[0]"),
va.set_control(control="product_category_filter", value="customdata[0]"),
# va.navigate_to(page="product-drill-down")
]
)
]
)
]
),
vm.Page(
id="product-drill-down",
title="Product Drill-Down",
controls=[
vm.Parameter(
targets=["product-bar-chart.selected_category"],
id="product_category_filter",
selector=vm.Dropdown(
options=categories,
value=categories[0],
multi=False,
title="Select Category for Product Breakdown"
),
show_in_url=True
)
],
components=[
vm.Graph(
id="product-bar-chart",
figure=monthly_sales_by_product_chart(data_frame=superstore_data, selected_category=categories[0])
)
]
)
]
)
# Build and run the dashboard
Vizro().build(dashboard).run()