############ Imports ##############
import vizro.models as vm
from vizro.models.types import capture
from vizro import Vizro
import pandas as pd
from vizro.managers import data_manager
import plotly.graph_objects as go
import numpy as np
from vizro.models.types import capture
####### Function definitions ######
@capture("graph")
def horizontal_category_subcategory_sales(data_frame):
# Define low volume stationery subcategories to combine
low_volume_items = [
"Envelopes",
"Pens & Art Supplies",
"Scissors, Rulers and Trimmers",
"Labels",
"Rubber Bands",
]
# Create a copy of the dataframe for processing
df_processed = data_frame.copy()
# Replace the low volume subcategories with 'Low volume stationery'
df_processed.loc[
df_processed["Product Sub-Category"].isin(low_volume_items),
"Product Sub-Category",
] = "Low volume stationery"
# Group by Product Category and Product Sub-Category to get total sales and profit
grouped = (
df_processed.groupby(["Product Category", "Product Sub-Category"])
.agg({"Sales": "sum", "Profit": "sum"})
.reset_index()
)
# Calculate Profit Margin as sum(profit)/sum(sales) for each row
grouped["Profit Margin"] = grouped["Profit"] / grouped["Sales"]
# Sort by Product Category, then by Sales DESCENDING within each category (highest first)
grouped = grouped.sort_values(
["Product Category", "Sales"], ascending=[True, False]
)
# Get positive margins only for blue gradient calculation
positive_margins = grouped[grouped["Profit Margin"] > 0]["Profit Margin"]
if len(positive_margins) > 0:
min_positive_margin = positive_margins.min()
max_positive_margin = positive_margins.max()
positive_range = max_positive_margin - min_positive_margin
else:
min_positive_margin = 0
max_positive_margin = 1
positive_range = 1
# Create color scale: orange for negative margins, pale blue to dark blue for positive margins
def get_color(margin_value):
if margin_value <= 0:
return "#ff7f0e" # Orange for negative margins (losses)
else:
# Blue gradient for positive margins
if positive_range == 0:
return (
"#1f4e79" # Default dark blue if no variation in positive margins
)
# Normalize positive margin to 0-1 scale
normalized = (margin_value - min_positive_margin) / positive_range
# Interpolate between pale blue and dark blue for positive margins
# Pale Blue: #add8e6, Dark Blue: #1f4e79
pale_r, pale_g, pale_b = 173, 216, 230
dark_r, dark_g, dark_b = 31, 78, 121
r = int(pale_r + (dark_r - pale_r) * normalized)
g = int(pale_g + (dark_g - pale_g) * normalized)
b = int(pale_b + (dark_b - pale_b) * normalized)
return f"rgb({r},{g},{b})"
# Create colors for each bar based on profit margin
colors = [get_color(margin) for margin in grouped["Profit Margin"]]
# Create y-axis labels that maintain the sorted order (reverse for plotly display)
y_labels = grouped["Product Sub-Category"].tolist()
y_labels.reverse() # Reverse so highest sales appear at top
fig = go.Figure()
# Reverse the data order for display (highest sales at top)
grouped_reversed = grouped.iloc[::-1].reset_index(drop=True)
colors_reversed = colors[::-1]
fig.add_trace(
go.Bar(
y=grouped_reversed["Product Sub-Category"],
x=grouped_reversed["Sales"],
orientation="h",
marker=dict(
color=colors_reversed,
line=dict(width=0.5, color="rgba(255,255,255,0.4)"),
),
hovertemplate="<b>%{y}</b><br>Sales: $%{x:,.0f}<br>Profit: $%{customdata[0]:,.0f}<br>Profit Margin: %{customdata[1]:.1%}<br><extra></extra>",
customdata=list(
zip(grouped_reversed["Profit"], grouped_reversed["Profit Margin"])
),
name="Sales by Sub-Category",
)
)
# Add dividing lines between product categories (adjust for reversed order)
categories = grouped["Product Category"].unique()
total_items = len(grouped)
y_position = 0
for i, category in enumerate(categories):
category_count = len(grouped[grouped["Product Category"] == category])
# Calculate position from bottom for reversed display
if i > 0:
line_position = total_items - y_position - 0.5
fig.add_hline(
y=line_position,
line=dict(color="rgba(255,255,255,0.6)", width=2, dash="solid"),
)
y_position += category_count
# Create gradient bar for legend (only for positive margins)
gradient_x = np.linspace(0, 1, 100)
gradient_colors = []
for x in gradient_x:
if len(positive_margins) > 0:
margin_val = (
min_positive_margin + (max_positive_margin - min_positive_margin) * x
)
gradient_colors.append(get_color(margin_val))
else:
gradient_colors.append("#1f4e79")
# Add gradient bar as a separate subplot area
fig.add_trace(
go.Bar(
x=gradient_x,
y=["Profit Margin"] * 100,
orientation="h",
marker=dict(color=gradient_colors, line=dict(width=0)),
showlegend=False,
hoverinfo="skip",
yaxis="y2",
xaxis="x2",
)
)
# Calculate dynamic height based on number of items (minimum 400px, ~30px per item)
chart_height = max(400, len(grouped) * 30 + 100)
fig.update_layout(
xaxis_title="Sales ($)",
yaxis_title="Product Sub-Category",
height=chart_height,
hovermode="closest",
paper_bgcolor="rgba(0,0,0,0)",
plot_bgcolor="rgba(0,0,0,0)",
showlegend=False,
yaxis=dict(categoryorder="array", categoryarray=y_labels, domain=[0, 0.85]),
# Second y-axis for gradient legend
yaxis2=dict(
domain=[0.9, 0.95],
anchor="x2",
showticklabels=False,
showgrid=False,
zeroline=False,
),
# Second x-axis for gradient legend
xaxis2=dict(
domain=[0.7, 0.98],
anchor="y2",
showticklabels=False,
showgrid=False,
zeroline=False,
),
annotations=[
# Gradient legend labels
dict(
x=0.68,
y=0.925,
xref="paper",
yref="paper",
text="Low Margin",
showarrow=False,
font=dict(size=10, color="white"),
xanchor="right",
),
dict(
x=1.0,
y=0.925,
xref="paper",
yref="paper",
text="High Margin",
showarrow=False,
font=dict(size=10, color="white"),
xanchor="left",
),
dict(
x=0.84,
y=0.97,
xref="paper",
yref="paper",
text="<b>Profit Margin</b>",
showarrow=False,
font=dict(size=11, color="white"),
xanchor="center",
),
# Add legend for orange (negative margins)
dict(
x=0.68,
y=0.875,
xref="paper",
yref="paper",
text="๐ = Loss",
showarrow=False,
font=dict(size=10, color="white"),
xanchor="right",
),
],
)
return fig
@capture("graph")
def north_america_city_bubble_map(data_frame):
# Filter for North America only
na_data = data_frame[data_frame["Continent"] == "North America"].copy()
if len(na_data) == 0:
# Return empty figure if no North America data
fig = go.Figure()
fig.update_layout(
title="No North America data available",
height=500,
paper_bgcolor="rgba(0,0,0,0)",
plot_bgcolor="rgba(0,0,0,0)",
)
return fig
# Major city coordinates (simplified list)
coords = {
"New York City": [40.71, -74.01],
"Los Angeles": [34.05, -118.24],
"Chicago": [41.88, -87.63],
"Houston": [29.76, -95.37],
"Philadelphia": [39.95, -75.17],
"Phoenix": [33.45, -112.07],
"San Antonio": [29.42, -98.49],
"San Diego": [32.72, -117.16],
"Dallas": [32.78, -96.80],
"San Jose": [37.34, -121.89],
"Austin": [30.27, -97.74],
"Jacksonville": [30.33, -81.66],
"San Francisco": [37.77, -122.42],
"Columbus": [39.96, -82.99],
"Charlotte": [35.23, -80.84],
"Fort Worth": [32.76, -97.33],
"Indianapolis": [39.77, -86.16],
"Seattle": [47.61, -122.33],
"Denver": [39.74, -104.99],
"Washington": [38.91, -77.04],
"Boston": [42.36, -71.06],
"El Paso": [31.76, -106.49],
"Detroit": [42.33, -83.05],
"Nashville": [36.16, -86.78],
"Memphis": [35.15, -90.05],
"Portland": [45.52, -122.68],
"Oklahoma City": [35.47, -97.52],
"Las Vegas": [36.17, -115.14],
"Louisville": [38.25, -85.76],
"Baltimore": [39.29, -76.61],
"Milwaukee": [43.04, -87.91],
"Albuquerque": [35.08, -106.65],
"Tucson": [32.22, -110.97],
"Fresno": [36.74, -119.79],
"Sacramento": [38.58, -121.49],
"Kansas City": [39.10, -94.58],
"Mesa": [33.42, -111.83],
"Atlanta": [33.75, -84.39],
"Colorado Springs": [38.83, -104.82],
"Virginia Beach": [36.85, -75.98],
"Raleigh": [35.78, -78.64],
"Omaha": [41.26, -95.93],
"Miami": [25.76, -80.19],
"Long Beach": [33.77, -118.19],
"Minneapolis": [44.98, -93.27],
"Tampa": [27.95, -82.46],
"Tulsa": [36.15, -95.99],
"Arlington": [32.74, -97.11],
"New Orleans": [29.95, -90.07],
"Wichita": [37.69, -97.34],
"Cleveland": [41.50, -81.69],
"Bakersfield": [35.37, -119.02],
"Aurora": [39.73, -104.83],
"Anaheim": [33.84, -117.91],
"Honolulu": [21.31, -157.86],
"Santa Ana": [33.75, -117.87],
"Corpus Christi": [27.80, -97.40],
"Riverside": [33.95, -117.40],
"Lexington": [38.04, -84.50],
"Stockton": [37.96, -121.29],
"St. Paul": [44.95, -93.09],
"Cincinnati": [39.10, -84.51],
"Anchorage": [61.22, -149.90],
"Henderson": [36.04, -114.98],
"Greensboro": [36.07, -79.79],
"Plano": [33.02, -96.70],
"Newark": [40.74, -74.17],
"Lincoln": [40.81, -96.70],
"Buffalo": [42.89, -78.88],
"Jersey City": [40.72, -74.04],
"Chula Vista": [32.64, -117.08],
"Fort Wayne": [41.13, -85.14],
"Orlando": [28.54, -81.38],
"St. Petersburg": [27.77, -82.64],
"Chandler": [33.31, -111.84],
"Laredo": [27.53, -99.48],
"Norfolk": [36.85, -76.29],
"Durham": [35.99, -78.90],
"Madison": [43.07, -89.40],
"Lubbock": [33.58, -101.86],
"Irvine": [33.68, -117.83],
"Winston-Salem": [36.10, -80.24],
"Glendale": [33.54, -112.19],
"Garland": [32.91, -96.64],
"Hialeah": [25.86, -80.28],
"Reno": [39.53, -119.81],
"Chesapeake": [36.77, -76.29],
"Gilbert": [33.35, -111.79],
"Baton Rouge": [30.45, -91.19],
"Irving": [32.81, -96.95],
"Scottsdale": [33.49, -111.93],
"North Las Vegas": [36.20, -115.12],
"Fremont": [37.55, -121.99],
"Boise": [43.61, -116.20],
"Richmond": [37.54, -77.44],
"San Bernardino": [34.11, -117.29],
"Birmingham": [33.52, -86.81],
"Spokane": [47.66, -117.43],
"Rochester": [43.16, -77.61],
"Des Moines": [41.59, -93.62],
"Modesto": [37.64, -120.99],
"Fayetteville": [35.05, -78.88],
"Tacoma": [47.25, -122.44],
"Oxnard": [34.20, -119.18],
"Fontana": [34.09, -117.44],
"Montgomery": [32.37, -86.30],
"Moreno Valley": [33.94, -117.23],
"Shreveport": [32.53, -93.75],
"Yonkers": [40.93, -73.90],
"Akron": [41.08, -81.52],
"Huntington Beach": [33.66, -117.99],
"Little Rock": [34.75, -92.29],
"Augusta": [33.47, -82.01],
"Amarillo": [35.22, -101.83],
"Mobile": [30.70, -88.04],
"Grand Rapids": [42.96, -85.67],
"Salt Lake City": [40.76, -111.89],
"Tallahassee": [30.45, -84.28],
"Huntsville": [34.73, -86.59],
"Grand Prairie": [32.75, -96.99],
"Knoxville": [35.96, -83.92],
"Worcester": [42.26, -71.80],
"Newport News": [37.09, -76.47],
"Brownsville": [25.90, -97.50],
"Overland Park": [38.98, -94.67],
"Santa Clarita": [34.39, -118.54],
"Providence": [41.82, -71.41],
"Garden Grove": [33.77, -117.94],
"Chattanooga": [35.05, -85.31],
"Oceanside": [33.20, -117.38],
"Jackson": [32.30, -90.18],
"Fort Lauderdale": [26.12, -80.14],
"Santa Rosa": [38.44, -122.71],
"Rancho Cucamonga": [34.11, -117.59],
"Port St. Lucie": [27.29, -80.35],
"Tempe": [33.43, -111.94],
"Ontario": [34.06, -117.65],
"Vancouver": [45.64, -122.66],
"Cape Coral": [26.56, -81.95],
"Sioux Falls": [43.54, -96.73],
"Springfield": [37.22, -93.30],
"Peoria": [40.69, -89.59],
"Pembroke Pines": [26.01, -80.30],
"Elk Grove": [38.41, -121.37],
"Salem": [44.94, -123.04],
"Lancaster": [34.69, -118.15],
"Corona": [33.88, -117.57],
"Eugene": [44.05, -123.09],
"Palmdale": [34.58, -118.12],
"Salinas": [36.68, -121.66],
}
# Group by city to get total sales and profit
city_data = (
na_data.groupby("City").agg({"Sales": "sum", "Profit": "sum"}).reset_index()
)
# Calculate profit margin
city_data["Profit Margin"] = city_data["Profit"] / city_data["Sales"]
# Add coordinates
city_data["lat"] = city_data["City"].map(lambda x: coords.get(x, [None, None])[0])
city_data["lon"] = city_data["City"].map(lambda x: coords.get(x, [None, None])[1])
# Filter out cities without coordinates
city_data_mapped = city_data.dropna(subset=["lat", "lon"])
if len(city_data_mapped) == 0:
fig = go.Figure()
fig.add_annotation(
text="No cities with known coordinates found in North America data",
xref="paper",
yref="paper",
x=0.5,
y=0.5,
showarrow=False,
font=dict(size=16, color="white"),
)
fig.update_layout(
height=500, paper_bgcolor="rgba(0,0,0,0)", plot_bgcolor="rgba(0,0,0,0)"
)
return fig
# Color mapping function (same as bar chart)
positive_margins = city_data_mapped[city_data_mapped["Profit Margin"] > 0][
"Profit Margin"
]
if len(positive_margins) > 0:
min_positive = positive_margins.min()
max_positive = positive_margins.max()
pos_range = max_positive - min_positive
else:
min_positive, max_positive, pos_range = 0, 1, 1
def get_color(margin):
if margin <= 0:
return "#ff7f0e" # Orange for losses
else:
if pos_range == 0:
return "#1f4e79"
normalized = (margin - min_positive) / pos_range
pale_r, pale_g, pale_b = 173, 216, 230
dark_r, dark_g, dark_b = 31, 78, 121
r = int(pale_r + (dark_r - pale_r) * normalized)
g = int(pale_g + (dark_g - pale_g) * normalized)
b = int(pale_b + (dark_b - pale_b) * normalized)
return f"rgb({r},{g},{b})"
# Create colors and sizes
colors = [get_color(margin) for margin in city_data_mapped["Profit Margin"]]
# Scale bubble sizes (min 5, max 50)
max_sales = city_data_mapped["Sales"].max()
min_sales = city_data_mapped["Sales"].min()
sales_range = max_sales - min_sales
if sales_range > 0:
sizes = 5 + 45 * (city_data_mapped["Sales"] - min_sales) / sales_range
else:
sizes = [25] * len(city_data_mapped)
# Create the map
fig = go.Figure()
fig.add_trace(
go.Scattergeo(
lat=city_data_mapped["lat"],
lon=city_data_mapped["lon"],
text=city_data_mapped["City"],
mode="markers",
marker=dict(
size=sizes,
color=colors,
line=dict(width=1, color="rgba(255,255,255,0.6)"),
sizemode="diameter",
),
hovertemplate="<b>%{text}</b><br>Sales: $%{customdata[0]:,.0f}<br>Profit: $%{customdata[1]:,.0f}<br>Profit Margin: %{customdata[2]:.1%}<br><extra></extra>",
customdata=list(
zip(
city_data_mapped["Sales"],
city_data_mapped["Profit"],
city_data_mapped["Profit Margin"],
)
),
name="City Sales",
)
)
fig.update_geos(
scope="north america",
projection_type="natural earth",
showland=True,
landcolor="rgba(50, 50, 50, 0.8)",
showocean=True,
oceancolor="rgba(20, 20, 20, 0.8)",
showcountries=True,
countrycolor="rgba(255, 255, 255, 0.3)",
showlakes=True,
lakecolor="rgba(20, 20, 20, 0.8)",
)
fig.update_layout(
height=600,
paper_bgcolor="rgba(0,0,0,0)",
geo=dict(bgcolor="rgba(0,0,0,0)"),
showlegend=False,
annotations=[
dict(
x=0.02,
y=0.98,
xref="paper",
yref="paper",
text="๐ = Loss",
showarrow=False,
font=dict(size=10, color="white"),
xanchor="left",
),
dict(
x=0.02,
y=0.94,
xref="paper",
yref="paper",
text="๐ต = Profit (darker = higher margin)",
showarrow=False,
font=dict(size=10, color="white"),
xanchor="left",
),
dict(
x=0.02,
y=0.90,
xref="paper",
yref="paper",
text="Bubble size = Sales volume",
showarrow=False,
font=dict(size=10, color="white"),
xanchor="left",
),
],
)
return fig
####### Data Manager Settings #####
data_manager["megastore_data"] = pd.read_csv(
"https://raw.githubusercontent.com/stichbury/vizro_projects/main/Megastore/MegastoreData.csv"
)
########### Model code ############
model = vm.Dashboard(
pages=[
vm.Page(
components=[
vm.Graph(
id="sales_chart",
type="graph",
figure=horizontal_category_subcategory_sales(
data_frame="megastore_data"
),
title="Sales by Product Sub-Category (Color = Profit Margin)",
),
vm.Graph(
id="bubble_map",
type="graph",
figure=north_america_city_bubble_map(data_frame="megastore_data"),
title="North America: City Sales & Profit Margins",
),
],
title="Megastore Sales Analysis",
controls=[
vm.Filter(
type="filter",
column="Continent",
targets=["sales_chart"],
selector=vm.Dropdown(
type="dropdown",
value=[
"Africa",
"Asia",
"Europe",
"North America",
"South America",
],
multi=True,
),
)
],
)
],
theme="vizro_dark",
title="Megastore Dashboard",
)
Vizro().build(model).run()