############ Imports ##############
import vizro.plotly.express as px
import vizro.models as vm
from vizro.models.types import capture
from vizro import Vizro
import pandas as pd
from vizro.managers import data_manager
import plotly.graph_objects as go
import requests
import vizro.plotly.express as px
from vizro.models.types import capture
####### Function definitions ######
@capture("graph")
def ranking_scatter_plot(data_frame):
# Filter data for rankings 1-10 and remove null values
filtered_data = data_frame[
(data_frame["highestRank"] >= 1)
& (data_frame["highestRank"] <= 10)
& data_frame["year"].notna()
& data_frame["highestRank"].notna()
].copy()
# Create scatter plot with corrected color scheme
fig = px.scatter(
filtered_data,
x="year",
y="highestRank",
title="",
labels={"year": "Year", "highestRank": "Top 10"},
hover_data={
"name": True,
"song": True,
"artist": True,
"year": False,
"highestRank": False,
},
color="highestRank",
color_continuous_scale="RdBu", # Red for rank 1, Blue for rank 10
)
# Invert y-axis so rank 1 is at the top
fig.update_yaxes(autorange="reversed")
# Add vertical lines for each year
years = sorted(filtered_data["year"].unique())
for year in years:
fig.add_vline(
x=year, line_dash="dash", line_color="rgba(128,128,128,0.3)", line_width=1
)
# Update hover template to include name, song, and artist
fig.update_traces(
hovertemplate="<b>%{customdata[0]}</b><br>Song: %{customdata[1]}<br>Artist: %{customdata[2]}<extra></extra>"
)
# Update layout with custom y-axis and show the colorbar
fig.update_layout(
xaxis=dict(title="Year"),
yaxis=dict(
title="Top 10", tickvals=[1, 3, 5, 7, 9], ticktext=["1", "3", "5", "7", "9"]
),
showlegend=False,
)
# Show the colorbar with proper labels
fig.update_coloraxes(
showscale=True,
colorbar=dict(
title="Billboard<br>Ranking",
tickvals=[1, 5.5, 10],
ticktext=["#1", "Mid", "#10"],
),
)
return fig
@capture("graph")
def combined_names_bubble_chart(
data_frame,
chart_selection=["Most popular names in songs", "Most popular names in the US"],
):
# Convert to list if it's not already
if not isinstance(chart_selection, list):
chart_selection = [chart_selection] if chart_selection else []
# Calculate top 15 names from the actual data (not hardcoded!)
name_counts_songs = data_frame.groupby("name")["song"].nunique().reset_index()
name_counts_songs.columns = ["name", "song_count"]
top_15_by_songs = name_counts_songs.nlargest(15, "song_count").sort_values(
"song_count", ascending=False
)
songs_fixed_order = top_15_by_songs["name"].tolist() # Dynamic, not hardcoded
# Create lookup for song counts
song_count_lookup = dict(
zip(name_counts_songs["name"], name_counts_songs["song_count"])
)
# Get data for most common names (from the second dataset)
try:
response = requests.get(
"https://raw.githubusercontent.com/the-pudding/data/master/names-in-songs/onlyNames.csv"
)
only_names_data = pd.read_csv(pd.io.common.StringIO(response.text))
top_15_common = only_names_data.nlargest(15, "count").sort_values(
"count", ascending=False
)
us_names_order = top_15_common["name"].tolist()
# Create lookup for US counts
us_count_lookup = dict(zip(only_names_data["name"], only_names_data["count"]))
except:
us_names_order = []
us_count_lookup = {}
# Create figure
fig = go.Figure()
# If no selection, show message
if len(chart_selection) == 0 or (
len(chart_selection) == 1 and chart_selection[0] == ""
):
fig.add_annotation(
x=0.5,
y=0.5,
xref="paper",
yref="paper",
text="Please select at least one chart type",
showarrow=False,
font=dict(size=16),
)
return fig
show_songs = "Most popular names in songs" in chart_selection
show_us = "Most popular names in the US" in chart_selection
x_labels = []
x_positions_ticks = []
max_y = 0
# Get range for US colorbar (calculate once)
us_max = (
max([us_count_lookup.get(name, 1) for name in us_names_order])
if us_names_order
else 1
)
us_min = (
min([us_count_lookup.get(name, 1) for name in us_names_order])
if us_names_order
else 1
)
us_mid = (us_max + us_min) // 2
# Track if we've shown the US colorbar yet
us_colorbar_shown = False
# Show songs chart (always in fixed positions 0-14)
if show_songs:
x_labels.extend(songs_fixed_order)
x_positions_ticks.extend(list(range(15)))
# Get song data for the fixed names
filtered_data_songs = data_frame[
data_frame["name"].isin(songs_fixed_order)
].copy()
unique_songs = filtered_data_songs.drop_duplicates(["name", "song"]).copy()
# Sort by fixed order
unique_songs.loc[:, "name_rank"] = unique_songs["name"].apply(
lambda x: songs_fixed_order.index(x) if x in songs_fixed_order else 999
)
unique_songs = unique_songs.sort_values(["name_rank", "song"])
unique_songs = unique_songs.reset_index(drop=True)
unique_songs.loc[:, "y_position"] = unique_songs.groupby("name").cumcount() + 1
max_y = max(max_y, unique_songs["y_position"].max())
# Create actual song count values for colorbar
song_count_values = []
for name in unique_songs["name"]:
song_count = song_count_lookup.get(name, 1)
song_count_values.append(song_count)
x_positions_songs = [
songs_fixed_order.index(name) for name in unique_songs["name"]
]
# Get the range of song counts for colorbar labels
max_songs = max(song_count_values)
min_songs = min(song_count_values)
mid_songs = (max_songs + min_songs) // 2
fig.add_trace(
go.Scatter(
x=x_positions_songs,
y=unique_songs["y_position"],
mode="markers",
marker=dict(
size=25,
color=song_count_values,
colorscale="Blues",
showscale=True,
colorbar=dict(
title="Songs<br>Count",
x=1.02,
len=0.4,
y=0.8,
tickvals=[min_songs, mid_songs, max_songs],
ticktext=[str(min_songs), str(mid_songs), str(max_songs)],
),
line=dict(width=1, color="white"),
),
name="Most popular names in song lyrics",
customdata=unique_songs[["name", "song", "artist", "year"]],
hovertemplate="<b>%{customdata[0]}</b><br>Song: %{customdata[1]}<br>Artist: %{customdata[2]}<br>Year: %{customdata[3]}<extra></extra>",
)
)
# Show US chart
if show_us and us_names_order:
if show_songs:
# Both selected: handle overlaps and US-only names
songs_set = set(songs_fixed_order)
us_set = set(us_names_order)
overlapping_names = songs_set.intersection(us_set)
us_only_names = us_set - songs_set
# Process overlapping names (stacked circles at same x position)
if overlapping_names:
filtered_overlap = data_frame[
data_frame["name"].isin(overlapping_names)
].copy()
unique_overlap = filtered_overlap.drop_duplicates(
["name", "song"]
).copy()
unique_overlap.loc[:, "name_rank"] = unique_overlap["name"].apply(
lambda x: (
songs_fixed_order.index(x) if x in songs_fixed_order else 999
)
)
unique_overlap = unique_overlap.sort_values(["name_rank", "song"])
unique_overlap = unique_overlap.reset_index(drop=True)
unique_overlap.loc[:, "y_position"] = (
unique_overlap.groupby("name").cumcount() + 1
)
# Create actual US count values for overlapping names
us_count_values_overlap = []
for name in unique_overlap["name"]:
us_count = us_count_lookup.get(name, 1)
us_count_values_overlap.append(us_count)
# Same x position as songs, but offset y slightly upward
x_positions_overlap = [
songs_fixed_order.index(name) for name in unique_overlap["name"]
]
y_positions_overlap = [y + 0.15 for y in unique_overlap["y_position"]]
fig.add_trace(
go.Scatter(
x=x_positions_overlap,
y=y_positions_overlap,
mode="markers",
marker=dict(
size=20,
color=us_count_values_overlap,
colorscale="Oranges",
showscale=not us_colorbar_shown, # Only show if not shown yet
colorbar=(
dict(
title="US Count",
x=1.12,
len=0.4,
y=0.3,
tickvals=[us_min, us_mid, us_max],
ticktext=[str(us_min), str(us_mid), str(us_max)],
)
if not us_colorbar_shown
else None
),
line=dict(width=1, color="white"),
),
name="", # Remove the "Also popular in the US" text
showlegend=False, # Don't show in legend
customdata=unique_overlap[["name", "song", "artist", "year"]],
hovertemplate="<b>%{customdata[0]} (US Popular)</b><br>Song: %{customdata[1]}<br>Artist: %{customdata[2]}<br>Year: %{customdata[3]}<extra></extra>",
)
)
us_colorbar_shown = True
# Process US-only names (positions to the right)
if us_only_names:
us_only_ordered = [
name for name in us_names_order if name in us_only_names
]
start_pos = 15
x_labels.extend(us_only_ordered)
x_positions_ticks.extend(
list(range(start_pos, start_pos + len(us_only_ordered)))
)
filtered_us_only = data_frame[
data_frame["name"].isin(us_only_names)
].copy()
unique_us_only = filtered_us_only.drop_duplicates(
["name", "song"]
).copy()
unique_us_only.loc[:, "name_rank"] = unique_us_only["name"].apply(
lambda x: us_only_ordered.index(x) if x in us_only_ordered else 999
)
unique_us_only = unique_us_only.sort_values(["name_rank", "song"])
unique_us_only = unique_us_only.reset_index(drop=True)
unique_us_only.loc[:, "y_position"] = (
unique_us_only.groupby("name").cumcount() + 1
)
max_y = max(max_y, unique_us_only["y_position"].max())
# Create actual US count values for US-only names
us_count_values_only = []
for name in unique_us_only["name"]:
us_count = us_count_lookup.get(name, 1)
us_count_values_only.append(us_count)
x_positions_us_only = [
start_pos + us_only_ordered.index(name)
for name in unique_us_only["name"]
]
fig.add_trace(
go.Scatter(
x=x_positions_us_only,
y=unique_us_only["y_position"],
mode="markers",
marker=dict(
size=25,
color=us_count_values_only,
colorscale="Oranges",
showscale=not us_colorbar_shown, # Only show if not shown yet
colorbar=(
dict(
title="US Count",
x=1.02,
len=0.4,
y=0.3,
tickvals=[us_min, us_mid, us_max],
ticktext=[str(us_min), str(us_mid), str(us_max)],
)
if not us_colorbar_shown
else None
),
line=dict(width=1, color="white"),
),
name="Popular name choices in the US",
customdata=unique_us_only[["name", "song", "artist", "year"]],
hovertemplate="<b>%{customdata[0]}</b><br>Song: %{customdata[1]}<br>Artist: %{customdata[2]}<br>Year: %{customdata[3]}<extra></extra>",
)
)
us_colorbar_shown = True
else:
# Only US selected: show ALL US names in their natural order
x_labels = us_names_order
x_positions_ticks = list(range(len(us_names_order)))
filtered_us = data_frame[data_frame["name"].isin(us_names_order)].copy()
unique_us = filtered_us.drop_duplicates(["name", "song"]).copy()
unique_us.loc[:, "name_rank"] = unique_us["name"].apply(
lambda x: us_names_order.index(x) if x in us_names_order else 999
)
unique_us = unique_us.sort_values(["name_rank", "song"])
unique_us = unique_us.reset_index(drop=True)
unique_us.loc[:, "y_position"] = unique_us.groupby("name").cumcount() + 1
max_y = max(max_y, unique_us["y_position"].max())
# Create actual US count values
us_count_values = []
for name in unique_us["name"]:
us_count = us_count_lookup.get(name, 1)
us_count_values.append(us_count)
x_positions_us = [us_names_order.index(name) for name in unique_us["name"]]
fig.add_trace(
go.Scatter(
x=x_positions_us,
y=unique_us["y_position"],
mode="markers",
marker=dict(
size=25,
color=us_count_values,
colorscale="Oranges",
showscale=True,
colorbar=dict(
title="US Count",
x=1.02,
len=0.4,
y=0.5,
tickvals=[us_min, us_mid, us_max],
ticktext=[str(us_min), str(us_mid), str(us_max)],
),
line=dict(width=1, color="white"),
),
name="Popular name choices in the US",
customdata=unique_us[["name", "song", "artist", "year"]],
hovertemplate="<b>%{customdata[0]}</b><br>Song: %{customdata[1]}<br>Artist: %{customdata[2]}<br>Year: %{customdata[3]}<extra></extra>",
)
)
# Update layout
fig.update_layout(
xaxis_title="Name",
yaxis_title="Songs",
xaxis=dict(
tickmode="array",
tickvals=x_positions_ticks,
ticktext=x_labels,
tickangle=-45,
),
showlegend=True,
legend=dict(yanchor="top", y=0.99, xanchor="left", x=0.01),
)
return fig
####### Data Manager Settings #####
data_manager["people_minus_baby"] = pd.read_csv(
"https://raw.githubusercontent.com/stichbury/vizro_projects/main/songs/people-minus-baby.csv"
)
data_manager["only_names"] = pd.read_csv(
"https://raw.githubusercontent.com/the-pudding/data/master/names-in-songs/onlyNames.csv"
)
########### Model code ############
model = vm.Dashboard(
pages=[
vm.Page(
components=[
vm.Graph(
id="combined_names_chart",
type="graph",
figure=combined_names_bubble_chart(
data_frame="people_minus_baby",
chart_selection=[
"Most popular names in songs",
"Most popular names in the US",
],
),
title="Names in Songs: Stacked Bubble Comparison",
),
vm.Graph(
id="billboard_rankings",
type="graph",
figure=ranking_scatter_plot(data_frame="people_minus_baby"),
title="Names in songs that ranked in the top 10 of the US Billboard Hot 100 by year",
),
],
title="Music Names Dashboard",
layout=vm.Grid(
type="grid",
grid=[[0], [1]],
row_gap="24px",
col_gap="24px",
row_min_height="400px",
),
controls=[
vm.Parameter(
id="chart_selector",
type="parameter",
targets=["combined_names_chart.chart_selection"],
selector=vm.Checklist(
type="checklist",
options=[
"Most popular names in songs",
"Most popular names in the US",
],
value=[
"Most popular names in songs",
"Most popular names in the US",
],
title="Select Chart Data",
show_select_all=False,
),
)
],
)
],
theme="vizro_dark",
)
Vizro().build(model).run()