# -*- coding: utf-8 -*-
"""
Created on Wed Oct 15 17:03:19 2025
@author: win11
"""
import pandas as pd
import plotly.graph_objects as go
from dash import Dash, dcc, html, Input, Output, callback
import dash_bootstrap_components as dbc
from dash_bootstrap_templates import load_figure_template
from plotly.subplots import make_subplots
import dash_ag_grid as dag
#load figure template
load_figure_template("cyborg")
#load theme bootstrap css
dbc_css = "https://cdn.jsdelivr.net/gh/AnnMarieW/dash-bootstrap-templates/dbc.min.css"
data_raw = pd.read_csv('data.csv')
columns = data_raw.columns
df = data_raw.iloc[:, [0,1,2,3,8,14,15,41,42,43,44,45,46,76]].copy()
#helperfunction to convert the date column
def parse_mixed_date(s):
# Parse using two-digit year format
dt = pd.to_datetime(s, format="%d-%b-%y", errors="coerce")
if pd.isna(dt):
return pd.NaT
year = dt.year
# Assume anything before 1970 is actually 2000+
if year > 2025:
dt = dt.replace(year=year - 100)
return dt
#convert datatypes
df['Date'] = df['Date'].apply(parse_mixed_date)
df['Year'] = df['Date'].dt.year
# Fixed the dropdown_list (added missing closing quote)
dropdown_list = ['Overall Rating','Energy','Danceability','Happiness', 'Loudness (dB)','Length (Sec)','BPM']
def card_content(title, dfin):
dff = dfin.loc[dfin[title] == dfin[title].max()]
row=dff.sample(n=1)
footer=f"One out of {len(dff)}" if len(dff) > 1 else ""
card_content = [
dbc.CardHeader(f"Highest {title}"),
dbc.CardBody(
[
html.H5(row['Song'], className="card-title"),
html.P(
row['Artist'],
className="card-text",
),
]
),
dbc.CardFooter(f"{footer}")
]
return card_content
#grouped df by year for some scatterplots
#date/year is connected to the first day the song reached
#the number one position
# Only aggregate columns that exist in dropdown_list
agg_dict = {'Song':'count', 'Weeks at Number One': 'mean'}
for col in dropdown_list:
if col in df.columns:
agg_dict[col] = 'mean'
dfg = df.groupby('Year').agg(agg_dict).reset_index()
# Initialize the app
app = Dash(__name__, suppress_callback_exceptions=True,external_stylesheets=[dbc.themes.CYBORG, dbc.icons.FONT_AWESOME, dbc_css])
# Define the layout with two dropdowns and a graph
app.layout = dbc.Container([
dbc.Row([
dbc.Col([
html.H1("Billboard hot 100 number 1 songs", className="mb-4", style={"fontSize":"32px"}),
html.P("Thank you to Chris Dalla Riva for the data.", className="mb-4"),
dbc.Row([
dbc.Col([
dbc.Label("Select X-axis variable:"),
dcc.Dropdown(
id='x-dropdown',
options=[{'label': col, 'value': col} for col in dropdown_list],
value='Overall Rating', # Default value
clearable=False
)
], width=4),
dbc.Col([
dbc.Label("Select Y-axis variable:"),
dcc.Dropdown(
id='y-dropdown',
options=[{'label': col, 'value': col} for col in dropdown_list if col != 'Overall Rating'],
value='Happiness', # Default value
clearable=False
)
], width=4)
], className="mb-4"),
html.H3("Song trends based on yearly average values", className="mb-3", style={"fontSize":"24px"}),
html.P("Markersize indicates only the number of songs, click on a marker to see the songs.", className="mb-3", style={"fontSize":"16px"}),
dcc.Graph(id='scatter-plot')
], width=6),
dbc.Col([
dbc.Row([
dbc.Col([
html.H3(id='songs-title', className="text-center mb-3", style={"fontSize":"24px"}),
html.Div(id="static-cards"),
dag.AgGrid(
id='songs-grid',
columnDefs=[],
rowData=[],
dashGridOptions={
"animateRows": True,
"pagination": True,
"paginationPageSize": 10,
"domLayout": "autoHeight"
},
style={"height": None},
className="ag-theme-alpine-dark"
)
], width=11)
], className="mb-5", style={"display": "none"}, id="grid-section"),
], width=6)
], className="mb-5"),
], style={'margin':'2rem'},fluid=True)
# Callback to update y-dropdown options based on x-dropdown selection
@app.callback(
Output('y-dropdown', 'options'),
Output('y-dropdown', 'value'),
Input('x-dropdown', 'value')
)
def update_y_dropdown(x_value):
# Filter out the selected x value from y dropdown options
y_options = [{'label': col, 'value': col} for col in dropdown_list if col != x_value]
# If current y value is same as x value, change it to the first available option
current_y = 'Happiness' if x_value != 'Happiness' else dropdown_list[0] if dropdown_list[0] != x_value else dropdown_list[1]
return y_options, current_y
# Callback to update the scatter plot based on dropdown selections
@app.callback(
Output('scatter-plot', 'figure'),
Input('x-dropdown', 'value'),
Input('y-dropdown', 'value')
)
def update_scatter_plot(x_col, y_col):
# Check if selected columns exist in dfg
if x_col not in dfg.columns or y_col not in dfg.columns:
# Return empty figure with error message
fig = go.Figure()
fig.add_annotation(
text=f"Error: Column '{x_col}' or '{y_col}' not found in aggregated data",
xref="paper", yref="paper",
x=0.5, y=0.5, showarrow=False
)
return fig
# Create the scatter plot with year information
fig = go.Figure(data=go.Scatter(
x=dfg[x_col],
y=dfg[y_col],
mode='markers',
marker=dict(
color=dfg['Year'], #set color equal to a variable
colorscale='Viridis', # one of plotly colorscales
showscale=True,
size=dfg['Song'],
),
text=[f"Year: {year}<br>{x_col}: {x:.2f}<br>{y_col}: {y:.2f}<br>Number of Songs: {songs}"
for year, x, y, songs in zip(dfg['Year'], dfg[x_col], dfg[y_col], dfg['Song'])],
hovertemplate='%{text}<extra></extra>',
customdata=dfg[['Year']].values # Pass year as customdata
))
# Update layout
fig.update_layout(
title=dict(
text=f"{y_col} vs {x_col} by Year",
font=dict(size=20)
),
xaxis=dict(
title=dict(
text=x_col,
font=dict(size=14)
)
),
yaxis=dict(
title=dict(
text=y_col,
font=dict(size=14)
)
),
width=800,
height=600,
hovermode='closest',
clickmode='event+select'
)
# Set appropriate axis ranges based on the selected variables
if x_col == 'Overall Rating':
fig.update_xaxes(range=[0, 10])
elif x_col in ['Energy', 'Danceability', 'Happiness']:
fig.update_xaxes(range=[0, 100])
elif x_col == 'Loudness (dB)':
fig.update_xaxes(range=[-20, 0])
elif x_col == 'Length (Sec)':
fig.update_xaxes(range=[100, 400])
if y_col == 'Overall Rating':
fig.update_yaxes(range=[0, 10])
elif y_col in ['Energy', 'Danceability', 'Happiness']:
fig.update_yaxes(range=[0, 100])
elif y_col == 'Loudness (dB)':
fig.update_yaxes(range=[-20, 0])
elif y_col == 'Length (Sec)':
fig.update_yaxes(range=[100, 400])
return fig
# Callback to update AG Grid based on scatter plot click
@app.callback(
Output('songs-grid', 'rowData'),
Output('songs-grid', 'columnDefs'),
Output('grid-section', 'style'),
Output('songs-title', 'children'),
Output('static-cards','children'),
Input('scatter-plot', 'clickData')
)
def display_click_data(clickData):
# Create column definitions for AG Grid
columnDefs = []
for col in df.columns:
col_def = {
"field": col,
"headerName": col,
"sortable": True,
"filter": True,
"resizable": True,
"minWidth": 50
}
# Format numeric columns
if col in ['Overall Rating', 'Energy', 'Danceability', 'Happiness', 'Loudness (dB)', 'Length (Sec)']:
col_def["valueFormatter"] = {"function": "d3.format('.2f')(params.value)"}
# Make some columns wider
if col in ['Song', 'Artist']:
col_def["minWidth"] = 200
columnDefs.append(col_def)
if clickData is None:
static_cards = dbc.Row(
[
dbc.Col(dbc.Card(card_content('Overall Rating',df), color="primary", outline=True)),
dbc.Col(dbc.Card(card_content('Happiness',df), color="secondary", outline=True)),
dbc.Col(dbc.Card(card_content('Danceability',df), color="info", outline=True)),
],
className="mb-4",
)
# Convert Date to string for display
# if 'Date' in df.columns:
# df['Date'] = df['Date'].dt.strftime('%Y-%m-%d')
return df.to_dict('records'), columnDefs, {"display": "block"}, f"All {len(df)} songs since 1958", static_cards
# Extract the year from the clicked point
# The customdata is passed as a list, so we need to access the first element
point = clickData['points'][0]
# Get year from customdata or from the marker color (which represents year)
if 'customdata' in point:
year = int(point['customdata'][0])
else:
# Fallback: extract from the text field using string parsing
text = point.get('text', '')
year_line = [line for line in text.split('<br>') if line.startswith('Year:')]
if year_line:
year = int(year_line[0].split(':')[1].strip())
else:
# Last fallback: use marker color which represents year
year = int(point.get('marker.color', 0))
# Filter dataframe for the selected year
filtered_df = df[df['Year'] == year].copy()
# Convert dataframe to records for AG Grid
data = filtered_df.to_dict('records')
title = f"Songs from {year} ({len(filtered_df)} songs)"
static_cards = dbc.Row(
[
dbc.Col(dbc.Card(card_content('Overall Rating',filtered_df), color="primary", outline=True)),
dbc.Col(dbc.Card(card_content('Happiness',filtered_df), color="secondary", outline=True)),
dbc.Col(dbc.Card(card_content('Danceability',filtered_df), color="info", outline=True)),
],
className="mb-4",
)
return data, columnDefs, {"display": "block"}, title,static_cards
if __name__ == '__main__':
app.run(debug=True)