# -*- coding: utf-8 -*-
"""
Created on Sat Feb 8 07:47:20 2025
@author: win11
"""
import pandas as pd
import numpy as np
import plotly.graph_objects as go
import dash
import dash_bootstrap_components as dbc
from dash import Input, Output, State, dcc, html, callback_context, callback
from data_functions import prepare_df_base #data etl
#import data and do some basic drop and typeconversions
df = prepare_df_base()
def create_listgroup(frame, subject):
#key is the subject and decidec how to style the row output
listChildren = []
for i, r in frame.iterrows():
match subject:
case 'author-titles':
listChildren.append(dbc.ListGroupItem(f"{r['author']} ({r['title']} titles)"))
case 'author-weeks':
listChildren.append(dbc.ListGroupItem(f"{r['author']} ({r['year-week']} mentions)"))
case 'publisher-titles':
listChildren.append(dbc.ListGroupItem(f"{r['publisher']} ({r['title']} titles)"))
case 'publisher-weeks':
listChildren.append(dbc.ListGroupItem(f"{r['publisher']} ({r['year-week']} mentions)"))
# If an exact match is not confirmed, this last case will be used if provided
case _:
listChildren.append(dbc.ListGroupItem(r))
listgroup = dbc.ListGroup( children = listChildren, flush=True, numbered=True)
return listgroup
def create_card(frame,subject,df_filtered):
#this card is used to display the top3 's on cards.
#convert frame to ordered list, frame is for example top 3 authors
#if you omit images, you omit df_filtered from the input vars.
subject_dict_cardtitles = {'author-titles': 'Most bestsellers listed', 'author-weeks':'Most mentions', \
'publisher-titles': 'Most bestsellers listed', 'publisher-weeks':'Most mentions'}
subject_dict_explanations = {'author-titles': 'The author on top had most different books in the bestseller list.', \
'author-weeks':'The author on had most listings with a title (or more) on the bestseller lists.', \
'publisher-titles': 'The publisher on top had most different books in the bestseller list.', \
'publisher-weeks':'The publisher on top had most titles mentioned in the bestseller lists.',
}
list_group = create_listgroup(frame, subject)
#function, get first image for the top 1 author in this case
#frame author.iloc[0] is the first author in top, thus author on no. 1
#frames do not have an image, so filtering the df_filtered, gets the first image url for this author
#this is maybe not the bookcover for the most succesful book, just the img cover which comes first
#in the df.
if (subject in ['author-titles','author-weeks']):
first_image = df_filtered['image_url'].loc[df_filtered['author']== frame['author'].iloc[0]].iloc[0]
else:
first_image = df_filtered['image_url'].loc[df_filtered['publisher']== frame['publisher'].iloc[0]].iloc[0]
#get_random_image_number1()
card = dbc.Card(
[
dbc.Row(
[
dbc.Col(
dbc.CardImg(
src=first_image,
className="img-fluid rounded-start",
),
className="col-md-4",
),
dbc.Col(
dbc.CardBody(
[
html.H2(subject_dict_cardtitles.get(subject), className="card-title"),
list_group,
html.Small(subject_dict_explanations.get(subject),className='card-text text-muted')
]
),
className="col-md-8",
),
],
className=" g-0 d-flex align-items-top",
)
]
)
return card
def create_basic_card(subject, df_filtered = df):
#this card is for the left part of the row, it maybe has an explanation
#it has a button which serves as the menu and links to a details page
#with more overall statistics, subject is the fieldname in the df_filtered
text_dict = {'author': 'authors', 'publisher': 'publishers'}
number_of_weeks_selected = df_filtered['published_date'].nunique()
#distinct something
distinct_something = df_filtered[subject].nunique()
#distinct_titles = df_filtered['title'].nunique()
basic_card = dbc.Card(
dbc.CardBody(
[
html.P(
f"During the {number_of_weeks_selected} weeks you selected:",
className="card-text",
),
html.H3( f"{distinct_something} different {text_dict.get(subject)}"),
html.P('had a book on the NYT bestsellerlist',className="card-text"),
dbc.Button(f"Dive into {text_dict.get(subject)}", color="info", size="lg", className="me-1")
]
), className='col-md-12'
)
return basic_card
def content_layout_author(df_filtered = df):
#AUTHORS
author_diff_books = df_filtered.groupby(['author'])['title'].nunique().reset_index()
#top 3 authors with most different titles in list
author_different_titles_top3 = author_diff_books.nlargest(3, ['title'])
#how many times was an author mentioned on the list
author_maxweeks = df_filtered.groupby(['author'])['year-week'].count().reset_index()
#top3 author weeks in list
author_weeks_top3 = author_maxweeks.nlargest(3, ['year-week'])
authorlayout = dbc.Row([
#kpi authors general, basic card data are calculated in basic card setup
dbc.Col(create_basic_card('author', df_filtered), className='col-md-3'),
dbc.Col(html.Img(src='assets/images/curltje.png',style={'maxWidth':'80%'}),className = 'col-md-1',style={'alignSelf':'center'}),
#top 3 authors with most distinct titles
dbc.Col(create_card(author_different_titles_top3,'author-titles', df_filtered), className='col-md-4'),
#top 3 authors most weeks in beststellers list in selected period
dbc.Col(create_card(author_weeks_top3,'author-weeks',df_filtered), className='col-md-4')
], style={'marginBottom':'2rem'})
return authorlayout
def content_layout_publisher (df_filtered = df):
#PUBLISHERS
publisher_diff_books = df_filtered.groupby(['publisher'])['title'].nunique().reset_index()
#top 3 authors with most different titles in list
publisher_different_titles_top3 = publisher_diff_books.nlargest(3, ['title'])
publisher_maxweeks = df_filtered.groupby(['publisher'])['year-week'].count().reset_index()
#top3 publisher weeks in listpub
publisher_weeks_top3 = publisher_maxweeks.nlargest(3, ['year-week'])
publisherlayout = dbc.Row([
#kpi publishers general, basic card data are calculated in basic card setup
dbc.Col(create_basic_card('publisher',df_filtered), className='col-md-3'),
dbc.Col(html.Img(src='assets/images/curltje.png',style={'maxWidth':'80%'}),className = 'col-md-1',style={'alignSelf':'center'}),
#top 3 authors with most distinct titles
dbc.Col(create_card(publisher_different_titles_top3,'publisher-titles', df_filtered), className=' col-md-4'),
#top 3 authors most weeks in beststellers list in selected period
dbc.Col(create_card(publisher_weeks_top3,'publisher-weeks',df_filtered), className='col-md-4')
])
return publisherlayout
def header(df_filtered):
rangeslider_years = df_filtered['year'].unique()
#to prevent the marks from being 2k, 2k, 2k etc a dict is created for the marks
rangeslider_marks = {int(key): str(key) for key in rangeslider_years}
header = dbc.Row([
dbc.Col([html.H1('New York Times,'),
html.H3(' bestseller lists (2011-2025)')
], className = 'col-sm-12 col-md-4', style={'textAlign':'center', 'color':'white'}),
dbc.Col([
dcc.RangeSlider(
id='selected_range_years',
min=rangeslider_years.min(),
max=rangeslider_years.max(),
step=1,
marks= rangeslider_marks,
value=[2011, 2025],
#minimum = 52 weeks
pushable=1
),
], className = 'col-sm-12 col-md-8', style={'backgroundColor':'white','paddingTop':'1rem', 'borderTopRightRadius': '4px','borderBottomRightRadius': '4px'})
], className = 'bg-info', style={'margin':'0rem','marginBottom':'2rem','justifyContent':'space-between','alignItems':'center' })#endrow
return header
app = dash.Dash(__name__,use_pages=False, external_stylesheets=[dbc.themes.SANDSTONE, dbc.icons.FONT_AWESOME])
app.layout = dbc.Container([
header(df),
html.Div(content_layout_author(), id='authorlayout'),
html.Div(content_layout_publisher(), id='publisherlayout')
], className = 'bg-primary vh-100', style={'padding':'2rem'}, fluid=True)
app.run_server(debug=True)
@callback(
Output('authorlayout', 'children'),
Output('publisherlayout', 'children'),
Input('selected_range_years', 'value'))
def update_output(value):
df_filtered = df[df['year'].between(value[0], value[1]-1)]
#print(df_filtered.head(20))
return content_layout_author(df_filtered),content_layout_publisher(df_filtered)