import panel as pn
import pandas as pd
import numpy as np
import altair as alt
from scipy import stats
import re
pn.extension('vega', theme='dark')
df = pd.read_csv('Case Study Data_cleaned.csv')
df_clean = df.dropna(subset=['Month'])
engagement_df = pd.read_csv('engagement.csv')
df_periods = {'Nov-14': df_clean[df_clean['Month'] == 'Nov-14'], 'Dec-14': df_clean[df_clean['Month'] == 'Dec-14'],
'Jan-15': df_clean[df_clean['Month'] == 'Jan-15'], 'Overall': df_clean}
radio_group = pn.widgets.RadioButtonGroup(name='Period Selector', options=list(df_periods.keys()), value='Nov-14',
button_type='primary', align='center')
activity_filter = pn.widgets.RadioBoxGroup(name="Account Activity", options=["All", "Active", "Inactive"],
value="All", inline=True, align='center', styles={'font-size': '11pt'})
account_id_slider = pn.widgets.IntRangeSlider(name='Filter by Top/Bottom Active Accounts:', start=0, end=60, value=(0, 60), step=1, width = 500)
selected_period_text = pn.pane.Markdown(align='center')
plot_pane = pn.pane.Vega()
action_plot_pane = pn.pane.Vega()
indicators_pane = pn.Column()
def parse_number_and_percentage(value):
match = re.match(r"(\d+)\s*\(([\d.]+)%\)", str(value))
if match:
number = int(match.group(1))
percentage = float(match.group(2))
return number, percentage
try:
return float(value), None
except Exception:
return 0, None
def generate_indicators(period):
row = engagement_df[engagement_df['Month'] == period]
row = row.iloc[0]
active_users, pct_users = parse_number_and_percentage(row["Active Users"])
active_accounts, pct_accounts = parse_number_and_percentage(row["Active Accounts"])
active_jobs, pct_jobs = parse_number_and_percentage(row["Active Job Ads"])
inactive_sessions = int(row["Inactive Sessions"])
avg_users_per_account = float(row["(Avg) No. Active Users per Active Account"])
indicators = [
pn.indicators.Number(name='Active Users', value=active_users, format=f"{active_users} ({pct_users}%)", title_size='15pt', font_size='20pt', default_color='white', styles={'color': 'white'}),
pn.indicators.Number(name="Active Accounts", value=active_accounts, format=f"{active_accounts} ({pct_accounts}%)", title_size='15pt', font_size='20pt', default_color='white', styles={'color': 'white'}),
pn.indicators.Number(name="Active Job Ads", value=active_jobs, format=f"{active_jobs} ({pct_jobs}%)", title_size='15pt', font_size='20pt', default_color='white', styles={'color': 'white'}),
pn.indicators.Number(name="Inactive User Sessions", value=inactive_sessions, format='{value:.0f}', title_size='15pt', font_size='20pt', default_color='white', styles={'color': 'white'}),
pn.indicators.Number(name="Avg Act_Usr/Act_Acc", value=avg_users_per_account, format='{value:.2f}', title_size='15pt', font_size='20pt', default_color='white', styles={'color': 'white'}),
]
indicator_grids = [
pn.GridBox(indicator, ncols=1, sizing_mode='stretch_width',
styles={'background': 'cornflowerblue', 'padding': '0px', 'border-radius': '10px', 'margin': '5px'}
) for indicator in indicators
]
return pn.Row(*indicator_grids, sizing_mode='stretch_width')
def generate_altair_plot(data, label, activity_filter="All"):
account_stats = data.groupby("Account ID").agg(
unique_jobs=("Job Ad (Role) ID", pd.Series.nunique),
total_sessions=("Sessions", "sum"),
messages_sent=("Message Sends", "sum"),
jobs_sent=("Job Sends", "sum"),
cvs_accessed=("CVs Accessed", "sum"),
users_per_account=("User ID", pd.Series.nunique)
).reset_index()
if activity_filter == "Active":
account_stats = account_stats[
(account_stats["messages_sent"] > 0) |
(account_stats["jobs_sent"] > 0) |
(account_stats["cvs_accessed"] > 0)
]
elif activity_filter == "Inactive":
account_stats = account_stats[
(account_stats["messages_sent"] == 0) &
(account_stats["jobs_sent"] == 0) &
(account_stats["cvs_accessed"] == 0)
]
if account_stats.empty:
return alt.Chart(pd.DataFrame({"x": [], "y": []})).mark_point().properties( title=f"No Data for Filter '{activity_filter}'", width=500, height=400)
x = account_stats["unique_jobs"].values
y = account_stats["total_sessions"].values
slope, intercept, r_value, _, _ = stats.linregress(x, y)
y_pred = intercept + slope * x
account_stats["y_pred"] = y_pred
max_x = account_stats["unique_jobs"].max()
display_max_x = max_x + 4
max_y_pred = intercept + slope * max_x
label_data = pd.DataFrame({
"x": [max_x],
"y": [max_y_pred],
"label": [f"R = {r_value:.2f}"]
})
zoom = alt.selection_interval(bind='scales')
points = alt.Chart(account_stats).mark_circle(opacity=0.75, color="#faedff").encode(
x=alt.X("unique_jobs", title="Number of Unique Jobs per Account", scale=alt.Scale(domain=(0, display_max_x))),
y=alt.Y("total_sessions", title="Total Sessions per Account"),
tooltip=["Account ID", "unique_jobs", "total_sessions", "messages_sent", "jobs_sent", "cvs_accessed", "users_per_account"])
line = alt.Chart(account_stats).mark_line(color='red').encode(x="unique_jobs", y="y_pred")
corr_label = alt.Chart(label_data).mark_text(
align='left',
baseline='middle',
dx=5,
dy=0,
fontSize=12,
fontStyle='bold',
color='#f55d76'
).encode(
x='x:Q',
y='y:Q',
text='label:N'
)
return (points + line + corr_label).add_selection(zoom).properties(width=500,height=400,
background='#15264a'
).configure_axis(
labelColor='white',
titleColor='white', gridOpacity=0.4
).configure_title(
color='white'
).configure_legend(
labelColor='white',
titleColor='white'
)
def generate_action_bar_chart(data, account_id_range=(0, 1000)):
grouped = data.groupby("Account ID")[["Message Sends", "Job Sends", "CVs Accessed"]].sum()
grouped["Total Activity"] = grouped.sum(axis=1)
active_accounts = grouped[grouped["Total Activity"] > 0].reset_index()
active_accounts = active_accounts.sort_values("Total Activity", ascending=False).reset_index(drop=True)
sliced_accounts = active_accounts.iloc[account_id_range[0]:account_id_range[1]+1].copy()
sliced_accounts["Credits"] = (
sliced_accounts["Message Sends"] * 200 +
sliced_accounts["Job Sends"] * 100 +
sliced_accounts["CVs Accessed"] * 500
)
sliced_accounts["Total Actions"] = (
sliced_accounts["Message Sends"] +
sliced_accounts["Job Sends"] +
sliced_accounts["CVs Accessed"]
)
melted_df = sliced_accounts.melt(
id_vars=["Account ID", "Credits", "Total Actions"],
value_vars=["Message Sends", "Job Sends", "CVs Accessed"],
var_name="Action Type",
value_name="Count"
)
account_order = sliced_accounts["Account ID"].tolist()
bars = alt.Chart(melted_df).mark_bar().encode(
x=alt.X('Account ID:N', sort=account_order, title='Account ID'),
y=alt.Y('Count:Q', stack='zero', title='Total Actions'),
color=alt.Color('Action Type:N',title='Action Type',
legend=alt.Legend(orient='top-right', direction='vertical', offset=0, padding = 10)),
tooltip=['Account ID:N', 'Action Type:N', 'Count:Q']
)
text = alt.Chart(sliced_accounts).mark_text(
angle=270,
dx=25,
dy=0,
fontSize=9,
fontWeight='bold', color='white'
).encode(
x=alt.X('Account ID:N', sort=account_order),
y=alt.Y('Total Actions:Q'),
text=alt.Text('Credits:Q', format='~s')
)
chart = (bars + text).properties(
width=500,
height=400,
background='#15264a'
).configure_axis(
labelColor='white',
titleColor='white', gridOpacity=0.4
).configure_axisX(labelAngle=-45).configure_legend(
labelColor='white',
titleColor='white'
).configure_title(
color='white'
)
return chart
def update_dashboard(event=None):
period = radio_group.value
data = df_periods[period]
selected_period_text.object = f"# SEEK Product Data Dashboard: {period}"
plot_pane.object = generate_altair_plot(data, period, activity_filter.value)
grouped = data.groupby("Account ID")[["Message Sends", "Job Sends", "CVs Accessed"]].sum()
grouped["Total Activity"] = grouped.sum(axis=1)
active_accounts = grouped[grouped["Total Activity"] > 0].reset_index()
active_accounts = active_accounts.sort_values("Total Activity", ascending=False).reset_index(drop=True)
slider_max = len(active_accounts) - 1
account_id_slider.start = 0
account_id_slider.end = slider_max if slider_max > 0 else 1
account_id_slider.value = (0, slider_max if slider_max > 0 else 1)
indicators_pane.objects = [generate_indicators(period)]
update_action_plot()
def update_action_plot(event=None):
period = radio_group.value
data = df_periods[period]
action_plot_pane.object = generate_action_bar_chart(data, account_id_range=account_id_slider.value)
radio_group.param.watch(update_dashboard, 'value')
activity_filter.param.watch(update_dashboard, 'value')
account_id_slider.param.watch(update_action_plot, 'value')
update_dashboard()
layout = pn.Column(
pn.Row(selected_period_text, radio_group),
pn.Row(indicators_pane, align='center', sizing_mode='stretch_width'),
pn.Spacer(height=10),
pn.Row(
pn.Column(pn.Row(pn.pane.Markdown("### Filter by Account Activity", align='center'), activity_filter), plot_pane),
pn.Column(account_id_slider, action_plot_pane),
sizing_mode='stretch_width'))
layout.servable()