import pandas as pd
import numpy as np
import plotly.express as px
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
import dash
from dash import dcc, html
from dash.dependencies import Input, Output
import dash_bootstrap_components as dbc
# Load and clean data
df = pd.read_csv("SaaS-businesses-NYSE-NASDAQ.csv").drop(['Company Website',
'Company Investor Relations Page', 'S-1 Filing', '2023 10-K Filing'], axis=1)
df['Net Income Margin'] = pd.to_numeric(df['Net Income Margin'].str.replace('%', ''))
net_income_positive = df[df['Net Income Margin'] > 0].iloc[:, :28]
features = ['Market Cap', 'Annualized Revenue', 'YoY Growth%', 'Revenue Multiple', 'EBITDA Margin', 'Net Income Margin']
data_selected = net_income_positive[features]
data_selected = data_selected.replace(r'[^\d.-]', '', regex=True)
data_selected = data_selected.apply(pd.to_numeric, errors='coerce')
# Handle missing values
data_selected = data_selected.dropna()
# Normalize/Standardize the data
scaler = StandardScaler()
data_scaled = scaler.fit_transform(data_selected)
# Apply K-Means clustering
kmeans = KMeans(n_clusters=3, random_state=42)
clusters = kmeans.fit_predict(data_scaled)
# Add cluster labels to the dataset
data_selected['Cluster'] = clusters
data_selected['Cluster'] = data_selected.Cluster.astype("str")
cluster_companies = pd.concat([data_selected, net_income_positive['Company']], axis=1)
cluster_dict = {"0": "Mega-Cap High-Growth Leaders", "1": "Emerging Mid-Cap Players", "2": "Established Large-Cap Companies"}
cluster_companies['Cluster'] = cluster_companies.Cluster.map(cluster_dict)
cluster_companies.to_csv("cluster_saas_business.csv")
cluster_data = pd.read_csv("cluster_saas_business.csv").iloc[:, 1:]
# Dash app setup
app = dash.Dash(external_stylesheets=[dbc.themes.BOOTSTRAP])
SIDEBAR_STYLE = {
"position": "fixed",
"top": 0,
"left": 0,
"bottom": 0,
"width": "14rem",
"padding": "2rem 1rem",
"background-color": "#f8f9fa",
}
CONTENT_STYLE = {
"margin-left": "18rem",
"margin-right": "2rem",
"padding": "2rem 1rem",
}
card_style_cluster_0 = {'backgroundColor': '#94bdff'}
card_style_cluster_1 = {'backgroundColor': '#ffb8f3'}
card_style_cluster_2 = {'backgroundColor': '#ccc8cb'}
sidebar = html.Div([
html.H6("SaaS", className="display-6"),
html.Hr(),
html.P("A free database of 170+ SaaS businesses listed on the U.S. Stock exchanges NYSE and NASDAQ", className="lead"),
html.Hr(),
dcc.RadioItems(
id='features-radioitems',
options=[
{'label': 'Market Cap', 'value': 'Market Cap'},
{'label': 'YoY Growth%', 'value': 'YoY Growth%'},
{'label': 'Net Income Margin', 'value': 'Net Income Margin'}
],
value='Net Income Margin',
className='card border-primary mb-3',
style={'text-align': 'left', 'padding': '10px'},
)
], style=SIDEBAR_STYLE)
content = html.Div([
dbc.Row([
dbc.Col(html.H4("SaaS Financial Metrics Explorer: Market Cap, YoY Growth%, Net Income Margin"),
style={'text-align': 'center', 'margin': '10px', 'padding': '10px'}, width=12),
dbc.Row([
dbc.Col(dbc.Card(
[dbc.CardHeader(html.H6("Mega-Cap High-Growth Leaders",
style={'textAlign': 'center', 'padding': '0px', 'margin': '0px'}), style=card_style_cluster_0),
dbc.CardBody(
html.H6("Average Net Income Margin 30%, Average YoY Growth% 18%, Average Market Cap 246 US$ Billion"), style=card_style_cluster_0)
], className="card border-light mb-3")),
dbc.Col(dbc.Card(
[dbc.CardHeader(html.H6("Emerging Mid-Cap Players",
style={'textAlign': 'center', 'padding': '0px', 'margin': '0px'}), style=card_style_cluster_1),
dbc.CardBody(
html.H6("Average Net Income Margin 5%, Average YoY Growth% 13%, Average Market Cap 17.68 US$ Billion"), style=card_style_cluster_1)
], className="card border-light mb-3")),
dbc.Col(dbc.Card(
[dbc.CardHeader(html.H6("Established Large-Cap Companies",
style={'textAlign': 'center', 'padding': '0px', 'margin': '0px'}), style=card_style_cluster_2),
dbc.CardBody(
html.H6("Average Net Income Margin 16%, Average YoY Growth% 9.5%, Average Market Cap 21.50 US$ Billion"), style=card_style_cluster_2)
], className="card border-light mb-3")),
]),
html.Hr(),
dbc.Col(dcc.Graph(id='bar-plot'))
])
], style=CONTENT_STYLE)
app.title = "SaaS Business"
app.layout = html.Div([sidebar, content])
# Define callback to update graph
@app.callback(
Output('bar-plot', 'figure'),
Input('features-radioitems', 'value')
)
def update_graph(feature):
"""
Update the bar plot based on the selected feature.
"""
clusters_orders = {'Cluster': ["Mega-Cap High-Growth Leaders", "Emerging Mid-Cap Players", "Established Large-Cap Companies"]}
clusters_colors = {"Mega-Cap High-Growth Leaders": "#94bdff", "Emerging Mid-Cap Players": "#ffb8f3", "Established Large-Cap Companies": "#ccc8cb"}
data_filtered = cluster_data.sort_values(feature, ascending=False)
fig = px.bar(data_filtered, x='Company', y=feature, height=600, color='Cluster',
template='plotly_white', labels={'Company': ''},
hover_name='Company',
category_orders=clusters_orders, color_discrete_map=clusters_colors)
fig.update_layout(legend=dict(
title=None, orientation="h", y=1.1, yanchor="bottom", x=0.5, xanchor="center", font=dict(size=18)),
)
return fig
if __name__ == "__main__":
app.run_server(debug=True)