Py.Cafe

owinoagola00/

iris-dataset-exploration

๐ŸŒธ Interactive Iris Dataset Exploration

DocsPricing
  • app.py
  • requirements.txt
app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
# app.py
import streamlit as st
import pandas as pd
import plotly.express as px
from sklearn.datasets import load_iris

# Page configuration
st.set_page_config(
    page_title="Iris Interactive Dashboard",
    layout="wide"
)

st.title("๐ŸŒธ Interactive Iris Dataset Dashboard")
st.write(
    "This dashboard allows users to explore the Iris dataset "
    "using interactive filters and dynamic visualizations."
)

# Load and prepare data
@st.cache_data
def load_data():
    iris = load_iris(as_frame=True)
    df = iris.frame
    df["species"] = df["target"].map(
        dict(enumerate(iris.target_names))
    )
    return df, iris.feature_names

df, feature_names = load_data()

# Sidebar controls
st.sidebar.header("Filter Options")

species_selected = st.sidebar.multiselect(
    "Select species:",
    options=df["species"].unique(),
    default=df["species"].unique()
)

x_axis = st.sidebar.selectbox(
    "Select X-axis:",
    options=feature_names,
    index=0
)

y_axis = st.sidebar.selectbox(
    "Select Y-axis:",
    options=feature_names,
    index=1
)

# Filter data
filtered_df = df[df["species"].isin(species_selected)]

# Key metrics
st.subheader("Key Statistics")

col1, col2, col3 = st.columns(3)

col1.metric("Total Observations", len(filtered_df))
col2.metric("Species Count", filtered_df["species"].nunique())
col3.metric(
    "Average Sepal Length (cm)",
    round(filtered_df["sepal length (cm)"].mean(), 2)
)

# Scatter plot
st.subheader("Feature Relationship")

scatter_fig = px.scatter(
    filtered_df,
    x=x_axis,
    y=y_axis,
    color="species",
    title=f"{y_axis} vs {x_axis}",
    hover_data=feature_names
)

st.plotly_chart(scatter_fig, use_container_width=True)

# Distribution plot
st.subheader("Feature Distribution")

feature_selected = st.selectbox(
    "Select feature to view distribution:",
    feature_names
)

hist_fig = px.histogram(
    filtered_df,
    x=feature_selected,
    color="species",
    barmode="overlay",
    opacity=0.7,
    title=f"Distribution of {feature_selected}"
)

st.plotly_chart(hist_fig, use_container_width=True)

# Data preview
with st.expander("View Filtered Data"):
    st.dataframe(filtered_df)
app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
# app.py
import streamlit as st
import pandas as pd
import plotly.express as px
from sklearn.datasets import load_iris

# Page configuration
st.set_page_config(
    page_title="Iris Interactive Dashboard",
    layout="wide"
)

st.title("๐ŸŒธ Interactive Iris Dataset Dashboard")
st.write(
    "This dashboard allows users to explore the Iris dataset "
    "using interactive filters and dynamic visualizations."
)

# Load and prepare data
@st.cache_data
def load_data():
    iris = load_iris(as_frame=True)
    df = iris.frame
    df["species"] = df["target"].map(
        dict(enumerate(iris.target_names))
    )
    return df, iris.feature_names

df, feature_names = load_data()

# Sidebar controls
st.sidebar.header("Filter Options")

species_selected = st.sidebar.multiselect(
    "Select species:",
    options=df["species"].unique(),
    default=df["species"].unique()
)

x_axis = st.sidebar.selectbox(
    "Select X-axis:",
    options=feature_names,
    index=0
)

y_axis = st.sidebar.selectbox(
    "Select Y-axis:",
    options=feature_names,
    index=1
)

# Filter data
filtered_df = df[df["species"].isin(species_selected)]

# Key metrics
st.subheader("Key Statistics")

col1, col2, col3 = st.columns(3)

col1.metric("Total Observations", len(filtered_df))
col2.metric("Species Count", filtered_df["species"].nunique())
col3.metric(
    "Average Sepal Length (cm)",
    round(filtered_df["sepal length (cm)"].mean(), 2)
)

# Scatter plot
st.subheader("Feature Relationship")

scatter_fig = px.scatter(
    filtered_df,
    x=x_axis,
    y=y_axis,
    color="species",
    title=f"{y_axis} vs {x_axis}",
    hover_data=feature_names
)

st.plotly_chart(scatter_fig, use_container_width=True)

# Distribution plot
st.subheader("Feature Distribution")

feature_selected = st.selectbox(
    "Select feature to view distribution:",
    feature_names
)

hist_fig = px.histogram(
    filtered_df,
    x=feature_selected,
    color="species",
    barmode="overlay",
    opacity=0.7,
    title=f"Distribution of {feature_selected}"
)

st.plotly_chart(hist_fig, use_container_width=True)

# Data preview
with st.expander("View Filtered Data"):
    st.dataframe(filtered_df)