Py.Cafe

lingyielia/

vizro-data-quality-issues

Data Quality Issues Overview

DocsPricing
  • assets/
  • app.py
  • finance_customer_quality_report.csv
  • requirements.txt
app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
# Vizro is an open-source toolkit for creating modular data visualization applications.
# check out https://github.com/mckinsey/vizro for more info about Vizro
# and checkout https://vizro.readthedocs.io/en/stable/ for documentation.

############ Imports ##############
import vizro.plotly.express as px
import vizro.models as vm
from vizro import Vizro
import pandas as pd
from vizro.managers import data_manager
import dash_mantine_components as dmc
from dash import html
from vizro.models.types import capture
from typing import Optional

####### Data Manager Settings #####
data_manager["DataQuality"] = pd.read_csv(
    "finance_customer_quality_report.csv"
)

########### Custom Figure Functions ############
@capture("figure")
def data_quality_cards(data_frame: pd.DataFrame, max_cards: Optional[int] = None) -> html.Div:
    """Creates collapsible cards for data quality issues using dash-mantine-components."""
    
    # Define colors for different issue types
    issue_colors = {
        "missing_values": "#00b4ff",
        "outliers": "#ff9222", 
        "invalid_values": "#ff5267"
    }
    
    # Limit the number of cards if specified
    df_subset = data_frame.head(max_cards) if max_cards else data_frame
    
    cards = []
    for _, row in df_subset.iterrows():
        issue_type = row["Issue Type"]
        color = issue_colors.get(issue_type, "gray")
        
        # Create the card content
        card = dmc.Card(
            children=[
                dmc.Group([
                    dmc.Text(row["Column Name"], size="lg", fw=500),
                    dmc.Badge(
                        issue_type.replace("_", " ").title(),
                        color=color,
                        variant="filled"
                    )
                ], justify="space-between"),
                
                dmc.Space(h="md"),
                
                dmc.Accordion(
                    children=[
                        dmc.AccordionItem(
                            children=[
                                dmc.AccordionControl(row["Description"]),
                                dmc.AccordionPanel([
                                    dmc.Space(h="sm"),
                                    dmc.Text("Recommendation:", fw=500, size="sm"),
                                    dmc.Text(row["Recommendation"], size="sm", c="dimmed"),
                                    dmc.Space(h="sm"),
                                    dmc.Text("Implementation:", fw=500, size="sm"),
                                    dmc.Text(row["Implementation"], size="sm", c="dimmed"),
                                    dmc.Space(h="sm"),
                                    dmc.Text("Assessment:", fw=500, size="sm"),
                                    dmc.Text(row["Assessment"], size="sm", c="dimmed"),
                                ])
                            ],
                            value=f"item-{_}"
                        )
                    ]
                )
            ],
            withBorder=True,
            shadow="sm",
            style={"marginBottom": "16px"}
        )
        cards.append(card)
    
    return html.Div(cards)

########### Model code ############
model = vm.Dashboard(
    pages=[
        vm.Page(
            components=[
                vm.Figure(
                    id="data_quality_cards_figure",
                    figure=data_quality_cards(data_frame="DataQuality"),
                ),
            ],
            title="Data Quality Report",
            controls=[
                vm.Filter(
                    column="Issue Type",
                    selector=vm.Checklist(),
                ),
                vm.Filter(
                    column="Column Name",
                ),
            ],
        ),
    ],
    title="Data Quality Report",
)

Vizro().build(model).run()