Py.Cafe

huong-li-nguyen/

vizro-polars-user-example

US Presidential Election Winners by State (1976-2020)

DocsPricing
  • 1976-2020-president.csv
  • app.py
  • requirements.txt
app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
# Vizro is an open-source toolkit for creating modular data visualization applications.
# check out https://github.com/mckinsey/vizro for more info about Vizro
# and checkout https://vizro.readthedocs.io/en/stable/ for documentation.

import polars as pl
import vizro.models as vm
import vizro.plotly.express as px
from vizro import Vizro
from vizro.actions import filter_interaction
from vizro.tables import dash_data_table

PATH = "1976-2020-president.csv"

df = pl.read_csv(
    PATH,
    infer_schema_length=10000,
    schema_overrides={"writein": pl.Utf8},  # Assuming 'writein' is better handled as a string
    null_values="NA",
    ignore_errors=True,
)

# Get the winner by year and state
winner_by_year_state = df.group_by(["year", "state_po"]).agg(
    [
        pl.col("party_detailed")
        .filter(pl.col("candidatevotes") == pl.col("candidatevotes").max())
        .first()
        .alias("Winning_Party"),
        pl.col("candidatevotes").max().alias("candidatevotes"),
        pl.col("totalvotes").first(),
    ]
)

# Format party column to be title case
winner_by_year_state = winner_by_year_state.with_columns(pl.col("Winning_Party"))

# Calculate the vote percentage for each winner
winner_by_year_state = winner_by_year_state.with_columns(
    (pl.col("candidatevotes") / pl.col("totalvotes")).round(4).alias("Votes")
)

# Select relevant columns
winner_by_year_state = winner_by_year_state.select(["year", "state_po", "Winning_Party", "Votes"])

# Filter data for a specific year (e.g., 2020)
# selected_year = 2020
# filtered_df = winner_by_year_state.filter(pl.col('year') == selected_year)

# Define color mapping for the parties
party_colors = {"DEMOCRAT": "blue", "REPUBLICAN": "red", "DEMOCRATIC-FARMER-LABOR": "orange"}  # Add more parties if necessary

# Group by state and aggregate necessary columns
result = (
    df.group_by(["state", "state_po"])
    .agg(
        [
            pl.col("totalvotes").sum().alias("Total Votes"),
            pl.col("candidatevotes").sum().alias("Total Candidate Votes"),
            (pl.col("candidatevotes").sum() / pl.col("totalvotes").sum() * 100).alias("Winning Vote Percentage"),
            pl.col("candidate").first().alias("Winning Candidate"),
            pl.col("party_simplified").first().alias("Winning_Party"),
        ]
    )
    .sort("state_po")
)

# Convert to pandas for Plotly compatibility
winner_by_year_state = winner_by_year_state.to_pandas()
result = result.to_pandas()

page2 = vm.Page(
    title="Page 2",
    path="my-custom-url",
    components=[
        vm.Graph(
            id="location",
            figure=px.choropleth(
                winner_by_year_state,
                locations="state_po",
                locationmode="USA-states",
                color="Winning_Party",
                hover_name="Winning_Party",
                hover_data=["Winning_Party", "Votes"],
                scope="usa",
                labels={"Votes": "Vote Percentage"},
                color_discrete_map=party_colors,
                custom_data=["state_po"],
            ),
            actions=[vm.Action(function=filter_interaction(targets=["result_table"]))],
        ),
        vm.Table(id="result_table", title="Election Result", figure=dash_data_table(data_frame=result)),
    ],
    controls=[
        vm.Filter(column="year", targets=["location"]),
    ],
)

dashboard = vm.Dashboard(pages=[page2])

Vizro().build(dashboard).run()