import pandas as pd
import plotly.express as px
from dash import Dash, dcc, html
import dash_ag_grid as dag
from geopy.geocoders import Nominatim
from geopy.extra.rate_limiter import RateLimiter
# Load and clean dataset
df = pd.read_csv('Post45_NEAData_Final.csv')
# Сlean dataset
df = df.fillna({
    'birth_year': df['birth_year'].median(),
    'state': 'Unknown',
    'hometown': 'Unknown'
})
# Analize datase
# Create a new feature: 'age of writer'
df['age of writer'] = df['nea_grant_year'] - df['birth_year']
# Categorize writers based on age groups
df['age_group'] = pd.cut(
    df['age of writer'],
    bins=[0, 30, 50, 70, 100],
    labels=['<30', '30-50', '50-70', '70+']
)
# Count of grants by state
grants_by_state = df['us_state'].value_counts().reset_index()
grants_by_state.columns = ['us_state', 'grant_count']
# Count of writers by hometown
writers_by_hometown = df['hometown'].value_counts().reset_index()
writers_by_hometown.columns = ['hometown', 'writer_count']
# Geocoding hometowns for map visualization
geolocator = Nominatim(user_agent='nea_analysis')
geocode = RateLimiter(geolocator.geocode, min_delay_seconds=1)
# Adding latitude and longitude for hometowns
writers_by_hometown['location'] = writers_by_hometown['hometown'].apply(geocode)
writers_by_hometown['latitude'] = writers_by_hometown['location'].apply(lambda loc: loc.latitude if loc else None)
writers_by_hometown['longitude'] = writers_by_hometown['location'].apply(lambda loc: loc.longitude if loc else None)
# Filter out rows without location data for visualization
writers_by_hometown_map = writers_by_hometown.dropna(subset=['latitude', 'longitude'])
# Counting universities
df['university_count'] = df[['ba', 'ba2', 'ma', 'ma2', 'phd', 'mfa', 'mfa2', 'post45_hathi_entry']].notna().sum(axis=1)
# Count of grants per year
grants_per_year = df.groupby('nea_grant_year').size().reset_index(name='grant_count')
# Create a Dash app
# Visualization: University count distribution
fig_university_count = px.histogram(
    df,
    x='university_count',
    title='Distribution of University Count per Writer',
    labels={'university_count': 'Number of Universities'},
    nbins=10,
    template='plotly_dark'
)
fig_university_count.update_layout(
    plot_bgcolor='#1e1e1e',
    paper_bgcolor='#1e1e1e',
    font_color='white'
)
# Visualization: University count vs. grant count
fig_university_vs_grant = px.bar(
    df.groupby('university_count').size().reset_index(name='grant_count'),
    x='university_count',
    y='grant_count',
    title='University Count vs. Grant Count',
    labels={
        'university_count': 'Number of Universities',
        'grant_count': 'Number of Grants'
    },
    template='plotly_dark'
)
fig_university_vs_grant.update_layout(
    plot_bgcolor='#1e1e1e',
    paper_bgcolor='#1e1e1e',
    font_color='white'
)
# Visualization: Histogram
fig_histogram = px.histogram(
    df,
    x='age of writer',
    color='age_group',
    title='Distribution of Writers by Age at Time of Grant',
    labels={'age of writer': 'Age of Writer'},
    nbins=20,
    template='plotly_dark'
)
fig_histogram.update_layout(
    plot_bgcolor='#1e1e1e',
    paper_bgcolor='#1e1e1e',
    font_color='white'
)
# visualization: Age group distribution
fig_pie = px.pie(
    df,
    names='age_group',
    title='Age Group Distribution of Writers',
    template='plotly_dark'
)
fig_pie.update_layout(
    plot_bgcolor='#1e1e1e',
    paper_bgcolor='#1e1e1e',
    font_color='white'
)
# Visualization: Grants by State (Map)
fig_map = px.choropleth(
    grants_by_state,
    locations='us_state',
    locationmode='USA-states',
    color='grant_count',
    color_continuous_scale='Viridis',
    scope='usa',
    title='Grants by State'
)
fig_map.update_layout(
    plot_bgcolor='#1e1e1e',
    paper_bgcolor='#1e1e1e',
    font_color='white'
)
# Visualization: Writers by Hometown (Bar Chart)
fig_hometown = px.bar(
    writers_by_hometown.iloc[1:11],
    x='hometown',
    y='writer_count',
    title='Top 10 Hometowns of Writers',
    labels={
        'hometown': 'Hometown',
        'writer_count': 'Writer Count'
    },
    template='plotly_dark'
)
fig_hometown.update_layout(
    plot_bgcolor='#1e1e1e',
    paper_bgcolor='#1e1e1e',
    font_color='white'
)
# Visualization: Writers by Hometown (Map)
fig_hometown_map = px.scatter_geo(
    writers_by_hometown_map,
    lat='latitude',
    lon='longitude',
    size='writer_count',
    hover_name='hometown',
    title='Writers by Hometown (Map)',
    template='plotly_dark'
)
fig_hometown_map.update_layout(
    geo=dict(
        showland=True,
        landcolor='#1e1e1e',
        showocean=True,
        oceancolor='#000033',
        projection_type='natural earth'
    ),
    plot_bgcolor='#1e1e1e',
    paper_bgcolor='#1e1e1e',
    font_color='white'
)
# Visualization: Grants per year
fig_grants_per_year = px.line(
    grants_per_year,
    x='nea_grant_year',
    y='grant_count',
    title='Grants per Year',
    labels={
        'nea_grant_year': 'Year',
        'grant_count': 'Number of Grants'
    },
    template='plotly_dark'
)
fig_grants_per_year.update_layout(
    plot_bgcolor='#1e1e1e',
    paper_bgcolor='#1e1e1e',
    font_color='white'
)
# Create a Dash app
app = Dash()
grid = dag.AgGrid(
    rowData=df.to_dict("records"),
    columnDefs=[{"field": i} for i in df.columns],
    dashGridOptions={"pagination": True}
)
app.layout = html.Div([
    grid,
    dcc.Graph(figure=fig_histogram),
    dcc.Graph(figure=fig_pie),
    dcc.Graph(figure=fig_university_count),
    dcc.Graph(figure=fig_university_vs_grant),
    dcc.Graph(figure=fig_map),
    dcc.Graph(figure=fig_hometown),
    dcc.Graph(figure=fig_hometown_map),
    dcc.Graph(figure=fig_grants_per_year)
])
if __name__ == "__main__":
    app.run_server(debug=True)