PyCafe - Panel - Complex Tennis Player Statistics

app.py
novakkk.jpg
player_stats.csv
player_stats.xlsx
requirements.txt
wimbledon_grass_court.jpg
app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
#!/usr/bin/env python
# coding: utf-8

# In[1]:


import pandas as pd

# The modified link to the Google Sheets CSV export

# Load the data into a DataFrame directly from the CSV export link
player_stats = pd.read_csv('player_stats.csv')



# In[2]:


import panel as pn
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import matplotlib.image as mpimg
import os
from PIL import Image  # Add this import statement
import numpy as np
import requests
from io import BytesIO
import matplotlib.colors as mcolors


# Initialize Panel extension
pn.extension()

# Title and Introduction
header = pn.pane.Markdown("## Player Statistics Dashboard presented by Minh Trinh, collaborated with Professor Eren Bilen")
intro = pn.pane.Markdown("""
Welcome to the Complex Tennis Player Statistics Interactive Dashboard!
This interactive dashboard illustrates a comprehensive overview of individual player statistics in Grand Slam events from 2019 to 2023. You can compare the selected player’s performance against the averages of other
elite and non-elite players, as well as visualize their serve distribution.""")
comparison_table_title = pn.pane.Markdown("""
### Comparison Table: Player vs Elite vs Non-Elite Statistics 
##### Elite players are the players with the best performances in Grand Slam from 2019-2023. The 'elite players' list includes: Novak Djokovic, Alexander Zverev, Andrey Rublev, Carlos Alcaraz, Casper Ruud, Daniil Medvedev, Dominic Thiem, Felix Auger Aliassime, Matteo Berrettini, Rafael Nadal, Roger Federer, and Stefanos Tsitsipas
""")
# Sidebar Dropdown for Player Selection
selected_player_widget = pn.widgets.Select(
    name="Select Player", 
    options=list(player_stats['player_name'].unique()),  # Convert to list
    value=player_stats['player_name'].iloc[0],
    width=200,
)
image_path = "novakkk.jpg"  # Replace with the correct path if it's not in the working directory

if os.path.exists(image_path):
    sidebar_image = pn.layout.Column(
        pn.pane.JPG(image_path, width=300, height=200),
        align="center"
    )
else:
    print(f"Image '{image_path}' not found. Using a placeholder.")
    sidebar_image = pn.pane.Markdown("### [Image Not Found]")

# Calculate Average Stats for Elite and Non-Elite Players
elite_avg_stats = player_stats[player_stats["elite_numeric"] == 1].mean(numeric_only=True)
non_elite_avg_stats = player_stats[player_stats["elite_numeric"] == 0].mean(numeric_only=True)

# Function to create comparison table
def create_comparison_table(selected_player):
    player_data = player_stats[player_stats["player_name"] == selected_player].iloc[0]
    
    comparison_table = pd.DataFrame({
        "Metric": [
            "Grand Slam Matches Win%","Avg 1st Serve Speed (KM/H)", "Avg 2nd Serve Speed (KM/H)", "Matches Played", 
            "Aces per Match", "Double Faults per Match", "Deep Return %",
             "1st Serve In per Match",
            "1st Serve Won per Match", "2nd Serve Won per Match", "Break Points Saved per Match",
            "Break Points Faced per Match", "Break Points Saved Percentage", "Deficit per Match",
            "Forehand Winners per Match", "Backhand Winners per Match", "0-4 Rallies Win %", "5-8 Rallies Win %"
            , "9+ Rallies Win %"
        ],
        f"{selected_player}'s Stats": [
            player_data["Match_Win_Percentage"],player_data["Avg_1st_ServeSpeed"], player_data["Avg_2nd_ServeSpeed"], player_data["match_played"],
            player_data["avg_ace"], player_data["avg_df"], player_data["Deep_Return%"],
            player_data["1stIn_per_match"], 
            player_data["1stWon_per_match"], player_data["2ndWon_per_match"], player_data["bpSaved_per_match"],
            player_data["bpFaced_per_match"], player_data["bpSaved%"], player_data["avg_deficit"],
            player_data["avg_Forehand"], player_data["avg_Backhand"],player_data["0-4_%"],
            player_data["5-8_%"],player_data["9+_%"]
        ],
        "Elite Players' Average": [
            elite_avg_stats["Match_Win_Percentage"],elite_avg_stats["Avg_1st_ServeSpeed"], elite_avg_stats["Avg_2nd_ServeSpeed"], elite_avg_stats["match_played"],
            elite_avg_stats["avg_ace"], elite_avg_stats["avg_df"], elite_avg_stats["Deep_Return%"],
            elite_avg_stats["1stIn_per_match"],
            elite_avg_stats["1stWon_per_match"], elite_avg_stats["2ndWon_per_match"], elite_avg_stats["bpSaved_per_match"],
            elite_avg_stats["bpFaced_per_match"], elite_avg_stats["bpSaved%"], elite_avg_stats["avg_deficit"],
            elite_avg_stats["avg_Forehand"], elite_avg_stats["avg_Backhand"],elite_avg_stats["0-4_%"],
            elite_avg_stats["5-8_%"],elite_avg_stats["9+_%"]
        ],
        "Non-Elite Players' Average": [
            non_elite_avg_stats["Match_Win_Percentage"],non_elite_avg_stats["Avg_1st_ServeSpeed"], non_elite_avg_stats["Avg_2nd_ServeSpeed"], non_elite_avg_stats["match_played"],
            non_elite_avg_stats["avg_ace"], non_elite_avg_stats["avg_df"], non_elite_avg_stats["Deep_Return%"],
            non_elite_avg_stats["1stIn_per_match"],
            non_elite_avg_stats["1stWon_per_match"], non_elite_avg_stats["2ndWon_per_match"], non_elite_avg_stats["bpSaved_per_match"],
            non_elite_avg_stats["bpFaced_per_match"], non_elite_avg_stats["bpSaved%"], non_elite_avg_stats["avg_deficit"],
           non_elite_avg_stats["avg_Forehand"], non_elite_avg_stats["avg_Backhand"], non_elite_avg_stats["0-4_%"],
            non_elite_avg_stats["5-8_%"],non_elite_avg_stats["9+_%"]
        ]
    })
    
    # Ensure that values are numeric and format with 2 decimals without trailing zeros
    for col in comparison_table.columns[1:]:
        comparison_table[col] = comparison_table[col].apply(lambda x: f"{x:.2f}".rstrip('0').rstrip('.'))
    
    return comparison_table

def highlight_values(row):
    """
    Apply color formatting to highlight the highest and lowest values in a row.
    Green for the largest value, red for the smallest value.

    Args:
        row (pd.Series): A row of numeric values.

    Returns:
        list: A list of CSS styles for each value in the row.
    """
    # Convert row to numeric values, ignoring non-numeric values
    numeric_values = pd.to_numeric(row, errors='coerce')

    # Identify max and min values (ignoring NaNs)
    max_val = numeric_values.max()
    min_val = numeric_values.min()

    # Apply colors based on max and min values
    colors = [
        "color: green;" if value == max_val else
        "color: red;" if value == min_val else
        ""
        for value in numeric_values
    ]
    return colors
# Function to update the dashboard dynamically
def update_dashboard(event):
    selected_player = selected_player_widget.value
    comparison_table = create_comparison_table(selected_player)
    
    # Apply styling to the table
    styled_table = (
        comparison_table.set_index("Metric")
        .style.apply(highlight_values, axis=1, subset=[f"{selected_player}'s Stats", "Elite Players' Average", "Non-Elite Players' Average"])
    )
    
    comparison_table_pane.object = styled_table

# Create initial comparison table for the first player
comparison_table_pane = pn.pane.DataFrame(create_comparison_table(selected_player_widget.value), width=600, height=200)

# Watch the player selection to update the table
selected_player_widget.param.watch(update_dashboard, 'value')


# Watch the player selection to update the table
selected_player_widget.param.watch(update_dashboard, 'value')

note = pn.pane.Markdown("""
**Note:**
- The comparison table shows the player's statistics against the averages of elite and non-elite players.
- **Elite players** are those ranked in the top 10, while **Non-Elite players** are those ranked outside of the top 10.
- This dashboard helps to visualize the player's performance in comparison to their peers in Grand Slam events.
""", width=400, height=200)

# Arrange the comparison table and the note side by side
layout = pn.layout.Column(
    pn.Row(comparison_table_pane, note),
    selected_player_widget
)


####################################################################################################################

# Define court regions
regions_ctl = {
    'W': {'x_min': 14, 'x_max': 16.25, 'y_min': 13, 'y_max': 18, 'color': '#d7d7d7'},
    'BW': {'x_min': 16.25, 'x_max': 18.5, 'y_min': 13, 'y_max': 18, 'color': '#1f1f1f'},
    'B': {'x_min': 18.5, 'x_max': 20.75, 'y_min': 13, 'y_max': 18, 'color': '#141414'},
    'BC': {'x_min': 20.75, 'x_max': 23, 'y_min': 13, 'y_max': 18, 'color': '#2d2d2d'},
    'C': {'x_min': 23, 'x_max': 25, 'y_min': 13, 'y_max': 18, 'color': '#484848'}
}
regions_nctl = {
    'W': {'x_min': 14, 'x_max': 16.25, 'y_min': 18, 'y_max': 23, 'color': '#4d4d4d'},
    'BW': {'x_min': 16.25, 'x_max': 18.5, 'y_min': 18, 'y_max': 23, 'color': '#a7a7a7'},
    'B': {'x_min': 18.5, 'x_max': 20.75, 'y_min': 18, 'y_max': 23, 'color': '#636363'},
    'BC': {'x_min': 20.75, 'x_max': 23, 'y_min': 18, 'y_max': 23, 'color': '#888888'},
    'C': {'x_min': 23, 'x_max': 25, 'y_min': 18, 'y_max': 23, 'color': '#ffffff'}
}

# Define label positions
label_positions = {
    'NCTL_C': {'x': 45, 'y': 35},
    'NCTL_BC': {'x': 40, 'y': 45},
    'NCTL_B': {'x': 25, 'y': 45},
    'NCTL_BW': {'x': 15, 'y': 35},
    'NCTL_W': {'x': 5, 'y': 45},
    'CTL_C': {'x': 45, 'y': 15},
    'CTL_BC': {'x': 42, 'y': 5},
    'CTL_B': {'x': 22, 'y': 7},
    'CTL_BW': {'x': 7, 'y': 5},
    'CTL_W': {'x': 5, 'y': 15}
}

# Function to prepare serve data for a player
def prepare_player_serve_data(player_name):
    player_data = player_stats[player_stats['player_name'] == player_name]
    serve_data = []
    for col in player_data.columns:
        if col.startswith("CTL") or col.startswith("NCTL"):
            serve_depth, serve_width = col.split("_")
            count = player_data[col].values[0]
            serve_data.append({
                "ServeDepth": serve_depth,
                "ServeWidth": serve_width,
                "count": count
            })
    serve_df = pd.DataFrame(serve_data)
    total_shots = serve_df['count'].sum()
    serve_df['percentage'] = (serve_df['count'] / total_shots) * 100
    return serve_df

# Function to add labels
def add_labels_for_regions(data_counts, label_positions, region_type, regions):
    for _, row in data_counts.iterrows():
        serve_width = row['ServeWidth']
        count = row['count']
        percentage = row['percentage']

        # Construct the key dynamically
        label_key = f"{region_type}_{serve_width}"

        # Check if label_key exists in label_positions
        if label_key in label_positions:
            x = label_positions[label_key]['x']
            y = label_positions[label_key]['y']
        else:
            raise ValueError(f"Label position for '{label_key}' not defined in label_positions.")

        # Label content
        label = f"{region_type} {serve_width}\n{count} shots\n({percentage:.2f}%)"
        
        # Draw line to region center
        region_center_x = (regions[serve_width]['x_min'] + regions[serve_width]['x_max']) / 2
        region_center_y = (regions[serve_width]['y_min'] + regions[serve_width]['y_max']) / 2
        plt.plot([x, region_center_x], [y, region_center_y], color='white', lw=1)

        # Add label background box
        label_box = patches.FancyBboxPatch(
            (x - 6, y - 3.5), 12, 7, boxstyle="round,pad=0.3",
            edgecolor='none', facecolor=regions[serve_width]['color'], alpha=0.6
        )
        plt.gca().add_patch(label_box)
        
        # Add text
        plt.text(x, y, label, fontsize=9, ha='center', va='center', color='white')

# Function to dynamically assign colors based on serve percentages
def assign_dynamic_colors(regions, data_counts):
    """
    Assign colors to regions dynamically based on serve percentages, using a seismic color scale.

    Args:
        regions (dict): Dictionary of regions (CTL or NCTL).
        data_counts (pd.DataFrame): Serve data with percentage for each region.

    Returns:
        dict: Updated regions with dynamically assigned colors.
    """
    # Normalize percentages between -1 and 1 for seismic color mapping
    percentages = data_counts['percentage'].values
    normalized = 2 * ((percentages - percentages.min()) / (percentages.max() - percentages.min())) - 1 if len(percentages) > 1 else [0] * len(percentages)

    # Map normalized values to the seismic color scale
    for region, norm_val in zip(data_counts['ServeWidth'], normalized):
        if norm_val < 0:
            # Blue to white (low values): interpolate between Blue (#0000FF) and White (#FFFFFF)
            r = int(255 * (1 + norm_val))  # Increase red
            g = int(255 * (1 + norm_val))  # Increase green
            b = 255  # Blue remains max
        else:
            # White to red (high values): interpolate between White (#FFFFFF) and Red (#FF0000)
            r = 255  # Red remains max
            g = int(255 * (1 - norm_val))  # Decrease green
            b = int(255 * (1 - norm_val))  # Decrease blue

        color = f'#{r:02x}{g:02x}{b:02x}'  # Seismic hex color
        regions[region]['color'] = color

    return regions

def load_court_image():
    court_img_path = 'wimbledon_grass_court.jpg'
    if os.path.exists(court_img_path):
        img = Image.open(court_img_path)
        return np.array(img)
    else:
        print("Court image not found. Using a placeholder.")
        return np.ones((500, 500, 3))  # Placeholder
    

def plot_combined_serve_distribution(player_name):
    """
    Plot the combined serve distribution for the selected player.
    Args:
        player_name (str): Name of the selected player.
    Returns:
        matplotlib.figure.Figure: The plot figure for Panel rendering.
    """
    court_image = load_court_image()
    fig, ax = plt.subplots()
    # Prepare serve data
    serve_df = prepare_player_serve_data(player_name)

    # Separate CTL and NCTL data
    ctl_data = serve_df[serve_df['ServeDepth'] == 'CTL']
    nctl_data = serve_df[serve_df['ServeDepth'] == 'NCTL']

    # Assign dynamic colors based on percentages
    assign_dynamic_colors(regions_ctl, ctl_data)
    assign_dynamic_colors(regions_nctl, nctl_data)

    # Create the plot
    fig, ax = plt.subplots(figsize=(12, 7))  # Expanded figure size
    court_img = load_court_image()
    ax.imshow(court_img, extent=[0, 50, 0, 50])
    ax.set_xlim(0, 50)
    ax.set_ylim(0, 50)

    # Plot CTL regions
    for region, data in regions_ctl.items():
        ax.add_patch(patches.Rectangle(
            (data['x_min'], data['y_min']),
            data['x_max'] - data['x_min'], 
            data['y_max'] - data['y_min'], 
            linewidth=1, edgecolor='black', facecolor=data['color'], alpha=0.5
        ))

    # Plot NCTL regions
    for region, data in regions_nctl.items():
        ax.add_patch(patches.Rectangle(
            (data['x_min'], data['y_min']),
            data['x_max'] - data['x_min'], 
            data['y_max'] - data['y_min'], 
            linewidth=1, edgecolor='black', facecolor=data['color'], alpha=0.5
        ))

    # Add labels for CTL and NCTL regions
    add_labels_for_regions(ctl_data, label_positions, 'CTL', regions_ctl)
    add_labels_for_regions(nctl_data, label_positions, 'NCTL', regions_nctl)

    # Titles and axis labels
    notes = (
        "Notes:\n"
        "1. ServeWidth: Categorizes serve placement horizontally:\n"
        "   - W: Wide\n"
        "   - BW: Body Wide\n"
        "   - B: Body\n"
        "   - BC: Body Center\n"
        "   - C: Center\n\n"
        "2. ServeDepth: Categorizes serve placement vertically:\n"
        "   - CTL: Close to the Service Line\n"
        "   - NCTL: Not Close to the Service Line\n\n"
        "3. Dynamic Colors: Represent the proportion of serves in each region, with darker red shades indicating higher percentages, and darker blue shades indicating lower percentages."
    )

    # Add the notes to the right of the graph
    ax_notes = fig.add_axes([0.65, 0.1, 0.2, 0.8])  # Add new axes for the notes
    ax_notes.axis('off')  # Turn off axes for the notes
    ax_notes.text(
        0, 1, notes, fontsize=10, verticalalignment='top',
        bbox=dict(boxstyle="round", facecolor="white", alpha=0.8), wrap=True
    )
    # Add a color legend below the notes
    ax_colorbar = fig.add_axes([0.65, 0.475, 0.25, 0.02])  # Position for the colorbar
    cmap = mcolors.LinearSegmentedColormap.from_list("", ["darkblue", "red"])
    norm = mcolors.Normalize(vmin=0, vmax=100)
    cb = plt.colorbar(
        plt.cm.ScalarMappable(norm=norm, cmap=cmap),
        cax=ax_colorbar, orientation='horizontal'
    )
    cb.set_label("Percentage of Serves (%)", fontsize=10)
    cb.ax.tick_params(labelsize=8)
    
    ax.set_title(f'Serve Distribution by {player_name}', fontsize=18, weight='bold')
    ax.set_xlabel('Serve Width', fontsize=12)
    ax.set_ylabel('Serve Depth', fontsize=12, rotation=90)

    # Adjust layout to avoid clipping
    plt.tight_layout(rect=[0, 0, 0.7, 1])  # Leave space for the notes on the right

    return fig

def plot_player_line_graph(player_name):
    # Get the percentages for the player
    player_data = player_stats[player_stats['player_name'] == player_name][['1_%', '2_%', '3_%', '4_%', 
                                                                            '5_%', '6_%', '7_%', '8_%', '9_%', '10_%', 
                                                                            '11_%', '12_%', '13_%', '14_%', '15+_%']].values.flatten()
    
    # Get the average percentages for elite players
    elite_data = player_stats[player_stats['elite_numeric'] == 1][['1_%', '2_%', '3_%', '4_%', 
                                                                   '5_%', '6_%', '7_%', '8_%', '9_%', '10_%', 
                                                                   '11_%', '12_%', '13_%', '14_%', '15+_%']].mean().values

    # Plot the line graph
    fig, ax = plt.subplots(figsize=(8, 6))
    ax.plot(range(1, 16), player_data, marker='o', color='b', label=player_name)
    ax.plot(range(1, 16), elite_data, marker='v', color='r', label='Elite Players (Average)')

    ax.set_title(f'Rally Category Percentages for {player_name}')
    ax.set_xlabel('Rally Length (Points)')
    ax.set_ylabel('Percentage (%)')
    ax.set_xticks(range(1, 16))
    ax.set_xticklabels([f'{i}' for i in range(1, 16)])
    ax.legend()
    
    return fig

# Updated: Panel integration for serve distribution
serve_distribution_pane = pn.pane.Matplotlib(
    plot_combined_serve_distribution(selected_player_widget.value), 
    width=800, height=600
)
# Create a new Matplotlib pane for the line graph
line_graph_pane = pn.pane.Matplotlib(
    plot_player_line_graph(selected_player_widget.value), 
    width=800, height=600
)

# Updated: Watch player selection for serve graph and line graph
def update_graphs(event):
    # Update the serve distribution graph
    serve_distribution_pane.object = plot_combined_serve_distribution(selected_player_widget.value)
    
    # Update the line graph
    line_graph_pane.object = plot_player_line_graph(selected_player_widget.value)

# Watch for player selection changes
selected_player_widget.param.watch(update_graphs, 'value')

# Updated: Create comparison table dynamically
comparison_table_pane = pn.pane.DataFrame(
    create_comparison_table(selected_player_widget.value), 
    width=600, height=400
)

# Updated: Dynamic dashboard layout with line graph
template = pn.template.FastListTemplate(
    title='Complex Professional Tennis Player Statistics in Grand Slams from 2019-2023', 
    sidebar=[
        pn.pane.Markdown("# Complex Player Statistics Interactive Dashboard"), 
        pn.pane.Markdown("### This dashboard is presented by Minh Trinh in collaboration with professor Eren Bilen. \n Contact: quangminh711@gmail.com"),
        intro,
        sidebar_image,
        selected_player_widget
    ],
    main=[
        pn.Row(
            pn.Column(width=150),
            pn.Column(comparison_table_title, comparison_table_pane, margin=(0, 25), width=300)
        ),
        pn.Row(
            serve_distribution_pane
        ),
        pn.Row(
            line_graph_pane  # Add the line graph below the serve distribution
        )
    ],
    accent_base_color="#90EE90",  
    header_background="#90EE90", 
    
    )


template.servable()
template


# In[3]:


#get_ipython().system('jupyter nbconvert --to script test.ipynb')


# In[ ]:
Py.Cafe

panel-tennis-stats

Complex Tennis Player Statistics