import asyncio from panel.io.pyodide import init_doc, write_doc init_doc() import folium from matplotlib.figure import Figure import panel as pn import numpy as np import pandas as pd from datetime import timedelta # Need to configure getting this in py-config tag url = r'dallasdata.csv.zip' data = pd.read_csv(url) data['begin'] = pd.to_datetime(data['begin']) data['end'] = pd.to_datetime(data['end']) ################################################# # Prepping dates # Min/Max in dataset date_min = data['begin'].min().date() date_max = data['begin'].max().date() # Figuring out week aggregation # Monday = 0, Sunday = 7 begin_week = date_min + timedelta(7 - date_min.weekday()) # sets to following Monday week_df = pd.date_range(begin_week,date_max,freq="7D")[:-1] week_df = pd.DataFrame(week_df,columns=['Week']).reset_index() week_df.columns = ['WeekIndex','WeekBegin'] data['WeekIndex'] = np.floor((data['begin'] - pd.to_datetime(begin_week)).dt.days/7).astype(int) # Last 30 days flag prior30 = date_max + timedelta(-30) data['Prior30'] = 1*(data['begin'].dt.date > prior30 ) # Figuring out month aggregation begin_month = date_min + pd.offsets.MonthBegin() end_month = date_max + pd.offsets.MonthEnd(-1) month_df = pd.date_range(begin_month,end_month,freq="M") + pd.offsets.MonthBegin(-1) month_df = pd.DataFrame(month_df,columns=['Month']).reset_index() month_df.columns = ['MonthIndex','MonthBegin'] data['Year'] = data['begin'].dt.year data['Month'] = data['begin'].dt.month data['Day'] = 1 data['MonthBegin'] = pd.to_datetime(data[['Year','Month','Day']]) data['Total'] = data['Day'] # Function to aggregate to weeks def agg_weeks(data,field_sum=['Total'], week_index='WeekIndex',weeks=week_df, prior=8,drop_first=True): # Groupby ws = data.groupby(week_index,as_index=False)[field_sum].sum() ws = weeks.merge(ws,on=week_index,how='left') ws[field_sum] = ws[field_sum].fillna(0) # calculate moving averages for prior weeks for v in field_sum: p8v = f'Prior{prior}_{v}' ws[p8v] = ws[v].rolling(prior+1).sum() ws[p8v] = (ws[p8v] - ws[v])/prior ws[f'Low_{v}'] = ((-3/2 + np.sqrt(ws[p8v]))**2).clip(0) ws[f'Hig_{v}'] = (3/2 + np.sqrt(ws[p8v]))**2 if drop_first: ws = ws[~ws[p8v].isna()].reset_index(drop=True) return ws # Function to aggregate to months def agg_months(data,field_sum=['Total'],month_index='MonthBegin',months=month_df): ms = data.groupby(month_index,as_index=False)[field_sum].sum() ms = months.merge(ms,on=month_index,how='left') ms[field_sum] = ms[field_sum].fillna(0).astype(int) return ms htmlM = "This is a demonstration dashboard for **CRIME De-Coder**. Data is at most one day old, via Dallas Open Data site." htmlM += " See [here](https://github.com/apwheele/apwheele/blob/main/dallas_data.py) for how data is generated." htmlM += f"\n\nData span from {date_min} to {date_max}." html_pane = pn.pane.Markdown(htmlM, width=300) style = {'background-color': '#DDDDDD', 'font-family': "Helvetica,Arial,sans-serif"} loc_label = {0: 'Street', 1: 'Apartment/Residence', 2: 'Bar/Restaurant', 3: 'Commercial', 4: 'Gas/Convenience', 5: 'Hotel/Motel', 6: 'Other', 7: 'Outdoor', 8: 'Parking Lot', 9: 'Store', 10: 'School'} # Mapping NIBRS categories nibr_num = {'LARCENY/ THEFT OFFENSES': 0, 'MOTOR VEHICLE THEFT': 1, 'DESTRUCTION/ DAMAGE/ VANDALISM OF PROPERTY': 2, 'ASSAULT OFFENSES': 3, 'DRUG/ NARCOTIC VIOLATIONS': 4, 'BURGLARY/ BREAKING & ENTERING': 5, 'ALL OTHER OFFENSES': 6, 'TRAFFIC VIOLATION - HAZARDOUS': 7, 'ROBBERY': 8, 'PUBLIC INTOXICATION': 9, 'WEAPON LAW VIOLATIONS': 10, 'FRAUD OFFENSES': 11, 'DRIVING UNDER THE INFLUENCE': 12, 'TRESPASS OF REAL PROPERTY': 13, 'FAMILY OFFENSES, NONVIOLENT': 14, 'STOLEN PROPERTY OFFENSES': 15, 'EMBEZZELMENT': 16, 'COUNTERFEITING / FORGERY': 17} nibr_lab = {v:k for k,v in nibr_num.items()} data['nibrs_cat'] = data['nibrs_cat'].replace(nibr_lab) data['location'] = data['location'].replace(loc_label) ################################################# ################################################# # Functions to return Bokeh plots nibrs = list(nibr_num.keys()) location = list(loc_label.values()) # Alternative widget types #nibrs_widget = pn.widgets.CheckBoxGroup(name='NIBRS', options=nibrs, value=nibrs) #loc_widget = pn.widgets.CheckBoxGroup(name='Location selection',options=location, value=location) #loc_widget = pn.widgets.Select(name='Location',options=location) nibrs_widget = pn.widgets.MultiSelect(name='NIBRS selection', options=nibrs, value=nibrs) loc_widget = pn.widgets.MultiSelect(name='Location selection',options=location, value=location) @pn.depends(nibrs_widget.param.value, loc_widget.param.value) def week_line(nibrs_widget, loc_widget): sd = data['nibrs_cat'].isin(nibrs_widget) & data['location'].isin(loc_widget) week_agg = agg_weeks(data[sd]) fig0 = Figure(figsize=(7.5, 3)) ax = fig0.subplots() #tools = ['reset','hover'] #wp = week_agg.hvplot.line(title='Weekly Counts',x='WeekBegin', y='Total', # height=500, width=620,color='black',hover=False,tools=tools) #ws = week_agg.hvplot.scatter(x='WeekBegin',y='Total',fill_color='black',line_color='white', # size=50,padding=0.03,xlabel='',ylabel='',tools=tools) #ws.yaxis.minor_tick_line_color = None #ws.xaxis.minor_tick_line_color = None #return wp*ws ax.plot(week_agg['WeekBegin'], week_agg['Prior8_Total'], color='k', label='Prior 8 Weeks', zorder=3) ax.fill_between(week_agg['WeekBegin'], week_agg['Low_Total'], week_agg['Hig_Total'], alpha=0.2, zorder=2, color='k', label='Prior 8 Weeks') ax.plot(week_agg['WeekBegin'], week_agg['Total'], color="#286090", linewidth=1.5, marker='o', markeredgecolor='w', label='Actual', markersize=5, zorder=1) ax.set_ylabel(None) # Making a nicer legend handler, labeler = ax.get_legend_handles_labels() hd = [(handler[0],handler[1]),handler[2]] ax.legend(hd, [labeler[0],labeler[2]], bbox_to_anchor=(1.02, 1.0), loc='upper left', prop={'size': 10}) ax.set_title('Crime Counts per Week') sup_title = f'Error bars generated via Poisson Z-Scores' ax.annotate(sup_title, (0,0), (0, -25), xycoords='axes fraction', textcoords='offset points', va='top') return pn.pane.Matplotlib(fig0, width=750, height=300, tight=True, dpi=300) # For the table, eliminate locations that are Dallas PD headquarters # https://dallaspolice.net/about/Pages/Contact.aspx pd_locs = ['1400 BOTHAM JEAN BLVD', '6969 MCCALLUM BLVD', '9915 E NORTHWEST HWY', '9801 HARRY HINES BLVD', '4230 W ILLINOIS AVE', '725 N JIM MILLER RD', '334 S HALL ST', '1999 E CAMP WISDOM RD'] table_css = ''' /* Alternate row coloring */ tr:nth-child(even) { background-color: #DDDDDD; } tr:nth-child(odd) { background-color: #FFFFFF; } /* First column larger */ td:nth-child(2), th:nth-child(2) { width: 150px; } /* Right align columns example td:nth-child(2), td:nth-child(3), th:nth-child(2), th:nth-child(3) { text-align: right; } */ /* Background color of header */ th { background-color: #999999 } /* No borders in Table */ /* No vertical borders in header and cells */ table, th, td { font-family: Menlo,Monaco,Consolas,"Courier New",monospace; border: none; border-collapse: collapse; border-left: none; border-right: none; border-bottom: none; border-top: none; } /* Cell padding */ th, td { padding: 0% 2% 0% 2%; } ''' @pn.depends(nibrs_widget.param.value, loc_widget.param.value) def table_agg(nibrs_widget, loc_widget): sd = data[data['nibrs_cat'].isin(nibrs_widget) & data['location'].isin(loc_widget)] sd = sd[~sd['address'].isin(pd_locs)] # no police departments for this table agg_locs = sd.groupby(['address','lat','lon'],as_index=False)[['Prior30','Total']].sum() agg_locs = agg_locs.sort_values(by='Total',ascending=False,ignore_index=True).head(10) agg_locs['Rank'] = range(agg_locs.shape[0]) agg_locs = agg_locs[['Rank','address','Total','Prior30']] agg_locs.columns = [['Rank','Top 10 Repeat Addresses','Total Count','Prior 30 Days']] return pn.pane.DataFrame(agg_locs, index=False, width=400, stylesheets=[table_css]) @pn.depends(nibrs_widget.param.value, loc_widget.param.value) def fol_map(nibrs_widget, loc_widget): sd = data[data['nibrs_cat'].isin(nibrs_widget) & data['location'].isin(loc_widget)] sd = sd[~sd['address'].isin(pd_locs)] # no police departments for this map agg_locs = sd.groupby(['address','lat','lon'],as_index=False)[['Prior30','Total']].sum() agg_locs = agg_locs.sort_values(by='Total',ascending=False,ignore_index=True).head(10) m = folium.Map(location=[32.787, -96.798], zoom_start=10) for i in range(agg_locs.shape[0]): li = agg_locs.loc[i] ll = li[['lat','lon']].tolist() if i < 3: ico = folium.Icon("blue", icon_color="white", prefix="fa", icon=f"{i}") #, icon = "fa-location-dot" else: ico = folium.Icon("blue", icon_color="white", prefix="fa", icon=f"{i}") popup = f'Address: {li["address"]}
Prior 30 Days: {li["Prior30"]}
Total: {li["Total"]}' popup = folium.Popup(popup, max_width=200) ma = folium.Marker(ll, popup=popup, icon=ico) ma.add_to(m) return pn.pane.plot.Folium(m, height=400, width=400) ############################# # Serving the app pn.extension() custom_style = {"background-color": "#f2f2f2"} sidebar = pn.layout.WidgetBox(html_pane, pn.WidgetBox(nibrs_widget), pn.WidgetBox(loc_widget), styles=custom_style) main = pn.Tabs(('Repeat Address', pn.Column(pn.Row(table_agg, fol_map))), ('Time Series Analysis', pn.Column(week_line, width=600))) pn.Row(sidebar, main).servable(); ############################# asyncio.ensure_future(write_doc());