import asyncio
from panel.io.pyodide import init_doc, write_doc
init_doc()
import folium
from matplotlib.figure import Figure
import panel as pn
import numpy as np
import pandas as pd
from datetime import timedelta
# Need to configure getting this in py-config tag
url = r'dallasdata.csv.zip'
data = pd.read_csv(url)
data['begin'] = pd.to_datetime(data['begin'])
data['end'] = pd.to_datetime(data['end'])
#################################################
# Prepping dates
# Min/Max in dataset
date_min = data['begin'].min().date()
date_max = data['begin'].max().date()
# Figuring out week aggregation
# Monday = 0, Sunday = 7
begin_week = date_min + timedelta(7 - date_min.weekday()) # sets to following Monday
week_df = pd.date_range(begin_week,date_max,freq="7D")[:-1]
week_df = pd.DataFrame(week_df,columns=['Week']).reset_index()
week_df.columns = ['WeekIndex','WeekBegin']
data['WeekIndex'] = np.floor((data['begin'] - pd.to_datetime(begin_week)).dt.days/7).astype(int)
# Last 30 days flag
prior30 = date_max + timedelta(-30)
data['Prior30'] = 1*(data['begin'].dt.date > prior30 )
# Figuring out month aggregation
begin_month = date_min + pd.offsets.MonthBegin()
end_month = date_max + pd.offsets.MonthEnd(-1)
month_df = pd.date_range(begin_month,end_month,freq="M") + pd.offsets.MonthBegin(-1)
month_df = pd.DataFrame(month_df,columns=['Month']).reset_index()
month_df.columns = ['MonthIndex','MonthBegin']
data['Year'] = data['begin'].dt.year
data['Month'] = data['begin'].dt.month
data['Day'] = 1
data['MonthBegin'] = pd.to_datetime(data[['Year','Month','Day']])
data['Total'] = data['Day']
# Function to aggregate to weeks
def agg_weeks(data,field_sum=['Total'],
week_index='WeekIndex',weeks=week_df,
prior=8,drop_first=True):
# Groupby
ws = data.groupby(week_index,as_index=False)[field_sum].sum()
ws = weeks.merge(ws,on=week_index,how='left')
ws[field_sum] = ws[field_sum].fillna(0)
# calculate moving averages for prior weeks
for v in field_sum:
p8v = f'Prior{prior}_{v}'
ws[p8v] = ws[v].rolling(prior+1).sum()
ws[p8v] = (ws[p8v] - ws[v])/prior
ws[f'Low_{v}'] = ((-3/2 + np.sqrt(ws[p8v]))**2).clip(0)
ws[f'Hig_{v}'] = (3/2 + np.sqrt(ws[p8v]))**2
if drop_first:
ws = ws[~ws[p8v].isna()].reset_index(drop=True)
return ws
# Function to aggregate to months
def agg_months(data,field_sum=['Total'],month_index='MonthBegin',months=month_df):
ms = data.groupby(month_index,as_index=False)[field_sum].sum()
ms = months.merge(ms,on=month_index,how='left')
ms[field_sum] = ms[field_sum].fillna(0).astype(int)
return ms
htmlM = "This is a demonstration dashboard for **CRIME De-Coder**. Data is at most one day old, via Dallas Open Data site."
htmlM += " See [here](https://github.com/apwheele/apwheele/blob/main/dallas_data.py) for how data is generated."
htmlM += f"\n\nData span from {date_min} to {date_max}."
html_pane = pn.pane.Markdown(htmlM, width=300)
style = {'background-color': '#DDDDDD',
'font-family': "Helvetica,Arial,sans-serif"}
loc_label = {0: 'Street',
1: 'Apartment/Residence',
2: 'Bar/Restaurant',
3: 'Commercial',
4: 'Gas/Convenience',
5: 'Hotel/Motel',
6: 'Other',
7: 'Outdoor',
8: 'Parking Lot',
9: 'Store',
10: 'School'}
# Mapping NIBRS categories
nibr_num = {'LARCENY/ THEFT OFFENSES': 0,
'MOTOR VEHICLE THEFT': 1,
'DESTRUCTION/ DAMAGE/ VANDALISM OF PROPERTY': 2,
'ASSAULT OFFENSES': 3,
'DRUG/ NARCOTIC VIOLATIONS': 4,
'BURGLARY/ BREAKING & ENTERING': 5,
'ALL OTHER OFFENSES': 6,
'TRAFFIC VIOLATION - HAZARDOUS': 7,
'ROBBERY': 8,
'PUBLIC INTOXICATION': 9,
'WEAPON LAW VIOLATIONS': 10,
'FRAUD OFFENSES': 11,
'DRIVING UNDER THE INFLUENCE': 12,
'TRESPASS OF REAL PROPERTY': 13,
'FAMILY OFFENSES, NONVIOLENT': 14,
'STOLEN PROPERTY OFFENSES': 15,
'EMBEZZELMENT': 16,
'COUNTERFEITING / FORGERY': 17}
nibr_lab = {v:k for k,v in nibr_num.items()}
data['nibrs_cat'] = data['nibrs_cat'].replace(nibr_lab)
data['location'] = data['location'].replace(loc_label)
#################################################
#################################################
# Functions to return Bokeh plots
nibrs = list(nibr_num.keys())
location = list(loc_label.values())
# Alternative widget types
#nibrs_widget = pn.widgets.CheckBoxGroup(name='NIBRS', options=nibrs, value=nibrs)
#loc_widget = pn.widgets.CheckBoxGroup(name='Location selection',options=location, value=location)
#loc_widget = pn.widgets.Select(name='Location',options=location)
nibrs_widget = pn.widgets.MultiSelect(name='NIBRS selection', options=nibrs, value=nibrs)
loc_widget = pn.widgets.MultiSelect(name='Location selection',options=location, value=location)
@pn.depends(nibrs_widget.param.value, loc_widget.param.value)
def week_line(nibrs_widget, loc_widget):
sd = data['nibrs_cat'].isin(nibrs_widget) & data['location'].isin(loc_widget)
week_agg = agg_weeks(data[sd])
fig0 = Figure(figsize=(7.5, 3))
ax = fig0.subplots()
#tools = ['reset','hover']
#wp = week_agg.hvplot.line(title='Weekly Counts',x='WeekBegin', y='Total',
# height=500, width=620,color='black',hover=False,tools=tools)
#ws = week_agg.hvplot.scatter(x='WeekBegin',y='Total',fill_color='black',line_color='white',
# size=50,padding=0.03,xlabel='',ylabel='',tools=tools)
#ws.yaxis.minor_tick_line_color = None
#ws.xaxis.minor_tick_line_color = None
#return wp*ws
ax.plot(week_agg['WeekBegin'], week_agg['Prior8_Total'], color='k', label='Prior 8 Weeks', zorder=3)
ax.fill_between(week_agg['WeekBegin'], week_agg['Low_Total'], week_agg['Hig_Total'],
alpha=0.2, zorder=2, color='k', label='Prior 8 Weeks')
ax.plot(week_agg['WeekBegin'], week_agg['Total'], color="#286090", linewidth=1.5,
marker='o', markeredgecolor='w', label='Actual', markersize=5, zorder=1)
ax.set_ylabel(None)
# Making a nicer legend
handler, labeler = ax.get_legend_handles_labels()
hd = [(handler[0],handler[1]),handler[2]]
ax.legend(hd, [labeler[0],labeler[2]], bbox_to_anchor=(1.02, 1.0), loc='upper left', prop={'size': 10})
ax.set_title('Crime Counts per Week')
sup_title = f'Error bars generated via Poisson Z-Scores'
ax.annotate(sup_title, (0,0), (0, -25), xycoords='axes fraction', textcoords='offset points', va='top')
return pn.pane.Matplotlib(fig0, width=750, height=300, tight=True, dpi=300)
# For the table, eliminate locations that are Dallas PD headquarters
# https://dallaspolice.net/about/Pages/Contact.aspx
pd_locs = ['1400 BOTHAM JEAN BLVD',
'6969 MCCALLUM BLVD',
'9915 E NORTHWEST HWY',
'9801 HARRY HINES BLVD',
'4230 W ILLINOIS AVE',
'725 N JIM MILLER RD',
'334 S HALL ST',
'1999 E CAMP WISDOM RD']
table_css = '''
/* Alternate row coloring */
tr:nth-child(even) {
background-color: #DDDDDD;
}
tr:nth-child(odd) {
background-color: #FFFFFF;
}
/* First column larger */
td:nth-child(2), th:nth-child(2) {
width: 150px;
}
/* Right align columns example
td:nth-child(2), td:nth-child(3),
th:nth-child(2), th:nth-child(3) {
text-align: right;
}
*/
/* Background color of header */
th {
background-color: #999999
}
/* No borders in Table */
/* No vertical borders in header and cells */
table, th, td {
font-family: Menlo,Monaco,Consolas,"Courier New",monospace;
border: none;
border-collapse: collapse;
border-left: none;
border-right: none;
border-bottom: none;
border-top: none;
}
/* Cell padding */
th, td {
padding: 0% 2% 0% 2%;
}
'''
@pn.depends(nibrs_widget.param.value, loc_widget.param.value)
def table_agg(nibrs_widget, loc_widget):
sd = data[data['nibrs_cat'].isin(nibrs_widget) & data['location'].isin(loc_widget)]
sd = sd[~sd['address'].isin(pd_locs)] # no police departments for this table
agg_locs = sd.groupby(['address','lat','lon'],as_index=False)[['Prior30','Total']].sum()
agg_locs = agg_locs.sort_values(by='Total',ascending=False,ignore_index=True).head(10)
agg_locs['Rank'] = range(agg_locs.shape[0])
agg_locs = agg_locs[['Rank','address','Total','Prior30']]
agg_locs.columns = [['Rank','Top 10 Repeat Addresses','Total Count','Prior 30 Days']]
return pn.pane.DataFrame(agg_locs, index=False, width=400, stylesheets=[table_css])
@pn.depends(nibrs_widget.param.value, loc_widget.param.value)
def fol_map(nibrs_widget, loc_widget):
sd = data[data['nibrs_cat'].isin(nibrs_widget) & data['location'].isin(loc_widget)]
sd = sd[~sd['address'].isin(pd_locs)] # no police departments for this map
agg_locs = sd.groupby(['address','lat','lon'],as_index=False)[['Prior30','Total']].sum()
agg_locs = agg_locs.sort_values(by='Total',ascending=False,ignore_index=True).head(10)
m = folium.Map(location=[32.787, -96.798], zoom_start=10)
for i in range(agg_locs.shape[0]):
li = agg_locs.loc[i]
ll = li[['lat','lon']].tolist()
if i < 3:
ico = folium.Icon("blue", icon_color="white", prefix="fa", icon=f"{i}") #, icon = "fa-location-dot"
else:
ico = folium.Icon("blue", icon_color="white", prefix="fa", icon=f"{i}")
popup = f'Address: {li["address"]}
Prior 30 Days: {li["Prior30"]}
Total: {li["Total"]}'
popup = folium.Popup(popup, max_width=200)
ma = folium.Marker(ll, popup=popup, icon=ico)
ma.add_to(m)
return pn.pane.plot.Folium(m, height=400, width=400)
#############################
# Serving the app
pn.extension()
custom_style = {"background-color": "#f2f2f2"}
sidebar = pn.layout.WidgetBox(html_pane, pn.WidgetBox(nibrs_widget), pn.WidgetBox(loc_widget), styles=custom_style)
main = pn.Tabs(('Repeat Address', pn.Column(pn.Row(table_agg, fol_map))),
('Time Series Analysis', pn.Column(week_line, width=600)))
pn.Row(sidebar, main).servable();
#############################
asyncio.ensure_future(write_doc());