import pandas as pd
import numpy as np 
import requests
import folium
from folium.plugins import HeatMap
import matplotlib.pyplot as plt


response = requests.get("https://data.cityofnewyork.us/resource/uip8-fykc.json?$limit=60000")

url = 'https://data.cityofnewyork.us/resource/uip8-fykc.json'
params = {
    '$select': 'arrest_date,arrest_boro,arrest_precinct,age_group,perp_sex,perp_race,ofns_desc,latitude,longitude', 
    '$limit':60000
}
response = requests.get(url, params=params)
data = response.json()
df1 = pd.json_normalize(data)
df1.shape

(54576, 9)


# Set the date range for the data you want to retrieve
start_date = '2018-01-01T00:00:00.000'
end_date = '2023-05-04T23:59:59.999'

# Build the API request with the date filter
url = 'https://data.cityofnewyork.us/resource/8h9b-rp9u.json'
params = {
    '$select': 'arrest_date,arrest_boro,arrest_precinct,age_group,perp_sex,perp_race,ofns_desc,latitude,longitude',
    '$where': f"arrest_date between '{start_date}' and '{end_date}'", 
    '$limit':5000000
}
response = requests.get(url, params=params)
data = response.json()

# Convert the data to a pandas dataframe
df2 = pd.json_normalize(data)
df2.shape

(947084, 9)


#print(response.content)
#df1=pd.json_normalize(response.json())
#df1 = pd.DataFrame(response.json()[1:], columns=response.json()[0]) 
#print(df1.head())
df1


df1['arrest_date'] = pd.to_datetime(df1['arrest_date'])
df1['arrest_date'] = df1['arrest_date'].dt.strftime('%B %d, %Y')
print(df1['arrest_date'])

0        February 07, 2023
1         January 04, 2023
2         January 10, 2023
3        February 27, 2023
4         January 07, 2023
               ...        
54571       March 20, 2023
54572       March 20, 2023
54573       March 13, 2023
54574       March 17, 2023
54575       March 31, 2023
Name: arrest_date, Length: 54576, dtype: object

df2


df1 = df1.rename(columns={
    'arrest_boro': 'Borough',
    'ofns_desc': 'Offense',
    'perp_race': 'Race',
    'perp_sex': 'Sex', 
    'arrest_date':'Date',
    'age_group':'Age Group',
    'arrest_precinct':'Precinct'
})
df1


df2 = df2.rename(columns={
    'arrest_boro': 'Borough',
    'ofns_desc': 'Offense',
    'perp_race': 'Race',
    'perp_sex': 'Sex', 
    'arrest_date':'Date',
    'age_group':'Age Group',
    'arrest_precinct':'Precinct'
})
df2


abbr_to_full = {'M': 'Manhattan', 'B': 'Bronx', 'K': 'Brooklyn','Q': 'Queens','S': 'Staten Island'}

df1['Borough'] = df1['Borough'].replace(abbr_to_full)


abbr_to_full = {'M': 'Manhattan', 'B': 'Bronx', 'K': 'Brooklyn','Q': 'Queens','S': 'Staten Island'}

df2['Borough'] = df2['Borough'].replace(abbr_to_full)

df1

df2


df1.dropna()


df2.dropna()


df2['Date'] = pd.to_datetime(df2['Date'])
df2 = df2.set_index('Date')
weekly_offenses = df2['Offense'].resample('W').count()
plt.figure(figsize=(20, 10))
plt.plot(weekly_offenses)
plt.xlabel('Date')
plt.ylabel('Number of Offenses')
plt.title('Offense Trends')
plt.show()


df1['Offense'].value_counts().sort_values().plot(kind='barh', figsize=(9.5,10), color='Red',grid=True)
plt.xlabel('Frequency')
plt.ylabel('Offense')
plt.title('Count of Offenses by Type')

Text(0.5, 1.0, 'Count of Offenses by Type')


df1.groupby(['Offense', 'Age Group']).size().unstack().plot(kind='bar', stacked=True,figsize=(10,5))
plt.title('Type of Offense by Age Group')

Text(0.5, 1.0, 'Type of Offense by Age Group')


df1.groupby(['Offense', 'Age Group']).size().unstack().plot(kind='barh', stacked=True, figsize=(10, 12))
plt.title('Type of Offense by Age Group')
plt.xlabel('Number of Offenses')
plt.ylabel('Offense')
plt.show()


df_counts = df1.groupby(['Offense', 'Age Group']).size().unstack()
df_counts_sorted = df_counts.sum(axis=1).sort_values(ascending=True)  # Change to ascending=True
df_sorted = df_counts.loc[df_counts_sorted.index]

df_sorted.plot(kind='barh', stacked=True, figsize=(10, 11))
plt.title('Type of Offense by Age Group')
plt.xlabel('Number of Offenses')
plt.ylabel('Offense')
plt.show()


df_counts = df1.groupby(['Offense', 'Race']).size().unstack()
df_counts_sorted = df_counts.sum(axis=1).sort_values(ascending=True)  # Change to ascending=True
df_sorted = df_counts.loc[df_counts_sorted.index]

df_sorted.plot(kind='barh', stacked=True, figsize=(10, 12))
plt.title('Type of Offense by Race')
plt.xlabel('Number of Offenses')
plt.ylabel('Offense')
plt.show()


df_counts = df1.groupby(['Offense', 'Sex']).size().unstack()
df_counts_sorted = df_counts.sum(axis=1).sort_values(ascending=True)  # Change to ascending=True
df_sorted = df_counts.loc[df_counts_sorted.index]

df_sorted.plot(kind='barh', stacked=True, figsize=(10, 11))
plt.title('Type of Offense by Sex')
plt.xlabel('Number of Offenses')
plt.ylabel('Offense')
plt.show()


df_counts = df1.groupby(['Offense', 'Borough']).size().unstack()
df_counts_sorted = df_counts.sum(axis=1).sort_values(ascending=True)  # Change to ascending=True
df_sorted = df_counts.loc[df_counts_sorted.index]

df_sorted.plot(kind='barh', stacked=True, figsize=(10, 11))
plt.title('Type of Offense by Borough')
plt.xlabel('Number of Offenses')
plt.ylabel('Offense')
plt.show()


df_counts = df1.groupby(['Borough', 'Race']).size().unstack()
df_counts_sorted = df_counts.sum(axis=1).sort_values(ascending=True)  # Change to ascending=True
df_sorted = df_counts.loc[df_counts_sorted.index]

df_sorted.plot(kind='barh', stacked=True, figsize=(12, 8))
plt.title('Number of Offenses by Borough and Race')
plt.xlabel('Number of Offenses')
plt.ylabel('Borough')
plt.show()


df_counts = df1.groupby(['Borough', 'Age Group']).size().unstack()
df_counts_sorted = df_counts.sum(axis=1).sort_values(ascending=True)  # Change to ascending=True
df_sorted = df_counts.loc[df_counts_sorted.index]

df_sorted.plot(kind='barh', stacked=True, figsize=(12, 8))
plt.title('Number of Offenses by Borough and Age Group')
plt.xlabel('Number of Offenses')
plt.ylabel('Borough')
plt.show()


# Group the dataframe by precinct and count the number of offenses
df_grouped = df1.groupby(['Precinct', 'latitude', 'longitude']).agg({'Offense': 'count'}).reset_index()

# Create a map centered at New York City
ny_map = folium.Map(location=[40.7128, -74.0060], zoom_start=10)

# Create a heat map layer with the latitude, longitude, and offense count
heat_data = [[row['latitude'], row['longitude'], row['Offense']] for index, row in df_grouped.iterrows()]
HeatMap(heat_data).add_to(ny_map)

# Display the map
ny_map


offense_categories = df1['Offense'].unique()
category_dict = {offense_categories[i]: i+1 for i in range(len(offense_categories))}
df1['Offense Category'] = df1['Offense'].map(category_dict)

# Perform describe() on the counts of each category
category_counts = df1['Offense Category'].value_counts()
category_describe = category_counts.describe()

print(category_describe)

count      61.000000
mean      894.688525
std      1577.582270
min         1.000000
25%        16.000000
50%       174.000000
75%      1038.000000
max      7888.000000
Name: Offense Category, dtype: float64


# Create a dictionary to map offense categories to numbers
offense_categories = df1['Offense'].unique()
category_dict = {offense_categories[i]: i+1 for i in range(len(offense_categories))}
df1['Offense Category'] = df1['Offense'].map(category_dict)

# Perform describe() on the counts of each category by age group
age_group_categories = df1.groupby('Age Group')['Offense Category']
age_group_describe = age_group_categories.value_counts().groupby(level=0).describe()

print(age_group_describe)

           count        mean         std  min    25%    50%     75%     max
Age Group                                                                  
18-24       53.0  178.698113  273.629563  1.0   4.00   44.0  229.00  1336.0
25-44       58.0  542.379310  931.071121  2.0  13.75  109.0  719.25  4554.0
45-64       55.0  195.490909  368.336796  1.0   6.50   36.0  199.50  1764.0
65+         41.0   19.634146   35.655824  1.0   2.00    5.0   22.00   187.0
<18         34.0   61.470588  118.534290  1.0   3.00   16.0   46.75   620.0


# Create a dictionary to map offense categories to numbers
offense_categories = df1['Offense'].unique()
category_dict = {offense_categories[i]: i+1 for i in range(len(offense_categories))}
df1['Offense Category'] = df1['Offense'].map(category_dict)

# Perform describe() on the counts of each category by sex
sex_categories = df1.groupby('Sex')['Offense Category']
sex_describe = sex_categories.value_counts().groupby(level=0).describe()

print(sex_describe)

     count        mean          std  min   25%    50%    75%     max
Sex                                                                 
F     51.0  175.941176   368.174383  1.0   7.5   22.0  183.5  1987.0
M     61.0  738.885246  1242.842791  1.0  11.0  154.0  860.0  5812.0
U     36.0   14.750000    20.024806  1.0   2.0    5.0   21.0    89.0


# Create a dictionary to map offense categories to numbers
offense_categories = df1['Offense'].unique()
category_dict = {offense_categories[i]: i+1 for i in range(len(offense_categories))}
df1['Offense Category'] = df1['Offense'].map(category_dict)

# Perform describe() on the counts of each category by age group
age_group_categories = df1.groupby('Race')['Offense Category']
age_group_describe = age_group_categories.value_counts().groupby(level=0).describe()

print(age_group_describe)

                                count        mean         std  min    25%  \
Race                                                                        
AMERICAN INDIAN/ALASKAN NATIVE   28.0    6.750000    8.382632  1.0   1.75   
ASIAN / PACIFIC ISLANDER         49.0   59.081633   92.045242  1.0   7.00   
BLACK                            55.0  484.854545  799.600002  2.0  11.00   
BLACK HISPANIC                   50.0  101.100000  148.104383  1.0   5.00   
UNKNOWN                          40.0   12.000000   14.609445  1.0   2.00   
WHITE                            49.0  111.510204  195.416662  1.0   5.00   
WHITE HISPANIC                   58.0  238.379310  411.845007  1.0   7.25   

                                 50%     75%     max  
Race                                                  
AMERICAN INDIAN/ALASKAN NATIVE   3.0    7.00    36.0  
ASIAN / PACIFIC ISLANDER        27.0   57.00   478.0  
BLACK                           90.0  559.00  3825.0  
BLACK HISPANIC                  43.5  130.00   717.0  
UNKNOWN                          7.5   16.25    67.0  
WHITE                           31.0  106.00   976.0  
WHITE HISPANIC                  49.5  267.25  2086.0


female_count = df1[df1['Sex'] == 'F'].shape[0]
print("Count of females:", female_count)

Count of females: 8973


male_count = df1[df1['Sex'] == 'M'].shape[0]
print("Count of Males:", male_count)

Count of Males: 45072


race_proportions = df1['Race'].value_counts(normalize=True)
print(race_proportions)

BLACK                             0.488621
WHITE HISPANIC                    0.253335
WHITE                             0.100117
BLACK HISPANIC                    0.092623
ASIAN / PACIFIC ISLANDER          0.053045
UNKNOWN                           0.008795
AMERICAN INDIAN/ALASKAN NATIVE    0.003463
Name: Race, dtype: float64


race_proportions = df1['Race'].value_counts(normalize=True)
labels = race_proportions.index
sizes = race_proportions.values
fig, ax = plt.subplots(figsize=(10, 8))
ax.pie(sizes, labels=labels, autopct='%1.1f%%')
ax.axis('equal')
ax.set_title('Race Proportions')
plt.show()


age_proportions = df1['Age Group'].value_counts(normalize=True)
print(age_proportions)

25-44    0.576407
45-64    0.197010
18-24    0.173538
<18      0.038295
65+      0.014750
Name: Age Group, dtype: float64


age_proportions = df1['Age Group'].value_counts(normalize=True)
labels = age_proportions.index
sizes = age_proportions.values
fig, ax = plt.subplots(figsize=(10, 8))
ax.pie(sizes, labels=labels, autopct='%1.1f%%')
ax.axis('equal')
ax.set_title('Age Group Proportions')
plt.show()


import sqlite3

# create a connection to the database
conn = sqlite3.connect('C:/path/to/mock/database.db')

# export the df1 DataFrame to the database
df1.to_sql('NYC_Arrest_Data', conn, if_exists='replace', index=False)

# close the database connection
conn.close()

	arrest_date	arrest_boro	arrest_precinct	age_group	perp_sex	perp_race	ofns_desc	latitude	longitude
0	2023-02-07T00:00:00.000	S	120	25-44	M	BLACK	FELONY ASSAULT	40.636362	-74.117505
1	2023-01-04T00:00:00.000	M	34	25-44	F	BLACK	(null)	40.856519	-73.927564
2	2023-01-10T00:00:00.000	B	42	18-24	F	WHITE HISPANIC	ASSAULT 3 & RELATED OFFENSES	40.822355	-73.911463
3	2023-02-27T00:00:00.000	Q	113	25-44	M	BLACK	(null)	40.66588105	-73.80183654
4	2023-01-07T00:00:00.000	M	18	<18	M	BLACK	ROBBERY	40.76539	-73.985702
...	...	...	...	...	...	...	...	...	...
54571	2023-03-20T00:00:00.000	Q	113	25-44	M	ASIAN / PACIFIC ISLANDER	FELONY ASSAULT	40.680077	-73.775929
54572	2023-03-20T00:00:00.000	M	33	18-24	M	WHITE HISPANIC	ASSAULT 3 & RELATED OFFENSES	40.837374	-73.941122
54573	2023-03-13T00:00:00.000	Q	113	18-24	M	WHITE	GRAND LARCENY	40.680077	-73.775929
54574	2023-03-17T00:00:00.000	Q	112	65+	M	ASIAN / PACIFIC ISLANDER	ASSAULT 3 & RELATED OFFENSES	40.709999	-73.853973
54575	2023-03-31T00:00:00.000	Q	114	25-44	M	WHITE	DANGEROUS WEAPONS	40.7714791	-73.91823339

	arrest_date	arrest_boro	arrest_precinct	age_group	perp_sex	perp_race	ofns_desc	latitude	longitude
0	2018-01-01T00:00:00.000	K	66	25-44	M	ASIAN / PACIFIC ISLANDER	FELONY ASSAULT	40.62572501600005	-73.99104937999994
1	2018-01-01T00:00:00.000	M	17	25-44	F	BLACK	DANGEROUS DRUGS	40.752228400000035	-73.97192485799998
2	2018-01-01T00:00:00.000	S	121	<18	F	BLACK	ASSAULT 3 & RELATED OFFENSES	40.63194388100004	-74.16170488099993
3	2018-01-01T00:00:00.000	B	40	45-64	M	BLACK HISPANIC	INTOXICATED/IMPAIRED DRIVING	40.81553235500007	-73.91861784599996
4	2018-01-01T00:00:00.000	B	46	18-24	M	BLACK HISPANIC	ASSAULT 3 & RELATED OFFENSES	40.84643225400004	-73.91104764699998
...	...	...	...	...	...	...	...	...	...
947079	2022-12-31T00:00:00.000	M	7	18-24	M	WHITE	ASSAULT 3 & RELATED OFFENSES	40.718911	-73.98939
947080	2022-12-31T00:00:00.000	K	73	25-44	M	BLACK	CRIMINAL MISCHIEF & RELATED OF	40.666387	-73.90458
947081	2022-12-31T00:00:00.000	M	32	25-44	F	BLACK	ASSAULT 3 & RELATED OFFENSES	40.822332	-73.936708
947082	2022-12-31T00:00:00.000	M	24	25-44	M	WHITE HISPANIC	GRAND LARCENY	40.794146	-73.968443
947083	2022-12-31T00:00:00.000	M	24	25-44	M	BLACK	MISCELLANEOUS PENAL LAW	40.789701	-73.967972

	Date	Borough	Precinct	Age Group	Sex	Race	Offense	latitude	longitude
0	February 07, 2023	S	120	25-44	M	BLACK	FELONY ASSAULT	40.636362	-74.117505
1	January 04, 2023	M	34	25-44	F	BLACK	(null)	40.856519	-73.927564
2	January 10, 2023	B	42	18-24	F	WHITE HISPANIC	ASSAULT 3 & RELATED OFFENSES	40.822355	-73.911463
3	February 27, 2023	Q	113	25-44	M	BLACK	(null)	40.66588105	-73.80183654
4	January 07, 2023	M	18	<18	M	BLACK	ROBBERY	40.76539	-73.985702
...	...	...	...	...	...	...	...	...	...
54571	March 20, 2023	Q	113	25-44	M	ASIAN / PACIFIC ISLANDER	FELONY ASSAULT	40.680077	-73.775929
54572	March 20, 2023	M	33	18-24	M	WHITE HISPANIC	ASSAULT 3 & RELATED OFFENSES	40.837374	-73.941122
54573	March 13, 2023	Q	113	18-24	M	WHITE	GRAND LARCENY	40.680077	-73.775929
54574	March 17, 2023	Q	112	65+	M	ASIAN / PACIFIC ISLANDER	ASSAULT 3 & RELATED OFFENSES	40.709999	-73.853973
54575	March 31, 2023	Q	114	25-44	M	WHITE	DANGEROUS WEAPONS	40.7714791	-73.91823339

	Date	Borough	Precinct	Age Group	Sex	Race	Offense	latitude	longitude
0	February 07, 2023	Staten Island	120	25-44	M	BLACK	FELONY ASSAULT	40.636362	-74.117505
1	January 04, 2023	Manhattan	34	25-44	F	BLACK	(null)	40.856519	-73.927564
2	January 10, 2023	Bronx	42	18-24	F	WHITE HISPANIC	ASSAULT 3 & RELATED OFFENSES	40.822355	-73.911463
3	February 27, 2023	Queens	113	25-44	M	BLACK	(null)	40.66588105	-73.80183654
4	January 07, 2023	Manhattan	18	<18	M	BLACK	ROBBERY	40.76539	-73.985702
...	...	...	...	...	...	...	...	...	...
54571	March 20, 2023	Queens	113	25-44	M	ASIAN / PACIFIC ISLANDER	FELONY ASSAULT	40.680077	-73.775929
54572	March 20, 2023	Manhattan	33	18-24	M	WHITE HISPANIC	ASSAULT 3 & RELATED OFFENSES	40.837374	-73.941122
54573	March 13, 2023	Queens	113	18-24	M	WHITE	GRAND LARCENY	40.680077	-73.775929
54574	March 17, 2023	Queens	112	65+	M	ASIAN / PACIFIC ISLANDER	ASSAULT 3 & RELATED OFFENSES	40.709999	-73.853973
54575	March 31, 2023	Queens	114	25-44	M	WHITE	DANGEROUS WEAPONS	40.7714791	-73.91823339

	Date	Borough	Precinct	Age Group	Sex	Race	Offense	latitude	longitude
0	2018-01-01T00:00:00.000	Brooklyn	66	25-44	M	ASIAN / PACIFIC ISLANDER	FELONY ASSAULT	40.62572501600005	-73.99104937999994
1	2018-01-01T00:00:00.000	Manhattan	17	25-44	F	BLACK	DANGEROUS DRUGS	40.752228400000035	-73.97192485799998
2	2018-01-01T00:00:00.000	Staten Island	121	<18	F	BLACK	ASSAULT 3 & RELATED OFFENSES	40.63194388100004	-74.16170488099993
3	2018-01-01T00:00:00.000	Bronx	40	45-64	M	BLACK HISPANIC	INTOXICATED/IMPAIRED DRIVING	40.81553235500007	-73.91861784599996
4	2018-01-01T00:00:00.000	Bronx	46	18-24	M	BLACK HISPANIC	ASSAULT 3 & RELATED OFFENSES	40.84643225400004	-73.91104764699998
...	...	...	...	...	...	...	...	...	...
947079	2022-12-31T00:00:00.000	Manhattan	7	18-24	M	WHITE	ASSAULT 3 & RELATED OFFENSES	40.718911	-73.98939
947080	2022-12-31T00:00:00.000	Brooklyn	73	25-44	M	BLACK	CRIMINAL MISCHIEF & RELATED OF	40.666387	-73.90458
947081	2022-12-31T00:00:00.000	Manhattan	32	25-44	F	BLACK	ASSAULT 3 & RELATED OFFENSES	40.822332	-73.936708
947082	2022-12-31T00:00:00.000	Manhattan	24	25-44	M	WHITE HISPANIC	GRAND LARCENY	40.794146	-73.968443
947083	2022-12-31T00:00:00.000	Manhattan	24	25-44	M	BLACK	MISCELLANEOUS PENAL LAW	40.789701	-73.967972

NYC ARREST DEMOGRAPHICS¶