{ "cells": [ { "attachments": {}, "cell_type": "markdown", "id": "7bc558f7", "metadata": {}, "source": [ "# Join Tutorial\n", "\n", "Join points to closest BBL shapes tutorial." ] }, { "cell_type": "markdown", "id": "difficult-princeton", "metadata": {}, "source": [ "# Importing Libraries" ] }, { "cell_type": "code", "execution_count": 1, "id": "sexual-hungary", "metadata": { "ExecuteTime": { "end_time": "2021-01-12T12:26:47.337270Z", "start_time": "2021-01-12T12:26:40.878024Z" } }, "outputs": [], "source": [ "import os\n", "import glob\n", "import pandas as pd\n", "import numpy as np\n", "import seaborn as sns\n", "import geopandas as gpd\n", "import fiona\n", "from fiona.crs import from_epsg\n", "import geoplot\n", "import matplotlib.pyplot as plt\n", "import matplotlib\n", "import matplotlib.dates as mdates\n", "from matplotlib.ticker import FuncFormatter\n", "from mpl_toolkits.axes_grid1 import make_axes_locatable\n", "import datetime\n", "import statsmodels.api as sm\n", "\n", "plt.rcParams['savefig.facecolor'] = 'white'\n", "%matplotlib inline" ] }, { "cell_type": "markdown", "id": "israeli-surname", "metadata": {}, "source": [ "Printing versions of Python modules and packages with **watermark** - the IPython magic extension." ] }, { "cell_type": "code", "execution_count": 2, "id": "driving-interstate", "metadata": { "ExecuteTime": { "end_time": "2021-01-12T12:26:47.373246Z", "start_time": "2021-01-12T12:26:47.340252Z" } }, "outputs": [], "source": [ "%load_ext watermark" ] }, { "cell_type": "code", "execution_count": 3, "id": "undefined-polyester", "metadata": { "ExecuteTime": { "end_time": "2021-01-12T12:26:47.487466Z", "start_time": "2021-01-12T12:26:47.377365Z" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Python implementation: CPython\n", "Python version : 3.8.13\n", "IPython version : 8.4.0\n", "\n", "numpy : 1.23.1\n", "pandas : 1.4.3\n", "geopandas : 0.11.1\n", "geoplot : 0.5.1\n", "fiona : 1.8.21\n", "matplotlib.pyplot: unknown\n", "seaborn : 0.11.2\n", "\n" ] } ], "source": [ "%watermark -v -p numpy,pandas,geopandas,geoplot,fiona,matplotlib.pyplot,seaborn" ] }, { "cell_type": "markdown", "id": "legitimate-gross", "metadata": {}, "source": [ "Documention for installing watermark: https://github.com/rasbt/watermark" ] }, { "cell_type": "markdown", "id": "drawn-sponsorship", "metadata": {}, "source": [ "# Retrieve Data" ] }, { "cell_type": "code", "execution_count": 4, "id": "0cb3b300", "metadata": { "scrolled": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "(26959, 35)\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
unique_keycreated_dateagencyagency_namecomplaint_typedescriptorincident_zipincident_addressstreet_namecross_street_1...intersection_street_1intersection_street_2closed_dateresolution_descriptionresolution_action_updated_datelocation_typelandmarkfacility_typedue_dategeometry
0485422202020-12-31T15:41:00.000DEPDepartment of Environmental ProtectionSewerStreet Flooding (SJ)11420.0117-17 135 STREET135 STREETFOCH BLVD...NaNNaN2021-01-01T00:20:00.000Please call 311 for further information. If yo...2021-01-01T00:20:00.000NaNNaNNaNNaNPOINT (1038500.000 186000.014)
1485364302020-12-31T14:49:00.000DEPDepartment of Environmental ProtectionSewerStreet Flooding (SJ)11357.020-24 150 STREET150 STREET20 AVE...NaNNaN2021-01-04T10:15:00.000The Department of Environment Protection inspe...2021-01-04T10:15:00.000NaNNaNNaNNaNPOINT (1035682.000 223771.014)
2485393612020-12-31T14:03:00.000DEPDepartment of Environmental ProtectionSewerStreet Flooding (SJ)11228.07223 8 AVENUE8 AVENUE72 ST...NaNNaN2021-01-02T11:25:00.000The Department of Environmental Protection has...2021-01-02T11:25:00.000NaNNaNNaNNaNPOINT (979584.000 168255.014)
3485431322020-12-31T13:48:00.000DEPDepartment of Environmental ProtectionSewerStreet Flooding (SJ)10032.0NaNNaNNaN...RIVERSIDE DRIVEWEST 165 STREET2020-12-31T14:50:00.000Please call 311 for further information. If yo...2020-12-31T14:50:00.000NaNNaNNaNNaNPOINT (999557.000 245700.014)
4485364412020-12-31T13:10:00.000DEPDepartment of Environmental ProtectionSewerStreet Flooding (SJ)11234.03123 FILLMORE AVENUEFILLMORE AVENUEE 31 ST...NaNNaN2021-01-03T10:45:00.000The Department of Environmental Protection ins...2021-01-03T10:45:00.000NaNNaNNaNNaNPOINT (1001868.000 161232.014)
\n", "

5 rows × 35 columns

\n", "
" ], "text/plain": [ " unique_key created_date agency \\\n", "0 48542220 2020-12-31T15:41:00.000 DEP \n", "1 48536430 2020-12-31T14:49:00.000 DEP \n", "2 48539361 2020-12-31T14:03:00.000 DEP \n", "3 48543132 2020-12-31T13:48:00.000 DEP \n", "4 48536441 2020-12-31T13:10:00.000 DEP \n", "\n", " agency_name complaint_type \\\n", "0 Department of Environmental Protection Sewer \n", "1 Department of Environmental Protection Sewer \n", "2 Department of Environmental Protection Sewer \n", "3 Department of Environmental Protection Sewer \n", "4 Department of Environmental Protection Sewer \n", "\n", " descriptor incident_zip incident_address street_name \\\n", "0 Street Flooding (SJ) 11420.0 117-17 135 STREET 135 STREET \n", "1 Street Flooding (SJ) 11357.0 20-24 150 STREET 150 STREET \n", "2 Street Flooding (SJ) 11228.0 7223 8 AVENUE 8 AVENUE \n", "3 Street Flooding (SJ) 10032.0 NaN NaN \n", "4 Street Flooding (SJ) 11234.0 3123 FILLMORE AVENUE FILLMORE AVENUE \n", "\n", " cross_street_1 ... intersection_street_1 intersection_street_2 \\\n", "0 FOCH BLVD ... NaN NaN \n", "1 20 AVE ... NaN NaN \n", "2 72 ST ... NaN NaN \n", "3 NaN ... RIVERSIDE DRIVE WEST 165 STREET \n", "4 E 31 ST ... NaN NaN \n", "\n", " closed_date resolution_description \\\n", "0 2021-01-01T00:20:00.000 Please call 311 for further information. If yo... \n", "1 2021-01-04T10:15:00.000 The Department of Environment Protection inspe... \n", "2 2021-01-02T11:25:00.000 The Department of Environmental Protection has... \n", "3 2020-12-31T14:50:00.000 Please call 311 for further information. If yo... \n", "4 2021-01-03T10:45:00.000 The Department of Environmental Protection ins... \n", "\n", " resolution_action_updated_date location_type landmark facility_type \\\n", "0 2021-01-01T00:20:00.000 NaN NaN NaN \n", "1 2021-01-04T10:15:00.000 NaN NaN NaN \n", "2 2021-01-02T11:25:00.000 NaN NaN NaN \n", "3 2020-12-31T14:50:00.000 NaN NaN NaN \n", "4 2021-01-03T10:45:00.000 NaN NaN NaN \n", "\n", " due_date geometry \n", "0 NaN POINT (1038500.000 186000.014) \n", "1 NaN POINT (1035682.000 223771.014) \n", "2 NaN POINT (979584.000 168255.014) \n", "3 NaN POINT (999557.000 245700.014) \n", "4 NaN POINT (1001868.000 161232.014) \n", "\n", "[5 rows x 35 columns]" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "path = 'https://raw.githubusercontent.com/mebauer/nyc-311-street-flooding/main/data/\\\n", "street-flooding-complaints.csv'\n", "\n", "df = pd.read_csv(path, low_memory=False)\n", "gdf = gpd.GeoDataFrame(\n", " df, geometry=gpd.points_from_xy(df.longitude, df.latitude, crs=4263))\n", "\n", "gdf = gdf.dropna(subset=['longitude']).reset_index(drop=True)\n", "gdf = gdf.to_crs(2263)\n", "\n", "print(gdf.shape)\n", "gdf.head()" ] }, { "cell_type": "code", "execution_count": 5, "id": "29f15d9c", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "gdf.plot()" ] }, { "cell_type": "code", "execution_count": 6, "id": "0adc2eb0", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "(500, 95)\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
BoroughBlockLotCDBCT2020BCTCB2020CT2010CB2010SchoolDistCouncil...FIRM07_FLAGPFIRM15_FLAGVersionDCPEditedLatitudeLongitudeNotesShape_LengShape_Areageometry
0MN11010110005001000500000351000021...1122v3.1None40.688766-74.018682None0.07.478663e+06MULTIPOLYGON (((980783.787 191526.763, 980898....
1MN11011011000100None11001021...None122v3.1None40.689920-74.045337None0.05.018973e+05MULTIPOLYGON (((972409.690 190685.560, 972428....
2MN120110110001001000100100011000021...None122v3.1None40.698188-74.041329None0.01.148539e+06MULTIPOLYGON (((973648.661 193712.461, 973648....
3MN2110110009001000900102291025021...1122v3.1t40.700369-74.012911None0.01.008250e+05MULTIPOLYGON (((980639.785 194217.691, 980609....
4MN2210110009001000900102291025021...1122v3.1None40.700550-74.011588None0.08.724425e+04MULTIPOLYGON (((980915.002 194319.141, 980795....
\n", "

5 rows × 95 columns

\n", "
" ], "text/plain": [ " Borough Block Lot CD BCT2020 BCTCB2020 CT2010 CB2010 SchoolDist \\\n", "0 MN 1 10 101 1000500 10005000003 5 1000 02 \n", "1 MN 1 101 101 1000100 None 1 1001 02 \n", "2 MN 1 201 101 1000100 10001001000 1 1000 02 \n", "3 MN 2 1 101 1000900 10009001022 9 1025 02 \n", "4 MN 2 2 101 1000900 10009001022 9 1025 02 \n", "\n", " Council ... FIRM07_FLAG PFIRM15_FLAG Version DCPEdited Latitude \\\n", "0 1 ... 1 1 22v3.1 None 40.688766 \n", "1 1 ... None 1 22v3.1 None 40.689920 \n", "2 1 ... None 1 22v3.1 None 40.698188 \n", "3 1 ... 1 1 22v3.1 t 40.700369 \n", "4 1 ... 1 1 22v3.1 None 40.700550 \n", "\n", " Longitude Notes Shape_Leng Shape_Area \\\n", "0 -74.018682 None 0.0 7.478663e+06 \n", "1 -74.045337 None 0.0 5.018973e+05 \n", "2 -74.041329 None 0.0 1.148539e+06 \n", "3 -74.012911 None 0.0 1.008250e+05 \n", "4 -74.011588 None 0.0 8.724425e+04 \n", "\n", " geometry \n", "0 MULTIPOLYGON (((980783.787 191526.763, 980898.... \n", "1 MULTIPOLYGON (((972409.690 190685.560, 972428.... \n", "2 MULTIPOLYGON (((973648.661 193712.461, 973648.... \n", "3 MULTIPOLYGON (((980639.785 194217.691, 980609.... \n", "4 MULTIPOLYGON (((980915.002 194319.141, 980795.... \n", "\n", "[5 rows x 95 columns]" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "path = '~/Downloads/nyc_mappluto_22v3_1_fgdb/MapPLUTO_22v3_1.gdb'\n", "pluto_gdf = gpd.read_file(path, rows=500)\n", "\n", "pluto_gdf = pluto_gdf.to_crs(2263)\n", "\n", "print(pluto_gdf.shape)\n", "pluto_gdf.head()" ] }, { "cell_type": "code", "execution_count": 7, "id": "1c8d087c", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "pluto_gdf.plot()" ] }, { "cell_type": "code", "execution_count": 8, "id": "7ec1828c", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "rtree.index.Index(bounds=[971045.2731347084, 188447.4288892746, 985249.228328228, 201211.1846499443], size=70)" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pluto_gdf.sindex" ] }, { "cell_type": "code", "execution_count": 9, "id": "addbb9e5", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
minxminymaxxmaxy
01.038300e+06185800.0143931.038700e+06186200.014393
11.035482e+06223571.0144171.035882e+06223971.014417
29.793840e+05168055.0143829.797840e+05168455.014382
39.993570e+05245500.0144319.997570e+05245900.014431
41.001668e+06161032.0143771.002068e+06161432.014377
\n", "
" ], "text/plain": [ " minx miny maxx maxy\n", "0 1.038300e+06 185800.014393 1.038700e+06 186200.014393\n", "1 1.035482e+06 223571.014417 1.035882e+06 223971.014417\n", "2 9.793840e+05 168055.014382 9.797840e+05 168455.014382\n", "3 9.993570e+05 245500.014431 9.997570e+05 245900.014431\n", "4 1.001668e+06 161032.014377 1.002068e+06 161432.014377" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "offset = 200\n", "bbox = gdf.bounds + [-offset, -offset, offset, offset]\n", "\n", "bbox.head()" ] }, { "cell_type": "code", "execution_count": 10, "id": "72dee2c9", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "(26959,)\n" ] }, { "data": { "text/plain": [ "0 []\n", "1 []\n", "2 []\n", "3 []\n", "4 []\n", "dtype: object" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "hits = bbox.apply(lambda row: list(pluto_gdf.sindex.intersection(row)), axis=1)\n", "\n", "print(hits.shape)\n", "hits.head()" ] }, { "cell_type": "code", "execution_count": 11, "id": "d44174b0", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "(721, 2)\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
pt_idxline_i
0459128.0
1459145.0
2459137.0
3459139.0
4459140.0
\n", "
" ], "text/plain": [ " pt_idx line_i\n", "0 459 128.0\n", "1 459 145.0\n", "2 459 137.0\n", "3 459 139.0\n", "4 459 140.0" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "tmp = pd.DataFrame(\n", " {\n", " # index of points table\n", " \"pt_idx\": np.repeat(hits.index, hits.apply(len)),\n", "\n", " # ordinal position of line - access via iloc later\n", " \"line_i\": np.concatenate(hits.values)\n", " }\n", ")\n", "\n", "print(tmp.shape)\n", "tmp.head()" ] }, { "cell_type": "code", "execution_count": 12, "id": "bc450473", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
pt_idxline_iBoroughBlockLotCDBCT2020BCTCB2020CT2010CB2010...FIRM07_FLAGPFIRM15_FLAGVersionDCPEditedLatitudeLongitudeNotesShape_LengShape_Areageometry
0459128.0MN792710110015021001502100015.021000...NoneNone22v3.1None40.710322-74.008466None0.05670.917438MULTIPOLYGON (((981955.698 198099.048, 981894....
1459145.0MN922310110015011001501101115.011007...NoneNone22v3.1None40.710594-74.007185None0.01702.936740MULTIPOLYGON (((982289.308 198169.543, 982267....
2459137.0MN89310110015011001501101715.011005...NoneNone22v3.1None40.710635-74.007964None0.05632.008015MULTIPOLYGON (((982020.597 198116.096, 981985....
3459139.0MN902310110015011001501100915.011004...NoneNone22v3.1None40.710956-74.007773None0.01021.570578MULTIPOLYGON (((982110.692 198297.452, 982090....
4459140.0MN902410110015011001501100915.011004...NoneNone22v3.1None40.711005-74.007817None0.01242.680263MULTIPOLYGON (((982106.447 198327.138, 982072....
\n", "

5 rows × 97 columns

\n", "
" ], "text/plain": [ " pt_idx line_i Borough Block Lot CD BCT2020 BCTCB2020 CT2010 \\\n", "0 459 128.0 MN 79 27 101 1001502 10015021000 15.02 \n", "1 459 145.0 MN 92 23 101 1001501 10015011011 15.01 \n", "2 459 137.0 MN 89 3 101 1001501 10015011017 15.01 \n", "3 459 139.0 MN 90 23 101 1001501 10015011009 15.01 \n", "4 459 140.0 MN 90 24 101 1001501 10015011009 15.01 \n", "\n", " CB2010 ... FIRM07_FLAG PFIRM15_FLAG Version DCPEdited Latitude \\\n", "0 1000 ... None None 22v3.1 None 40.710322 \n", "1 1007 ... None None 22v3.1 None 40.710594 \n", "2 1005 ... None None 22v3.1 None 40.710635 \n", "3 1004 ... None None 22v3.1 None 40.710956 \n", "4 1004 ... None None 22v3.1 None 40.711005 \n", "\n", " Longitude Notes Shape_Leng Shape_Area \\\n", "0 -74.008466 None 0.0 5670.917438 \n", "1 -74.007185 None 0.0 1702.936740 \n", "2 -74.007964 None 0.0 5632.008015 \n", "3 -74.007773 None 0.0 1021.570578 \n", "4 -74.007817 None 0.0 1242.680263 \n", "\n", " geometry \n", "0 MULTIPOLYGON (((981955.698 198099.048, 981894.... \n", "1 MULTIPOLYGON (((982289.308 198169.543, 982267.... \n", "2 MULTIPOLYGON (((982020.597 198116.096, 981985.... \n", "3 MULTIPOLYGON (((982110.692 198297.452, 982090.... \n", "4 MULTIPOLYGON (((982106.447 198327.138, 982072.... \n", "\n", "[5 rows x 97 columns]" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Join back to the lines on line_i; we use reset_index() to \n", "# give us the ordinal position of each line\n", "tmp = tmp.join(pluto_gdf.reset_index(drop=True), on=\"line_i\")\n", "\n", "tmp.head()" ] }, { "cell_type": "code", "execution_count": 13, "id": "70023ecf", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
pt_idxline_iBoroughBlockLotCDBCT2020BCTCB2020CT2010CB2010...PFIRM15_FLAGVersionDCPEditedLatitudeLongitudeNotesShape_LengShape_Areageometrypoint
0459128.0MN792710110015021001502100015.021000...None22v3.1None40.710322-74.008466None0.05670.917438MULTIPOLYGON (((981955.698 198099.048, 981894....POINT (982027.000 198304.014)
1459145.0MN922310110015011001501101115.011007...None22v3.1None40.710594-74.007185None0.01702.936740MULTIPOLYGON (((982289.308 198169.543, 982267....POINT (982027.000 198304.014)
2459137.0MN89310110015011001501101715.011005...None22v3.1None40.710635-74.007964None0.05632.008015MULTIPOLYGON (((982020.597 198116.096, 981985....POINT (982027.000 198304.014)
3459139.0MN902310110015011001501100915.011004...None22v3.1None40.710956-74.007773None0.01021.570578MULTIPOLYGON (((982110.692 198297.452, 982090....POINT (982027.000 198304.014)
4459140.0MN902410110015011001501100915.011004...None22v3.1None40.711005-74.007817None0.01242.680263MULTIPOLYGON (((982106.447 198327.138, 982072....POINT (982027.000 198304.014)
\n", "

5 rows × 98 columns

\n", "
" ], "text/plain": [ " pt_idx line_i Borough Block Lot CD BCT2020 BCTCB2020 CT2010 \\\n", "0 459 128.0 MN 79 27 101 1001502 10015021000 15.02 \n", "1 459 145.0 MN 92 23 101 1001501 10015011011 15.01 \n", "2 459 137.0 MN 89 3 101 1001501 10015011017 15.01 \n", "3 459 139.0 MN 90 23 101 1001501 10015011009 15.01 \n", "4 459 140.0 MN 90 24 101 1001501 10015011009 15.01 \n", "\n", " CB2010 ... PFIRM15_FLAG Version DCPEdited Latitude Longitude Notes \\\n", "0 1000 ... None 22v3.1 None 40.710322 -74.008466 None \n", "1 1007 ... None 22v3.1 None 40.710594 -74.007185 None \n", "2 1005 ... None 22v3.1 None 40.710635 -74.007964 None \n", "3 1004 ... None 22v3.1 None 40.710956 -74.007773 None \n", "4 1004 ... None 22v3.1 None 40.711005 -74.007817 None \n", "\n", " Shape_Leng Shape_Area geometry \\\n", "0 0.0 5670.917438 MULTIPOLYGON (((981955.698 198099.048, 981894.... \n", "1 0.0 1702.936740 MULTIPOLYGON (((982289.308 198169.543, 982267.... \n", "2 0.0 5632.008015 MULTIPOLYGON (((982020.597 198116.096, 981985.... \n", "3 0.0 1021.570578 MULTIPOLYGON (((982110.692 198297.452, 982090.... \n", "4 0.0 1242.680263 MULTIPOLYGON (((982106.447 198327.138, 982072.... \n", "\n", " point \n", "0 POINT (982027.000 198304.014) \n", "1 POINT (982027.000 198304.014) \n", "2 POINT (982027.000 198304.014) \n", "3 POINT (982027.000 198304.014) \n", "4 POINT (982027.000 198304.014) \n", "\n", "[5 rows x 98 columns]" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Join back to the original points to get their geometry\n", "# rename the point geometry as \"point\"\n", "tmp = tmp.join(gdf.geometry.rename(\"point\"), on=\"pt_idx\")\n", "\n", "# Convert back to a GeoDataFrame, so we can do spatial ops\n", "tmp = gpd.GeoDataFrame(tmp, geometry=\"geometry\", crs=gdf.crs)\n", "\n", "tmp.head()" ] }, { "cell_type": "code", "execution_count": 14, "id": "01b72539", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
pt_idxline_iBoroughBlockLotCDBCT2020BCTCB2020CT2010CB2010...VersionDCPEditedLatitudeLongitudeNotesShape_LengShape_Areageometrypointsnap_dist
0459128.0MN792710110015021001502100015.021000...22v3.1None40.710322-74.008466None0.05670.917438MULTIPOLYGON (((981955.698 198099.048, 981894....POINT (982027.000 198304.014)210.186017
1459145.0MN922310110015011001501101115.011007...22v3.1None40.710594-74.007185None0.01702.936740MULTIPOLYGON (((982289.308 198169.543, 982267....POINT (982027.000 198304.014)244.527951
2459137.0MN89310110015011001501101715.011005...22v3.1None40.710635-74.007964None0.05632.008015MULTIPOLYGON (((982020.597 198116.096, 981985....POINT (982027.000 198304.014)69.911787
3459139.0MN902310110015011001501100915.011004...22v3.1None40.710956-74.007773None0.01021.570578MULTIPOLYGON (((982110.692 198297.452, 982090....POINT (982027.000 198304.014)49.550503
4459140.0MN902410110015011001501100915.011004...22v3.1None40.711005-74.007817None0.01242.680263MULTIPOLYGON (((982106.447 198327.138, 982072....POINT (982027.000 198304.014)30.673000
\n", "

5 rows × 99 columns

\n", "
" ], "text/plain": [ " pt_idx line_i Borough Block Lot CD BCT2020 BCTCB2020 CT2010 \\\n", "0 459 128.0 MN 79 27 101 1001502 10015021000 15.02 \n", "1 459 145.0 MN 92 23 101 1001501 10015011011 15.01 \n", "2 459 137.0 MN 89 3 101 1001501 10015011017 15.01 \n", "3 459 139.0 MN 90 23 101 1001501 10015011009 15.01 \n", "4 459 140.0 MN 90 24 101 1001501 10015011009 15.01 \n", "\n", " CB2010 ... Version DCPEdited Latitude Longitude Notes Shape_Leng \\\n", "0 1000 ... 22v3.1 None 40.710322 -74.008466 None 0.0 \n", "1 1007 ... 22v3.1 None 40.710594 -74.007185 None 0.0 \n", "2 1005 ... 22v3.1 None 40.710635 -74.007964 None 0.0 \n", "3 1004 ... 22v3.1 None 40.710956 -74.007773 None 0.0 \n", "4 1004 ... 22v3.1 None 40.711005 -74.007817 None 0.0 \n", "\n", " Shape_Area geometry \\\n", "0 5670.917438 MULTIPOLYGON (((981955.698 198099.048, 981894.... \n", "1 1702.936740 MULTIPOLYGON (((982289.308 198169.543, 982267.... \n", "2 5632.008015 MULTIPOLYGON (((982020.597 198116.096, 981985.... \n", "3 1021.570578 MULTIPOLYGON (((982110.692 198297.452, 982090.... \n", "4 1242.680263 MULTIPOLYGON (((982106.447 198327.138, 982072.... \n", "\n", " point snap_dist \n", "0 POINT (982027.000 198304.014) 210.186017 \n", "1 POINT (982027.000 198304.014) 244.527951 \n", "2 POINT (982027.000 198304.014) 69.911787 \n", "3 POINT (982027.000 198304.014) 49.550503 \n", "4 POINT (982027.000 198304.014) 30.673000 \n", "\n", "[5 rows x 99 columns]" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "tmp[\"snap_dist\"] = tmp.geometry.distance(gpd.GeoSeries(tmp.point))\n", "\n", "tmp.head()" ] }, { "cell_type": "code", "execution_count": 15, "id": "22559048", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
pt_idxline_iBoroughBlockLotCDBCT2020BCTCB2020CT2010CB2010...VersionDCPEditedLatitudeLongitudeNotesShape_LengShape_Areageometrypointsnap_dist
44819254340.0MN163101103170410317044000317.044001...22v3.1t40.712618-74.016586None0.02.052775e+06MULTIPOLYGON (((980584.841 200985.007, 980594....POINT (979355.000 197234.014)0.000000
48319475340.0MN163101103170410317044000317.044001...22v3.1t40.712618-74.016586None0.02.052775e+06MULTIPOLYGON (((980584.841 200985.007, 980594....POINT (979355.000 197234.014)0.000000
32415235340.0MN163101103170410317044000317.044001...22v3.1t40.712618-74.016586None0.02.052775e+06MULTIPOLYGON (((980584.841 200985.007, 980594....POINT (979759.000 199686.014)0.000000
51821495455.0MN25750210110009001000900100191000...22v3.1None40.705919-74.010766None0.02.585314e+04MULTIPOLYGON (((981284.442 196374.824, 981236....POINT (981399.000 196469.014)11.267715
5162149547.0MN252710110009001000900100091000...22v3.1None40.705686-74.010287None0.01.625568e+04MULTIPOLYGON (((981492.934 196409.111, 981478....POINT (981399.000 196469.014)11.310903
\n", "

5 rows × 99 columns

\n", "
" ], "text/plain": [ " pt_idx line_i Borough Block Lot CD BCT2020 BCTCB2020 CT2010 \\\n", "448 19254 340.0 MN 16 3 101 1031704 10317044000 317.04 \n", "483 19475 340.0 MN 16 3 101 1031704 10317044000 317.04 \n", "324 15235 340.0 MN 16 3 101 1031704 10317044000 317.04 \n", "518 21495 455.0 MN 25 7502 101 1000900 10009001001 9 \n", "516 21495 47.0 MN 25 27 101 1000900 10009001000 9 \n", "\n", " CB2010 ... Version DCPEdited Latitude Longitude Notes Shape_Leng \\\n", "448 4001 ... 22v3.1 t 40.712618 -74.016586 None 0.0 \n", "483 4001 ... 22v3.1 t 40.712618 -74.016586 None 0.0 \n", "324 4001 ... 22v3.1 t 40.712618 -74.016586 None 0.0 \n", "518 1000 ... 22v3.1 None 40.705919 -74.010766 None 0.0 \n", "516 1000 ... 22v3.1 None 40.705686 -74.010287 None 0.0 \n", "\n", " Shape_Area geometry \\\n", "448 2.052775e+06 MULTIPOLYGON (((980584.841 200985.007, 980594.... \n", "483 2.052775e+06 MULTIPOLYGON (((980584.841 200985.007, 980594.... \n", "324 2.052775e+06 MULTIPOLYGON (((980584.841 200985.007, 980594.... \n", "518 2.585314e+04 MULTIPOLYGON (((981284.442 196374.824, 981236.... \n", "516 1.625568e+04 MULTIPOLYGON (((981492.934 196409.111, 981478.... \n", "\n", " point snap_dist \n", "448 POINT (979355.000 197234.014) 0.000000 \n", "483 POINT (979355.000 197234.014) 0.000000 \n", "324 POINT (979759.000 199686.014) 0.000000 \n", "518 POINT (981399.000 196469.014) 11.267715 \n", "516 POINT (981399.000 196469.014) 11.310903 \n", "\n", "[5 rows x 99 columns]" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Discard any lines that are greater than tolerance from points\n", "tmp = tmp.loc[tmp.snap_dist <= offset]\n", "\n", "# Sort on ascending snap distance, so that closest goes to top\n", "tmp = tmp.sort_values(by=[\"snap_dist\"])\n", "\n", "tmp.head()" ] }, { "cell_type": "code", "execution_count": 16, "id": "0e02b9ae", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
line_iBoroughBlockLotCDBCT2020BCTCB2020CT2010CB2010SchoolDist...VersionDCPEditedLatitudeLongitudeNotesShape_LengShape_Areageometrypointsnap_dist
pt_idx
459138.0MN90110110015011001501100915.01100402...22v3.1None40.711176-74.008123None0.06523.195602MULTIPOLYGON (((982041.663 198346.135, 982046....POINT (982027.000 198304.014)13.819315
1096353.0MN143750310110039001003900300739400702...22v3.1t40.717834-74.009967None0.05299.637906MULTIPOLYGON (((981511.400 200756.768, 981461....POINT (981500.000 200877.014)26.688052
1859356.0MN1443110110033001003300201833302202...22v3.1t40.716852-74.008459None0.01353.898306MULTIPOLYGON (((981934.351 200443.725, 981920....POINT (981942.000 200380.014)46.337050
2563386.0MN151110110033001003300300433101002...22v3.1t40.715820-74.005977None0.026217.794000MULTIPOLYGON (((982672.512 200047.743, 982690....POINT (982569.000 199967.014)26.450837
4135212.0MN38171011000700100070070027101202...22v3.1None40.705661-74.006168None0.028946.530441MULTIPOLYGON (((982494.655 196247.048, 982403....POINT (982649.000 196479.014)31.676819
\n", "

5 rows × 98 columns

\n", "
" ], "text/plain": [ " line_i Borough Block Lot CD BCT2020 BCTCB2020 CT2010 CB2010 \\\n", "pt_idx \n", "459 138.0 MN 90 1 101 1001501 10015011009 15.01 1004 \n", "1096 353.0 MN 143 7503 101 1003900 10039003007 39 4007 \n", "1859 356.0 MN 144 31 101 1003300 10033002018 33 3022 \n", "2563 386.0 MN 151 1 101 1003300 10033003004 33 1010 \n", "4135 212.0 MN 38 17 101 1000700 10007007002 7 1012 \n", "\n", " SchoolDist ... Version DCPEdited Latitude Longitude Notes \\\n", "pt_idx ... \n", "459 02 ... 22v3.1 None 40.711176 -74.008123 None \n", "1096 02 ... 22v3.1 t 40.717834 -74.009967 None \n", "1859 02 ... 22v3.1 t 40.716852 -74.008459 None \n", "2563 02 ... 22v3.1 t 40.715820 -74.005977 None \n", "4135 02 ... 22v3.1 None 40.705661 -74.006168 None \n", "\n", " Shape_Leng Shape_Area \\\n", "pt_idx \n", "459 0.0 6523.195602 \n", "1096 0.0 5299.637906 \n", "1859 0.0 1353.898306 \n", "2563 0.0 26217.794000 \n", "4135 0.0 28946.530441 \n", "\n", " geometry \\\n", "pt_idx \n", "459 MULTIPOLYGON (((982041.663 198346.135, 982046.... \n", "1096 MULTIPOLYGON (((981511.400 200756.768, 981461.... \n", "1859 MULTIPOLYGON (((981934.351 200443.725, 981920.... \n", "2563 MULTIPOLYGON (((982672.512 200047.743, 982690.... \n", "4135 MULTIPOLYGON (((982494.655 196247.048, 982403.... \n", "\n", " point snap_dist \n", "pt_idx \n", "459 POINT (982027.000 198304.014) 13.819315 \n", "1096 POINT (981500.000 200877.014) 26.688052 \n", "1859 POINT (981942.000 200380.014) 46.337050 \n", "2563 POINT (982569.000 199967.014) 26.450837 \n", "4135 POINT (982649.000 196479.014) 31.676819 \n", "\n", "[5 rows x 98 columns]" ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# group by the index of the points and take the first, which is the\n", "# closest line \n", "closest = tmp.groupby(\"pt_idx\").first()\n", "\n", "# construct a GeoDataFrame of the closest lines\n", "closest = gpd.GeoDataFrame(closest, geometry=\"geometry\")\n", "\n", "closest.head()" ] }, { "cell_type": "code", "execution_count": 17, "id": "e4d24365", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
unique_keycreated_dateagencyagency_namecomplaint_typedescriptorincident_zipincident_addressstreet_namecross_street_1...PFIRM15_FLAGVersionDCPEditedLatitudeLongitudeNotesShape_LengShape_Areapointsnap_dist
0485422202020-12-31T15:41:00.000DEPDepartment of Environmental ProtectionSewerStreet Flooding (SJ)11420.0117-17 135 STREET135 STREETFOCH BLVD...NaNNaNNaNNaNNaNNaNNaNNaNNoneNaN
1485364302020-12-31T14:49:00.000DEPDepartment of Environmental ProtectionSewerStreet Flooding (SJ)11357.020-24 150 STREET150 STREET20 AVE...NaNNaNNaNNaNNaNNaNNaNNaNNoneNaN
2485393612020-12-31T14:03:00.000DEPDepartment of Environmental ProtectionSewerStreet Flooding (SJ)11228.07223 8 AVENUE8 AVENUE72 ST...NaNNaNNaNNaNNaNNaNNaNNaNNoneNaN
3485431322020-12-31T13:48:00.000DEPDepartment of Environmental ProtectionSewerStreet Flooding (SJ)10032.0NaNNaNNaN...NaNNaNNaNNaNNaNNaNNaNNaNNoneNaN
4485364412020-12-31T13:10:00.000DEPDepartment of Environmental ProtectionSewerStreet Flooding (SJ)11234.03123 FILLMORE AVENUEFILLMORE AVENUEE 31 ST...NaNNaNNaNNaNNaNNaNNaNNaNNoneNaN
\n", "

5 rows × 132 columns

\n", "
" ], "text/plain": [ " unique_key created_date agency \\\n", "0 48542220 2020-12-31T15:41:00.000 DEP \n", "1 48536430 2020-12-31T14:49:00.000 DEP \n", "2 48539361 2020-12-31T14:03:00.000 DEP \n", "3 48543132 2020-12-31T13:48:00.000 DEP \n", "4 48536441 2020-12-31T13:10:00.000 DEP \n", "\n", " agency_name complaint_type \\\n", "0 Department of Environmental Protection Sewer \n", "1 Department of Environmental Protection Sewer \n", "2 Department of Environmental Protection Sewer \n", "3 Department of Environmental Protection Sewer \n", "4 Department of Environmental Protection Sewer \n", "\n", " descriptor incident_zip incident_address street_name \\\n", "0 Street Flooding (SJ) 11420.0 117-17 135 STREET 135 STREET \n", "1 Street Flooding (SJ) 11357.0 20-24 150 STREET 150 STREET \n", "2 Street Flooding (SJ) 11228.0 7223 8 AVENUE 8 AVENUE \n", "3 Street Flooding (SJ) 10032.0 NaN NaN \n", "4 Street Flooding (SJ) 11234.0 3123 FILLMORE AVENUE FILLMORE AVENUE \n", "\n", " cross_street_1 ... PFIRM15_FLAG Version DCPEdited Latitude Longitude \\\n", "0 FOCH BLVD ... NaN NaN NaN NaN NaN \n", "1 20 AVE ... NaN NaN NaN NaN NaN \n", "2 72 ST ... NaN NaN NaN NaN NaN \n", "3 NaN ... NaN NaN NaN NaN NaN \n", "4 E 31 ST ... NaN NaN NaN NaN NaN \n", "\n", " Notes Shape_Leng Shape_Area point snap_dist \n", "0 NaN NaN NaN None NaN \n", "1 NaN NaN NaN None NaN \n", "2 NaN NaN NaN None NaN \n", "3 NaN NaN NaN None NaN \n", "4 NaN NaN NaN None NaN \n", "\n", "[5 rows x 132 columns]" ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Join back to the original points:\n", "updated_points = gdf.join(closest.drop(columns=['geometry']))\n", "\n", "updated_points.head()" ] }, { "cell_type": "code", "execution_count": 18, "id": "b0ef4791", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
unique_keyincident_zipbblx_coordinate_state_planey_coordinate_state_planelatitudelongitudelocation_typelandmarkfacility_type...BBLCondoNoXCoordYCoordAPPBBLLatitudeLongitudeShape_LengShape_Areasnap_dist
count2.695900e+0426959.0000001.615900e+042.695900e+0426959.00000026959.00000026959.0000000.00.00.0...8.600000e+0145.00000086.00000086.0000006.100000e+0186.00000086.00000086.08.600000e+0186.000000
mean3.254573e+0710969.3844363.699429e+091.005498e+06186184.60744140.677615-73.923323NaNNaNNaN...1.000691e+091692.355556981860.232558197816.5348841.000704e+0940.709637-74.0086200.01.464337e+0555.102735
std9.167418e+06536.3774701.075864e+093.505257e+0430576.1507500.0839240.126329NaNNaNNaN...4.924506e+05891.6785281349.5509901455.1571794.997937e+050.0039940.0048680.04.086507e+0541.886663
min1.563993e+0710001.0000000.000000e+009.133570e+05121032.00000040.498628-74.254937NaNNaNNaN...1.000078e+0965.000000979652.000000195554.0000001.000070e+0940.703427-74.0165860.06.592921e+020.000000
25%2.455603e+0710312.0000003.060255e+099.868870e+05161248.00000040.609123-73.990504NaNNaNNaN...1.000210e+09960.000000980937.250000196635.2500001.000250e+0940.706394-74.0119480.06.331987e+0328.184895
50%3.291181e+0711221.0000004.067910e+091.007480e+06183618.00000040.670610-73.916267NaNNaNNaN...1.000610e+091704.000000981720.000000197685.0000001.000710e+0940.709276-74.0091250.01.725629e+0438.098773
75%4.043790e+0711385.0000004.156250e+091.033980e+06206488.50000040.733373-73.820554NaNNaNNaN...1.001055e+092442.000000982535.000000198845.7500001.000970e+0940.712461-74.0061860.04.467984e+0475.728271
max4.854313e+0711697.0000005.080500e+091.067279e+06271876.00000040.912869-73.700384NaNNaNNaN...1.001620e+093035.000000984839.000000200803.0000001.001621e+0940.717834-73.9978750.02.052775e+06165.787876
\n", "

8 rows × 59 columns

\n", "
" ], "text/plain": [ " unique_key incident_zip bbl x_coordinate_state_plane \\\n", "count 2.695900e+04 26959.000000 1.615900e+04 2.695900e+04 \n", "mean 3.254573e+07 10969.384436 3.699429e+09 1.005498e+06 \n", "std 9.167418e+06 536.377470 1.075864e+09 3.505257e+04 \n", "min 1.563993e+07 10001.000000 0.000000e+00 9.133570e+05 \n", "25% 2.455603e+07 10312.000000 3.060255e+09 9.868870e+05 \n", "50% 3.291181e+07 11221.000000 4.067910e+09 1.007480e+06 \n", "75% 4.043790e+07 11385.000000 4.156250e+09 1.033980e+06 \n", "max 4.854313e+07 11697.000000 5.080500e+09 1.067279e+06 \n", "\n", " y_coordinate_state_plane latitude longitude location_type \\\n", "count 26959.000000 26959.000000 26959.000000 0.0 \n", "mean 186184.607441 40.677615 -73.923323 NaN \n", "std 30576.150750 0.083924 0.126329 NaN \n", "min 121032.000000 40.498628 -74.254937 NaN \n", "25% 161248.000000 40.609123 -73.990504 NaN \n", "50% 183618.000000 40.670610 -73.916267 NaN \n", "75% 206488.500000 40.733373 -73.820554 NaN \n", "max 271876.000000 40.912869 -73.700384 NaN \n", "\n", " landmark facility_type ... BBL CondoNo XCoord \\\n", "count 0.0 0.0 ... 8.600000e+01 45.000000 86.000000 \n", "mean NaN NaN ... 1.000691e+09 1692.355556 981860.232558 \n", "std NaN NaN ... 4.924506e+05 891.678528 1349.550990 \n", "min NaN NaN ... 1.000078e+09 65.000000 979652.000000 \n", "25% NaN NaN ... 1.000210e+09 960.000000 980937.250000 \n", "50% NaN NaN ... 1.000610e+09 1704.000000 981720.000000 \n", "75% NaN NaN ... 1.001055e+09 2442.000000 982535.000000 \n", "max NaN NaN ... 1.001620e+09 3035.000000 984839.000000 \n", "\n", " YCoord APPBBL Latitude Longitude Shape_Leng \\\n", "count 86.000000 6.100000e+01 86.000000 86.000000 86.0 \n", "mean 197816.534884 1.000704e+09 40.709637 -74.008620 0.0 \n", "std 1455.157179 4.997937e+05 0.003994 0.004868 0.0 \n", "min 195554.000000 1.000070e+09 40.703427 -74.016586 0.0 \n", "25% 196635.250000 1.000250e+09 40.706394 -74.011948 0.0 \n", "50% 197685.000000 1.000710e+09 40.709276 -74.009125 0.0 \n", "75% 198845.750000 1.000970e+09 40.712461 -74.006186 0.0 \n", "max 200803.000000 1.001621e+09 40.717834 -73.997875 0.0 \n", "\n", " Shape_Area snap_dist \n", "count 8.600000e+01 86.000000 \n", "mean 1.464337e+05 55.102735 \n", "std 4.086507e+05 41.886663 \n", "min 6.592921e+02 0.000000 \n", "25% 6.331987e+03 28.184895 \n", "50% 1.725629e+04 38.098773 \n", "75% 4.467984e+04 75.728271 \n", "max 2.052775e+06 165.787876 \n", "\n", "[8 rows x 59 columns]" ] }, "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ "updated_points.describe()" ] }, { "cell_type": "code", "execution_count": 19, "id": "de00c266", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "True" ] }, "execution_count": 19, "metadata": {}, "output_type": "execute_result" } ], "source": [ "updated_points['unique_key'].is_unique" ] }, { "cell_type": "code", "execution_count": null, "id": "371499a7", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.0" } }, "nbformat": 4, "nbformat_minor": 5 }