from pathlib import Path
from zipfile import ZipFile
from urllib.request import urlretrieve
# Download and unzip
= "https://open.gishub.org/data/duckdb/nyc_data.db.zip"
url = Path("nyc_data.db.zip")
zip_path = Path("nyc_data.db")
db_path
if not zip_path.exists():
urlretrieve(url, zip_path)
if not db_path.exists():
with ZipFile(zip_path) as zip_file:
"nyc_data.db") zip_file.extract(
Ibis now has support for DuckDB geospatial functions!
This blogpost showcases some examples of the geospatial API for the DuckDB backend. The material is inspired by the “DuckDB: Spatial Relationships” lesson from Dr. Qiusheng Wu’s course “Spatial Data Management” from the Department of Geography & Sustainability at the University of Tennessee, Knoxville.
Installation
Install Ibis with the dependencies needed to work with geospatial data using DuckDB:
$ pip install 'ibis-framework[duckdb,geospatial]'
Data
We are going to be working with data from New York City. The database contains multiple tables with information about subway stations, streets, neighborhood, census data and, homicides. The datasets in the database are in NAD83 / UTM zone 18N projection, EPSG:26918.
Let’s get started
The beauty of spatial databases is that they allow us to both store and compute over geometries.
import ibis
from ibis import _
= True
ibis.options.interactive
= ibis.duckdb.connect("nyc_data.db")
con con.list_tables()
['nyc_census_blocks',
'nyc_homicides',
'nyc_neighborhoods',
'nyc_streets',
'nyc_subway_stations']
We have multiple tables with information about New York City. Following Dr. Wu’s class, we’ll take a look at some spatial relations.
We can start by taking a peek at the nyc_subway_stations
table.
= con.table("nyc_subway_stations")
subway_stations subway_stations
┏━━━━━━━━━━┳━━━━━━━━━┳━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━━━━━┳━━━━━━━━━┳━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓ ┃ OBJECTID ┃ ID ┃ NAME ┃ ALT_NAME ┃ CROSS_ST ┃ LONG_NAME ┃ LABEL ┃ BOROUGH ┃ NGHBHD ┃ ROUTES ┃ TRANSFERS ┃ COLOR ┃ EXPRESS ┃ CLOSED ┃ geom ┃ ┡━━━━━━━━━━╇━━━━━━━━━╇━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━━━━━╇━━━━━━━━━╇━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩ │ float64 │ float64 │ string │ string │ string │ string │ string │ string │ string │ string │ string │ string │ string │ string │ geospatial:geometry │ ├──────────┼─────────┼──────────────┼─────────────────┼─────────────────┼─────────────────────────────────────────┼───────────────────────────────────┼───────────┼────────┼────────┼───────────┼──────────────┼─────────┼────────┼──────────────────────────────────┤ │ 1.0 │ 376.0 │ Cortlandt St │ NULL │ Church St │ Cortlandt St (R,W) Manhattan │ Cortlandt St (R,W) │ Manhattan │ NULL │ R,W │ R,W │ YELLOW │ NULL │ NULL │ <POINT (583521.854 4507077.863)> │ │ 2.0 │ 2.0 │ Rector St │ NULL │ NULL │ Rector St (1) Manhattan │ Rector St (1) │ Manhattan │ NULL │ 1 │ 1 │ RED │ NULL │ NULL │ <POINT (583324.487 4506805.373)> │ │ 3.0 │ 1.0 │ South Ferry │ NULL │ NULL │ South Ferry (1) Manhattan │ South Ferry (1) │ Manhattan │ NULL │ 1 │ 1 │ RED │ NULL │ NULL │ <POINT (583304.182 4506069.654)> │ │ 4.0 │ 125.0 │ 138th St │ Grand Concourse │ Grand Concourse │ 138th St / Grand Concourse (4,5) Bronx │ 138th St / Grand Concourse (4,5) │ Bronx │ NULL │ 4,5 │ 4,5 │ GREEN │ NULL │ NULL │ <POINT (590250.106 4518558.02)> │ │ 5.0 │ 126.0 │ 149th St │ Grand Concourse │ Grand Concourse │ 149th St / Grand Concourse (4) Bronx │ 149th St / Grand Concourse (4) │ Bronx │ NULL │ 4 │ 2,4,5 │ GREEN │ express │ NULL │ <POINT (590454.74 4519145.72)> │ │ 6.0 │ 45.0 │ 149th St │ Grand Concourse │ Grand Concourse │ 149th St / Grand Concourse (2,5) Bronx │ 149th St / Grand Concourse (2,5) │ Bronx │ NULL │ 2,5 │ 2,4,5 │ RED-GREEN │ express │ NULL │ <POINT (590465.893 4519168.697)> │ │ 7.0 │ 127.0 │ 161st St │ Yankee Stadium │ River Ave │ 161st St / Yankee Stadium (B,D,4) Bronx │ 161st St / Yankee Stadium (B,D,4) │ Bronx │ NULL │ B,D,4 │ B,D,4 │ GREEN-ORANGE │ NULL │ NULL │ <POINT (590573.169 4520214.766)> │ │ 8.0 │ 208.0 │ 167th St │ NULL │ Grand Concourse │ 167th St (B,D) Bronx │ 167th St (B,D) │ Bronx │ NULL │ B,D │ B,D │ ORANGE │ NULL │ NULL │ <POINT (591252.831 4520950.353)> │ │ 9.0 │ 128.0 │ 167th St │ NULL │ River Ave │ 167th St (4) Bronx │ 167th St (4) │ Bronx │ NULL │ 4 │ 4 │ GREEN │ NULL │ NULL │ <POINT (590946.397 4521077.319)> │ │ 10.0 │ 209.0 │ 170th St │ NULL │ Grand Concourse │ 170th St (B,D) Bronx │ 170th St (B,D) │ Bronx │ NULL │ B,D │ B,D │ ORANGE │ NULL │ NULL │ <POINT (591583.611 4521434.847)> │ │ … │ … │ … │ … │ … │ … │ … │ … │ … │ … │ … │ … │ … │ … │ … │ └──────────┴─────────┴──────────────┴─────────────────┴─────────────────┴─────────────────────────────────────────┴───────────────────────────────────┴───────────┴────────┴────────┴───────────┴──────────────┴─────────┴────────┴──────────────────────────────────┘
Notice that the last column has a geometry
type, and in this case it contains points that represent the location of each subway station. Let’s grab the entry for the Broad St subway station.
= subway_stations.filter(subway_stations.NAME == "Broad St")
broad_station broad_station
┏━━━━━━━━━━┳━━━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━━━┳━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓ ┃ OBJECTID ┃ ID ┃ NAME ┃ ALT_NAME ┃ CROSS_ST ┃ LONG_NAME ┃ LABEL ┃ BOROUGH ┃ NGHBHD ┃ ROUTES ┃ TRANSFERS ┃ COLOR ┃ EXPRESS ┃ CLOSED ┃ geom ┃ ┡━━━━━━━━━━╇━━━━━━━━━╇━━━━━━━━━━╇━━━━━━━━━━╇━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━━━╇━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩ │ float64 │ float64 │ string │ string │ string │ string │ string │ string │ string │ string │ string │ string │ string │ string │ geospatial:geometry │ ├──────────┼─────────┼──────────┼──────────┼──────────┼────────────────────────────┼──────────────────┼───────────┼────────┼────────┼───────────┼────────┼─────────┼────────┼──────────────────────────────────┤ │ 332.0 │ 304.0 │ Broad St │ NULL │ Wall St │ Broad St (J,M,Z) Manhattan │ Broad St (J,M,Z) │ Manhattan │ NULL │ J,M,Z │ J,M,Z │ BROWN │ express │ NULL │ <POINT (583571.906 4506714.341)> │ └──────────┴─────────┴──────────┴──────────┴──────────┴────────────────────────────┴──────────────────┴───────────┴────────┴────────┴───────────┴────────┴─────────┴────────┴──────────────────────────────────┘
Then convert it to a scalar subquery:
= broad_station.select("geom").as_scalar() broad_station_subquery
geo_equals
(ST_Equals
)
In DuckDB ST_Equals
returns True
if two geometries are topologically equal. This means that they have the same dimension and identical coordinate values, although the order of the vertices may be different.
The following is a bit redundant but we can check if our "Broad St"
point matches only one point in our data using geo_equals
filter(subway_stations.geom.geo_equals(broad_station_subquery)) subway_stations.
┏━━━━━━━━━━┳━━━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━━━┳━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓ ┃ OBJECTID ┃ ID ┃ NAME ┃ ALT_NAME ┃ CROSS_ST ┃ LONG_NAME ┃ LABEL ┃ BOROUGH ┃ NGHBHD ┃ ROUTES ┃ TRANSFERS ┃ COLOR ┃ EXPRESS ┃ CLOSED ┃ geom ┃ ┡━━━━━━━━━━╇━━━━━━━━━╇━━━━━━━━━━╇━━━━━━━━━━╇━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━━━╇━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩ │ float64 │ float64 │ string │ string │ string │ string │ string │ string │ string │ string │ string │ string │ string │ string │ geospatial:geometry │ ├──────────┼─────────┼──────────┼──────────┼──────────┼────────────────────────────┼──────────────────┼───────────┼────────┼────────┼───────────┼────────┼─────────┼────────┼──────────────────────────────────┤ │ 332.0 │ 304.0 │ Broad St │ NULL │ Wall St │ Broad St (J,M,Z) Manhattan │ Broad St (J,M,Z) │ Manhattan │ NULL │ J,M,Z │ J,M,Z │ BROWN │ express │ NULL │ <POINT (583571.906 4506714.341)> │ └──────────┴─────────┴──────────┴──────────┴──────────┴────────────────────────────┴──────────────────┴───────────┴────────┴────────┴───────────┴────────┴─────────┴────────┴──────────────────────────────────┘
We can also write this query without using broad_station
as a variable, and with the help of the deferred expressions API, also known as the underscore API.
filter(_.geom.geo_equals(_.filter(_.NAME == "Broad St")[["geom"]].as_scalar())) subway_stations.
┏━━━━━━━━━━┳━━━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━━━┳━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓ ┃ OBJECTID ┃ ID ┃ NAME ┃ ALT_NAME ┃ CROSS_ST ┃ LONG_NAME ┃ LABEL ┃ BOROUGH ┃ NGHBHD ┃ ROUTES ┃ TRANSFERS ┃ COLOR ┃ EXPRESS ┃ CLOSED ┃ geom ┃ ┡━━━━━━━━━━╇━━━━━━━━━╇━━━━━━━━━━╇━━━━━━━━━━╇━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━━━╇━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩ │ float64 │ float64 │ string │ string │ string │ string │ string │ string │ string │ string │ string │ string │ string │ string │ geospatial:geometry │ ├──────────┼─────────┼──────────┼──────────┼──────────┼────────────────────────────┼──────────────────┼───────────┼────────┼────────┼───────────┼────────┼─────────┼────────┼──────────────────────────────────┤ │ 332.0 │ 304.0 │ Broad St │ NULL │ Wall St │ Broad St (J,M,Z) Manhattan │ Broad St (J,M,Z) │ Manhattan │ NULL │ J,M,Z │ J,M,Z │ BROWN │ express │ NULL │ <POINT (583571.906 4506714.341)> │ └──────────┴─────────┴──────────┴──────────┴──────────┴────────────────────────────┴──────────────────┴───────────┴────────┴────────┴───────────┴────────┴─────────┴────────┴──────────────────────────────────┘
intersect
(ST_Intersect)
Let’s locate the neighborhood of the “Broad Street” subway station using the geospatial intersect
function. The intersect
function returns True
if two geometries have any points in common.
= con.table("nyc_neighborhoods")
boroughs boroughs
┏━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓ ┃ BORONAME ┃ NAME ┃ geom ┃ ┡━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩ │ string │ string │ geospatial:geometry │ ├───────────────┼──────────────────────────┼──────────────────────────────────────────────────────────────────────────────────┤ │ Brooklyn │ Bensonhurst │ <MULTIPOLYGON (((582771.426 4495167.427, 584651.294 4497541.643, 585422.281 ...> │ │ Manhattan │ East Village │ <MULTIPOLYGON (((585508.753 4509691.267, 586826.357 4508984.188, 586726.827 ...> │ │ Manhattan │ West Village │ <MULTIPOLYGON (((583263.278 4509242.626, 583276.82 4509378.825, 583473.971 4...> │ │ The Bronx │ Throggs Neck │ <MULTIPOLYGON (((597640.009 4520272.72, 597647.746 4520617.824, 597805.462 4...> │ │ The Bronx │ Wakefield-Williamsbridge │ <MULTIPOLYGON (((595285.205 4525938.798, 595348.545 4526158.777, 595672 4527...> │ │ Queens │ Auburndale │ <MULTIPOLYGON (((600973.009 4510338.857, 601002.162 4510743.044, 601131.315 ...> │ │ Manhattan │ Battery Park │ <MULTIPOLYGON (((583408.101 4508093.111, 583356.048 4507665.145, 583260.947 ...> │ │ Manhattan │ Carnegie Hill │ <MULTIPOLYGON (((588501.208 4515525.88, 588125.03 4514806.77, 587702.963 451...> │ │ Staten Island │ Mariners Harbor │ <MULTIPOLYGON (((570300.108 4497031.156, 570393.836 4497227.426, 570478.075 ...> │ │ Staten Island │ Rossville │ <MULTIPOLYGON (((564664.957 4489358.427, 564771.457 4489415.865, 564783.746 ...> │ │ … │ … │ … │ └───────────────┴──────────────────────────┴──────────────────────────────────────────────────────────────────────────────────┘
filter(boroughs.geom.intersects(broad_station_subquery)) boroughs.
┏━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓ ┃ BORONAME ┃ NAME ┃ geom ┃ ┡━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩ │ string │ string │ geospatial:geometry │ ├───────────┼────────────────────┼──────────────────────────────────────────────────────────────────────────────────┤ │ Manhattan │ Financial District │ <MULTIPOLYGON (((583356.048 4507665.145, 583505.038 4507562.36, 583828.604 4...> │ └───────────┴────────────────────┴──────────────────────────────────────────────────────────────────────────────────┘
d_within
(ST_DWithin)
We can also find the streets near (say, within 10 meters) the Broad St subway station using the d_within
function. The d_within
function returns True if the geometries are within a given distance.
= con.table("nyc_streets")
streets streets
┏━━━━━━━┳━━━━━━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓ ┃ ID ┃ NAME ┃ ONEWAY ┃ TYPE ┃ geom ┃ ┡━━━━━━━╇━━━━━━━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩ │ int32 │ string │ string │ string │ geospatial:geometry │ ├───────┼─────────────────┼────────┼───────────────┼──────────────────────────────────────────────────────────────────────────────────┤ │ 1 │ Shore Pky S │ NULL │ residential │ <MULTILINESTRING ((586785.477 4492901.001, 586898.232 4492943.725, 587118.98...> │ │ 2 │ NULL │ NULL │ footway │ <MULTILINESTRING ((586645.007 4504977.75, 586664.225 4504988.544, 586672.151...> │ │ 3 │ Avenue O │ NULL │ residential │ <MULTILINESTRING ((586750.302 4496109.722, 586837.373 4496123.393, 586929.08...> │ │ 4 │ Walsh Ct │ NULL │ residential │ <MULTILINESTRING ((586728.696 4497971.053, 586886.358 4498000.536))> │ │ 5 │ NULL │ NULL │ motorway_link │ <MULTILINESTRING ((586587.053 4510088.25, 586641.734 4510156.835))> │ │ 6 │ Avenue Z │ NULL │ residential │ <MULTILINESTRING ((586792.159 4493279.322, 586978.534 4493308.473, 587056.76...> │ │ 7 │ Dank Ct │ NULL │ residential │ <MULTILINESTRING ((586794.754 4493361.729, 586966.095 4493387.928))> │ │ 8 │ Cumberland Walk │ NULL │ footway │ <MULTILINESTRING ((586657.468 4505324.904, 586692.489 4505320.983, 586707.76...> │ │ 9 │ Cumberland Walk │ NULL │ footway │ <MULTILINESTRING ((586670.712 4505521.567, 586667.915 4505500.551, 586657.46...> │ │ 10 │ NULL │ NULL │ residential │ <MULTILINESTRING ((586598.326 4510424.446, 586602.314 4510430.044, 586604.94...> │ │ … │ … │ … │ … │ … │ └───────┴─────────────────┴────────┴───────────────┴──────────────────────────────────────────────────────────────────────────────────┘
Using the deferred API, we can check which streets are within d=10
meters of distance.
= streets.filter(_.geom.d_within(broad_station_subquery, 10))
sts_near_broad sts_near_broad
┏━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓ ┃ ID ┃ NAME ┃ ONEWAY ┃ TYPE ┃ geom ┃ ┡━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩ │ int32 │ string │ string │ string │ geospatial:geometry │ ├───────┼───────────┼────────┼──────────────┼──────────────────────────────────────────────────────────────────────────────────┤ │ 17394 │ Wall St │ NULL │ unclassified │ <MULTILINESTRING ((583483.954 4506785.646, 583522.11 4506758.431, 583571.509...> │ │ 17399 │ Broad St │ NULL │ unclassified │ <MULTILINESTRING ((583571.509 4506715.578, 583529.136 4506622.066, 583509.85...> │ │ 17445 │ Nassau St │ NULL │ unclassified │ <MULTILINESTRING ((583571.509 4506715.578, 583610.912 4506780.181, 583641.80...> │ └───────┴───────────┴────────┴──────────────┴──────────────────────────────────────────────────────────────────────────────────┘
In the previous query, streets
and broad_station
are different tables. We use as_scalar()
to generate a scalar subquery from a table with a single column (whose shape is scalar).
To visualize the findings, we will convert the tables to GeoPandas DataFrames.
= broad_station.to_pandas()
broad_station_gdf = "EPSG:26918"
broad_station_gdf.crs
= sts_near_broad.to_pandas()
sts_near_broad_gdf = "EPSG:26918"
sts_near_broad_gdf.crs
= streets.to_pandas()
streets_gdf = "EPSG:26918" streets_gdf.crs
from lonboard import Map, ScatterplotLayer, PathLayer, PolygonLayer
= ScatterplotLayer.from_geopandas(
broad_station_layer ="blue", get_radius=5
broad_station_gdf, get_fill_color
)= PathLayer.from_geopandas(
sts_near_broad_layer ="red", opacity=0.4, get_width=2
sts_near_broad_gdf, get_color
)= PathLayer.from_geopandas(streets_gdf, get_color="grey", opacity=0.3)
streets_layer = Map(
m
[
broad_station_layer,
sts_near_broad_layer,
streets_layer,
],={"longitude": -74.01066, "latitude": 40.7069, "zoom": 16}
view_state
) m
You can zoom in and out, and hover over the map to check on the street names.
buffer
(ST_Buffer)
Next, we’ll take a look at the homicides table and showcase some additional functionality related to polygon handling.
= con.table("nyc_homicides")
homicides homicides
┏━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━┳━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓ ┃ INCIDENT_D ┃ BORONAME ┃ NUM_VICTIM ┃ PRIMARY_MO ┃ ID ┃ WEAPON ┃ LIGHT_DARK ┃ YEAR ┃ geom ┃ ┡━━━━━━━━━━━━╇━━━━━━━━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━╇━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩ │ date │ string │ string │ string │ int32 │ string │ string │ int32 │ geospatial:geometry │ ├────────────┼───────────────┼────────────┼────────────┼───────┼────────┼────────────┼───────┼──────────────────────────────────┤ │ 2008-01-01 │ Brooklyn │ 1 │ NULL │ 7 │ gun │ D │ 2008 │ <POINT (592158.666 4502210.892)> │ │ 2008-01-04 │ Manhattan │ 1 │ NULL │ 14 │ gun │ D │ 2008 │ <POINT (588654.952 4517855.383)> │ │ 2008-01-05 │ Queens │ 1 │ NULL │ 15 │ gun │ D │ 2008 │ <POINT (605800.815 4505730.608)> │ │ 2008-01-04 │ Queens │ 1 │ NULL │ 16 │ knife │ D │ 2008 │ <POINT (594255.157 4512250.378)> │ │ 2008-01-05 │ Queens │ 1 │ NULL │ 18 │ gun │ D │ 2008 │ <POINT (605498.135 4496052.64)> │ │ 2008-01-07 │ Brooklyn │ 1 │ NULL │ 20 │ gun │ D │ 2008 │ <POINT (592020.999 4505733.647)> │ │ 2008-01-10 │ Manhattan │ 1 │ NULL │ 22 │ gun │ D │ 2008 │ <POINT (584055.518 4511774.724)> │ │ 2008-01-10 │ Manhattan │ 1 │ NULL │ 23 │ gun │ D │ 2008 │ <POINT (587283.748 4516908.39)> │ │ 2008-01-13 │ Staten Island │ 1 │ NULL │ 25 │ gun │ D │ 2008 │ <POINT (570593.125 4498222.78)> │ │ 2008-01-16 │ Queens │ 1 │ NULL │ 27 │ gun │ D │ 2008 │ <POINT (607385.969 4501506.717)> │ │ … │ … │ … │ … │ … │ … │ … │ … │ … │ └────────────┴───────────────┴────────────┴────────────┴───────┴────────┴────────────┴───────┴──────────────────────────────────┘
Let’s use the buffer
method to find homicides near our "Broad St"
station point.
The buffer
method computes a polygon or multipolygon that represents all points whose distance from a geometry is less than or equal to a given distance.
buffer(200) broad_station.geom.
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓ ┃ GeoBuffer(geom, 200.0) ┃ ┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩ │ geospatial:geometry │ ├──────────────────────────────────────────────────────────────────────────────────┤ │ <POLYGON ((583771.906 4506714.341, 583768.063 4506675.323, 583756.682 450663...> │ └──────────────────────────────────────────────────────────────────────────────────┘
We can check the area using the area
(ST_Area
) function, and see that is \(~ \pi r^{2}=125664\)
buffer(200).area() broad_station.geom.
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓ ┃ GeoArea(GeoBuffer(geom, 200.0)) ┃ ┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩ │ float64 │ ├─────────────────────────────────┤ │ 124857.80609 │ └─────────────────────────────────┘
To find if there were any homicides in that area, we can find where the polygon resulting from adding the 200 meters buffer to our “Broad St” station point intersects with the geometry column in our homicides table.
= homicides.filter(_.geom.intersects(broad_station.select(_.geom.buffer(200)).as_scalar()))
h_near_broad h_near_broad
┏━━━━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━┳━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓ ┃ INCIDENT_D ┃ BORONAME ┃ NUM_VICTIM ┃ PRIMARY_MO ┃ ID ┃ WEAPON ┃ LIGHT_DARK ┃ YEAR ┃ geom ┃ ┡━━━━━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━╇━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩ │ date │ string │ string │ string │ int32 │ string │ string │ int32 │ geospatial:geometry │ ├────────────┼───────────┼────────────┼────────────┼───────┼────────┼────────────┼───────┼──────────────────────────────────┤ │ 2009-07-07 │ Manhattan │ 1 │ NULL │ 3544 │ NULL │ NULL │ 2009 │ <POINT (583443.249 4506757.877)> │ └────────────┴───────────┴────────────┴────────────┴───────┴────────┴────────────┴───────┴──────────────────────────────────┘
It looks like there was one homicide within 200 meters from the “Broad St” station, but from this data we can’t tell the street near which it happened. However, we can check if the homicide point is within a small distance of a street.
= streets.filter(_.geom.d_within(h_near_broad.select(_.geom).as_scalar(), 2))
h_street h_street
┏━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓ ┃ ID ┃ NAME ┃ ONEWAY ┃ TYPE ┃ geom ┃ ┡━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩ │ int32 │ string │ string │ string │ geospatial:geometry │ ├───────┼───────────┼────────┼──────────────┼──────────────────────────────────────────────────────────────────────────────────┤ │ 17296 │ Rector St │ yes │ unclassified │ <MULTILINESTRING ((583184.691 4506868.803, 583257.066 4506835.054, 583324.11...> │ └───────┴───────────┴────────┴──────────────┴──────────────────────────────────────────────────────────────────────────────────┘
Let’s plot this:
= broad_station.mutate(geom=broad_station.geom.buffer(200))
broad_station_zone = broad_station_zone.to_pandas()
broad_station_zone = "EPSG:26918"
broad_station_zone.crs
= h_near_broad.to_pandas()
h_near_broad_gdf = "EPSG:26918"
h_near_broad_gdf.crs
= h_street.to_pandas()
h_street_gdf = "EPSG:26918" h_street_gdf.crs
= ScatterplotLayer.from_geopandas(
broad_station_layer ="orange", get_radius=5
broad_station_gdf, get_fill_color
)
= PolygonLayer.from_geopandas(
broad_station_zone_layer ="orange", opacity=0.1
broad_station_zone, get_fill_color
)
= ScatterplotLayer.from_geopandas(
h_near_broad_layer ="red", get_radius=5
h_near_broad_gdf, get_fill_color
)
= PathLayer.from_geopandas(
h_street_layer ="blue", opacity=0.5, get_width=2
h_street_gdf, get_color
)
= PathLayer.from_geopandas(streets_gdf, get_color="grey", opacity=0.3)
streets_layer
= Map(
mh
[
broad_station_layer,
broad_station_zone_layer,
h_near_broad_layer,
h_street_layer,
streets_layer,
],={"longitude": -74.01066, "latitude": 40.7069, "zoom": 16}
view_state
) mh
Functions supported and next steps
At the moment in Ibis we have support for around thirty geospatial functions in DuckDB and we will add some more (see list here).
We also support reading multiple geospatial formats via read_geo()
.
Here are some resources to learn more about Ibis:
Chat with us on Zulip: