Tutorials
Citibike
import pandas as pd
import geopandas as gpd
ntas = gpd.read_file("../Data/nyc_ntas_2020.geojson")
cb = pd.read_csv(
"/Users/marioag/Documents/GitHub/citibike-viewer/data/od/202302-citibike-tripdata.csv"
)
/var/folders/g5/b592wl6x12s0tx4jfw9f7_j40000gn/T/ipykernel_17048/4078996454.py:1: DtypeWarning: Columns (5,7) have mixed types. Specify dtype option on import or set low_memory=False.
cb = pd.read_csv(
cb.shape
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
Cell In[1], line 1
----> 1 cb.shape
NameError: name 'cb' is not defined
cb.rename(
columns={
"start_station_id": "source",
"end_station_id": "target",
},
inplace=True,
)
cb["lat"] = cb["start_lat"]
cb["lon"] = cb["start_lng"]
# cb.to_csv("../Data/citibike_sample.csv", index=False)
# groupby start and end stations
cb_grouped = (
cb.groupby(["source", "target", "lat", "lon", "start_station_name"])
.size()
.reset_index(name="weight")
)
cb_grouped.shape
(834630, 6)
# cb_grouped.to_csv("../Data/citibike_grouped.csv", index=False)
# assign nta stats to stations
stations_gdf = gpd.GeoDataFrame(
cb_grouped,
geometry=gpd.points_from_xy(cb_grouped["lon"], cb_grouped["lat"], crs="EPSG:4326"),
)
stations_w_ntas = gpd.sjoin(stations_gdf, ntas, how="left", predicate="intersects")
# get unique stations and their coordinates
stations = (
cb_grouped[
[
"source",
"lat",
"lon",
"start_station_name",
]
]
.drop_duplicates(subset=["source", "start_station_name"], keep="first")
.rename(
columns={
"source": "id",
"lat": "latitude",
"lon": "longitude",
"start_station_name": "name",
}
)
)
stations
vertical-align: top;
}
.dataframe thead th {
text-align: right;
}
| id | latitude | longitude | name | |
|---|---|---|---|---|
| 0 | 2733.03 | 40.633349 | -74.016569 | 67 St & Erik Pl |
| 37 | 2782.02 | 40.635679 | -74.020005 | 5 Ave & 66 St |
| 95 | 2821.05 | 40.635560 | -74.012980 | 7 Ave & 62 St |
| 145 | 2832.03 | 40.637033 | -74.022141 | 4 Ave & Shore Road Dr |
| 244 | 2872.02 | 40.637660 | -74.017820 | 63 St & 5 Ave |
| ... | ... | ... | ... | ... |
| 834526 | 8795.03 | 40.878016 | -73.884744 | Grand Concourse & E Mosholu Pkwy S |
| 834540 | 8799.01 | 40.878120 | -73.891920 | Goulden Ave & W 205 St |
| 834574 | 8811.01 | 40.880294 | -73.886065 | Paul Ave & Mosholu Pkwy |
| 834585 | 8841.03 | 40.882178 | -73.886925 | W Mosholu Pkwy S & Sedgwick Ave |
| 834613 | SYS038 | 40.709540 | -73.931514 | Morgan Loading Docks |
3564 rows × 4 columns
stations_attr = stations.merge(
stations_w_ntas[
["source", "start_station_name", "BoroName", "NTAName"]
].drop_duplicates(subset=["source", "start_station_name"], keep="first"),
left_on=["id", "name"],
right_on=["source", "start_station_name"],
how="left",
suffixes=("", "_nta"),
) # .shape
stations_attr
vertical-align: top;
}
.dataframe thead th {
text-align: right;
}
| id | latitude | longitude | name | source | start_station_name | BoroName | NTAName | |
|---|---|---|---|---|---|---|---|---|
| 0 | 2733.03 | 40.633349 | -74.016569 | 67 St & Erik Pl | 2733.03 | 67 St & Erik Pl | Brooklyn | Bay Ridge |
| 1 | 2782.02 | 40.635679 | -74.020005 | 5 Ave & 66 St | 2782.02 | 5 Ave & 66 St | Brooklyn | Bay Ridge |
| 2 | 2821.05 | 40.635560 | -74.012980 | 7 Ave & 62 St | 2821.05 | 7 Ave & 62 St | Brooklyn | Sunset Park (Central) |
| 3 | 2832.03 | 40.637033 | -74.022141 | 4 Ave & Shore Road Dr | 2832.03 | 4 Ave & Shore Road Dr | Brooklyn | Bay Ridge |
| 4 | 2872.02 | 40.637660 | -74.017820 | 63 St & 5 Ave | 2872.02 | 63 St & 5 Ave | Brooklyn | Sunset Park (Central) |
| ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 3559 | 8795.03 | 40.878016 | -73.884744 | Grand Concourse & E Mosholu Pkwy S | 8795.03 | Grand Concourse & E Mosholu Pkwy S | Bronx | Bedford Park |
| 3560 | 8799.01 | 40.878120 | -73.891920 | Goulden Ave & W 205 St | 8799.01 | Goulden Ave & W 205 St | Bronx | Bedford Park |
| 3561 | 8811.01 | 40.880294 | -73.886065 | Paul Ave & Mosholu Pkwy | 8811.01 | Paul Ave & Mosholu Pkwy | Bronx | Bedford Park |
| 3562 | 8841.03 | 40.882178 | -73.886925 | W Mosholu Pkwy S & Sedgwick Ave | 8841.03 | W Mosholu Pkwy S & Sedgwick Ave | Bronx | Bedford Park |
| 3563 | SYS038 | 40.709540 | -73.931514 | Morgan Loading Docks | SYS038 | Morgan Loading Docks | Brooklyn | East Williamsburg |
3564 rows × 8 columns
stations_attr.to_csv("../Data/citibike_stations_attr.csv", index=False)