3. Split survey data into lines#
We try to recreate the BAS processing steps.
[1]:
%load_ext autoreload
%autoreload 2
import geopandas as gpd
import pandas as pd
import plotly.io as pio
import airbornegeo
pio.renderers.default = "notebook"
/home/airbornegeo/airbornegeo/.pixi/envs/default/lib/python3.14/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html
from .autonotebook import tqdm as notebook_tqdm
3.1. Load data#
This is a subset of the BAS AGAP survey over Antarctica’s Gamburtsev Subglacial Mountains. The file is downloaded and subset in the notebook AGAP_magnetic_survey.
[3]:
data_df = pd.read_csv("data/AGAP_magnetic_survey.csv")
# only keep relevant columns
data_df = data_df[["easting", "northing", "unixtime", "line", "line_name"]]
# rename line column since we want to split into lines ourselves
data_df = data_df.rename(columns={"line": "original_line"})
data_df.head()
[3]:
| easting | northing | unixtime | original_line | line_name | |
|---|---|---|---|---|---|
| 0 | 621072.177354 | 159052.962392 | 1.229500e+09 | 1 | DA500_11.0 |
| 1 | 621126.010622 | 159060.533993 | 1.229500e+09 | 1 | DA500_11.0 |
| 2 | 621179.696916 | 159067.801202 | 1.229500e+09 | 1 | DA500_11.0 |
| 3 | 621233.338990 | 159074.801823 | 1.229500e+09 | 1 | DA500_11.0 |
| 4 | 621287.011834 | 159081.682095 | 1.229500e+09 | 1 | DA500_11.0 |
[4]:
# turn to geopandas geodataframe
data_df = gpd.GeoDataFrame(
data_df,
geometry=gpd.points_from_xy(x=data_df.easting, y=data_df.northing),
crs="EPSG:3031",
)
[ ]:
airbornegeo.plotly_points(
data_df[::50],
color_col="original_line",
hover_cols=[
"line_name",
"unixtime",
],
robust=False,
size=3,
)
3.2. Split lines on time gaps#
[6]:
data_df["segments_by_time"] = airbornegeo.split_into_segments(
data_df,
threshold=60 * 10, # 10 minutes
column_name="unixtime",
min_points_per_segment=100,
)
print(f"{len(data_df.segments_by_time.unique())} segments")
ax = data_df.groupby("segments_by_time").size().hist(bins=50)
ax.set_title("Histogram of number of data points in each segment");
98 segments
[ ]:
airbornegeo.plotly_points(
data_df[::50],
color_col="segments_by_time",
hover_cols=[
"original_line",
"unixtime",
],
robust=False,
size=3,
)
3.3. Split lines on distance gaps#
[8]:
data_df["relative_distance"] = airbornegeo.relative_distance(data_df)
data_df["segments_by_distance"] = airbornegeo.split_into_segments(
data_df,
threshold=20e3, # 20 km
column_name="relative_distance",
min_points_per_segment=100,
)
print(f"{len(data_df.segments_by_distance.unique())} segments")
ax = data_df.groupby("segments_by_distance").size().hist(bins=50)
ax.set_title("Histogram of number of data points in each segment");
168 segments
[ ]:
airbornegeo.plotly_points(
data_df[::50],
color_col="segments_by_distance",
hover_cols=["original_line", "relative_distance"],
robust=False,
size=3,
)
3.4. Split lines on bearing changes#
[ ]:
data_df["bearing"] = airbornegeo.bearing(data_df, 10)
airbornegeo.plotly_points(
data_df[::50],
color_col="bearing",
hover_cols=["original_line"],
robust=False,
size=3,
)
[13]:
data_df["segments_by_bearing"] = airbornegeo.split_into_segments(
data_df,
threshold=45, # 45 degree bearing change
column_name="bearing",
min_points_per_segment=100,
)
print(f"{len(data_df.segments_by_bearing.unique())} segments")
ax = data_df.groupby("segments_by_bearing").size().hist(bins=50)
ax.set_title("Histogram of number of data points in each segment");
177 segments
[ ]:
airbornegeo.plotly_points(
data_df[::10],
color_col="segments_by_bearing",
hover_cols=["original_line", "bearing", "unixtime"],
robust=False,
size=3,
)