3. Split survey data into lines#

We try to recreate the BAS processing steps.

[1]:
%load_ext autoreload
%autoreload 2


import geopandas as gpd
import pandas as pd
import plotly.io as pio

import airbornegeo

pio.renderers.default = "notebook"
/home/airbornegeo/airbornegeo/.pixi/envs/default/lib/python3.14/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html
  from .autonotebook import tqdm as notebook_tqdm

3.1. Load data#

This is a subset of the BAS AGAP survey over Antarctica’s Gamburtsev Subglacial Mountains. The file is downloaded and subset in the notebook AGAP_magnetic_survey.

[3]:
data_df = pd.read_csv("data/AGAP_magnetic_survey.csv")

# only keep relevant columns
data_df = data_df[["easting", "northing", "unixtime", "line", "line_name"]]

# rename line column since we want to split into lines ourselves
data_df = data_df.rename(columns={"line": "original_line"})

data_df.head()
[3]:
easting northing unixtime original_line line_name
0 621072.177354 159052.962392 1.229500e+09 1 DA500_11.0
1 621126.010622 159060.533993 1.229500e+09 1 DA500_11.0
2 621179.696916 159067.801202 1.229500e+09 1 DA500_11.0
3 621233.338990 159074.801823 1.229500e+09 1 DA500_11.0
4 621287.011834 159081.682095 1.229500e+09 1 DA500_11.0
[4]:
# turn to geopandas geodataframe
data_df = gpd.GeoDataFrame(
    data_df,
    geometry=gpd.points_from_xy(x=data_df.easting, y=data_df.northing),
    crs="EPSG:3031",
)
[ ]:
airbornegeo.plotly_points(
    data_df[::50],
    color_col="original_line",
    hover_cols=[
        "line_name",
        "unixtime",
    ],
    robust=False,
    size=3,
)

3.2. Split lines on time gaps#

[6]:
data_df["segments_by_time"] = airbornegeo.split_into_segments(
    data_df,
    threshold=60 * 10,  # 10 minutes
    column_name="unixtime",
    min_points_per_segment=100,
)
print(f"{len(data_df.segments_by_time.unique())} segments")
ax = data_df.groupby("segments_by_time").size().hist(bins=50)
ax.set_title("Histogram of number of data points in each segment");
98 segments
_images/split_survey_into_lines_7_1.png
[ ]:
airbornegeo.plotly_points(
    data_df[::50],
    color_col="segments_by_time",
    hover_cols=[
        "original_line",
        "unixtime",
    ],
    robust=False,
    size=3,
)

3.3. Split lines on distance gaps#

[8]:
data_df["relative_distance"] = airbornegeo.relative_distance(data_df)

data_df["segments_by_distance"] = airbornegeo.split_into_segments(
    data_df,
    threshold=20e3,  # 20 km
    column_name="relative_distance",
    min_points_per_segment=100,
)
print(f"{len(data_df.segments_by_distance.unique())} segments")
ax = data_df.groupby("segments_by_distance").size().hist(bins=50)
ax.set_title("Histogram of number of data points in each segment");
168 segments
_images/split_survey_into_lines_10_1.png
[ ]:
airbornegeo.plotly_points(
    data_df[::50],
    color_col="segments_by_distance",
    hover_cols=["original_line", "relative_distance"],
    robust=False,
    size=3,
)

3.4. Split lines on bearing changes#

[ ]:
data_df["bearing"] = airbornegeo.bearing(data_df, 10)

airbornegeo.plotly_points(
    data_df[::50],
    color_col="bearing",
    hover_cols=["original_line"],
    robust=False,
    size=3,
)
[13]:
data_df["segments_by_bearing"] = airbornegeo.split_into_segments(
    data_df,
    threshold=45,  # 45 degree bearing change
    column_name="bearing",
    min_points_per_segment=100,
)
print(f"{len(data_df.segments_by_bearing.unique())} segments")
ax = data_df.groupby("segments_by_bearing").size().hist(bins=50)
ax.set_title("Histogram of number of data points in each segment");
177 segments
_images/split_survey_into_lines_14_1.png
[ ]:
airbornegeo.plotly_points(
    data_df[::10],
    color_col="segments_by_bearing",
    hover_cols=["original_line", "bearing", "unixtime"],
    robust=False,
    size=3,
)