2. Change samping frequency#
Sometimes survey data are provided at a very high sampling frequency. Here we show how to reduce this frequency to allow faster processing of the data. For this we use a block reduction.
[1]:
%load_ext autoreload
%autoreload 2
import numpy as np
import pandas as pd
import plotly.io as pio
import airbornegeo
pio.renderers.default = "notebook"
/home/airbornegeo/airbornegeo/.pixi/envs/default/lib/python3.14/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html
from .autonotebook import tqdm as notebook_tqdm
[2]:
data_df = pd.read_csv("data/AGAP_gravity_survey_processed.csv")
data_df = data_df[
[
"easting",
"northing",
"line",
"unixtime",
"distance_along_line",
"grav_disturbance_filt",
]
]
data_df.head()
[2]:
| easting | northing | line | unixtime | distance_along_line | grav_disturbance_filt | |
|---|---|---|---|---|---|---|
| 0 | 1.000024e+06 | 226237.330771 | 1 | 1.229507e+09 | 0.000000 | 49.38 |
| 1 | 1.000083e+06 | 226246.631269 | 1 | 1.229507e+09 | 59.842447 | 49.45 |
| 2 | 1.000142e+06 | 226255.809132 | 1 | 1.229507e+09 | 119.693401 | 49.52 |
| 3 | 1.000201e+06 | 226264.969079 | 1 | 1.229507e+09 | 179.545645 | 49.58 |
| 4 | 1.000260e+06 | 226274.156809 | 1 | 1.229507e+09 | 239.285174 | 49.65 |
2.1. Block-reduce a single line#
[3]:
# extract a single line from the survey
line_df = data_df[data_df.line == 8]
line_df
[3]:
| easting | northing | line | unixtime | distance_along_line | grav_disturbance_filt | |
|---|---|---|---|---|---|---|
| 21812 | 1.064733e+06 | 327333.974379 | 8 | 1.230574e+09 | 0.000000 | 57.67 |
| 21813 | 1.064800e+06 | 327344.354097 | 8 | 1.230574e+09 | 67.710542 | 57.43 |
| 21814 | 1.064867e+06 | 327354.384562 | 8 | 1.230574e+09 | 135.361302 | 57.20 |
| 21815 | 1.064934e+06 | 327364.065720 | 8 | 1.230574e+09 | 202.954030 | 56.97 |
| 21816 | 1.065001e+06 | 327373.504034 | 8 | 1.230574e+09 | 270.586284 | 56.74 |
| ... | ... | ... | ... | ... | ... | ... |
| 25273 | 1.323004e+06 | 373241.037284 | 8 | 1.230578e+09 | 262363.009770 | -0.05 |
| 25274 | 1.323069e+06 | 373253.374345 | 8 | 1.230578e+09 | 262429.891228 | 0.01 |
| 25275 | 1.323135e+06 | 373265.681016 | 8 | 1.230578e+09 | 262496.663302 | 0.07 |
| 25276 | 1.323201e+06 | 373278.155554 | 8 | 1.230578e+09 | 262563.532253 | 0.13 |
| 25277 | 1.323266e+06 | 373290.745039 | 8 | 1.230578e+09 | 262630.390948 | 0.19 |
3466 rows × 6 columns
2.1.1. Block-reduce by distance#
By setting reduce_by to ‘distance_along_line’ and spacing to 500, we can block reduce the data to have 1 point every 500 meters.
[4]:
blocked_line = airbornegeo.block_reduce(
line_df,
np.median,
spacing=500,
reduce_by="distance_along_line",
)
blocked_line
[4]:
| distance_along_line | easting | northing | line | unixtime | grav_disturbance_filt | |
|---|---|---|---|---|---|---|
| 0 | 236.770157 | 1.064967e+06 | 327368.784877 | 8.0 | 1.230574e+09 | 56.855 |
| 1 | 742.914133 | 1.065468e+06 | 327442.189898 | 8.0 | 1.230574e+09 | 55.240 |
| 2 | 1250.218179 | 1.065967e+06 | 327531.292779 | 8.0 | 1.230574e+09 | 53.730 |
| 3 | 1759.418288 | 1.066468e+06 | 327626.014830 | 8.0 | 1.230575e+09 | 52.290 |
| 4 | 2238.340303 | 1.066938e+06 | 327715.929722 | 8.0 | 1.230575e+09 | 51.000 |
| ... | ... | ... | ... | ... | ... | ... |
| 464 | 260392.116461 | 1.321058e+06 | 372931.568246 | 8.0 | 1.230578e+09 | -0.445 |
| 465 | 260893.139426 | 1.321555e+06 | 372994.943503 | 8.0 | 1.230578e+09 | -0.630 |
| 466 | 261395.547382 | 1.322052e+06 | 373064.214044 | 8.0 | 1.230578e+09 | -0.660 |
| 467 | 261896.075947 | 1.322545e+06 | 373153.437659 | 8.0 | 1.230578e+09 | -0.430 |
| 468 | 262396.450499 | 1.323036e+06 | 373247.205814 | 8.0 | 1.230578e+09 | -0.020 |
469 rows × 6 columns
[5]:
# plot the blocked line data
airbornegeo.plotly_profiles(
blocked_line,
x="distance_along_line",
y=["grav_disturbance_filt"],
)
[6]:
# plot the orignal line data
airbornegeo.plotly_profiles(
line_df,
x="distance_along_line",
y=["grav_disturbance_filt"],
)
2.1.2. Block-reduce by time#
By setting reduce_by to ‘unixtime’ and spacing to 20, we can block reduce the data to have 1 point every 20 along the flight.
[7]:
blocked_line = airbornegeo.block_reduce(
line_df,
np.median,
spacing=10,
reduce_by="unixtime",
)
blocked_line
[7]:
| unixtime | easting | northing | line | distance_along_line | grav_disturbance_filt | |
|---|---|---|---|---|---|---|
| 0 | 1.230574e+09 | 1.065034e+06 | 327378.169311 | 8.0 | 304.354592 | 56.625 |
| 1 | 1.230574e+09 | 1.065701e+06 | 327481.819946 | 8.0 | 979.375748 | 54.525 |
| 2 | 1.230574e+09 | 1.066367e+06 | 327607.820028 | 8.0 | 1657.253139 | 52.575 |
| 3 | 1.230575e+09 | 1.067039e+06 | 327735.174930 | 8.0 | 2341.393820 | 50.740 |
| 4 | 1.230575e+09 | 1.067710e+06 | 327855.417977 | 8.0 | 3023.307113 | 49.205 |
| ... | ... | ... | ... | ... | ... | ... |
| 343 | 1.230578e+09 | 1.320333e+06 | 372835.166847 | 8.0 | 259660.826489 | -0.140 |
| 344 | 1.230578e+09 | 1.320992e+06 | 372923.282886 | 8.0 | 260325.522162 | -0.415 |
| 345 | 1.230578e+09 | 1.321655e+06 | 373007.321750 | 8.0 | 260993.661862 | -0.655 |
| 346 | 1.230578e+09 | 1.322316e+06 | 373110.902922 | 8.0 | 261662.773749 | -0.565 |
| 347 | 1.230578e+09 | 1.322971e+06 | 373234.837226 | 8.0 | 262329.628091 | -0.075 |
348 rows × 6 columns
[8]:
# plot the blocked line data
airbornegeo.plotly_profiles(
blocked_line,
x="unixtime",
y=["grav_disturbance_filt"],
)
[9]:
# plot the original line data
airbornegeo.plotly_profiles(
line_df,
x="unixtime",
y=["grav_disturbance_filt"],
)
2.2. Block-reduce all lines in a survey#
By supply ‘line’ to the groupby_column, the block-reduce occurs only on 1 line at a time. This means for lines that are closer together than the spacing, or where lines cross, the values from other lines within the same block are not included in the block-reduction.
[10]:
blocked_survey = airbornegeo.block_reduce(
data_df,
np.median,
spacing=5000, # 5 km
reduce_by="distance_along_line",
groupby_column="line",
)
blocked_survey
Segments: 100%|██████████| 100/100 [00:01<00:00, 74.02it/s]
[10]:
| distance_along_line | easting | northing | unixtime | grav_disturbance_filt | line | |
|---|---|---|---|---|---|---|
| 0 | 2504.019094 | 1.002497e+06 | 226630.494762 | 1.229507e+09 | 51.060 | 1 |
| 1 | 7547.392786 | 1.007466e+06 | 227482.464546 | 1.229507e+09 | 54.350 | 1 |
| 2 | 12582.823047 | 1.012422e+06 | 228357.635239 | 1.229507e+09 | 60.105 | 1 |
| 3 | 17584.178229 | 1.017334e+06 | 229278.897099 | 1.229507e+09 | 66.250 | 1 |
| 4 | 22626.203376 | 1.022305e+06 | 230114.462205 | 1.229507e+09 | 80.275 | 1 |
| ... | ... | ... | ... | ... | ... | ... |
| 4328 | 50855.877560 | 1.583041e+06 | 524116.951654 | 1.230381e+09 | -25.920 | 100 |
| 4329 | 55703.602546 | 1.583903e+06 | 519346.896658 | 1.230381e+09 | -8.400 | 100 |
| 4330 | 60519.728408 | 1.584750e+06 | 514606.912024 | 1.230381e+09 | -0.410 | 100 |
| 4331 | 65387.035058 | 1.585659e+06 | 509826.551110 | 1.230381e+09 | -0.890 | 100 |
| 4332 | 70239.511090 | 1.586449e+06 | 505040.740376 | 1.230382e+09 | -4.165 | 100 |
4333 rows × 6 columns
[11]:
# plot the block-reduced data
airbornegeo.plotly_points(
blocked_survey,
color_col="grav_disturbance_filt",
robust=True,
size=2,
)
[12]:
# plot the original data
airbornegeo.plotly_points(
data_df,
color_col="grav_disturbance_filt",
robust=True,
size=2,
)