Skip to content

Commit

Permalink
Refactor: Fixed-distance statistics
Browse files Browse the repository at this point in the history
  • Loading branch information
kcwongaz committed Aug 28, 2022
1 parent 1ad5ef3 commit 63a36fe
Show file tree
Hide file tree
Showing 2 changed files with 146 additions and 0 deletions.
56 changes: 56 additions & 0 deletions air_traffic/trajectory.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
from geopy import distance


def find_first_landed(df):

# Get through the inital take-off stage
n = 0
while df["altitude"].iloc[n] == 0:
n += 1
# Also possible that the whole flight record is already landed
if n == len(df):
n = 0
break

# Locate the last part of the trajectory
df = df.iloc[n:]
df = df.loc[df["altitude"] == 0]

# The first row that has altitude zero is when the flight landed
return df.iloc[0]


def find_first_in_range(df, d_min, d_max):

n = 0
first = df.iloc[0]
d = distance_hkia(first)

while d > d_max:
n += 1
first = df.iloc[n]
d = distance_hkia(first)

# Return None if there are no point within the ring
if d < d_min:
return None
else:
return first


def distance(row1, row2):

point1 = (row1["latitude"], row1["longitude"])
point2 = (row2["latitude"], row2["longitude"])

d = distance.distance(point1, point2).km
return d


def distance_hkia(row):

hkia = (22.308046, 113.918480)
point = (row["latitude"], row["longitude"])

d = distance.distance(point, hkia).km
return d
90 changes: 90 additions & 0 deletions pipeline/3_stat_fixed_distance.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
import pandas as pd
import os
import numpy as np

from air_traffic.trajectory import *


d_min = 145
d_max = 165

datadir = "../data/cleaned"
savedir = "../data/results"
savename = f"stat_fixed_distance_{d_min}-{d_max}.csv"

# --------------------------------------------------------------------------- #
data = {"date": [], # Date
"callsign": [], # Callsign
"lat_i": [], # Entry lat
"lon_i": [], # Entry lon
"lat_f": [], # Final lat
"lon_f": [], # Final lon
"t_i": [], # Entry timestamp
"t_f": [], # Final timestamp
"r_i_km": [], # Entry distance from HKIA in km
"delta_r_km": [], # Distance between entry and final point in km
"delta_t_sec": []} # Time difference in second


for subdir, dirs, files in os.walk(datadir):

# Process in sorted order for easy tracking
dirs.sort()
files.sort()

for file in files:
fname = os.path.join(subdir, file)

if fname[-4:] != ".csv":
continue
else:
print(fname)

df = pd.read_csv(fname, header=0)
df = df.loc[(df["latitude"] > 19) & (df["latitude"] < 25.5) &
(df["longitude"] > 111) & (df["longitude"] < 117.5)]

# Skip if there is no useable data
if len(df) == 0:
continue

# Find the first landing point
# Get through the inital take-off stage
last = find_first_landed(df)

# Check this later
# # Skip flights that not landed in the square
# if (np.abs(last["latitude"] - hkia[0]) > 0.25) and \
# (np.abs(last["longitude"] - hkia[1]) > 0.25):
# continue

# Find the first point in the fixed ring
first = find_first_in_range(df, d_min, d_max)
if first is None:
continue

# Get the date at HK time
date = pd.Timestamp(first["time"] + 8*3600, unit="s").strftime("%Y-%m-%d")

# Identifiers
data["date"].append(date)
data["callsign"].append(file[:-4])

last_point = (last["latitude"], last["longitude"])

# Positional data
data["lat_i"].append(first["latitude"])
data["lon_i"].append(first["longitude"])
data["lat_f"].append(last["latitude"])
data["lon_f"].append(last["longitude"])
data["delta_r_km"].append(distance(first, last))
data["r_i_km"].append(distance_hkia(first))

# Compute time difference
data["t_i"].append(first["time"])
data["t_f"].append(last["time"])
data["delta_t_sec"].append(last["time"] - first["time"])


df_master = pd.DataFrame(data=data)
df_master.to_csv(f"{savedir}/{savename}", index=False)

0 comments on commit 63a36fe

Please sign in to comment.