Commit 91b97ef6 authored by Fabian Kovac's avatar Fabian Kovac
Browse files

[f] added INCA RR for start and end of links

parent bad77c5b
......@@ -2,27 +2,29 @@
# Title: Data Preparation for LINK Configs and Transmissions
# Author: Fabian Kovac <ds191008@fhstp.ac.at>
# Team: University of Applied Sciences St. Pölten
# Version: 1.0
# Last changed: 2021-06-15
# Version: 1.1
# Last changed: 2021-06-21
#
import sys
import gzip
import pathlib
import argparse
import datetime
import numpy as np
import pandas as pd
def parse_arguments() -> argparse.Namespace:
"""Parses provided commandline arguments for LINK config file and transmissions
"""Parses provided commandline arguments for LINK
Returns:
args (argparse.Namespace): object with paths to provided config- and transmissions-file
args (argparse.Namespace): object with paths to provided files
"""
# create argument parser with description
desc = '# Data Preparation for LINK Configs and Transmissions\n'
desc = '# Data Preparation for LINK\n'
desc += '-'*64 + '\n'
desc += 'Script outputs the same files with a "_clean" suffix.\n'
desc += 'Existing clean versions are automatically overwritten!'
......@@ -40,6 +42,7 @@ def parse_arguments() -> argparse.Namespace:
required_args = parser.add_argument_group('required arguments')
required_args.add_argument('-c', '--config', type = str, required = True, help = 'Path to Config-File')
required_args.add_argument('-t', '--transmissions', type = str, required = True, help = 'Path to Config-File')
required_args.add_argument('-i', '--inca', type = str, required = True, help = 'Path to Inca-Dir')
# parse arguments
args = parser.parse_args()
......@@ -66,15 +69,58 @@ def _log(msg: str) -> None:
print(msg)
def get_distance(lat_a: pd.Series, lon_a: pd.Series, lat_b: pd.Series, lon_b: pd.Series) -> np.array:
def load_inca_file(file_inca: pathlib.Path) -> np.array:
"""Loads loads gzipped INCA data to a given file
Parameters:
file_inca (pathlib.Path): Path to INCA file
Returns:
x (np.array): Matrix with shape (401, 701)
"""
# open zipped file and bring data to right shape (resolution x,y: 701x401 km2)
with gzip.open(file_inca, 'rb') as file:
x = file.read()
x = np.fromstring(x, sep = ' ')
x = np.reshape(x, (401, 701))
return x
def load_inca_data(dir_inca: pathlib.Path) -> np.array:
"""Loads inca files to a given inca dir
Parameters:
dir_inca (pathlib.Path): Directory for INCA files
Returns:
inca_Data (np.array): Tensor with shape (96, 401, 701)
"""
# initialize tensor with 96 15min intervals
inca_data = np.zeros((96, 401, 701))
# load inca dates from inca dir
for i, file_inca in enumerate(sorted([file for file in dir_inca.iterdir() if file.is_file()])):
# load zipped ascii data
data = load_inca_file(file_inca)
# update inca tensor
inca_data[i] = data
return inca_data
def get_distance(lon_a: np.array, lat_a: np.array, lon_b: np.array, lat_b: np.array) -> np.array:
"""Calculcates distance between two coordinates in km
using a rotation-ellipsoid in cartesian coordinates out of polar coordiantes
Parameters:
lat_a (pd.Series): Latitudes of point A
lon_a (pd.Series): Longitudes of point A
lat_b (pd.Series): Latitudes of point B
lon_b (pd.Series): Longitudes of point B
lon_a (np.array): Longitudes of point A
lat_a (np.array): Latitudes of point A
lon_b (np.array): Longitudes of point B
lat_b (np.array): Latitudes of point B
Returns:
length (np.array): Vector with distances in km (can directly be assigned a pandas column)
......@@ -105,12 +151,111 @@ def get_distance(lat_a: pd.Series, lon_a: pd.Series, lat_b: pd.Series, lon_b: pd
return length
def prep(file_config: pathlib.Path, file_trans: pathlib.Path) -> None:
def utm_to_lambert(lon: np.array, lat: np.array) -> tuple:
"""Convert WGS 84 UTM coordinates to a Lambert Conic Conformal Projection
Parameters:
lon (np.array): Vector containing longitudes
lat (np.array): Vector containing latitudes
Returns:
x, y (tuple(np.array, np.array)): tuple of two vectors with coordinates
"""
# convert utm coordinates (angles of degree) to radians
lon = np.radians(lon)
lat = np.radians(lat)
# define standard parallels according to EPSG:31287 - MGI/Austria Lambert
# --> see https://epsg.io/31287
lat1 = np.radians(49)
lat2 = np.radians(46)
# point of reference:
# lon: 13°20'E
# lat: 47°30'N
lon0 = np.radians(13.33333333333333)
lat0 = np.radians(47.5)
# INCA grid: 701x401 km
# compensate for point of reference is in the middle of the grid
# false easting: half of 701km = 350500m
# false northing: half of 401km = 200500m
x0 = 350500
y0 = 200500
# volumetric mean radius of the earth in m
R = 6371000
# lambert conformal conic projection:
# --> see https://mathworld.wolfram.com/LambertConformalConicProjection.html
n = np.log(np.cos(lat1) * (1 / np.cos(lat2))) / np.log(np.tan(np.pi/4 + lat2/2) * (np.cos(np.pi/4 + lat1/2) / np.sin(np.pi/4 + lat1/2)))
F = (np.cos(lat1) * np.tan(np.pi/4 + lat1/2)**n) / n
p = R * F * (np.cos(np.pi/4 + lat/2) / np.sin(np.pi/4 + lat/2))**n
p0 = R * F * (np.cos(np.pi/4 + lat0/2) / np.sin(np.pi/4 + lat0/2))**n
# calculate lambert conic conformal x and y
x = p * np.sin(n * (lon - lon0)) + x0
y = p0 - p * np.cos(n * (lon - lon0)) + y0
return x, y
def lambert_to_inca_coords(x: np.array, y: np.array) -> tuple:
"""Convert x and y of Lambert Conic Conformal Projection to INCA coordinates
(rounded lambert coordinates)
Parameters:
x (np.array): Vector containing x values in meter of Lambert Conic Conformal Projection
y (np.array): Vector containing y values in meter of in Lambert Conic Conformal Projection
Returns:
ix, iy (tuple(np.array, np.array)): tuple of indices for INCA data
"""
return np.round(x/1000, decimals = 0).astype(int), np.round(y/1000, decimals = 0).astype(int)
def get_inca_indices(datetimes: np.array, x: np.array, y: np.array) -> np.array:
"""Get indices of INCA RR data based on Lamber Conic Conformal Coordinates
Parameters:
datetimes (np.array): Vector containing datetimes of transmissions
x (np.array): Vector containing x values of LINK
y (np.array): Vector containing y values of LINK
Returns:
idx_times, idx, idy (np.array): Three vectors containing indices to INCA RR data
"""
# convert utm coordinates to lambert conic conformal projection
lccX, lccY = utm_to_lambert(x, y)
# convert lambert coordinates to INCA indices
idx, idy = lambert_to_inca_coords(lccX, lccY)
# generate times of day in 15min (window) intervals
window = 15
inca_times = sorted([str(i * datetime.timedelta(seconds = window))[2:] for i in range(24*60//window)])
# generate times of LINK data
link_times = datetimes.map(lambda x: f'{x[-4:-2]}:{x[-2:]}')
# get LINK indices of INCA times
idx_times = np.searchsorted(inca_times, link_times)
# return INCA data based on time indices and lambert coordinates
return idx_times, idx, idy
def prep() -> None:
"""Data preparation for LINK config and transmissions
Parameters:
file_config (pathlib.Path): Config File
file_trans (pathlib.Path): Transmissions File
dir_inca (pathlib.Path): INCA Directory
"""
_log('\n******************************** READ FILES ********************************')
......@@ -120,49 +265,49 @@ def prep(file_config: pathlib.Path, file_trans: pathlib.Path) -> None:
_log(f'Read config file with shape {df_config.shape}')
df_trans = pd.read_csv(file_trans, sep = ';')
_log(f'Read transmissions file with shape {df_trans.shape}')
_log('\n******************************** BASIC PREP ********************************')
# remove test-link with link id 1
df_config = df_config[df_config['LINKID'] != 1]
df_trans = df_trans[df_trans['RADIOLINKID'] != 1]
_log('Removed all entries of test-link with linkid 1')
# drop links that are officially not in use ('na' in CAPACITYINTERFACE and/or FREQUENCY)
# --> see Q&A Phillip Scheffknecht (05 Feb 2021)
df_config = df_config.dropna(axis = 0, subset = ['CAPACITYINTERFACE', 'FREQUENCY'])
_log('Dropped configs with NA in CAPACITYINTERFACE and/or FREQUENCY (links officially not in use)')
# delete rows with unused link ids
# get link ids of config and transmissions
config_ids = set(df_config['LINKID'].unique().tolist())
trans_ids = set(df_trans['RADIOLINKID'].unique().tolist())
# delete link ids in transmissions without config
unused_trans_ids = trans_ids - config_ids
df_trans = df_trans[~df_trans['RADIOLINKID'].isin(list(unused_trans_ids))]
_log('Removed all links in transmissions where no config is present')
# delete link ids in config without transmissions
unused_config_ids = config_ids - trans_ids
df_config = df_config[~df_config['LINKID'].isin(list(unused_config_ids))]
_log('Removed all links in config where no transmission is present')
# delete duplicates in config (same values, different link ids), where corresponding link ids are not used in transmissions
# gather duplicated rows in config file
col_subset = ['LINKTYPE', 'SITEID_A', 'LATITUDE_A', 'LONGITUDE_A', 'SITEID_B', 'LATITUDE_B', 'LONGITUDE_B', 'CAPACITYINTERFACE', 'FREQUENCY']
duplicated_config_ids = set(df_config[df_config.duplicated(subset = col_subset)]['LINKID'].unique().tolist())
# gather duplicated link ids of config file in transmissions file
found_trans_ids = set(df_trans[df_trans['RADIOLINKID'].isin(duplicated_config_ids)]['RADIOLINKID'].unique().tolist())
# calculate unused duplicated ids in config file
duplicated_used_ids = duplicated_config_ids - found_trans_ids
# delete rows with unused duplicated link ids in config file
df_config = df_config[~df_config['LINKID'].isin(list(duplicated_used_ids))]
_log('Removed duplicated links which are not in use')
......@@ -192,9 +337,9 @@ def prep(file_config: pathlib.Path, file_trans: pathlib.Path) -> None:
df_config = df_config.drop(['TEMP_LOC_TUPLE'], axis = 1)
df_trans = df_trans.drop(['TEMP_LOC_TUPLE'], axis = 1)
# calculate LENGTH in km between links
df_config['LENGTH'] = get_distance(df_config['LATITUDE_A'], df_config['LONGITUDE_A'], df_config['LATITUDE_B'], df_config['LONGITUDE_B'])
df_config['LENGTH'] = get_distance(df_config['LONGITUDE_A'], df_config['LATITUDE_A'], df_config['LONGITUDE_B'], df_config['LATITUDE_B'])
_log('Calculated distances between sites using a WGS84 ellipsoid')
......@@ -210,10 +355,9 @@ def prep(file_config: pathlib.Path, file_trans: pathlib.Path) -> None:
df_config['TXFREQUENCY'] = df_config['TXFREQUENCY'].astype('float')
df_config['TXFREQUENCY'] = df_config['TXFREQUENCY']/1000
_log('Converted RXFREQUENCY and TXFREQUENCY to float and GHz')
# TODO: drop transmissions with (operational) status unequal 1?
# check occurences with: df_trans[(df_trans['STATUS'] != 1) | (df_trans['OPERATIONALSTATUS'] != 1)].shape
# drop transmissions with (operational) status unequal 1
df_trans = df_trans[df_trans['STATUS'] == 1]
df_trans = df_trans[df_trans['OPERATIONALSTATUS'] == 1]
_log('Removed transmissions with STATUS and/or OPERATIONALSTATUS unequal 1')
......@@ -224,20 +368,20 @@ def prep(file_config: pathlib.Path, file_trans: pathlib.Path) -> None:
# copy transmissions dataframe to link dataframe
df_link = df_trans.copy()
_log('Copy transmissions dataframe to link dataframe')
# convert begintime to utc
df_link['BEGINTIME'] = pd.to_datetime(df_link['BEGINTIME'], format = '%Y-%m-%d %H:%M:%S')
df_link['BEGINTIME'] = df_link['BEGINTIME'].dt.tz_localize('Europe/Vienna').dt.tz_convert('UTC').dt.tz_localize(None)
_log('Converted BEGINTIME to UTC')
# copy REMOTERXLEVEL to PMIN and PMAX (for aggregation in 15min window conversion)
df_link['PMIN'] = df_link['REMOTERXLEVEL']
df_link['PMAX'] = df_link['REMOTERXLEVEL']
_log('Created PMIN and PMAX of REMOTERXLEVEL')
# convert 3min windows to 15min windows
group_cols = [df_link['BEGINTIME'].dt.floor('15Min'), 'RADIOLINKID']
agg_cols = {'TXLEVEL' : 'mean', 'REMOTERXLEVEL' : 'mean', 'PMIN' : 'min', 'PMAX' : 'max'}
......@@ -248,8 +392,8 @@ def prep(file_config: pathlib.Path, file_trans: pathlib.Path) -> None:
# convert BEGINTIME to RAINLINK format
df_link['BEGINTIME'] = df_link['BEGINTIME'].dt.strftime('%Y%m%d%H%M')
_log('Converted BEGINTIME to RAINLINK format "%Y%m%d%H%M"')
# build df with differences of sending and receiving levels
df_diff = df_link[['RADIOLINKID', 'TXLEVEL', 'REMOTERXLEVEL']].copy()
df_diff['MEANLINKDIFFLEVEL'] = df_diff['TXLEVEL'] - df_diff['REMOTERXLEVEL']
......@@ -258,13 +402,13 @@ def prep(file_config: pathlib.Path, file_trans: pathlib.Path) -> None:
# get mean of differences
df_diff = df_diff.groupby(['RADIOLINKID']).agg({'MEANLINKDIFFLEVEL' : 'mean'}).reset_index()
_log('Merged mean link difference levels back to link dataframe')
# merge differences to transmission dataframe
df_link = pd.merge(df_link, df_diff, how = 'inner', left_on = 'RADIOLINKID', right_on = 'RADIOLINKID')
df_link['DIFFLEVEL'] = df_link['TXLEVEL'] - df_link['REMOTERXLEVEL'] - df_link['MEANLINKDIFFLEVEL']
_log('Calculated DIFFLEVEL as TXLEVEL - REMOTERXLEVEL - MEANLINKDIFFLEVEL')
# merge config and link dataframe
drop_cols = ['RADIOLINKID', 'LINKTYPE', 'SITEID_A', 'SITEID_B', 'CAPACITYINTERFACE']
df_link = pd.merge(df_link, df_config, how = 'inner', left_on = 'RADIOLINKID', right_on = 'LINKID').drop(drop_cols, axis = 1)
......@@ -299,6 +443,21 @@ def prep(file_config: pathlib.Path, file_trans: pathlib.Path) -> None:
_log('Converted link dataframe to RAINLINK format')
_log('\n******************************** MERGE INCA ********************************')
# load inca data
inca_data = load_inca_data(dir_inca)
_log(f'Loaded INCA data from {str(dir_inca).split("/")[-1]}')
idx_times, IxStart, IyStart = get_inca_indices(df_link['DateTime'], df_link['XStart'], df_link['YStart'])
_, IxEnd, IyEnd = get_inca_indices(df_link['DateTime'], df_link['XEnd'], df_link['YEnd'])
_log('Calculated INCA RR indices based on lambert coordinates and datetimes')
df_link['RRStart'] = inca_data[idx_times, IyStart, IxStart]
df_link['RREnd'] = inca_data[idx_times, IyEnd, IxEnd]
_log('Set INCA RR data based on lambert coordinates')
_log('\n******************************** SAVE FILES ********************************')
# build path for clean config and transmissions destination files
......@@ -308,9 +467,9 @@ def prep(file_config: pathlib.Path, file_trans: pathlib.Path) -> None:
# build path for clean link destination file (same folder, date and extension as transmissions file)
date = str(file_trans.stem).split('_')[-1]
dest_link = pathlib.Path(dest_trans.parents[0], f'LINK_{date}_clean{file_trans.suffix}')
# save cleaned files
# save clean files
df_config.to_csv(dest_config, sep = ';', header = True, index = False)
_log(f'Saved clean config file with shape {df_config.shape} to "{str(dest_config)}"')
df_trans.to_csv(dest_trans, sep = ';', header = True, index = False)
......@@ -329,6 +488,7 @@ if __name__ == '__main__':
# convert config and transmissions arguments to paths
file_config = pathlib.Path(args.config)
file_trans = pathlib.Path(args.transmissions)
dir_inca = pathlib.Path(args.inca)
# check if config files exists
if not file_config.exists():
......@@ -337,11 +497,16 @@ if __name__ == '__main__':
# check if transmissions file exists
if not file_trans.exists():
_log('% Invalid path for transmissions file!')
_log('Invalid path for transmissions file!')
start_prep = False
# chec if inca dir exists
if not dir_inca.exists():
_log('Invalid path for inca directory!')
start_prep = False
# start prep if flag is True, otherwise exit with code 2
if start_prep:
prep(file_config, file_trans)
prep()
else:
sys.exit(2)
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment