Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
oeigner
LINK - Forschungsprojekt Repo
Commits
91b97ef6
Commit
91b97ef6
authored
Jun 22, 2021
by
Fabian Kovac
Browse files
[f] added INCA RR for start and end of links
parent
bad77c5b
Changes
1
Hide whitespace changes
Inline
Side-by-side
FHSTP/prep.py
View file @
91b97ef6
...
...
@@ -2,27 +2,29 @@
# Title: Data Preparation for LINK Configs and Transmissions
# Author: Fabian Kovac <ds191008@fhstp.ac.at>
# Team: University of Applied Sciences St. Pölten
# Version: 1.
0
# Last changed: 2021-06-1
5
# Version: 1.
1
# Last changed: 2021-06-
2
1
#
import
sys
import
gzip
import
pathlib
import
argparse
import
datetime
import
numpy
as
np
import
pandas
as
pd
def
parse_arguments
()
->
argparse
.
Namespace
:
"""Parses provided commandline arguments for LINK
config file and transmissions
"""Parses provided commandline arguments for LINK
Returns:
args (argparse.Namespace): object with paths to provided
config- and transmissions-
file
args (argparse.Namespace): object with paths to provided file
s
"""
# create argument parser with description
desc
=
'# Data Preparation for LINK
Configs and Transmissions
\n
'
desc
=
'# Data Preparation for LINK
\n
'
desc
+=
'-'
*
64
+
'
\n
'
desc
+=
'Script outputs the same files with a "_clean" suffix.
\n
'
desc
+=
'Existing clean versions are automatically overwritten!'
...
...
@@ -40,6 +42,7 @@ def parse_arguments() -> argparse.Namespace:
required_args
=
parser
.
add_argument_group
(
'required arguments'
)
required_args
.
add_argument
(
'-c'
,
'--config'
,
type
=
str
,
required
=
True
,
help
=
'Path to Config-File'
)
required_args
.
add_argument
(
'-t'
,
'--transmissions'
,
type
=
str
,
required
=
True
,
help
=
'Path to Config-File'
)
required_args
.
add_argument
(
'-i'
,
'--inca'
,
type
=
str
,
required
=
True
,
help
=
'Path to Inca-Dir'
)
# parse arguments
args
=
parser
.
parse_args
()
...
...
@@ -66,15 +69,58 @@ def _log(msg: str) -> None:
print
(
msg
)
def
get_distance
(
lat_a
:
pd
.
Series
,
lon_a
:
pd
.
Series
,
lat_b
:
pd
.
Series
,
lon_b
:
pd
.
Series
)
->
np
.
array
:
def
load_inca_file
(
file_inca
:
pathlib
.
Path
)
->
np
.
array
:
"""Loads loads gzipped INCA data to a given file
Parameters:
file_inca (pathlib.Path): Path to INCA file
Returns:
x (np.array): Matrix with shape (401, 701)
"""
# open zipped file and bring data to right shape (resolution x,y: 701x401 km2)
with
gzip
.
open
(
file_inca
,
'rb'
)
as
file
:
x
=
file
.
read
()
x
=
np
.
fromstring
(
x
,
sep
=
' '
)
x
=
np
.
reshape
(
x
,
(
401
,
701
))
return
x
def
load_inca_data
(
dir_inca
:
pathlib
.
Path
)
->
np
.
array
:
"""Loads inca files to a given inca dir
Parameters:
dir_inca (pathlib.Path): Directory for INCA files
Returns:
inca_Data (np.array): Tensor with shape (96, 401, 701)
"""
# initialize tensor with 96 15min intervals
inca_data
=
np
.
zeros
((
96
,
401
,
701
))
# load inca dates from inca dir
for
i
,
file_inca
in
enumerate
(
sorted
([
file
for
file
in
dir_inca
.
iterdir
()
if
file
.
is_file
()])):
# load zipped ascii data
data
=
load_inca_file
(
file_inca
)
# update inca tensor
inca_data
[
i
]
=
data
return
inca_data
def
get_distance
(
lon_a
:
np
.
array
,
lat_a
:
np
.
array
,
lon_b
:
np
.
array
,
lat_b
:
np
.
array
)
->
np
.
array
:
"""Calculcates distance between two coordinates in km
using a rotation-ellipsoid in cartesian coordinates out of polar coordiantes
Parameters:
l
at
_a (
pd.Series
): L
at
itudes of point A
l
on
_a (
pd.Series
): L
ong
itudes of point A
l
at
_b (
pd.Series
): L
at
itudes of point B
l
on
_b (
pd.Series
): L
ong
itudes of point B
l
on
_a (
np.array
): L
ong
itudes of point A
l
at
_a (
np.array
): L
at
itudes of point A
l
on
_b (
np.array
): L
ong
itudes of point B
l
at
_b (
np.array
): L
at
itudes of point B
Returns:
length (np.array): Vector with distances in km (can directly be assigned a pandas column)
...
...
@@ -105,12 +151,111 @@ def get_distance(lat_a: pd.Series, lon_a: pd.Series, lat_b: pd.Series, lon_b: pd
return
length
def
prep
(
file_config
:
pathlib
.
Path
,
file_trans
:
pathlib
.
Path
)
->
None
:
def
utm_to_lambert
(
lon
:
np
.
array
,
lat
:
np
.
array
)
->
tuple
:
"""Convert WGS 84 UTM coordinates to a Lambert Conic Conformal Projection
Parameters:
lon (np.array): Vector containing longitudes
lat (np.array): Vector containing latitudes
Returns:
x, y (tuple(np.array, np.array)): tuple of two vectors with coordinates
"""
# convert utm coordinates (angles of degree) to radians
lon
=
np
.
radians
(
lon
)
lat
=
np
.
radians
(
lat
)
# define standard parallels according to EPSG:31287 - MGI/Austria Lambert
# --> see https://epsg.io/31287
lat1
=
np
.
radians
(
49
)
lat2
=
np
.
radians
(
46
)
# point of reference:
# lon: 13°20'E
# lat: 47°30'N
lon0
=
np
.
radians
(
13.33333333333333
)
lat0
=
np
.
radians
(
47.5
)
# INCA grid: 701x401 km
# compensate for point of reference is in the middle of the grid
# false easting: half of 701km = 350500m
# false northing: half of 401km = 200500m
x0
=
350500
y0
=
200500
# volumetric mean radius of the earth in m
R
=
6371000
# lambert conformal conic projection:
# --> see https://mathworld.wolfram.com/LambertConformalConicProjection.html
n
=
np
.
log
(
np
.
cos
(
lat1
)
*
(
1
/
np
.
cos
(
lat2
)))
/
np
.
log
(
np
.
tan
(
np
.
pi
/
4
+
lat2
/
2
)
*
(
np
.
cos
(
np
.
pi
/
4
+
lat1
/
2
)
/
np
.
sin
(
np
.
pi
/
4
+
lat1
/
2
)))
F
=
(
np
.
cos
(
lat1
)
*
np
.
tan
(
np
.
pi
/
4
+
lat1
/
2
)
**
n
)
/
n
p
=
R
*
F
*
(
np
.
cos
(
np
.
pi
/
4
+
lat
/
2
)
/
np
.
sin
(
np
.
pi
/
4
+
lat
/
2
))
**
n
p0
=
R
*
F
*
(
np
.
cos
(
np
.
pi
/
4
+
lat0
/
2
)
/
np
.
sin
(
np
.
pi
/
4
+
lat0
/
2
))
**
n
# calculate lambert conic conformal x and y
x
=
p
*
np
.
sin
(
n
*
(
lon
-
lon0
))
+
x0
y
=
p0
-
p
*
np
.
cos
(
n
*
(
lon
-
lon0
))
+
y0
return
x
,
y
def
lambert_to_inca_coords
(
x
:
np
.
array
,
y
:
np
.
array
)
->
tuple
:
"""Convert x and y of Lambert Conic Conformal Projection to INCA coordinates
(rounded lambert coordinates)
Parameters:
x (np.array): Vector containing x values in meter of Lambert Conic Conformal Projection
y (np.array): Vector containing y values in meter of in Lambert Conic Conformal Projection
Returns:
ix, iy (tuple(np.array, np.array)): tuple of indices for INCA data
"""
return
np
.
round
(
x
/
1000
,
decimals
=
0
).
astype
(
int
),
np
.
round
(
y
/
1000
,
decimals
=
0
).
astype
(
int
)
def
get_inca_indices
(
datetimes
:
np
.
array
,
x
:
np
.
array
,
y
:
np
.
array
)
->
np
.
array
:
"""Get indices of INCA RR data based on Lamber Conic Conformal Coordinates
Parameters:
datetimes (np.array): Vector containing datetimes of transmissions
x (np.array): Vector containing x values of LINK
y (np.array): Vector containing y values of LINK
Returns:
idx_times, idx, idy (np.array): Three vectors containing indices to INCA RR data
"""
# convert utm coordinates to lambert conic conformal projection
lccX
,
lccY
=
utm_to_lambert
(
x
,
y
)
# convert lambert coordinates to INCA indices
idx
,
idy
=
lambert_to_inca_coords
(
lccX
,
lccY
)
# generate times of day in 15min (window) intervals
window
=
15
inca_times
=
sorted
([
str
(
i
*
datetime
.
timedelta
(
seconds
=
window
))[
2
:]
for
i
in
range
(
24
*
60
//
window
)])
# generate times of LINK data
link_times
=
datetimes
.
map
(
lambda
x
:
f
'
{
x
[
-
4
:
-
2
]
}
:
{
x
[
-
2
:
]
}
'
)
# get LINK indices of INCA times
idx_times
=
np
.
searchsorted
(
inca_times
,
link_times
)
# return INCA data based on time indices and lambert coordinates
return
idx_times
,
idx
,
idy
def
prep
()
->
None
:
"""Data preparation for LINK config and transmissions
Parameters:
file_config (pathlib.Path): Config File
file_trans (pathlib.Path): Transmissions File
dir_inca (pathlib.Path): INCA Directory
"""
_log
(
'
\n
******************************** READ FILES ********************************'
)
...
...
@@ -120,49 +265,49 @@ def prep(file_config: pathlib.Path, file_trans: pathlib.Path) -> None:
_log
(
f
'Read config file with shape
{
df_config
.
shape
}
'
)
df_trans
=
pd
.
read_csv
(
file_trans
,
sep
=
';'
)
_log
(
f
'Read transmissions file with shape
{
df_trans
.
shape
}
'
)
_log
(
'
\n
******************************** BASIC PREP ********************************'
)
# remove test-link with link id 1
df_config
=
df_config
[
df_config
[
'LINKID'
]
!=
1
]
df_trans
=
df_trans
[
df_trans
[
'RADIOLINKID'
]
!=
1
]
_log
(
'Removed all entries of test-link with linkid 1'
)
# drop links that are officially not in use ('na' in CAPACITYINTERFACE and/or FREQUENCY)
# --> see Q&A Phillip Scheffknecht (05 Feb 2021)
df_config
=
df_config
.
dropna
(
axis
=
0
,
subset
=
[
'CAPACITYINTERFACE'
,
'FREQUENCY'
])
_log
(
'Dropped configs with NA in CAPACITYINTERFACE and/or FREQUENCY (links officially not in use)'
)
# delete rows with unused link ids
# get link ids of config and transmissions
config_ids
=
set
(
df_config
[
'LINKID'
].
unique
().
tolist
())
trans_ids
=
set
(
df_trans
[
'RADIOLINKID'
].
unique
().
tolist
())
# delete link ids in transmissions without config
unused_trans_ids
=
trans_ids
-
config_ids
df_trans
=
df_trans
[
~
df_trans
[
'RADIOLINKID'
].
isin
(
list
(
unused_trans_ids
))]
_log
(
'Removed all links in transmissions where no config is present'
)
# delete link ids in config without transmissions
unused_config_ids
=
config_ids
-
trans_ids
df_config
=
df_config
[
~
df_config
[
'LINKID'
].
isin
(
list
(
unused_config_ids
))]
_log
(
'Removed all links in config where no transmission is present'
)
# delete duplicates in config (same values, different link ids), where corresponding link ids are not used in transmissions
# gather duplicated rows in config file
col_subset
=
[
'LINKTYPE'
,
'SITEID_A'
,
'LATITUDE_A'
,
'LONGITUDE_A'
,
'SITEID_B'
,
'LATITUDE_B'
,
'LONGITUDE_B'
,
'CAPACITYINTERFACE'
,
'FREQUENCY'
]
duplicated_config_ids
=
set
(
df_config
[
df_config
.
duplicated
(
subset
=
col_subset
)][
'LINKID'
].
unique
().
tolist
())
# gather duplicated link ids of config file in transmissions file
found_trans_ids
=
set
(
df_trans
[
df_trans
[
'RADIOLINKID'
].
isin
(
duplicated_config_ids
)][
'RADIOLINKID'
].
unique
().
tolist
())
# calculate unused duplicated ids in config file
duplicated_used_ids
=
duplicated_config_ids
-
found_trans_ids
# delete rows with unused duplicated link ids in config file
df_config
=
df_config
[
~
df_config
[
'LINKID'
].
isin
(
list
(
duplicated_used_ids
))]
_log
(
'Removed duplicated links which are not in use'
)
...
...
@@ -192,9 +337,9 @@ def prep(file_config: pathlib.Path, file_trans: pathlib.Path) -> None:
df_config
=
df_config
.
drop
([
'TEMP_LOC_TUPLE'
],
axis
=
1
)
df_trans
=
df_trans
.
drop
([
'TEMP_LOC_TUPLE'
],
axis
=
1
)
# calculate LENGTH in km between links
df_config
[
'LENGTH'
]
=
get_distance
(
df_config
[
'L
AT
ITUDE_A'
],
df_config
[
'L
ONG
ITUDE_A'
],
df_config
[
'L
AT
ITUDE_B'
],
df_config
[
'L
ONG
ITUDE_B'
])
df_config
[
'LENGTH'
]
=
get_distance
(
df_config
[
'L
ONG
ITUDE_A'
],
df_config
[
'L
AT
ITUDE_A'
],
df_config
[
'L
ONG
ITUDE_B'
],
df_config
[
'L
AT
ITUDE_B'
])
_log
(
'Calculated distances between sites using a WGS84 ellipsoid'
)
...
...
@@ -210,10 +355,9 @@ def prep(file_config: pathlib.Path, file_trans: pathlib.Path) -> None:
df_config
[
'TXFREQUENCY'
]
=
df_config
[
'TXFREQUENCY'
].
astype
(
'float'
)
df_config
[
'TXFREQUENCY'
]
=
df_config
[
'TXFREQUENCY'
]
/
1000
_log
(
'Converted RXFREQUENCY and TXFREQUENCY to float and GHz'
)
# TODO: drop transmissions with (operational) status unequal 1?
#
check occurences with: df_trans[(df_trans['STATUS'] != 1) | (df_trans['OPERATIONALSTATUS'] != 1)].shape
#
drop transmissions with (operational) status unequal 1
df_trans
=
df_trans
[
df_trans
[
'STATUS'
]
==
1
]
df_trans
=
df_trans
[
df_trans
[
'OPERATIONALSTATUS'
]
==
1
]
_log
(
'Removed transmissions with STATUS and/or OPERATIONALSTATUS unequal 1'
)
...
...
@@ -224,20 +368,20 @@ def prep(file_config: pathlib.Path, file_trans: pathlib.Path) -> None:
# copy transmissions dataframe to link dataframe
df_link
=
df_trans
.
copy
()
_log
(
'Copy transmissions dataframe to link dataframe'
)
# convert begintime to utc
df_link
[
'BEGINTIME'
]
=
pd
.
to_datetime
(
df_link
[
'BEGINTIME'
],
format
=
'%Y-%m-%d %H:%M:%S'
)
df_link
[
'BEGINTIME'
]
=
df_link
[
'BEGINTIME'
].
dt
.
tz_localize
(
'Europe/Vienna'
).
dt
.
tz_convert
(
'UTC'
).
dt
.
tz_localize
(
None
)
_log
(
'Converted BEGINTIME to UTC'
)
# copy REMOTERXLEVEL to PMIN and PMAX (for aggregation in 15min window conversion)
df_link
[
'PMIN'
]
=
df_link
[
'REMOTERXLEVEL'
]
df_link
[
'PMAX'
]
=
df_link
[
'REMOTERXLEVEL'
]
_log
(
'Created PMIN and PMAX of REMOTERXLEVEL'
)
# convert 3min windows to 15min windows
group_cols
=
[
df_link
[
'BEGINTIME'
].
dt
.
floor
(
'15Min'
),
'RADIOLINKID'
]
agg_cols
=
{
'TXLEVEL'
:
'mean'
,
'REMOTERXLEVEL'
:
'mean'
,
'PMIN'
:
'min'
,
'PMAX'
:
'max'
}
...
...
@@ -248,8 +392,8 @@ def prep(file_config: pathlib.Path, file_trans: pathlib.Path) -> None:
# convert BEGINTIME to RAINLINK format
df_link
[
'BEGINTIME'
]
=
df_link
[
'BEGINTIME'
].
dt
.
strftime
(
'%Y%m%d%H%M'
)
_log
(
'Converted BEGINTIME to RAINLINK format "%Y%m%d%H%M"'
)
# build df with differences of sending and receiving levels
df_diff
=
df_link
[[
'RADIOLINKID'
,
'TXLEVEL'
,
'REMOTERXLEVEL'
]].
copy
()
df_diff
[
'MEANLINKDIFFLEVEL'
]
=
df_diff
[
'TXLEVEL'
]
-
df_diff
[
'REMOTERXLEVEL'
]
...
...
@@ -258,13 +402,13 @@ def prep(file_config: pathlib.Path, file_trans: pathlib.Path) -> None:
# get mean of differences
df_diff
=
df_diff
.
groupby
([
'RADIOLINKID'
]).
agg
({
'MEANLINKDIFFLEVEL'
:
'mean'
}).
reset_index
()
_log
(
'Merged mean link difference levels back to link dataframe'
)
# merge differences to transmission dataframe
df_link
=
pd
.
merge
(
df_link
,
df_diff
,
how
=
'inner'
,
left_on
=
'RADIOLINKID'
,
right_on
=
'RADIOLINKID'
)
df_link
[
'DIFFLEVEL'
]
=
df_link
[
'TXLEVEL'
]
-
df_link
[
'REMOTERXLEVEL'
]
-
df_link
[
'MEANLINKDIFFLEVEL'
]
_log
(
'Calculated DIFFLEVEL as TXLEVEL - REMOTERXLEVEL - MEANLINKDIFFLEVEL'
)
# merge config and link dataframe
drop_cols
=
[
'RADIOLINKID'
,
'LINKTYPE'
,
'SITEID_A'
,
'SITEID_B'
,
'CAPACITYINTERFACE'
]
df_link
=
pd
.
merge
(
df_link
,
df_config
,
how
=
'inner'
,
left_on
=
'RADIOLINKID'
,
right_on
=
'LINKID'
).
drop
(
drop_cols
,
axis
=
1
)
...
...
@@ -299,6 +443,21 @@ def prep(file_config: pathlib.Path, file_trans: pathlib.Path) -> None:
_log
(
'Converted link dataframe to RAINLINK format'
)
_log
(
'
\n
******************************** MERGE INCA ********************************'
)
# load inca data
inca_data
=
load_inca_data
(
dir_inca
)
_log
(
f
'Loaded INCA data from
{
str
(
dir_inca
).
split
(
"/"
)[
-
1
]
}
'
)
idx_times
,
IxStart
,
IyStart
=
get_inca_indices
(
df_link
[
'DateTime'
],
df_link
[
'XStart'
],
df_link
[
'YStart'
])
_
,
IxEnd
,
IyEnd
=
get_inca_indices
(
df_link
[
'DateTime'
],
df_link
[
'XEnd'
],
df_link
[
'YEnd'
])
_log
(
'Calculated INCA RR indices based on lambert coordinates and datetimes'
)
df_link
[
'RRStart'
]
=
inca_data
[
idx_times
,
IyStart
,
IxStart
]
df_link
[
'RREnd'
]
=
inca_data
[
idx_times
,
IyEnd
,
IxEnd
]
_log
(
'Set INCA RR data based on lambert coordinates'
)
_log
(
'
\n
******************************** SAVE FILES ********************************'
)
# build path for clean config and transmissions destination files
...
...
@@ -308,9 +467,9 @@ def prep(file_config: pathlib.Path, file_trans: pathlib.Path) -> None:
# build path for clean link destination file (same folder, date and extension as transmissions file)
date
=
str
(
file_trans
.
stem
).
split
(
'_'
)[
-
1
]
dest_link
=
pathlib
.
Path
(
dest_trans
.
parents
[
0
],
f
'LINK_
{
date
}
_clean
{
file_trans
.
suffix
}
'
)
# save clean
ed
files
# save clean files
df_config
.
to_csv
(
dest_config
,
sep
=
';'
,
header
=
True
,
index
=
False
)
_log
(
f
'Saved clean config file with shape
{
df_config
.
shape
}
to "
{
str
(
dest_config
)
}
"'
)
df_trans
.
to_csv
(
dest_trans
,
sep
=
';'
,
header
=
True
,
index
=
False
)
...
...
@@ -329,6 +488,7 @@ if __name__ == '__main__':
# convert config and transmissions arguments to paths
file_config
=
pathlib
.
Path
(
args
.
config
)
file_trans
=
pathlib
.
Path
(
args
.
transmissions
)
dir_inca
=
pathlib
.
Path
(
args
.
inca
)
# check if config files exists
if
not
file_config
.
exists
():
...
...
@@ -337,11 +497,16 @@ if __name__ == '__main__':
# check if transmissions file exists
if
not
file_trans
.
exists
():
_log
(
'% Invalid path for transmissions file!'
)
_log
(
'Invalid path for transmissions file!'
)
start_prep
=
False
# chec if inca dir exists
if
not
dir_inca
.
exists
():
_log
(
'Invalid path for inca directory!'
)
start_prep
=
False
# start prep if flag is True, otherwise exit with code 2
if
start_prep
:
prep
(
file_config
,
file_trans
)
prep
()
else
:
sys
.
exit
(
2
)
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment