-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathexternal_file_data.py
More file actions
112 lines (89 loc) · 3.76 KB
/
external_file_data.py
File metadata and controls
112 lines (89 loc) · 3.76 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
"""
ExternalFileData class to read data from an external file using pandas.
"""
import logging
import pandas as pd
class ExternalFileData:
"""
Class to read data from an external file using pandas.
"""
def __init__(self, file_path: str, parameters: dict):
"""
Initialize the ExternalFileData class.
:param file_path: Path to the external file.
:param parameters: Parameters for reading the file (e.g., delimiter, header). Check pd.read_csv for details.
"""
self.__file_path: str = file_path
self.__parameters: dict = parameters
self.__df: pd.DataFrame | None = None
self.__log: logging.Logger = logging.getLogger(__name__)
self.__units: list[str] | None = None
self.__names: list[str] | None = None
self.__descriptions: list[str] | None = None
def close(self):
"""
Close the file and release resources.
"""
if self.__df is not None:
self.__log.info("Closing file: %s", self.__file_path)
self.__df = None
def not_my_file(self) -> bool:
"""
Check if the file should be read with this plugin.
:return: True if the file should not be read with this plugin, False otherwise.
"""
if self.data() is None:
return True
return ExternalFileData._not_my_file(self.__names, self.__descriptions, self.__units)
@staticmethod
def _not_my_file(names, descriptions, units) -> bool:
if names is None or descriptions is None or units is None:
return True
nr_of_channels = len(names)
nr_of_descriptions = len(descriptions)
nr_of_units = len(units)
if nr_of_channels < 2 or nr_of_channels != nr_of_descriptions or nr_of_channels != nr_of_units:
return True
# first column in dydaqlog CSV file is "Sequence Number"
if nr_of_descriptions == 0 or "sequence number" != descriptions[0].lower():
return True
return False
def data(self) -> pd.DataFrame:
"""
Read the data from the file and return it as a pandas DataFrame.
:return: DataFrame containing the data from the file.
"""
if self.__df is None:
self.__log.info("Reading file: %s", self.__file_path)
# The first three rows are containing the metadata
with open(self.__file_path, "r", encoding="utf-8-sig") as file:
self.__names = [item.strip().strip('"') for item in file.readline().split(";")]
self.__descriptions = [item.strip().strip('"') for item in file.readline().split(";")]
self.__units = [item.strip().strip('"') for item in file.readline().split(";")]
if not ExternalFileData._not_my_file(self.__names, self.__descriptions, self.__units):
# Read the data from the file - skip the header (already read)
df = pd.read_csv(self.__file_path, sep=";", decimal=",", header=None, skiprows=3)
df[2] = pd.to_datetime(df[2] + "T" + df[3] + "." + df[4].astype(str))
self.__df = df
return self.__df
def column_names(self) -> list[str] | None:
"""
Overwrite the column names of the dataframe.
"""
if self.__names is None:
self.data()
return self.__names
def column_units(self) -> list[str] | None:
"""
Return column units of the dataframe.
"""
if self.__units is None:
self.data()
return self.__units
def column_descriptions(self) -> list[str] | None:
"""
Return column descriptions of the dataframe.
"""
if self.__descriptions is None:
self.data()
return self.__descriptions