-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathdata.py
More file actions
101 lines (80 loc) · 3.03 KB
/
data.py
File metadata and controls
101 lines (80 loc) · 3.03 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
from datetime import datetime
import pandas as pd
class Data:
"""The Data class is responsible for reading and writing the data files and
for all the data queries"""
def __init__(self, dataset):
self.dataset = dataset
df = pd.read_csv(dataset['file'])
df['date'] = df['date'].apply(pd.to_datetime)
df['count'] = df['count'].apply(pd.to_numeric)
self.df = df
def clearAll(self):
self.df.drop(self.df.index, inplace=True)
def getLatestDate(self):
return self.df.date.max().date()
def removeDates(self, dates):
self.df = self.df[~(self.df['date'].isin(pd.to_datetime(dates)))]
def removeDateTypes(self, *dts):
for dt in dts:
date = pd.to_datetime(dt['date'])
t = dt['type']
index = self.df[(self.df.date == date) & (self.df.type == t)].index
self.df.drop(index, inplace=True)
def haveDataFor(self, dt):
"""
Do we already have data for the given date and type?
"""
return len(self.df[(self.df['type'] == dt['type']) & (self.df['date'] == dt['date'])]) > 0
def toCsv(self, path):
"""
Save the csv to path
"""
df = self.df
df = self.df.dropna().reset_index(drop=True)
df = self.df.sort_values(
by=['date', 'type', 'location'], ascending=False)
df.to_csv(path, index=False)
def to_df(self, data):
df = pd.DataFrame(data)
df['count'] = df['count'].apply(pd.to_numeric)
return df
def append(self, data):
"""
Append the given data from the driver to the data we already have
"""
if len(data) > 0:
df = self.to_df(data)
self.df = self.df.append(df)
class Directory:
df = None
nameMap = {
'LAKE COMO K-8': 'LAKE COMO SCHOOL',
'AUDUBON PARK K-8': 'AUDUBON PARK SCHOOL',
'APOPKA MEMORIAL MIDDLE': 'MEMORIAL MIDDLE',
'WHEATLEY ELEMENTARY': 'PHILLIS WHEATLEY ELEMENTARY',
'DR. PHILLIPS HIGH': 'DR PHILLIPS HIGH',
'DILLARD ST. ELEMENTARY': 'DILLARD STREET ELEMENTARY',
'NORTHLAKE PARK COMMUNITY': 'NORTHLAKE PARK COMMUNITY ELEMENTARY',
'WINTER PARK 9TH GRADE CENTER': 'WINTER PARK HIGH 9TH GRADE CENTER'
}
def __init__(self, dataset, data=None):
if data is None:
data = Data(dataset)
self.dataset = dataset
self.data = data
df = pd.read_csv(dataset['directory'])
self.df = self.mapDataToDirectory(df)
def mapDataToDirectory(self, data_df):
data_df.location = data_df.location.apply(
lambda x: self.mapDirNames(x))
return data_df.merge(self.data.df, how='left', on='location')
def mapDirNames(self, name):
name = name.upper()
name = name.replace('(', '')
name = name.replace(')', '')
name = name.replace(" SCHOOL", "")
name = name.replace("’", "")
if name in self.nameMap:
return self.nameMap[name]
return name.strip()