-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathstats.py
More file actions
executable file
·119 lines (100 loc) · 2.95 KB
/
stats.py
File metadata and controls
executable file
·119 lines (100 loc) · 2.95 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
#!/usr/bin/env python3
# print statstics of the floating point data presented at stdin
import sys
from math import sqrt
class Stats:
MAX_UNIQUES_UNFLAGGED=16384
def __init__(self, initialData=[], trackUniques=False):
self.dataCount = 0
self.dataSum = 0
self.dataSquaredSum = 0
self.dataMax = None
self.dataMin = None
self.trackUniques = trackUniques
self.dataUniques = set()
self.addAll(initialData)
# if trackUniques:
# self.dataUniques = set()
# else:
# self.dataUniques = None
def add(self, datum):
self.dataCount += 1
self.dataSum += datum
self.dataSquaredSum += datum*datum
try:
if self.dataMax < datum:
self.dataMax = datum
if self.dataMin > datum:
self.dataMin = datum
except:
self.dataMax = datum
self.dataMin = datum
if self.dataUniques != None:
self.dataUniques.add(datum)
if (not self.trackUniques) and (len(self.dataUniques) > Stats.MAX_UNIQUES_UNFLAGGED):
self.dataUniques = None
def addAll(self, data):
for datum in data:
self.add(datum)
def avg(self):
if self.dataCount==0:
return "undef"
return self.dataSum / self.dataCount
def avg2(self):
if self.dataCount==0:
return "undef"
return self.dataSquaredSum / self.dataCount
def sum(self):
return self.dataSum
def max(self):
return self.dataMax
def min(self):
return self.dataMin
def N(self):
return self.dataCount
def uniqueCount(self):
if self.dataUniques == None:
return "enable with -u or --unique, or Stats(trackUniques=True) (requires more memory)"
else:
return len(self.dataUniques)
def var(self):
if self.dataCount==0:
return "undef"
return self.avg2() - self.avg()**2
def std(self):
if self.dataCount==0:
return "undef"
return sqrt(self.var())
def print(self):
print(" number of items: " + str(self.N()))
print("number of unique items: " + str(self.uniqueCount()))
print(" sum: " + str(self.sum()))
print(" mean: " + str(self.avg()))
print(" max: " + str(self.max()))
print(" min: " + str(self.min()))
print(" standard deviation: " + str(self.std()))
if __name__=='__main__':
# load data
stats = Stats(trackUniques="--unique" in sys.argv or "-u" in sys.argv)
for line in sys.stdin.readlines():
for item in line.split():
try:
i = float(item)
stats.add(i)
except ValueError:
continue
stats.print()
class Freq:
def __init__(self, initialItems = ()):
self.items = {}
for item in initialItems:
self.add(item)
def add(self, item):
try:
self.items[item] += 1
except KeyError:
self.items[item] = 1
def byFrequency(self):
return sorted(self.items.items(), key=lambda i:i[1])
def byValues(self):
return sorted(self.items.items(), key=lambda i:i[0])