-
Notifications
You must be signed in to change notification settings - Fork 172
/
Copy pathStatistics.carp
194 lines (173 loc) · 6.24 KB
/
Statistics.carp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
(use Int)
(use Double)
(use Array)
(doc Statistics "is a module for providing various statistical analyses on a
collection of values.")
(defmodule Statistics
(deftype Summary [
sum Double,
min Double,
max Double,
mean Double,
median Double,
var Double,
stdev Double,
stdev-pct Double,
median-abs-dev Double,
median-abs-dev-pct Double,
quartiles (Array Double),
iqr Double
])
(hidden Summary)
(doc mean "Compute the mean of the samples data.")
(defn mean [data]
(/ (Array.sum data) (from-int (Array.length data))))
(hidden _pp)
(defn _pp [a mean]
(let [sum 0.0]
(do
(for [i 0 (Array.length a)]
(let [tmp (Double.- (Double.copy (Array.unsafe-nth a i)) mean)]
(set! sum (Double.* tmp tmp))))
sum)))
(hidden _xx)
(defn _xx [a mean]
(let [sum 0.0]
(do
(for [i 0 (Array.length a)]
(set! sum (Double.- (Double.copy (Array.unsafe-nth a i)) mean)))
sum)))
(hidden _ss)
(defn _ss [data]
(let [m (mean data)
tmp (_xx data m)]
(Double.- (_pp data m)
(Double./ (Double.* tmp tmp)
(Double.from-int (Array.length data))))))
(doc median "Compute the median of the samples data.")
(defn median [data]
(let [n (Array.length data)
sorted (Array.sorted data)]
(cond (= n 0) 0.0
(= (mod n 2) 1) @(Array.unsafe-nth data (/ n 2))
(let [mid (/ n 2)] ; else
(/ (+ (the Double @(Array.unsafe-nth data (dec mid)))
@(Array.unsafe-nth data mid))
2.0)))))
(doc low-median "Compute the low median of the samples data.")
(defn low-median [data]
(let [n (Array.length data)
sorted (Array.sorted data)]
(cond (= n 0) 0.0
(= (mod n 2) 1) @(Array.unsafe-nth data (/ n 2))
@(Array.unsafe-nth data (dec (/ n 2)))))) ; else
(doc high-median "Compute the high median of the samples data.")
(defn high-median [data]
(let [n (Array.length data)
sorted (Array.sorted data)]
(if (= n 0)
0.0
@(Array.unsafe-nth data (/ n 2)))))
(doc grouped-median "Compute the grouped median of the samples data.")
(defn grouped-median [data interval]
(let [n (Array.length data)
sorted (Array.sorted data)]
(cond (= n 0) 0.0
(= n 1) @(Array.unsafe-nth data 0)
(let [x @(Array.unsafe-nth data (/ n 2)) ; else
l (- x (/ (from-int interval) 2.0))
cf (Maybe.from (Array.index-of data &x) -1)
f (Array.element-count data &x)]
(+ l (/ (* (from-int interval) (- (/ (from-int n) 2.0) (from-int cf)))
(from-int f)))))))
(doc variance "Compute the variance of the samples data.")
(defn variance [data]
(let [n (Array.length data)
ss (_ss data)]
(/ ss (from-int (dec n)))))
(doc pvariance "Compute the population variance of the samples data.")
(defn pvariance [data]
(let [n (Array.length data)
ss (_ss data)]
(/ ss (from-int n))))
(doc stdev "Compute the standard deviation of the samples data.")
(defn stdev [data]
(Double.sqrt (variance data)))
(doc pstdev "Compute the population standard deviation of the samples data.")
(defn pstdev [data]
(Double.sqrt (pvariance data)))
(doc pstdev-pct "Compute the standard deviation of the samples data as a percentile.")
(defn stdev-pct [data]
(* (/ (stdev data) (mean data)) 100.0))
(hidden median-abs-dev)
(defn median-abs-dev [data]
(let [med (median data)
zero 0.0
abs-devs (Array.replicate (Array.length data) &zero)
n 1.4826] ; taken from Rust and R, because that’s how it’s done apparently
(do
(for [i 0 (Array.length data)]
(Array.aset! &abs-devs i (abs (- med @(Array.unsafe-nth data i)))))
(* (median &abs-devs) n))))
(hidden median-abs-dev-pct)
(defn median-abs-dev-pct [data]
(* (/ (median-abs-dev data) (median data)) 100.0))
(hidden percentile-of-sorted)
(defn percentile-of-sorted [sorted pct]
(cond
(Int.= 0 (Array.length sorted)) -1.0 ; should abort here
(Double.< pct 0.0) -1.0 ; should abort here
(Double.> pct 100.0) -1.0 ; should abort here
(Int.= 1 (Array.length sorted)) @(Array.unsafe-nth sorted 0)
(Double.= 100.0 pct) @(Array.unsafe-nth sorted (Int.dec (Array.length sorted)))
(let [len (Int.dec (Array.length sorted))
rank (Double.* (Double./ pct 100.0) (Double.from-int len))
lrank (Double.floor rank)
d (Double.- rank lrank)
n (Double.to-int lrank)
lo @(Array.unsafe-nth sorted n)
hi @(Array.unsafe-nth sorted (Int.inc n))]
(Double.+ lo (Double.* d (Double.- hi lo))))))
(doc quartiles "Compute the quartiles of the samples data.")
(defn quartiles [data]
(let [tmp (Array.sorted data)
first 25.0
second 50.0
third 75.0
a (percentile-of-sorted &tmp first)
b (percentile-of-sorted &tmp second)
c (percentile-of-sorted &tmp third)]
[a b c]))
(doc iqr "Compute the interquartile range.")
(defn iqr [data]
(let [s &(quartiles data)]
(the Double (- @(Array.unsafe-nth s 2) @(Array.unsafe-nth s 0)))))
(hidden winsorize)
(defn winsorize [samples pct]
(let [tmp &(Array.sorted samples)
lo (Statistics.percentile-of-sorted tmp pct)
hi (Statistics.percentile-of-sorted tmp (Double.- 100.0 pct))]
(do
(for [i 0 (Array.length tmp)]
(let [samp @(Array.unsafe-nth tmp i)]
(cond
(> samp hi) (Array.aset! tmp i hi)
(< samp lo) (Array.aset! tmp i lo)
())))
(Array.copy tmp))))
(doc summary "Compute a variety of statistical values from a list of samples.")
(defn summary [samples]
(Summary.init
(Array.sum samples)
(Maybe.from (Array.minimum samples) 0.0)
(Maybe.from (Array.maximum samples) 0.0)
(mean samples)
(median samples)
(variance samples)
(stdev samples)
(stdev-pct samples)
(median-abs-dev samples)
(median-abs-dev-pct samples)
(quartiles samples)
(iqr samples)))
)