module stats import math // freq calculates the Measure of Occurrence // Frequency of a given number // Based on // https://www.mathsisfun.com/data/frequency-distribution.html pub fn freq[T](data []T, val T) int { if data.len == 0 { return 0 } mut count := 0 for v in data { if v == val { count++ } } return count } // mean calculates the average // of the given input array, sum(data)/data.len // Based on // https://www.mathsisfun.com/data/central-measures.html pub fn mean[T](data []T) T { if data.len == 0 { return T(0) } mut sum := T(0) for v in data { sum += v } return T(sum / data.len) } // geometric_mean calculates the central tendency // of the given input array, product(data)**1/data.len // Based on // https://www.mathsisfun.com/numbers/geometric-mean.html pub fn geometric_mean[T](data []T) T { if data.len == 0 { return T(0) } mut sum := T(1) for v in data { sum *= v } $if T is f64 { return math.pow(sum, f64(1.0) / data.len) } $else { // use f32 for f32/int/... return T(math.powf(sum, f32(1.0) / data.len)) } } // harmonic_mean calculates the reciprocal of the average of reciprocals // of the given input array // Based on // https://www.mathsisfun.com/numbers/harmonic-mean.html pub fn harmonic_mean[T](data []T) T { if data.len == 0 { return T(0) } $if T is f64 { mut sum := f64(0) for v in data { sum += f64(1.0) / v } return f64(f64(data.len) / sum) } $else { // use f32 for f32/int/... mut sum := f32(0) for v in data { sum += f32(1.0) / f32(v) } return T(f32(data.len) / sum) } } // median returns the middlemost value of the given input array ( input array is assumed to be sorted ) // Based on // https://www.mathsisfun.com/data/central-measures.html pub fn median[T](sorted_data []T) T { if sorted_data.len == 0 { return T(0) } if sorted_data.len % 2 == 0 { mid := (sorted_data.len / 2) - 1 return (sorted_data[mid] + sorted_data[mid + 1]) / T(2) } else { return sorted_data[((sorted_data.len - 1) / 2)] } } // mode calculates the highest occurring value of the given input array // Based on // https://www.mathsisfun.com/data/central-measures.html pub fn mode[T](data []T) T { if data.len == 0 { return T(0) } mut freqs := []int{} for v in data { freqs << freq(data, v) } mut max := 0 for i := 0; i < freqs.len; i++ { if freqs[i] > freqs[max] { max = i } } return data[max] } // rms, Root Mean Square, calculates the sqrt of the mean of the squares of the given input array // Based on // https://en.wikipedia.org/wiki/Root_mean_square pub fn rms[T](data []T) T { if data.len == 0 { return T(0) } $if T is f64 { mut sum := f64(0) for v in data { sum += math.pow(v, 2) } return math.sqrt(sum / data.len) } $else { // use f32 for f32/int/... mut sum := f32(0) for v in data { sum += math.powf(v, 2) } return T(math.sqrtf(sum / data.len)) } } // population_variance is the Measure of Dispersion / Spread // of the given input array // Based on // https://www.mathsisfun.com/data/standard-deviation.html @[inline] pub fn population_variance[T](data []T) T { if data.len == 0 { return T(0) } data_mean := mean[T](data) return population_variance_mean[T](data, data_mean) } // population_variance_mean is the Measure of Dispersion / Spread // of the given input array, with the provided mean // Based on // https://www.mathsisfun.com/data/standard-deviation.html pub fn population_variance_mean[T](data []T, mean T) T { if data.len == 0 { return T(0) } mut sum := T(0) for v in data { sum += T((v - mean) * (v - mean)) } return T(sum / data.len) } // sample_variance calculates the spread of dataset around the mean // Based on // https://www.mathsisfun.com/data/standard-deviation.html @[inline] pub fn sample_variance[T](data []T) T { if data.len == 0 { return T(0) } data_mean := mean[T](data) return sample_variance_mean[T](data, data_mean) } // sample_variance calculates the spread of dataset around the provided mean // Based on // https://www.mathsisfun.com/data/standard-deviation.html pub fn sample_variance_mean[T](data []T, mean T) T { if data.len == 0 { return T(0) } mut sum := T(0) for v in data { sum += T((v - mean) * (v - mean)) } return T(sum / (data.len - 1)) } // population_stddev calculates how spread out the dataset is // Based on // https://www.mathsisfun.com/data/standard-deviation.html @[inline] pub fn population_stddev[T](data []T) T { if data.len == 0 { return T(0) } $if T is f64 { return math.sqrt(population_variance[T](data)) } $else { return T(math.sqrtf(population_variance[T](data))) } } // population_stddev_mean calculates how spread out the dataset is, with the provide mean // Based on // https://www.mathsisfun.com/data/standard-deviation.html @[inline] pub fn population_stddev_mean[T](data []T, mean T) T { if data.len == 0 { return T(0) } $if T is f64 { return math.sqrt(population_variance_mean[T](data, mean)) } $else { return T(math.sqrtf(population_variance_mean[T](data, mean))) } } // Measure of Dispersion / Spread // Sample Standard Deviation of the given input array // Based on // https://www.mathsisfun.com/data/standard-deviation.html @[inline] pub fn sample_stddev[T](data []T) T { if data.len == 0 { return T(0) } $if T is f64 { return math.sqrt(sample_variance[T](data)) } $else { return T(math.sqrtf(sample_variance[T](data))) } } // Measure of Dispersion / Spread // Sample Standard Deviation of the given input array // Based on // https://www.mathsisfun.com/data/standard-deviation.html @[inline] pub fn sample_stddev_mean[T](data []T, mean T) T { if data.len == 0 { return T(0) } $if T is f64 { return math.sqrt(sample_variance_mean[T](data, mean)) } $else { return T(math.sqrtf(sample_variance_mean[T](data, mean))) } } // absdev calculates the average distance between each data point and the mean // Based on // https://en.wikipedia.org/wiki/Average_absolute_deviation @[inline] pub fn absdev[T](data []T) T { if data.len == 0 { return T(0) } data_mean := mean[T](data) return absdev_mean[T](data, data_mean) } // absdev_mean calculates the average distance between each data point and the provided mean // Based on // https://en.wikipedia.org/wiki/Average_absolute_deviation pub fn absdev_mean[T](data []T, mean T) T { if data.len == 0 { return T(0) } mut sum := T(0) for v in data { sum += math.abs(v - mean) } return T(sum / data.len) } // tts, Sum of squares, calculates the sum over all squared differences between values and overall mean @[inline] pub fn tss[T](data []T) T { if data.len == 0 { return T(0) } data_mean := mean[T](data) return tss_mean[T](data, data_mean) } // tts_mean, Sum of squares, calculates the sum over all squared differences between values and the provided mean pub fn tss_mean[T](data []T, mean T) T { if data.len == 0 { return T(0) } mut tss := T(0) for v in data { tss += T((v - mean) * (v - mean)) } return tss } // min finds the minimum value from the dataset pub fn min[T](data []T) T { if data.len == 0 { return T(0) } mut min := data[0] for v in data { if v < min { min = v } } return min } // max finds the maximum value from the dataset pub fn max[T](data []T) T { if data.len == 0 { return T(0) } mut max := data[0] for v in data { if v > max { max = v } } return max } // minmax finds the minimum and maximum value from the dataset pub fn minmax[T](data []T) (T, T) { if data.len == 0 { return T(0), T(0) } mut max := data[0] mut min := data[0] for v in data[1..] { if v > max { max = v } if v < min { min = v } } return min, max } // min_index finds the first index of the minimum value pub fn min_index[T](data []T) int { if data.len == 0 { return 0 } mut min := data[0] mut min_index := 0 for i, v in data { if v < min { min = v min_index = i } } return min_index } // max_index finds the first index of the maximum value pub fn max_index[T](data []T) int { if data.len == 0 { return 0 } mut max := data[0] mut max_index := 0 for i, v in data { if v > max { max = v max_index = i } } return max_index } // minmax_index finds the first index of the minimum and maximum value pub fn minmax_index[T](data []T) (int, int) { if data.len == 0 { return 0, 0 } mut min := data[0] mut max := data[0] mut min_index := 0 mut max_index := 0 for i, v in data { if v < min { min = v min_index = i } if v > max { max = v max_index = i } } return min_index, max_index } // range calculates the difference between the min and max // Range ( Maximum - Minimum ) of the given input array // Based on // https://www.mathsisfun.com/data/range.html pub fn range[T](data []T) T { if data.len == 0 { return T(0) } min, max := minmax[T](data) return max - min } // covariance calculates directional association between datasets // positive value denotes variables move in same direction and negative denotes variables move in opposite directions @[inline] pub fn covariance[T](data1 []T, data2 []T) T { mean1 := mean[T](data1) mean2 := mean[T](data2) return covariance_mean[T](data1, data2, mean1, mean2) } // covariance_mean computes the covariance of a dataset with means provided // the recurrence relation pub fn covariance_mean[T](data1 []T, data2 []T, mean1 T, mean2 T) T { n := int(math.min(data1.len, data2.len)) if n == 0 { return T(0) } mut covariance := T(0) for i in 0 .. n { delta1 := data1[i] - mean1 delta2 := data2[i] - mean2 covariance += T((delta1 * delta2 - covariance) / (T(i) + T(1))) } return covariance } // lag1_autocorrelation_mean calculates the correlation between values that are one time period apart // of a dataset, based on the mean @[inline] pub fn lag1_autocorrelation[T](data []T) T { data_mean := mean[T](data) return lag1_autocorrelation_mean[T](data, data_mean) } // lag1_autocorrelation_mean calculates the correlation between values that are one time period apart // of a dataset, using // the recurrence relation pub fn lag1_autocorrelation_mean[T](data []T, mean T) T { if data.len == 0 { return T(0) } mut q := T(0) mut v := (data[0] * mean) - (data[0] * mean) for i := 1; i < data.len; i++ { delta0 := data[i - 1] - mean delta1 := data[i] - mean d01 := delta0 * delta1 d11 := delta1 * delta1 ti1 := T(i) + T(1) q += T((d01 - q) / ti1) v += T((d11 - v) / ti1) } return T(q / v) } // kurtosis calculates the measure of the 'tailedness' of the data by finding mean and standard of deviation @[inline] pub fn kurtosis[T](data []T) T { data_mean := mean[T](data) sd := population_stddev_mean[T](data, data_mean) return kurtosis_mean_stddev[T](data, data_mean, sd) } // kurtosis_mean_stddev calculates the measure of the 'tailedness' of the data // using the fourth moment the deviations, normalized by the sd pub fn kurtosis_mean_stddev[T](data []T, mean T, sd T) T { if data.len == 0 { return T(0) } mut avg := T(0) // find the fourth moment the deviations, normalized by the sd /* we use a recurrence relation to stably update a running value so * there aren't any large sums that can overflow */ for i, v in data { x := (v - mean) / sd x4 := x * x * x * x ti1 := (T(i) + T(1)) avg += T((x4 - avg) / ti1) } return avg - T(3) } // skew calculates the mean and standard of deviation to find the skew from the data @[inline] pub fn skew[T](data []T) T { data_mean := mean[T](data) sd := population_stddev_mean[T](data, data_mean) return skew_mean_stddev[T](data, data_mean, sd) } // skew_mean_stddev calculates the skewness of data pub fn skew_mean_stddev[T](data []T, mean T, sd T) T { if data.len == 0 { return T(0) } mut skew := T(0) // find the sum of the cubed deviations, normalized by the sd. /* we use a recurrence relation to stably update a running value so * there aren't any large sums that can overflow */ for i, v in data { x := (v - mean) / sd x3 := x * x * x skew += T((x3 - skew) / (T(i) + T(1))) } return skew } // quantile calculates quantile points // for more reference // https://en.wikipedia.org/wiki/Quantile pub fn quantile[T](sorted_data []T, f T) !T { if sorted_data.len == 0 { return T(0) } index := f * (sorted_data.len - 1) lhs := int(index) if lhs < 0 || lhs >= sorted_data.len { return error('index out of range') } else if lhs == sorted_data.len - 1 { return sorted_data[lhs] } else { if lhs >= sorted_data.len - 1 { return error('index out of range') } delta := index - T(lhs) return T((1 - delta) * sorted_data[lhs] + delta * sorted_data[(lhs + 1)]) } }