moments
Full source code
C++
// -*- mode: c++ -*-
// $Id
// http://www.bagley.org/~doug/shootout/
// Calculate statistical moments of a region, from Bill Lear
// Modified by Tamás Benkõ
// Further modified by Tom Hyer
#include <vector>
#include <numeric>
#include <iterator>
#include <algorithm>
#include <cstdio>
#include <cstdlib>
#include <cmath>
using namespace std;
template <class T>
struct moments {
public:
template <class InputIterator>
moments(InputIterator begin, InputIterator end)
: median(0.0), mean(0.0), average_deviation(0.0),
standard_deviation(0.0), variance(0.0),
skew(0.0), kurtosis(0.0)
{
T sum = accumulate(begin, end, 0.0);
size_t N = end - begin;
mean = sum / N;
for (InputIterator i = begin; i != end; ++i) {
T deviation = *i - mean;
average_deviation += fabs(deviation);
T temp = deviation * deviation;
variance += temp;
temp *= deviation;
skew += temp;
kurtosis += temp * deviation;
}
average_deviation /= N;
variance /= (N - 1);
standard_deviation = sqrt(variance);
if (variance) {
skew /= (N * variance * standard_deviation);
kurtosis = kurtosis/(N * variance * variance) - 3.0;
}
InputIterator mid = begin+N/2;
nth_element(begin, mid, end);
if (N % 2 == 0) {
InputIterator next_biggest = max_element(begin, mid);
median = (*mid+*next_biggest)/2;
}
else
median = *mid;
}
T median;
T mean;
T average_deviation;
T standard_deviation;
T variance;
T skew;
T kurtosis;
};
int main(int argc, char**argv) {
#ifdef SMALL_PROBLEM_SIZE
#define LENGTH 500000
#else
#define LENGTH 5000000
#endif
int n = ((argc == 2) ? atoi(argv[1]) : LENGTH);
vector<double> v;
double d;
for (unsigned i = 0; i != n; ++i) v.push_back(i);
moments<double> m(v.begin(), v.end());
printf("n: %d\n", v.end() - v.begin());
printf("median: %f\n", m.median);
printf("mean: %f\n", m.mean);
printf("average_deviation: %f\n", m.average_deviation);
printf("standard_deviation: %f\n", m.standard_deviation);
printf("variance: %f\n", m.variance);
printf("skew: %f\n", m.skew);
printf("kurtosis: %f\n", m.kurtosis);
return 0;
}
Rust
// Adapted from https://github.com/llvm/llvm-test-suite and // http://www.bagley.org/~doug/shootout/ // Calculate statistical moments of a region, from Bill Lear // Modified by Tamás Benkõ // Further modified by Tom Hyer use std::env; #[cfg(feature = "small_problem_size")] const LENGTH: i32 = 500000; #[cfg(not(feature = "small_problem_size"))] const LENGTH: i32 = 5000000; struct Moments<T> { median: T, mean: T, average_deviation: T, standard_deviation: T, variance: T, skew: T, kurtosis: T, } impl Moments<f64> { fn new(slice: &mut [f64]) -> Moments<f64> { let mut m = Moments::<f64> { median: 0.0, mean: 0.0, average_deviation: 0.0, standard_deviation: 0.0, variance: 0.0, skew: 0.0, kurtosis: 0.0, }; let sum: f64 = slice.iter().sum(); let n = slice.iter().count(); m.mean = sum / (n as f64); for e in slice.iter() { let deviation = *e - m.mean; m.average_deviation += deviation.abs(); let mut temp = deviation * deviation; m.variance += temp; temp *= deviation; m.skew += temp; m.kurtosis += temp * deviation; } m.average_deviation /= n as f64; m.variance /= (n - 1) as f64; m.standard_deviation = m.variance.sqrt(); if m.variance != 0.0 { m.skew /= (n as f64) * m.variance * m.standard_deviation; m.kurtosis = m.kurtosis / ((n as f64) * m.variance * m.variance) - 3.0; } let (before, mid, _after) = slice.select_nth_unstable_by(n / 2, |a, b| a.partial_cmp(b).unwrap()); if n % 2 == 0 { let next_biggest = before.iter().max_by(|a, b| a.partial_cmp(b).unwrap()); m.median = (*mid + *next_biggest.unwrap()) / 2.0; } else { m.median = *mid; } return m; } } fn main() { let mut args = env::args(); let n = if args.len() == 2 { args.nth(1).unwrap().parse::<i32>().unwrap() } else { LENGTH }; let mut v = Vec::<f64>::new(); for i in 0..n { v.push(i as f64); } let m = Moments::<f64>::new(&mut v[..]); println!("n: {}", v.iter().count()); println!("median: {:.6}", m.median); println!("mean: {:.6}", m.mean); println!("average_deviation: {:.6}", m.average_deviation); println!("standard_deviation: {:.6}", m.standard_deviation); println!("variance: {:.6}", m.variance); println!("skew: {:.6}", m.skew); println!("kurtosis: {:.6}", m.kurtosis); }
Porting notes
Passing the input vector using a slice, as opposed to C++ iterators
C++
template <class InputIterator>
moments(InputIterator begin, InputIterator end)
: median(0.0), mean(0.0), average_deviation(0.0),
standard_deviation(0.0), variance(0.0),
skew(0.0), kurtosis(0.0)
{
...
}
Rust
#![allow(unused)] fn main() { fn new(slice: &mut [f64]) -> Moments<f64> { let mut m = Moments::<f64> { median: 0.0, mean: 0.0, average_deviation: 0.0, standard_deviation: 0.0, variance: 0.0, skew: 0.0, kurtosis: 0.0, }; ... } }
The C++ version passes the input vector using its begin
and end
iterators to the moments
constructor.
In constract, the Rust version passes a mutable slice of the input vector. &mut [f64]
represents a mutable slice of type f64
. &mut v[..]
is the syntax that takes a mutable slice of a vector v
. If it were an immutable slice, the syntax would be &[f64]
and &v[..]
, respectively.
Slices are safer than the C++ iterators in that they are bounds-checked and, being single entities, they are less error-prone, avoiding potential mistakes like mixing up iterators from different containers or passing them in a wrong order.
The Rust version uses a slice instead of an iterator because the select_nth_unstable_by
method that is used to implement the C++ std::nth_element
below is available for a slice but not for an iterator.
Explicitly converting types
C++
mean = sum / N;
Rust
#![allow(unused)] fn main() { m.mean = sum / (n as f64); }
In the C++ version, the division implicit converts the N
of type size_t
to double
.
In Rust, implicit conversions are not supported for safety reasons and types need to be explicitly converted. The Rust version uses the as
operator as in (n as f64)
to convert from usize
to f64
.
Implementing std::nth_element
with select_nth_unstable_by
, multiple return values as a tuple, explicitly handling 64-bit floating point not having a total order
C++
InputIterator mid = begin+N/2;
nth_element(begin, mid, end);
Rust
#![allow(unused)] fn main() { let (before, mid, _after) = slice.select_nth_unstable_by(n / 2, |a, b| a.partial_cmp(b).unwrap()); }
The C++ version uses the std::nth_element
function to find the median value.
The Rust version uses the select_nth_unstable_by
method as an equivalent.
select_nth_unstable_by
uses a tuple return type to return multiple values. let (before, mid, _after) = ...
declares and assigns each element of the returned tuple as local variables conveniently in a single line.
The 64-bit floating point type (double
or f64
) doesn't technically have a total order because of NaN
. So there could be subtle undefined or implementation-dependent behaviors where strict ordering is expected. The Rust version makes it explicit by not letting f64
implement the cmp
method. The closure |a, b| a.partial_cmp(b).unwrap()
makes it explicit that it would panic if NaN
is encountered, though it doesn't occur in this program.