This is largely untested, so use with care. Since I have bad memory, I checked the algorithm against Wikipedia. I'm not aware of an algorithm to calculate the median from a stream of numbers, but that doesn't mean there isn't one.
#!/usr/bin/perl
use strict;
use warnings;
use MooseX::Declare;
class SimpleStats {
has 'min' => (is => 'rw', isa => 'Num', default => 9**9**9);
has 'max' => (is => 'rw', isa => 'Num', default => -9**9**9);
has 'A' => (is => 'rw', isa => 'Num', default => 0);
has 'Q' => (is => 'rw', isa => 'Num', default => 0);
has 'n' => (is => 'rw', isa => 'Int', default => 0);
has 'n_nonzero' => (is => 'rw', isa => 'Int', default => 0);
has 'sum_w' => (is => 'rw', isa => 'Int', default => 0);
method add (Num $x, Num $w = 1) {
$self->min($x) if $x < $self->min;
$self->max($x) if $x > $self->max;
my $n = $self->n;
if ($n == 0) {
$self->A($x);
$self->sum_w($w);
}
else {
my $A = $self->A;
my $Q = $self->Q;
my $sum_w_before = $self->sum_w;
$self->sum_w($sum_w_before+$w);
$self->A($A + ($x-$A) * $w/$self->sum_w);
$self->Q($Q + $w*($x-$A)*($x-$self->A));
}
$self->n($n+1);
$self->n_nonzero($self->n_nonzero+1) if $w != 0;
return();
}
method mean () { $self->A }
method sample_variance () {
$self->Q * $self->n_nonzero() /
( ($self->n_nonzero-1) * $self->sum_w )
}
method std_variance () { $self->Q / $self->sum_w }
method std_dev () { sqrt($self->std_variance) }
# slightly evil. Just don't reuse objects
method reset () { %$self = %{__PACKAGE__->new()} }
}
package main;
my $stats = SimpleStats->new;
while (<STDIN>) {
s/^\s+//;
s/\s+$//;
my ($x, $w) = split /\s+/, $_;
if (defined $w) {
$stats->add($x, $w);
} else {
$stats->add($x);
}
}
print "Mean: ", $stats->mean, "\n";
print "Sample var: ", $stats->sample_variance, "\n";
print "Std var: ", $stats->std_variance, "\n";
print "Std dev: ", $stats->std_dev, "\n";
print "Entries: ", $stats->n, "\n";
print "Min: ", $stats->min, "\n";
print "Max: ", $stats->max, "\n";