views:

1334

answers:

5

I'm still working on groking the F# thing - trying to work out how to 'think' in F# rather than just translating from other languages I know.

I've recently been thinking about the cases where you don't have a 1:1 map between before and after. Cases where List.map falls down.

One example of this is moving averages, where typically you will have len-n+1 results for a list of length len when averaging over n items.

For the gurus out there, is this a good way to do it (using queue pinched from Jomo Fisher)?

//Immutable queue, with added Length member
type Fifo<'a> =
    new()={xs=[];rxs=[]}
    new(xs,rxs)={xs=xs;rxs=rxs}

    val xs: 'a list;
    val rxs: 'a list;

    static member Empty() = new Fifo<'a>()
    member q.IsEmpty = (q.xs = []) && (q.rxs = [])
    member q.Enqueue(x) = Fifo(q.xs,x::q.rxs)
    member q.Length() = (List.length q.xs) + (List.length q.rxs)
    member q.Take() =
        if q.IsEmpty then failwith "fifo.Take: empty queue"
        else match q.xs with
                | [] -> (Fifo(List.rev q.rxs,[])).Take()
                | y::ys -> (Fifo(ys, q.rxs)),y

//List module, add function to split one list into two parts (not safe if n > lst length)
module List =
    let splitat n lst =
        let rec loop acc n lst =
            if List.length acc = n then
                (List.rev acc, lst)
            else
                loop (List.hd lst :: acc) n (List.tl lst)
        loop [] n lst

//Return list with moving average accross len elements of lst
let MovingAverage (len:int) (lst:float list) = 
    //ugly mean - including this in Fifo kills genericity
    let qMean (q:Fifo<float>) = ((List.sum q.xs) + (List.sum q.rxs))/(float (q.Length()))

    //get first part of list to initialise queue
    let (init, rest) = List.splitat len lst

    //initialise queue with first n items
    let q = new Fifo<float>([], init)

    //loop through input list, use fifo to push/pull values as they come
    let rec loop (acc:float list) ls (q:Fifo<float>) =
        match ls with
        | [] -> List.rev acc
        | h::t -> 
            let nq = q.Enqueue(h) //enqueue new value
            let (nq, _) = nq.Take() //drop old value
            loop ((qMean nq)::acc) t nq //tail recursion

    loop [qMean q] rest q

//Example usage    
MovingAverage 3 [1.;1.;1.;1.;1.;2.;2.;2.;2.;2.]

(Maybe a better way would be to implement a MovingAverageQueue by inheriting from Fifo?)

A: 

As far as I can see, your code is full of let statements. I'm not familiar with F# but did do some Haskell. The functional paradigm means not thinking about "how" but about "what": you think Fifo, but you should actually just specify the semantics of the moving average.

-- the limited average of a list
limitedaverage 0 _ = 0
limited verage n (x:xs) = (x/n) + ( limited average (n-1) xs )

-- a list, transformed into a sequence of moving averages of 
movingaverages n [] = []
movingaverages n (x:xs) = ( movingaverage n (x:xs) : movingaverages n xs )
xtofl
Eh, tested?I'm not a Haskellite, but limitedaverage should divide by the initial n each time, otherwise the result is wrong.Should movingaverages read (limitedaverage n (x:xs) : limitedaverage n xs)?
Benjol
+1  A: 

Here's a (corrected, I hope) F# version of the Haskell solution proposed here.

EDIT: Now tail-recursive, not any faster, but doesn't explode with n = 50000. (see edit history for non-tail-recursive version)

let LimitedAverage n ls = 
    let rec loop acc i n ls = 
        match i with
        | 0 -> acc //i counts down from n to 0, when we hit 0 we stop
        | _ -> match ls with
               | [] -> acc //if we hit empty list before end of n, we stop too
               | x::xs -> (loop (acc + (x / float n)) (i - 1) n xs) //divide this value by n, perform average on 'rest' of list
    loop 0. n n ls

LimitedAverage 50000 (List.map float [1..9999999])
//>val it : float = 25000.5

let rec MovingAverage3 n ls = 
    let rec acc loop i n ls = 
        match i with 
        | 0 -> List.rev acc //i counts down from n to 0, when we hit 0 we stop
        | _ -> match ls with
                | [] -> List.rev acc //if we hit empty list before end of n, we stop too
                | x::xs -> loop (LimitedAverage2 n ls :: acc) (i - 1) n xs // limited average on whole list, then moving average on tail
    loop [] (n + 1) n ls 

MovingAverage3 50000 (List.map float [1..9999999])
//>val it : float list = [25000.5; 25001.5; 25002.5; ...]
Benjol
@Jon Harrop, I see you :). Where's my downvote comment?
Benjol
+4  A: 

If you don't care too much about performance, here is a very simple solution:

#light

let MovingAverage n s =
   Seq.windowed n s
   |> Seq.map Array.average

let avgs = MovingAverage 5000 (Seq.map float [|1..999999|])

for avg in avgs do
    printfn "%f" avg
    System.Console.ReadKey() |> ignore

This recomputes the average of each 'window' from scratch, so it is poor if the windows are large.

In any case, check out Seq.windowed:

http://research.microsoft.com/projects/cambridge/fsharp/manual/FSharp.Core/Microsoft.FSharp.Collections.Seq.html

as it's handy to have in your back pocket for such things.

Brian
Excellent, this is the kind of answer which helps me 'grow' - i.e. discovering stuff that already exists instead of re-inventing the wheel!
Benjol
dead link, I guess all docs were moved to msdn by now so a similar page would be http://msdn.microsoft.com/en-us/library/dd233209(VS.100).aspx or http://msdn.microsoft.com/en-us/library/ee353635(VS.100).aspx
Mauricio Scheffer
A: 

If you do care about performance, then you can calculate a moving average efficiently using something like this (assuming we're calculating a moving average over a 3-day window)

Numbers[n]    Running Total[n]
---------     ---------------
n[0] = 7       7 = Numbers[0]
n[1] = 1       8 = RunningTotal[1-1] + Numbers[1]
n[2] = 2      10 = RunningTotal[2-1] + Numbers[2]
n[3] = 8      11 = RunningTotal[3-1] + Numbers[3] - Numbers[3-3]
n[4] = 4      14 = RunningTotal[4-1] + Numbers[4] - Numbers[4-3]
n[5] = 1      13 = RunningTotal[5-1] + Numbers[5] - Numbers[5-3] 
n[6] = 9      14 = RunningTotal[6-1] + Numbers[6] - Numbers[6-3]
...
N             RunningTotal[N] = RunningTotal[N-1] + Numbers[N] - Numbers[N-3]

The hard part about this is holding on your previous running total and NumberN-window. I came up with the following code:

let movingAverage days l =
    seq {
        let queue = new Queue<_>(days : int)
        let divisor = decimal days

        let total = ref 0m
        for cur in l do
            queue.Enqueue(cur)
            total := !total + cur
            if queue.Count < days then
                yield (cur, 0m)
            else
                yield (cur, !total / divisor)
                total := !total - (queue.Dequeue())
    }

This version isn't as nice looking as the Haskell code, but it should avoid performance problems associated with recomputing your "window" on each run. It keeps a running total and holds previously used numbers in a queue, so it should be very fast.

Just for fun, I wrote a simple benchmark:

#light
open System
open System.Collections.Generic
open System.Diagnostics;

let windowAverage days (l : #seq<decimal>) = Seq.windowed days l |> Seq.map (Seq.average)

let princessAverage days l =
    seq {
        let queue = new Queue<_>(days : int)
        let divisor = decimal days

        let total = ref 0m
        for cur in l do
            queue.Enqueue(cur)
            total := !total + cur
            if queue.Count < days then
                yield (cur, 0m)
            else
                yield (cur, !total / divisor)
                total := !total - (queue.Dequeue())
    }

let testData =
    let rnd = new System.Random()
    seq { for a in 1 .. 1000000 -> decimal (rnd.Next(1000)) }

let benchmark msg f iterations =
    let rec loop = function
        | 0 -> ()
        | n -> f 3 testData |> ignore; loop (n - 1)

    let stopWatch = Stopwatch.StartNew()
    loop iterations
    stopWatch.Stop()
    printfn "%s: %f" msg stopWatch.Elapsed.TotalMilliseconds

let _ =
    let iterations = 10000000
    benchmark "princessAverage" princessAverage iterations
    benchmark "windowAverage" windowAverage iterations
    printfn "Done"

Results:

princessAverage: 1670.791800
windowAverage: 2986.146900

My version is ~1.79x faster.

Juliet
Love it, please check out all my other old questions too!
Benjol
John's answer is 3 times faster again, based on my single test...
Benjol
You're relying on fixed-point arithmetic to avoid accumulating round-off error?
Jon Harrop
+1  A: 

Ok, this is a very late entry.

I created another sequence, so that I essentially have 2 generators, one generating the number to add to running total, while the other generating the number to subtract from total. It is theoretically fast and memory efficient, as no list/array is created to keep track of numbers, and no recalculation occurs.

Please help me improve it, as I don't know how to do it without using reference.

let MovingAverage size seq =
    let sum = ref 0.0
    let day = ref 0.0
    let seqprev = seq |> Seq.append (Seq.map (fun x -> 0.0) [|1..size|])
    Seq.map2 (fun prev next ->
        day := if !day >= float size then !day else !day + 1.0
        sum := !sum - prev + next
        !sum / !day) seqprev seq

let avgs = MovingAverage 5000 (Seq.map float [|1..999999|])
Agreed, mutability is not very functional, but this is much faster than the other solutions, as far as I can tell. Doesn't seem to give the same answer as Princess's solution though? Haven't got time to check right now.
Benjol
I am assuming the solution should be:day 1 = 1 / 1day 2 = (1 + 2) / 2day 3 = (1 + 2 + 3) / 3etc., until it hits full window size, then we start removing days.I realized that this solution only works if the sequence remains consistent over successive runs. (i.e. random generator won't work)
-1: You're accumulating round-off error forever. One big number and every subsequent result will be wrong...
Jon Harrop