views:

233

answers:

6

I have the following F# function which makes use of a ref variable to seed and keep track of a running total, something tells me this isn't in the spirit of fp or even particular clear on its own. I'd like some direction on the clearest (possible fp, but if an imperative approach is clearer I'd be open to that) way to express this in F#. Note that selectItem implements a random weighted selection algorithm.

type WeightedItem(id: int, weight: int) =
    member self.id = id
    member self.weight = weight

let selectItem (items: WeightedItem list) (rand:System.Random) =
    let totalWeight = List.sumBy (fun (item: WeightedItem) -> item.weight) items
    let selection = rand.Next(totalWeight) + 1
    let runningWeight = ref 0
    List.find 
        (fun (item: WeightedItem) ->
            runningWeight := !runningWeight + item.weight
            !runningWeight >= selection)
        items

let items = [new WeightedItem(1,100); new WeightedItem(2,50); new WeightedItem(3,25)]
let selection = selectItem items (new System.Random())
+1  A: 

Hm, here's one way to do it with a fold, but it feels inelegant and always traverses the whole list...

type WeightedItem(id: int, weight: int) = 
    member self.id = id 
    member self.weight = weight 

let selectItem (items: WeightedItem list) (rand:System.Random) = 
    let totalWeight = List.sumBy (fun (item: WeightedItem) -> item.weight) items 
    let selection = rand.Next(totalWeight) + 1 
    List.fold 
        (fun (runningWeight,found) (item: WeightedItem) -> 
            if not found then
                let newRunningWeight = runningWeight + item.weight 
                newRunningWeight, newRunningWeight >= selection
            else
                (runningWeight,found)) 
        (0,false)
        items 
    |> fst

let items = [new WeightedItem(1,100)
             new WeightedItem(2,50)
             new WeightedItem(3,25)] 
let selection = selectItem items (new System.Random()) 
Brian
Thanks, I was thinking fold might be appropriate too (seed and accumulate!), but alas, I can't stomach traversing the whole list.
Stephen Swensen
+2  A: 

Hm, here's one with Seq.scan, but it also feels very ugly...

type WeightedItem(id: int, weight: int) = 
    member self.id = id 
    member self.weight = weight 

let selectItem (items: WeightedItem list) (rand:System.Random) = 
    let totalWeight = List.sumBy (fun (item: WeightedItem) -> item.weight) items 
    let selection = rand.Next(totalWeight) + 1 
    Seq.scan 
        (fun (runningWeight,found,itemO) (item: WeightedItem) -> 
            if not found then
                let newRunningWeight = runningWeight + item.weight 
                newRunningWeight, newRunningWeight >= selection, Some(item)
            else
                (runningWeight,found,itemO)) 
        (0,false,None)
        items 
    |> Seq.find (fun (rw,f,i) -> f)
    |> (fun (rw,f,i) -> i.Value)

let items = [new WeightedItem(1,100)
             new WeightedItem(2,50)
             new WeightedItem(3,25)] 
let selection = selectItem items (new System.Random()) 
Brian
Whoa! Yeah, that's a bit much. But I definitely appreciate seeing an alternate approach for its own sake!
Stephen Swensen
+1  A: 

Hm, here's some mutables and a loop; still traverses the whole list though...

type WeightedItem(id: int, weight: int) = 
    member self.id = id 
    member self.weight = weight 

let selectItem (items: WeightedItem list) (rand:System.Random) = 
    let totalWeight = List.sumBy (fun (item: WeightedItem) -> item.weight) items 
    let selection = rand.Next(totalWeight) + 1 
    let mutable runningWeight = 0
    let mutable found = None
    for item in items do
        match found with
        | None ->
            runningWeight <- runningWeight + item.weight 
            if runningWeight >= selection then
                found <- Some(item)
        | _ -> ()
    found.Value

let items = [new WeightedItem(1,100)
             new WeightedItem(2,50)
             new WeightedItem(3,25)] 
let selection = selectItem items (new System.Random()) 

This is my favorite of the three. I look forward to the day that F# adds break. Of course you can call GetEnumerator and take full control, but that is ugly too.

Brian
Right, I was playing with for / in but no break to be found. I'm starting to think I wasn't so far off base with my original implementation.
Stephen Swensen
+4  A: 

Here is a version of the search algorithm using a recursive function. My F# is very rusty and I don't know what to return when we can't find anything:

let rec find list item total =
    match list with
    | h::t -> if h > total then h else find t item total+h
    | [] -> 0 //<-- return some sort of default to say can't find the item

EDIT

Full code:

type WeightedItem(id: int, weight: int) = 
    member self.id = id 
    member self.weight = weight 

let selectItem (items: WeightedItem list) (rand:System.Random) = 
    let totalWeight = List.sumBy (fun (item: WeightedItem) -> item.weight) items 
    let selection = rand.Next(totalWeight) + 1 
    let rec find runningWeight ((h:WeightedItem)::t) =
        let newRunningWeight = runningWeight + h.weight
        if newRunningWeight >= selection then
            h
        else
            find newRunningWeight t
    find 0 items

let items = [new WeightedItem(1,100)
             new WeightedItem(2,50)
             new WeightedItem(3,25)] 
let selection = selectItem items (new System.Random()) 
Igor Zevaka
Indeed, a local tail-recursive function would work well here.
Brian
Beautiful. This is exactly what I was going for.
Stephen Swensen
+2  A: 

Igor's answer is probably the best one for items stored in a list in terms of efficiency, but since Brian's scan approach is representative of a recurrent sequence manipulation pattern, I suggest a slightly more compact variation :

let selectItem (items: WeightedItem list) (rand:System.Random) =
    let totalWeight = List.sumBy (fun (item: WeightedItem) -> item.weight) items
    let selection = rand.Next(totalWeight) + 1
    items
    |> Seq.scan (fun acc (item : WeightedItem) -> acc + item.weight) 0
    |> Seq.skip 1 |> Seq.zip items
    |> Seq.find (fun (i, rw) -> rw >= selection) |> fst
+1  A: 

Use Seq.unfold to build an on-demand sequence that accumulates runningWeight and then search it for the first element that had a sufficiently large runningWeight using Seq.pick:

let gen = function
  | _, [] -> None
  | runningWeight, item::items ->
      let runningWeight = runningWeight + item.weight
      Some((if runningWeight >= selection then Some item else None), (runningWeight, items))
Seq.unfold gen (0, xs) |> Seq.pick id
Jon Harrop
Thanks Jon, I learned a lot of new things figuring this one out.
Stephen Swensen
But in order to get type inference working and satisfy the compiler, and I needed to do (0, items) |> Seq.unfold ((*gen body here*))|> Seq.pick id
Stephen Swensen
Ah, that's because you defined `weightedItem` as a class so F# could not infer the type of `item.weight`. In my testing, I defined it as a record type `type weightedItem = {id: int; weight: int}` so F# did infer the type of `item.weight`.
Jon Harrop