views:

197

answers:

1

I'm trying to write a merge sorting algorithm in Perl and I've attempted to copy the pseudo code from Wikipedia.

So this is what I have:

sub sort_by_date {
    my $self        = shift;
    my $collection  = shift;

    print STDERR "\$collection = ";
    print STDERR Dumper $collection;

    if ( @$collection <= 1 ) {
        return $collection;
    }

    my ( $left, $right, $result );

    my $middle = ( @$collection / 2 ) - 1;

    my $x = 0;
    for ( $x; $x <= $middle; $x++ ) {
        push( @$left,$collection->[$x] );
    }

    $x = $middle + 1;
    for ( $x; $x < @$collection; $x++  ) {
        push( @$right,$collection->[$x] );
    }

    $left = $self->sort_by_date( $left );
    $right = $self->sort_by_date( $right );

    print STDERR '$left = ';
    print STDERR Dumper $left;
    print STDERR '$right = ';
    print STDERR Dumper $right;

    print STDERR '$self->{\'files\'}{$left->[@$left-1]} = ';
    print STDERR Dumper $self->{'files'}{$left->[@$left-1]};
    print STDERR '$self->{\'files\'}{$right->[0]} = ';
    print STDERR Dumper $self->{'files'}{$right->[0]};

    if ( $self->{'files'}{$left->[@$left-1]}{'modified'} > $self->{'files'}{$right->[0]}{'modified'} ) {
        $result = $self->merge_sort( $left,$right );
    }
    else {
        $result = [ @$left, @$right ];
    }

    return $result;
}

## We're merge sorting two lists together
sub merge_sort {
    my $self  = shift;
    my $left  = shift;
    my $right = shift;

    my @result;

    while ( @$left > 0 && @$right > 0 ) {
        if ( $self->{'files'}{$left->[0]}{'modified'} <= $self->{'files'}{$right->[0]}{'modified'} ) {
            push( @result,$left->[0] );
            shift( @$left );
        }
        else {
            push( @result,$right->[0] );
            shift( @$right );
        }
    }

    print STDERR "\@$left = @$left\n";
    print STDERR "\@$right = @$right\n";

    if ( @$left > 0 ) {
        push( @result,@$left );
    }
    else {
        push( @result,@$right );
    }

    print STDERR "\@result = @result\n";

    return @result;
}

The error I'm getting + the output from my debugging print statements is as follows:

$collection = $VAR1 = [
      'dev/css/test.css',
      'dev/scripts/out.tmp',
      'dev/scripts/taxonomy.csv',
      'dev/scripts/wiki.cgi',
      'dev/scripts/wiki.cgi.back',
      'dev/templates/convert-wiki.tpl',
      'dev/templates/includes/._menu.tpl',
      'dev/templates/test.tpl'
    ];
$collection = $VAR1 = [
      'dev/css/test.css',
      'dev/scripts/out.tmp',
      'dev/scripts/taxonomy.csv',
      'dev/scripts/wiki.cgi'
    ];
$collection = $VAR1 = [
      'dev/css/test.css',
      'dev/scripts/out.tmp'
    ];
$collection = $VAR1 = [
      'dev/css/test.css'
    ];
$collection = $VAR1 = [
      'dev/scripts/out.tmp'
    ];
$left = $VAR1 = [
      'dev/css/test.css'
    ];
$right = $VAR1 = [
      'dev/scripts/out.tmp'
    ];
$self->{'files'}{$left->[@$left-1]} = $VAR1 = {
      'type' => 'file',
      'modified' => '0.764699074074074'
    };
$self->{'files'}{$right->[0]} = $VAR1 = {
      'type' => 'file',
      'modified' => '340.851956018519'
    };
$collection = $VAR1 = [
      'dev/scripts/taxonomy.csv',
      'dev/scripts/wiki.cgi'
    ];
$collection = $VAR1 = [
      'dev/scripts/taxonomy.csv'
    ];
$collection = $VAR1 = [
      'dev/scripts/wiki.cgi'
    ];
$left = $VAR1 = [
      'dev/scripts/taxonomy.csv'
    ];
$right = $VAR1 = [
      'dev/scripts/wiki.cgi'
    ];
$self->{'files'}{$left->[@$left-1]} = $VAR1 = {
      'type' => 'file',
      'modified' => '255.836377314815'
    };
$self->{'files'}{$right->[0]} = $VAR1 = {
      'type' => 'file',
      'modified' => '248.799166666667'
    };
@ARRAY(0x8226b2c) = dev/scripts/taxonomy.csv
@ARRAY(0x8f95178) = 
@result = dev/scripts/wiki.cgi dev/scripts/taxonomy.csv
$left = $VAR1 = [
      'dev/css/test.css',
      'dev/scripts/out.tmp'
    ];
$right = $VAR1 = 2;
$self->{'files'}{$left->[@$left-1]} = $VAR1 = {
      'type' => 'file',
      'modified' => '340.851956018519'
    };
$self->{'files'}{$right->[0]} = [Tue Sep 22 13:47:19 2009] [error] [Tue Sep 22 13:47:19 2009] null: Can't use string ("2") as an ARRAY ref while "strict refs" in use at ../lib/Master/ProductVersion.pm line 690.\n

Now the added complexity you see in the code is that for each item in the $collection array_ref passed in there is also a hash entry for that item containing item => { type => 'file', modified => 'date-last-modified' } and I'm trying to sort on the date last modified of each file.

My brain just basically can't cope with recursion and I can't figure out where I'm going wrong - it's probably obvious and/or horrendously wrong. Any help would be much appreciated... or I'm rewriting as insertion sort!

Thanks

+4  A: 

Why are you not using the sort function?

my @sorted = sort { $a->{modified} <=> $b->{modified} } @unsorted;

Just for the record, here is an inefficient implementation of merge sort in Perl:

#!/usr/bin/perl

use strict;
use warnings;

sub merge {
    my ($cmp, $left, $right) = @_;
    my @merged;

    while (@$left && @$right) {
        if ($cmp->($left->[0], $right->[0]) <= 0) {
            push @merged, shift @$left;
        } else {
            push @merged, shift @$right;
        }
    }
    if (@$left) {
        push @merged, @$left;
    } else {
        push @merged, @$right;
    }
    return @merged;
}

sub merge_sort {
    my ($cmp, $array) = @_;

    return @$array if @$array <= 1;

    my $mid = @$array/2 - 1;

    my @left  = merge_sort($cmp, [@{$array}[0 .. $mid]]);
    my @right = merge_sort($cmp, [@{$array}[$mid+1 .. $#{$array}]]);

    if ($left[-1] > $right[0]) {
        @left = merge $cmp, \@left, \@right;
    } else {
        push @left, @right;
    }
    return @left;    
}

my $cmp = sub {
    my ($x, $y) = @_;
    return $x <=> $y;
};

print join(", ", merge_sort $cmp, [qw/1 3 4 2 5 4 7 8 1/]), "\n";
Chas. Owens
yes I have just also been recommended that in IRC so I will. Thanks
Adam Taylor
If you really want mergesort (and not quicksort) you can force it by saying `use sort '_mergesort'`
Michael Carman
Yeah, but the only reason to do that is if you know in advance that the data is likely to be pathological for quicksort. There are valid reasons to use a custom built version of mergesort (e.g. low memory systems that need to use files instead of memory). Of course, there are modules in CPAN that already do most of the heavy lifting for you.
Chas. Owens
A good quicksort implementation, like Perl's, avoids most of the pathological cases.
Schwern