views:

621

answers:

1

A rather odd experience. Using the latest PDK (v7.3) from ActiveState, I used perlctrl to build a COM DLL. Perlctrl ran without a hitch. OLEView read the typelib okay. RegSvr32 registered it okay. However ... there's no sign of it in registry, and anything that tries to use it fails. I hunted for the various UIDs using RegEdit and they're just not there.

The code is below. It's a wrapping of Lingua::ZH::WordSegmenter, but with the encoding changed to utf8 rather than gbk.

It's probably something obvious ...

package ZHWordSeg;

use strict;
use warnings;
use utf8;
use ws;

use Encode;

use constant STX => chr( 2 ); #[
use constant ETX => chr( 3 ); #]
use constant FS => chr( 28 ); #^
use constant RS => chr( 30 ); #~

use constant TAB_SEPARATOR => 0;
use constant CARET_SEPARATOR => 1;
use constant FS_SEPARATOR => 2;
use constant SPACE_SEPARATOR => 3;
use constant AS_ARRAY => 4;

use feature 'switch';

our $segmenter; 

sub ZHWordSeg_Setup {
  my $dic = shift;
  my $dic_encoding = shift;
  my $separator = shift;
  my $verbose = shift;

  $dic_encoding = 'utf8' unless defined( $dic_encoding );
  $separator = " " unless defined( $separator );
  $verbose = 0 unless defined( $verbose );

  if ( defined( $dic ) ) {
    $segmenter = ws->new( dic => $dic, dic_encoding => $dic_encoding, seperator => $separator, verbose => $verbose );
  } else {
    $segmenter = ws->new( dic_encoding => $dic_encoding, seperator => $separator, verbose => $verbose );
  }
}

sub ZHWordSeg {
  my $source = shift;
  print STDERR $source;
  my $sepcode = shift;
  $source = encode("utf8",$source);
  my $stringres = $segmenter->seg($source);
  my @arrayres;

  given ($sepcode) {
    when (TAB_SEPARATOR) { 
      $stringres =~ tr/ /\t/;
      return $stringres;
    }
    when (CARET_SEPARATOR) { 
      $stringres =~ tr/ /^/;
      $stringres .= "^";
      return $stringres;
    }
    when (FS_SEPARATOR) { 
      $stringres =~ s/ /FS/eg;
      $stringres .= FS;
      return $stringres;
    }
    when (SPACE_SEPARATOR) { 
      return $stringres;
    }
    default { 
      @arrayres = split( / /, $stringres );
      return \@arrayres;
    }
  }
}

sub SetDictionary {
  my ($source) = shift;
  my $res = set_dic($source);
  return $res;
}

1;

=pod

=begin PerlCtrl

%TypeLib = (
  PackageName     => 'ZHWordSeg',
  DocString       => 'Chinese word segmentation',
  HelpContext     => 1,
  TypeLibGUID     => '{F6C9BD66-7CA1-4610-B77F-E219A7122C18}', # do NOT edit this line
  ControlGUID     => '{45D47C6A-2B9A-4D62-9CFD-F18C95DC00C5}', # do NOT edit this line either
  DispInterfaceIID=> '{007E4E7A-3B75-4DC3-864C-7746860941B3}', # or this one
  ControlName     => 'BOCWS',
  ControlVer      => 2,  # increment if new object with same ProgID
  # create new GUIDs as well
  ProgID          => 'ZHWordSeg.BOCWS',
  LCID            => 0,
  DefaultMethod   => 'ChineseWordSegmenter',
  Methods         => {
      'ChineseWordSegmenter' => {
        RetType             =>  VT_VARIANT,
        TotalParams         =>  2,
        NumOptionalParams   =>  1,
        ParamList           =>  
          [ 'source' => VT_BSTR,
            'sepcode' => VT_I4
          ] 
      },
      'ChineseWordSegmenter_Setup' => {
        RetType             =>  VT_VARIANT,
        TotalParams         =>  4,
        NumOptionalParams   =>  4,
        ParamList           =>  
          [ 'dic' => VT_BSTR,
            'dic_encoding' => VT_BSTR,
            'separator' => VT_BSTR,
            'verbose' => VT_BSTR
          ] 
      }
  },  # end of 'Methods'
  Properties        => {
    TAB_SEPARATOR => {
      DocString  => "Separate items with TAB (0x0)",
      Type => VT_I4, 
      DispID => 3,
      ReadOnly => 1,
    },
    CARET_SEPARATOR => {
      DocString  => "Separate items with ^ (0x1)",
      Type => VT_I4, 
      DispID => 4,
      ReadOnly => 1,
    },
    FS_SEPARATOR => {
      DocString  => "Separate items with ascii 28 (0x2)",
      Type => VT_I4, 
      DispID => 5,
      ReadOnly => 1,
    },
    SPACE_SEPARATOR => {
      DocString  => "Separate items with space (0x3)",
      Type => VT_I4, 
      DispID => 6,
      ReadOnly => 1,
    },
    AS_ARRAY => {
      DocString  => "Separate items as array (0x4)",
      Type => VT_I4, 
      DispID => 7,
      ReadOnly => 1,
    }
  },  # end of 'Properties'
);  # end of %TypeLib

=end PerlCtrl

=cut

This is the .perlctrl file, in case it matters:

#!C:\Program Files\ActiveState Perl Dev Kit 7.3 Deployment\bin\lib\pai.exe
PAP-Version: 1.0
Packer: C:\Program Files\ActiveState Perl Dev Kit 7.3 Deployment\bin\perlctrl.exe
Script: ZHWordSeg.ctrl
Cwd: P:\BOCWS
Byref: 0
Clean: 0
Date: 2008-10-24 18:05:42
Debug: 127.0.0.1:2000
Dependent: 0
Dyndll: 0
Exe: BOCWS.dll
Force: 1
Gui: 0
Hostname: xi
No-Compress: 0
No-Gui: 0
No-Logo: 0
Runlib: 
Shared: none
Singleton: 0
Tmpdir: 
Verbose: 0
Version-Comments: 
Version-CompanyName: 
Version-FileDescription: Wrapper of Lingua::ZH::WordSegmenter.pm
Version-FileVersion: 1.0
Version-InternalName: ZHWordSeg
Version-LegalCopyright: 
Version-LegalTrademarks: 
Version-OriginalFilename: ZHWordSeg.ctrl
Version-ProductName: BOChineseWordSegmenter
Version-ProductVersion: 1.0
Warnings: 0
Xclude: 1
+1  A: 

The only "solution" that I've found was suggested over on news:comp.os.ms-windows.programmer.win32

i am not a PDK user but from experience i can tell you, that you should check the DllRegister exports code and what it internally does, since this is what the regsvr32 calls and this code is creating the registry keys for your com server/proxy, etc, ...

You can track the registry for changes with sysinternal tools like regmon or procmon, just to make sure!

Ultimately, I gave up and went back to a previous version that worked and tweaked it.

boost