A rather odd experience. Using the latest PDK (v7.3) from ActiveState, I used perlctrl to build a COM DLL. Perlctrl ran without a hitch. OLEView read the typelib okay. RegSvr32 registered it okay. However ... there's no sign of it in registry, and anything that tries to use it fails. I hunted for the various UIDs using RegEdit and they're just not there.
The code is below. It's a wrapping of Lingua::ZH::WordSegmenter, but with the encoding changed to utf8 rather than gbk.
It's probably something obvious ...
package ZHWordSeg;
use strict;
use warnings;
use utf8;
use ws;
use Encode;
use constant STX => chr( 2 ); #[
use constant ETX => chr( 3 ); #]
use constant FS => chr( 28 ); #^
use constant RS => chr( 30 ); #~
use constant TAB_SEPARATOR => 0;
use constant CARET_SEPARATOR => 1;
use constant FS_SEPARATOR => 2;
use constant SPACE_SEPARATOR => 3;
use constant AS_ARRAY => 4;
use feature 'switch';
our $segmenter;
sub ZHWordSeg_Setup {
my $dic = shift;
my $dic_encoding = shift;
my $separator = shift;
my $verbose = shift;
$dic_encoding = 'utf8' unless defined( $dic_encoding );
$separator = " " unless defined( $separator );
$verbose = 0 unless defined( $verbose );
if ( defined( $dic ) ) {
$segmenter = ws->new( dic => $dic, dic_encoding => $dic_encoding, seperator => $separator, verbose => $verbose );
} else {
$segmenter = ws->new( dic_encoding => $dic_encoding, seperator => $separator, verbose => $verbose );
}
}
sub ZHWordSeg {
my $source = shift;
print STDERR $source;
my $sepcode = shift;
$source = encode("utf8",$source);
my $stringres = $segmenter->seg($source);
my @arrayres;
given ($sepcode) {
when (TAB_SEPARATOR) {
$stringres =~ tr/ /\t/;
return $stringres;
}
when (CARET_SEPARATOR) {
$stringres =~ tr/ /^/;
$stringres .= "^";
return $stringres;
}
when (FS_SEPARATOR) {
$stringres =~ s/ /FS/eg;
$stringres .= FS;
return $stringres;
}
when (SPACE_SEPARATOR) {
return $stringres;
}
default {
@arrayres = split( / /, $stringres );
return \@arrayres;
}
}
}
sub SetDictionary {
my ($source) = shift;
my $res = set_dic($source);
return $res;
}
1;
=pod
=begin PerlCtrl
%TypeLib = (
PackageName => 'ZHWordSeg',
DocString => 'Chinese word segmentation',
HelpContext => 1,
TypeLibGUID => '{F6C9BD66-7CA1-4610-B77F-E219A7122C18}', # do NOT edit this line
ControlGUID => '{45D47C6A-2B9A-4D62-9CFD-F18C95DC00C5}', # do NOT edit this line either
DispInterfaceIID=> '{007E4E7A-3B75-4DC3-864C-7746860941B3}', # or this one
ControlName => 'BOCWS',
ControlVer => 2, # increment if new object with same ProgID
# create new GUIDs as well
ProgID => 'ZHWordSeg.BOCWS',
LCID => 0,
DefaultMethod => 'ChineseWordSegmenter',
Methods => {
'ChineseWordSegmenter' => {
RetType => VT_VARIANT,
TotalParams => 2,
NumOptionalParams => 1,
ParamList =>
[ 'source' => VT_BSTR,
'sepcode' => VT_I4
]
},
'ChineseWordSegmenter_Setup' => {
RetType => VT_VARIANT,
TotalParams => 4,
NumOptionalParams => 4,
ParamList =>
[ 'dic' => VT_BSTR,
'dic_encoding' => VT_BSTR,
'separator' => VT_BSTR,
'verbose' => VT_BSTR
]
}
}, # end of 'Methods'
Properties => {
TAB_SEPARATOR => {
DocString => "Separate items with TAB (0x0)",
Type => VT_I4,
DispID => 3,
ReadOnly => 1,
},
CARET_SEPARATOR => {
DocString => "Separate items with ^ (0x1)",
Type => VT_I4,
DispID => 4,
ReadOnly => 1,
},
FS_SEPARATOR => {
DocString => "Separate items with ascii 28 (0x2)",
Type => VT_I4,
DispID => 5,
ReadOnly => 1,
},
SPACE_SEPARATOR => {
DocString => "Separate items with space (0x3)",
Type => VT_I4,
DispID => 6,
ReadOnly => 1,
},
AS_ARRAY => {
DocString => "Separate items as array (0x4)",
Type => VT_I4,
DispID => 7,
ReadOnly => 1,
}
}, # end of 'Properties'
); # end of %TypeLib
=end PerlCtrl
=cut
This is the .perlctrl file, in case it matters:
#!C:\Program Files\ActiveState Perl Dev Kit 7.3 Deployment\bin\lib\pai.exe
PAP-Version: 1.0
Packer: C:\Program Files\ActiveState Perl Dev Kit 7.3 Deployment\bin\perlctrl.exe
Script: ZHWordSeg.ctrl
Cwd: P:\BOCWS
Byref: 0
Clean: 0
Date: 2008-10-24 18:05:42
Debug: 127.0.0.1:2000
Dependent: 0
Dyndll: 0
Exe: BOCWS.dll
Force: 1
Gui: 0
Hostname: xi
No-Compress: 0
No-Gui: 0
No-Logo: 0
Runlib:
Shared: none
Singleton: 0
Tmpdir:
Verbose: 0
Version-Comments:
Version-CompanyName:
Version-FileDescription: Wrapper of Lingua::ZH::WordSegmenter.pm
Version-FileVersion: 1.0
Version-InternalName: ZHWordSeg
Version-LegalCopyright:
Version-LegalTrademarks:
Version-OriginalFilename: ZHWordSeg.ctrl
Version-ProductName: BOChineseWordSegmenter
Version-ProductVersion: 1.0
Warnings: 0
Xclude: 1