From df4815103ba5338f0ff4017e25d4f9063791be0d Mon Sep 17 00:00:00 2001 From: Graham Ollis Date: Tue, 23 Feb 2021 10:06:33 -0700 Subject: [PATCH] wip --- lib/FFI/C/ASCIIString.pm | 29 ++------ lib/FFI/C/Buffer.pm | 3 - lib/FFI/C/String.pm | 139 ++++++++++++++++++++++++++++++++++++++- t/ffi_c_string.t | 77 +++++++++++++++++++++- 4 files changed, 219 insertions(+), 29 deletions(-) diff --git a/lib/FFI/C/ASCIIString.pm b/lib/FFI/C/ASCIIString.pm index fd50c68..8dead99 100644 --- a/lib/FFI/C/ASCIIString.pm +++ b/lib/FFI/C/ASCIIString.pm @@ -52,31 +52,12 @@ sub new { my $class = shift; - if(@_ == 1) + if(@_ == 1 && is_plain_scalarref $_[0] && !is_ref ${$_[0]}) { - if(is_plain_scalarref $_[0] && !is_ref ${$_[0]}) - { - Carp::croak("Non ASCII characters found in string") if ${$_[0]} =~ /[^[:ascii:]]/; - - return ${$_[0]} =~ /\0/ - ? $class->SUPER::new($_[0]) - : $class->SUPER::new(\"${$_[0]}\0"); - } - elsif(!is_ref $_[0]) - { - my $self = $class->SUPER::new(@_); - $self->from_perl("\0"); - return $self; - } - else - { - return $class->SUPER::new(@_); - } - } - else - { - return $class->SUPER::new(@_); + Carp::croak("Non ASCII characters found in string") if ${$_[0]} =~ /[^[:ascii:]]/; } + + $class->SUPER::new(@_); } =head1 ATTRIBUTES @@ -132,6 +113,8 @@ sub to_perl $str->from_perl($perl_string); $str->from_perl($perl_string, $size); +Copy the content of a Perl into the C string. + =cut sub from_perl diff --git a/lib/FFI/C/Buffer.pm b/lib/FFI/C/Buffer.pm index 1746e23..8a19e22 100644 --- a/lib/FFI/C/Buffer.pm +++ b/lib/FFI/C/Buffer.pm @@ -50,9 +50,6 @@ sub new { my $class = shift; - Carp::croak("You cannot create an instance of FFI::C::String directly") - if $class eq 'FFI::C::String'; - my $buffer_size; my $ptr; my $owner; diff --git a/lib/FFI/C/String.pm b/lib/FFI/C/String.pm index ea68ba9..519c374 100644 --- a/lib/FFI/C/String.pm +++ b/lib/FFI/C/String.pm @@ -4,7 +4,8 @@ use strict; use warnings; use base qw( FFI::C::Buffer ); use Carp (); -use Ref::Util qw( is_blessed_hashref ); +use Ref::Util qw( is_blessed_hashref is_plain_hashref is_plain_scalarref is_ref); +use Encode (); # ABSTRACT: Base class for C string classes # VERSION @@ -32,7 +33,102 @@ Subclasses include: =back -=head1 ATTRIBUTES +=head1 CONSTRUCTOR + +=head2 new + + my $str = FFI::C::ASCIIString->new(\%args); + +Supported arguments: + +=over 4 + +=item encoding_name + +[required] + +The encoding name as understood by L. + +=item encoding_width + +[optional] + +The number of bytes it takes to represent a character, if the encoding is fixed-width. +If the encoding is not fixed-width or you aren't sure this should be C. + +=item buffer_size + +[require this or string] + +The size of the buffer. This can be larger than the initial string provided. + +=item string + +[require this or buffer_size] + +The Perl string to initially populate the new string object. This should be a Perl string, +possibly with Unicode characters in it which will be encoded into the proper encoding. + +=back + +=cut + +sub new +{ + my $class = shift; + + local $@; + eval { $class->encoding_name }; + if($@) + { + if(defined $_[0] && is_plain_hashref $_[0]) + { + my %args = %{ $_[0] }; + Carp::croak("encoding_name is required") unless defined $args{encoding_name}; + + my $encoding = Encode::find_encoding($args{encoding_name}); + Carp::croak("Unknown encoding: $args{encoding_name}") unless defined $encoding; + + Carp::croak("buffer_size or string are required") unless defined $args{buffer_size} || defined $args{string}; + + my $self = $class->SUPER::new(defined $args{buffer_size} ? $args{buffer_size} : \$args{string}); + $self->{encoding_name} = $encoding->name; + $self->{encoding_width} = $args{encoding_width} if defined $args{encoding_width}; + $args{string} = '' unless defined $args{string}; + $self->from_perl($args{string}); + return $self; + } + else + { + Carp::croak("No encoding provided for this class / object"); + } + } + elsif(@_ == 1) + { + if(is_plain_scalarref $_[0] && !is_ref ${$_[0]}) + { + return ${$_[0]} =~ /\0/ + ? $class->SUPER::new($_[0]) + : $class->SUPER::new(\"${$_[0]}\0"); + } + elsif(!is_ref $_[0]) + { + my $self = $class->SUPER::new(@_); + $self->from_perl("\0"); + return $self; + } + else + { + return $class->SUPER::new(@_); + } + } + else + { + return $class->SUPER::new(@_); + } +} + +=head1 PROPERTIES =head2 encoding_name @@ -80,6 +176,45 @@ sub encoding_width } } +=head1 METHODS + +=head2 to_perl + + my $perl_string = $str->to_perl; + +Copies the NULL terminated C string to a Perl string. +If the string contains non-ASCII characters it will +throw an exception. + +=cut + +sub to_perl +{ + my $self = shift; + my $copy = $self->SUPER::to_perl; + $copy =~ s/\0.*$//sm; # doesn't work for UTF-16 UTF-32 etc. + Encode::decode($self->encoding_name, $copy, Encode::FB_CROAK); +} + +=head2 from_perl + + $str->from_perl($perl_string); + $str->from_perl($perl_string, $size); + +Copy the content of a Perl into the C string. + +=cut + +sub from_perl +{ + my $self = shift; + Carp::croak("Argument is undef") unless @_ >= 1 && defined $_[0]; + my $str = shift @_; + $str .= "\0" unless $str =~ /\0/; + $str = Encode::encode($self->encoding_name, $str, Encode::FB_CROAK); + $self->SUPER::from_perl($str, @_); +} + 1; =head1 SEE ALSO diff --git a/t/ffi_c_string.t b/t/ffi_c_string.t index 33edf21..96a92c5 100644 --- a/t/ffi_c_string.t +++ b/t/ffi_c_string.t @@ -1,6 +1,81 @@ +use utf8; use Test2::V0 -no_srand => 1; use FFI::C::String; +use Encode qw( encode find_encoding ); -is dies { FFI::C::String->new }, match qr/You cannot create an instance of FFI::C::String/; +subtest 'ctor errors' => sub { + + is dies { FFI::C::String->new }, match qr/No encoding provided for this class \/ object/; + is dies { FFI::C::String->new({ buffer_size => 1024 }) }, match qr/encoding_name is required/; + is dies { FFI::C::String->new({ encoding_name => 'invalid', buffer_size => 1024 }) }, match qr/Unknown encoding: invalid/; + is dies { FFI::C::String->new({ encoding_name => 'ascii' }) }, match qr/buffer_size or string are required/; + +}; + +subtest 'with encoding ascii' => sub { + + is( + FFI::C::String->new({ + encoding_name => 'ascii', + buffer_size => 1024, + string => 'foobar', + }), + object { + call [ isa => 'FFI::C::String' ] => T(); + call [ isa => 'FFI::C::Buffer' ] => T(); + + call encoding_name => 'ascii'; + call encoding_width => U(); + call buffer_size => 1024; + call to_perl => 'foobar'; + }, + ); + + is( + FFI::C::String->new({ + encoding_name => 'ascii', + string => "foobar\0xx\nroger\0", + buffer_size => 1024, + }), + object { + call to_perl => 'foobar'; + }, + ); + + +}; + +subtest 'with encoding koi8-r' => sub { + + skip_all 'test requires koi8-r encoding' + unless find_encoding('koi8-r'); + + my $str; + is( + $str = FFI::C::String->new({ + encoding_name => 'kOI8-r', + encoding_width => 1, + buffer_size => 512, + strings => 'Привет, мир', + }), + object { + call [ isa => 'FFI::C::String' ] => T(); + call [ isa => 'FFI::C::Buffer' ] => T(); + + call encoding_name => 'koi8-r'; + call encoding_width => 1; + call buffer_size => 512; + call to_perl => 'Привет, мир'; + }, + ); + + my $win; + $str->window($win); + my $raw = "$win"; + $raw =~ s/\0.*$//; + + is($raw, encode('koi8-r', $raw)); + +}; done_testing;