perl5-dbi/dbi

FetchHashKeyName emits invalid keys for utf8 fieldnames

Deracination opened this issue · 3 comments

Using DBI v 1.636, DBD::Pg 3.5.3, with Perl 5.42.0 on Linux

$dbi->{FetchHashKeyName}=’NAME_lc’ or NAME_uc produces different keys from ‘lc’ and ‘uc’ functions for fieldnames containing non-ascii characters.

For example, selecting column as ‘ÄMNE-Abc’ with FetchHashKeyName=NAME_lc returns the result key ‘\x{0}\x{0}mne-abc’ while PERL_UNICODE=SA perl -e 'use utf8; print lc("ÄMNE-Abc");' returns the expected ämne-abc

Test Case:

use strict;
use utf8;
use Test::More tests => 26;
use Data::Dumper;

use DBI;

my $dbi=DBI->connect(
        'dbi:Pg:dbname=test_db',
        'chris',
        '',
        {
        pg_enable_utf8  =>  1,
        }
);

my @expect=(
    [ 'NAME', "ABc", "ABc" ],
    [ 'NAME_uc', "ABc", "ABC" ],
    [ 'NAME_lc', "ABc", "abc" ],

    [ 'NAME', "てすと-ABc", "てすと-ABc" ],
    [ 'NAME_uc', "てすと-Abc", "てすと-ABC" ],
    [ 'NAME_uc', "てすと-Abc", "てすと-ABC" ],
    [ 'NAME_lc', "てすと-Abc", "てすと-abc" ],
    [ 'NAME_lc', "てすと-Abc", "てすと-abc" ],

    [ 'NAME', "ÄMNE-Abc", "ÄMNE-Abc" ],
    [ 'NAME_uc', "ÄMNE-Abc", "ÄMNE-ABC" ],
    [ 'NAME_uc', "ämne-Abc", "ÄMNE-ABC" ],
    [ 'NAME_lc', "ämne-Abc", "ämne-abc" ],
    [ 'NAME_lc', "ÄMNE-Abc", "ämne-abc" ],
);

foreach my $e (@expect) {
    my($case,$as,$fld)=@$e;

    my $val;
    if($case eq 'NAME_uc') {
        $val = uc($as);
    } elsif($case eq 'NAME_lc') {
        $val = lc($as);
    } else {
        $val = $as;
    }

    is($val,$fld,"case-converted $as to $case");

    $dbi->{FetchHashKeyName} = $case;

    my $row=$dbi->selectrow_hashref(qq{ select now() as "$as" });

    ok(exists $row->{$fld},"hashref $case") or diag(Dumper $row);
}
Summary of my perl5 (revision 5 version 24 subversion 0) configuration:

  Platform:
    osname=linux, osvers=2.6.32-642.6.2.el6.x86_64, archname=x86_64-linux
    uname='linux yonkyo.local 2.6.32-642.6.2.el6.x86_64 #1 smp wed oct 26 06:52:09 utc 2016 x86_64 x86_64 x86_64 gnulinux '
    config_args='-de -Dprefix=/opt/perlbrew/perls/perl-5.24.0 -Aeval:scriptdir=/opt/perlbrew/perls/perl-5.24.0/bin'
    hint=recommended, useposix=true, d_sigaction=define
    useithreads=undef, usemultiplicity=undef
    use64bitint=define, use64bitall=define, uselongdouble=undef
    usemymalloc=n, bincompat5005=undef
  Compiler:
    cc='cc', ccflags ='-fwrapv -fno-strict-aliasing -pipe -fstack-protector -I/usr/local/include -D_LARGEFILE_SOURCE -D_FILE_OFFSET_BITS=64 -D_FORTIFY_SOURCE=2',
    optimize='-O2',
    cppflags='-fwrapv -fno-strict-aliasing -pipe -fstack-protector -I/usr/local/include'
    ccversion='', gccversion='4.4.7 20120313 (Red Hat 4.4.7-17)', gccosandvers=''
    intsize=4, longsize=8, ptrsize=8, doublesize=8, byteorder=12345678, doublekind=3
    d_longlong=define, longlongsize=8, d_longdbl=define, longdblsize=16, longdblkind=3
    ivtype='long', ivsize=8, nvtype='double', nvsize=8, Off_t='off_t', lseeksize=8
    alignbytes=8, prototype=define
  Linker and Libraries:
    ld='cc', ldflags =' -fstack-protector -L/usr/local/lib'
    libpth=/usr/local/lib /usr/lib /lib/../lib64 /usr/lib/../lib64 /lib /lib64 /usr/lib64 /usr/local/lib64
    libs=-lpthread -lnsl -lgdbm -ldb -ldl -lm -lcrypt -lutil -lc
    perllibs=-lpthread -lnsl -ldl -lm -lcrypt -lutil -lc
    libc=libc-2.12.so, so=so, useshrplib=false, libperl=libperl.a
    gnulibc_version='2.12'
  Dynamic Linking:
    dlsrc=dl_dlopen.xs, dlext=so, d_dlsymun=undef, ccdlflags='-Wl,-E'
    cccdlflags='-fPIC', lddlflags='-shared -O2 -L/usr/local/lib -fstack-protector'

Characteristics of this binary (from libperl):
  Compile-time options: HAS_TIMES PERLIO_LAYERS PERL_COPY_ON_WRITE
                        PERL_DONT_CREATE_GVSV
                        PERL_HASH_FUNC_ONE_AT_A_TIME_HARD PERL_MALLOC_WRAP
                        PERL_PRESERVE_IVUV USE_64_BIT_ALL USE_64_BIT_INT
                        USE_LARGE_FILES USE_LOCALE USE_LOCALE_COLLATE
                        USE_LOCALE_CTYPE USE_LOCALE_NUMERIC USE_LOCALE_TIME
                        USE_PERLIO USE_PERL_ATOF
  Locally applied patches:
        Devel::PatchPerl 1.42
  Built under linux
  Compiled at Nov  3 2016 12:25:49
  %ENV:
    PERLBREW_BASHRC_VERSION="0.75"
    PERLBREW_HOME="/home/chris/.perlbrew"
    PERLBREW_MANPATH="/opt/perlbrew/perls/perl-5.24.0/man"
    PERLBREW_PATH="/opt/perlbrew/bin:/opt/perlbrew/perls/perl-5.24.0/bin"
    PERLBREW_PERL="perl-5.24.0"
    PERLBREW_ROOT="/opt/perlbrew"
    PERLBREW_VERSION="0.75"
  @INC:
    /opt/perlbrew/perls/perl-5.24.0/lib/site_perl/5.24.0/x86_64-linux
    /opt/perlbrew/perls/perl-5.24.0/lib/site_perl/5.24.0
    /opt/perlbrew/perls/perl-5.24.0/lib/5.24.0/x86_64-linux
    /opt/perlbrew/perls/perl-5.24.0/lib/5.24.0
    .

Wonderful. Thanks for the test case!
I've reproduced it with this cut-down version (which avoids the Pg dependency):

use strict;
use utf8;
use Test::More;
use Data::Dumper;

use DBI qw(:utils);

binmode STDOUT, ":encoding(UTF-8)";
binmode STDERR, ":encoding(UTF-8)";

my $builder = Test::More->builder;
binmode $builder->output,         ":encoding(utf8)";
binmode $builder->failure_output, ":encoding(utf8)";
binmode $builder->todo_output,    ":encoding(utf8)";

my $dbi = DBI->connect( 'dbi:Sponge:', '', '');

my @expect=(
    [ 'NAME',    "ABc", "ABc" ],
    [ 'NAME_uc', "ABc", "ABC" ],
    [ 'NAME_lc', "ABc", "abc" ],

    [ 'NAME',    "てすと-ABc", "てすと-ABc" ],
    [ 'NAME_uc', "てすと-Abc", "てすと-ABC" ],
    [ 'NAME_uc', "てすと-Abc", "てすと-ABC" ],
    [ 'NAME_lc', "てすと-Abc", "てすと-abc" ],
    [ 'NAME_lc', "てすと-Abc", "てすと-abc" ],

    [ 'NAME',    "ÄMNE-Abc", "ÄMNE-Abc" ],
    [ 'NAME_uc', "ÄMNE-Abc", "ÄMNE-ABC" ],
    [ 'NAME_uc', "ämne-Abc", "ÄMNE-ABC" ],
    [ 'NAME_lc', "ämne-Abc", "ämne-abc" ],
    [ 'NAME_lc', "ÄMNE-Abc", "ämne-abc" ],
);

foreach my $e (@expect) {
    my($case, $as, $fld) = @$e;
    note "Testing $case $as -> $fld";

    my $val;
    if($case eq 'NAME_uc') {
        $val = uc($as);
    } elsif($case eq 'NAME_lc') {
        $val = lc($as);
    } else {
        $val = $as;
    }

    is($val, $fld, "case-converted $as to $case");

    $dbi->{FetchHashKeyName} = $case;

    my $sth = $dbi->prepare("dummy", {
        rows => [ [ "value" ] ],
        NAME => [ $as ],
    });

    is $sth->{NAME}[0], $as;
    is $sth->{$case}[0], $fld, "$case of $as";
}

done_testing();

I'll try to look into a fix soonish.

Oh dear. The perl core doesn't appear to provide an API for case folding SVs.
We'd need to replicate much of the (large) code for pp_uc and pp_lc :(
I won't get to this anytime soon.

I'd accept a patch that adds a function like SV *_case_fold_sv(SV* sv, bool upcase) based on pp_uc/pp_lc (or a link to an existing one I can copy) then I'd integrate it into dbih_get_attr_k().

Meanwhile, a poor but effective workaround would be to set the NAME_uc or NAME_lc key yourself in Perl. At least, that ought to work but it was failing because setting those attributes triggered an error. I've fixed that in a43696a. So now you can write:

   $sth->{NAME_uc} = [ map { uc($_) } @{$sth->{NAME}} ];

Thanks for investigating this. I think we'll go with assigning NAME_uc for now and look at implementing a _case_fold_sv patch if I ever get some time.