Skip to content

Commit 25d7b7a

Browse files
ntynikhwilliamson
authored andcommitted
Fix POSIX::mblen mbstate_t initialization on threaded perls with glibc
As reported in https://bugs.launchpad.net/bugs/1818953 POSIX::mblen() is broken on threaded perls with glibc. % perl -MPOSIX=mblen -e 'mblen("a", 1)' perl: mbrtowc.c:105: __mbrtowc: Assertion `__mbsinit (data.__statep)' failed. zsh: abort (core dumped) perl -MPOSIX=mblen -e 'mblen("a", 1)' This broke in v5.27.8-134-g6c9ff7e96e which made the function use mbrlen(3) under the hood on threaded perls. The problem is initialization of the shift state with mbrlen(NULL, 0, &ps)); The glibc documentation for mbrlen(3) at https://www.gnu.org/software/libc/manual/html_node/Converting-a-Character.html#Converting-a-Character does not mention initialization by passing in a null pointer for the string, only a pointer to a NUL wide character. If the next multibyte character corresponds to the NUL wide character, the return value is 0. If the next n bytes form a valid multibyte character, the number of bytes belonging to this multibyte character byte sequence is returned. Use memset(3) instead for mbstate_t initialization, as suggested in https://www.gnu.org/software/libc/manual/html_node/Keeping-the-state.html with the hope that this is more portable. While at it, add a few basic test cases. These are in a new file because they need fresh_perl_is() from test.pl while the existing ones use Test::More (and conversion of at least posix.t looks way too involved.) Bug-Ubuntu: https://bugs.launchpad.net/bugs/1818953
1 parent e7a474c commit 25d7b7a

File tree

4 files changed

+50
-2
lines changed

4 files changed

+50
-2
lines changed

MANIFEST

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4182,6 +4182,7 @@ ext/POSIX/POSIX.xs POSIX extension external subroutines
41824182
ext/POSIX/t/export.t Test @EXPORT and @EXPORT_OK
41834183
ext/POSIX/t/iscrash See if POSIX isxxx() crashes with threads on Win32
41844184
ext/POSIX/t/math.t Basic math tests for POSIX
4185+
ext/POSIX/t/mb.t Multibyte function tests for POSIX
41854186
ext/POSIX/t/posix.t See if POSIX works
41864187
ext/POSIX/t/sigaction.t See if POSIX::sigaction works
41874188
ext/POSIX/t/sigset.t See if POSIX::SigSet works

ext/POSIX/POSIX.xs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3329,7 +3329,7 @@ mblen(s, n)
33293329
#endif
33303330
CODE:
33313331
#if defined(USE_ITHREADS) && defined(HAS_MBRLEN)
3332-
PERL_UNUSED_RESULT(mbrlen(NULL, 0, &ps)); /* Initialize state */
3332+
memset(&ps, 0, sizeof(ps)); /* Initialize state */
33333333
RETVAL = mbrlen(s, n, &ps); /* Prefer reentrant version */
33343334
#else
33353335
RETVAL = mblen(s, n);

ext/POSIX/lib/POSIX.pm

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ use warnings;
44

55
our ($AUTOLOAD, %SIGRT);
66

7-
our $VERSION = '1.87';
7+
our $VERSION = '1.88';
88

99
require XSLoader;
1010

ext/POSIX/t/mb.t

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
#!./perl
2+
3+
# These tests are in a separate file, because they use fresh_perl_is()
4+
# from test.pl.
5+
6+
# The mb* functions use the "underlying locale" that is not affected by
7+
# the Perl one. So we run the tests in a separate "fresh_perl" process
8+
# with the correct LC_CTYPE set in the environment.
9+
10+
BEGIN {
11+
require Config; import Config;
12+
if ($^O ne 'VMS' and $Config{'extensions'} !~ /\bPOSIX\b/) {
13+
print "1..0\n";
14+
exit 0;
15+
}
16+
unshift @INC, "../../t";
17+
require 'loc_tools.pl';
18+
require 'test.pl';
19+
}
20+
21+
plan tests => 3;
22+
23+
use POSIX qw();
24+
25+
SKIP: {
26+
skip("mblen() not present", 3) unless $Config{d_mblen};
27+
28+
is(&POSIX::mblen("a", &POSIX::MB_CUR_MAX), 1, 'mblen() basically works');
29+
30+
skip("LC_CTYPE locale support not available", 2)
31+
unless locales_enabled('LC_CTYPE');
32+
33+
my $utf8_locale = find_utf8_ctype_locale();
34+
skip("no utf8 locale available", 2) unless $utf8_locale;
35+
36+
local $ENV{LC_CTYPE} = $utf8_locale;
37+
local $ENV{LC_ALL};
38+
delete $ENV{LC_ALL};
39+
40+
fresh_perl_is(
41+
'use POSIX; print &POSIX::mblen("\x{c3}\x{28}", &POSIX::MB_CUR_MAX)',
42+
-1, {}, 'mblen() recognizes invalid multibyte characters');
43+
44+
fresh_perl_is(
45+
'use POSIX; print &POSIX::mblen("\N{GREEK SMALL LETTER SIGMA}", &POSIX::MB_CUR_MAX)',
46+
2, {}, 'mblen() works on UTF-8 characters');
47+
}

0 commit comments

Comments
 (0)