Всем здравствуйте.
Разобрался, почему Java 1.3 несовместима с современными Glibc (серия 1, серия 2).
С целью выяснения канонического пути до rt.jar
процесс JVM вызывает функцию canonicalize()
, определённую в файле canonicalize_md.c
и находящуюся в libjava.so
. Ниже фрагмент исходников Java 1.4.1 (ещё даже не GPL). Более ранних у меня нет, но между 1.3 и 1.4 конкретно этот код вряд ли менялся:
/*
* @(#)canonicalize_md.c 1.35 01/12/03
*
* Copyright 2002 Sun Microsystems, Inc. All rights reserved.
* SUN PROPRIETARY/CONFIDENTIAL. Use is subject to license terms.
*/
/*
* Pathname canonicalization for Unix file systems
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/stat.h>
#include <errno.h>
#include <limits.h>
#include <alloca.h>
/* Note: The comments in this file use the terminology
defined in the java.io.File class */
/* Check the given name sequence to see if it can be further collapsed.
Return zero if not, otherwise return the number of names in the sequence. */
static int
collapsible(char *names)
{
char *p = names;
int dots = 0, n = 0;
while (*p) {
if ((p[0] == '.') && ((p[1] == '\0')
|| (p[1] == '/')
|| ((p[1] == '.') && ((p[2] == '\0')
|| (p[2] == '/'))))) {
dots = 1;
}
n++;
while (*p) {
if (*p == '/') {
p++;
break;
}
p++;
}
}
return (dots ? n : 0);
}
/* Split the names in the given name sequence,
replacing slashes with nulls and filling in the given index array */
static void
splitNames(char *names, char **ix)
{
char *p = names;
int i = 0;
while (*p) {
ix[i++] = p++;
while (*p) {
if (*p == '/') {
*p++ = '\0';
break;
}
p++;
}
}
}
/* Join the names in the given name sequence, ignoring names whose index
entries have been cleared and replacing nulls with slashes as needed */
static void
joinNames(char *names, int nc, char **ix)
{
int i;
char *p;
for (i = 0, p = names; i < nc; i++) {
if (!ix[i]) continue;
if (i > 0) {
p[-1] = '/';
}
if (p == ix[i]) {
p += strlen(p) + 1;
} else {
char *q = ix[i];
while ((*p++ = *q++));
}
}
*p = '\0';
}
/* Collapse "." and ".." names in the given path wherever possible.
A "." name may always be eliminated; a ".." name may be eliminated if it
follows a name that is neither "." nor "..". This is a syntactic operation
that performs no filesystem queries, so it should only be used to cleanup
after invoking the realpath() procedure. */
static void
collapse(char *path)
{
char *names = (path[0] == '/') ? path + 1 : path; /* Preserve first '/' */
int nc;
char **ix;
int i, j;
char *p, *q;
nc = collapsible(names);
if (nc < 2) return; /* Nothing to do */
ix = (char **)alloca(nc * sizeof(char *));
splitNames(names, ix);
for (i = 0; i < nc; i++) {
int dots = 0;
/* Find next occurrence of "." or ".." */
do {
char *p = ix[i];
if (p[0] == '.') {
if (p[1] == '\0') {
dots = 1;
break;
}
if ((p[1] == '.') && (p[2] == '\0')) {
dots = 2;
break;
}
}
i++;
} while (i < nc);
if (i >= nc) break;
/* At this point i is the index of either a "." or a "..", so take the
appropriate action and then continue the outer loop */
if (dots == 1) {
/* Remove this instance of "." */
ix[i] = 0;
}
else {
/* If there is a preceding name, remove both that name and this
instance of ".."; otherwise, leave the ".." as is */
for (j = i - 1; j >= 0; j--) {
if (ix[j]) break;
}
if (j < 0) continue;
ix[j] = 0;
ix[i] = 0;
}
/* i will be incremented at the top of the loop */
}
joinNames(names, nc, ix);
}
/* Convert a pathname to canonical form. The input path is assumed to contain
no duplicate slashes. On Solaris we can use realpath() to do most of the
work, though once that's done we still must collapse any remaining "." and
".." names by hand. */
int
canonicalize(char *original, char *resolved, int len)
{
if (len < PATH_MAX) {
errno = EINVAL;
return -1;
}
if (strlen(original) > PATH_MAX) {
errno = ENAMETOOLONG;
return -1;
}
/* First try realpath() on the entire path */
if (realpath(original, resolved)) {
/* That worked, so return it */
collapse(resolved);
return 0;
}
else {
/* Something's bogus in the original path, so remove names from the end
until either some subpath works or we run out of names */
char *p, *end, *r = NULL;
char path[PATH_MAX + 1];
strncpy(path, original, sizeof(path));
if (path[PATH_MAX] != '\0') {
errno = ENAMETOOLONG;
return -1;
}
end = path + strlen(path);
for (p = end; p > path;) {
/* Skip last element */
while ((--p > path) && (*p != '/'));
if (p == path) break;
/* Try realpath() on this subpath */
*p = '\0';
r = realpath(path, resolved);
*p = (p == end) ? '\0' : '/';
if (r != NULL) {
/* The subpath has a canonical path */
break;
}
else if (errno == ENOENT || errno == ENOTDIR || errno == EACCES) {
/* If the lookup of a particular subpath fails because the file
does not exist, because it is of the wrong type, or because
access is denied, then remove its last name and try again.
Other I/O problems cause an error return. */
continue;
}
else {
return -1;
}
}
if (r != NULL) {
/* Append unresolved subpath to resolved subpath */
int rn = strlen(r);
if (rn + strlen(p) >= len) {
/* Buffer overflow */
errno = ENAMETOOLONG;
return -1;
}
if ((rn > 0) && (r[rn - 1] == '/') && (*p == '/')) {
/* Avoid duplicate slashes */
p++;
}
strcpy(r + rn, p);
collapse(r);
return 0;
}
else {
/* Nothing resolved, so just return the original path */
strcpy(resolved, path);
collapse(resolved);
return 0;
}
}
}
И код прекрасен всем, кроме одного: в 2017-м году в Glibc 2.25 появилась стандартная функция с тем же именем, хоть и другой сигнатурой (eaf5ad0bc4a67bf40999e22db6f583ebc3a806ba
):
TS 18661-1 defines canonicalize functions to produce a canonical
version of a floating-point representation. This patch implements
these functions for glibc.
As with the iscanonical macro, these functions are oriented to the
decimal floating-point case, where some values have both canonical and
noncanonical representations. However, the functions have a return
value that says whether they succeeded in storing a canonical result;
thus, they can fail for the case of an invalid representation (while
still not making any particular choice from among multiple equally
canonical valid representations of the same value). Since no
floating-point formats in glibc actually have noncanonical valid
representations, a type-generic implementation of these functions can
be used that expects iscanonical to return 0 only for invalid
representations. Now that iscanonical is used within libm.so,
libm_hidden_proto / libm_hidden_def are added for __iscanonicall.
The definition of these functions is intended to correspond to a
convertFormat operation to the same floating-point format. Thus, they
convert signaling NaNs to quiet NaNs, raising the «invalid» exception.
Such a conversion «should» produce «the canonical version of that
signaling NaN made quiet».
Удивительным образом JVM не валится с segfault, хотя количество аргументов и различается, но, так или иначе, canonicalize(char*, char*, int)
из libjava.so
не вызывается уж боле.
Вот стектрейс здорового человека (Glibc 2.24, смотрим на фрейм 0):
#0 0xf79c304a in canonicalize () from /usr/lib/jvm/java-1.3.1_20-sun-i386/jre/lib/i386/libjava.so
#1 0xf79bec50 in Canonicalize () from /usr/lib/jvm/java-1.3.1_20-sun-i386/jre/lib/i386/libjava.so
#2 0xf7d05c4b in ClassLoader::get_canonical_path(char *, char *, int) () from /usr/lib/jvm/java-1.3.1_20-sun-i386/jre/lib/x86_64/client/libjvm.so
#3 0xf7d0506f in ClassLoader::setup_bootstrap_search_path(void) () from /usr/lib/jvm/java-1.3.1_20-sun-i386/jre/lib/x86_64/client/libjvm.so
#4 0xf7d05cad in classLoader_init(void) () from /usr/lib/jvm/java-1.3.1_20-sun-i386/jre/lib/x86_64/client/libjvm.so
#5 0xf7d1bad3 in init_globals(void) () from /usr/lib/jvm/java-1.3.1_20-sun-i386/jre/lib/x86_64/client/libjvm.so
#6 0xf7de14ea in Threads::create_vm(JavaVMInitArgs *) () from /usr/lib/jvm/java-1.3.1_20-sun-i386/jre/lib/x86_64/client/libjvm.so
#7 0xf7d4b620 in JNI_CreateJavaVM () from /usr/lib/jvm/java-1.3.1_20-sun-i386/jre/lib/x86_64/client/libjvm.so
#8 0x08049bca in InitializeJVM ()
#9 0x08048fd0 in main ()
А вот стектрейс курильщика (фрейм 1 и далее общие, фрейм 0 другой):
#0 __canonicalize (cx=0x8050d50, x=0xffff948c) at ./s_canonicalize_template.c:24
#1 0xf7fa9c50 in Canonicalize () from /usr/lib/jvm/java-1.3.1_20-sun-i386/jre/lib/i386/libjava.so
#2 0xf7c8dc4b in ClassLoader::get_canonical_path(char *, char *, int) () from /usr/lib/jvm/java-1.3.1_20-sun-i386/jre/lib/x86_64/client/libjvm.so
#3 0xf7c8d06f in ClassLoader::setup_bootstrap_search_path(void) () from /usr/lib/jvm/java-1.3.1_20-sun-i386/jre/lib/x86_64/client/libjvm.so
#4 0xf7c8dcad in classLoader_init(void) () from /usr/lib/jvm/java-1.3.1_20-sun-i386/jre/lib/x86_64/client/libjvm.so
#5 0xf7ca3ad3 in init_globals(void) () from /usr/lib/jvm/java-1.3.1_20-sun-i386/jre/lib/x86_64/client/libjvm.so
#6 0xf7d694ea in Threads::create_vm(JavaVMInitArgs *) () from /usr/lib/jvm/java-1.3.1_20-sun-i386/jre/lib/x86_64/client/libjvm.so
#7 0xf7cd3620 in JNI_CreateJavaVM () from /usr/lib/jvm/java-1.3.1_20-sun-i386/jre/lib/x86_64/client/libjvm.so
#8 0x08049bca in InitializeJVM ()
#9 0x08048fd0 in main ()
Здесь значения cx
и x
– видимо, исходные значения указателей на строки путей (char *
), интерпретированные как double *
.
Собственно, вопрос.
Я решил, что я, типа, умный, и сейчас я возьму, соберу пресловутый сановский исходник в 32-разрядную библиотеку (gcc -m32 ...
) и положу её в LD_PRELOAD
.
Ага, щаз.
При запуске наблюдаю множественные сообщения вида
ERROR: ld.so: object '.../libcanonicalize.so' from LD_PRELOAD cannot be preloaded (wrong ELF class: ELFCLASS32): ignored.
за которыми следует радостный Segmentation fault. Причём, если я явно запускаю 32-разрядный динамический интерпретатор:
LD_PRELOAD='...' /lib/i386-linux-gnu/ld-linux.so.2 java -version
— то результат ровно тот же.
ЧЯДНТ?