Skip to content

Commit 0f51d2a

Browse files
authored
HADOOP-14451. Deadlock in NativeIO (apache#6632)
1 parent b25b28e commit 0f51d2a

File tree

3 files changed

+191
-48
lines changed

3 files changed

+191
-48
lines changed

hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/nativeio/NativeIO.java

+16-10
Original file line numberDiff line numberDiff line change
@@ -220,6 +220,9 @@ public long getLength() {
220220
}
221221
}
222222

223+
/** Initialize the JNI method ID and class ID cache. */
224+
private static native void initNativePosix(boolean doThreadsafeWorkaround);
225+
223226
/**
224227
* JNI wrapper of persist memory operations.
225228
*/
@@ -331,11 +334,11 @@ public boolean verifyCanMlock() {
331334
if (NativeCodeLoader.isNativeCodeLoaded()) {
332335
try {
333336
Configuration conf = new Configuration();
334-
workaroundNonThreadSafePasswdCalls = conf.getBoolean(
335-
WORKAROUND_NON_THREADSAFE_CALLS_KEY,
336-
WORKAROUND_NON_THREADSAFE_CALLS_DEFAULT);
337+
boolean workaroundNonThreadSafePasswdCalls = conf.getBoolean(
338+
WORKAROUND_NON_THREADSAFE_CALLS_KEY,
339+
WORKAROUND_NON_THREADSAFE_CALLS_DEFAULT);
337340

338-
initNative();
341+
initNativePosix(workaroundNonThreadSafePasswdCalls);
339342
nativeLoaded = true;
340343

341344
cacheTimeout = conf.getLong(
@@ -679,9 +682,6 @@ public static native void munmap(long addr, long length)
679682
throws IOException;
680683
}
681684

682-
private static boolean workaroundNonThreadSafePasswdCalls = false;
683-
684-
685685
public static class Windows {
686686
// Flags for CreateFile() call on Windows
687687
public static final long GENERIC_READ = 0x80000000L;
@@ -833,7 +833,9 @@ public static boolean access(String path, AccessRight desiredAccess)
833833
static {
834834
if (NativeCodeLoader.isNativeCodeLoaded()) {
835835
try {
836-
initNative();
836+
initNativeWindows(false);
837+
// As of now there is no change between initNative()
838+
// and initNativeWindows() native impls.
837839
nativeLoaded = true;
838840
} catch (Throwable t) {
839841
// This can happen if the user has an older version of libhadoop.so
@@ -843,6 +845,10 @@ public static boolean access(String path, AccessRight desiredAccess)
843845
}
844846
}
845847
}
848+
849+
/** Initialize the JNI method ID and class ID cache. */
850+
private static native void initNativeWindows(
851+
boolean doThreadsafeWorkaround);
846852
}
847853

848854
private static final Logger LOG = LoggerFactory.getLogger(NativeIO.class);
@@ -852,7 +858,7 @@ public static boolean access(String path, AccessRight desiredAccess)
852858
static {
853859
if (NativeCodeLoader.isNativeCodeLoaded()) {
854860
try {
855-
initNative();
861+
initNative(false);
856862
nativeLoaded = true;
857863
} catch (Throwable t) {
858864
// This can happen if the user has an older version of libhadoop.so
@@ -871,7 +877,7 @@ public static boolean isAvailable() {
871877
}
872878

873879
/** Initialize the JNI method ID and class ID cache */
874-
private static native void initNative();
880+
private static native void initNative(boolean doThreadsafeWorkaround);
875881

876882
/**
877883
* Get the maximum number of bytes that can be locked into memory at any

hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/io/nativeio/NativeIO.c

+88-38
Original file line numberDiff line numberDiff line change
@@ -103,24 +103,6 @@ extern void throw_ioe(JNIEnv* env, int errnum);
103103
static ssize_t get_pw_buflen();
104104
#endif
105105

106-
/**
107-
* Returns non-zero if the user has specified that the system
108-
* has non-threadsafe implementations of getpwuid_r or getgrgid_r.
109-
**/
110-
static int workaround_non_threadsafe_calls(JNIEnv *env, jclass clazz) {
111-
jboolean result;
112-
jfieldID needs_workaround_field = (*env)->GetStaticFieldID(
113-
env, clazz,
114-
"workaroundNonThreadSafePasswdCalls",
115-
"Z");
116-
PASS_EXCEPTIONS_RET(env, 0);
117-
assert(needs_workaround_field);
118-
119-
result = (*env)->GetStaticBooleanField(
120-
env, clazz, needs_workaround_field);
121-
return result;
122-
}
123-
124106
/**
125107
* Sets a static boolean field to the specified value.
126108
*/
@@ -201,10 +183,9 @@ static void consts_init(JNIEnv *env) {
201183
}
202184
#endif
203185

204-
static void stat_init(JNIEnv *env, jclass nativeio_class) {
186+
static void stat_init(JNIEnv *env) {
205187
jclass clazz = NULL;
206-
jclass obj_class = NULL;
207-
jmethodID obj_ctor = NULL;
188+
if (stat_ctor2 != NULL) return; //Already inited
208189
// Init Stat
209190
clazz = (*env)->FindClass(env, NATIVE_IO_STAT_CLASS);
210191
if (!clazz) {
@@ -224,6 +205,20 @@ static void stat_init(JNIEnv *env, jclass nativeio_class) {
224205
if (!stat_ctor2) {
225206
return; // exception has been raised
226207
}
208+
}
209+
210+
static void stat_deinit(JNIEnv *env) {
211+
if (stat_clazz != NULL) {
212+
(*env)->DeleteGlobalRef(env, stat_clazz);
213+
stat_clazz = NULL;
214+
}
215+
}
216+
217+
static void workaround_non_threadsafe_calls_init(JNIEnv *env){
218+
jclass obj_class = NULL;
219+
jmethodID obj_ctor = NULL;
220+
if (pw_lock_object != NULL) return; // Already inited
221+
227222
obj_class = (*env)->FindClass(env, "java/lang/Object");
228223
if (!obj_class) {
229224
return; // exception has been raised
@@ -233,28 +228,21 @@ static void stat_init(JNIEnv *env, jclass nativeio_class) {
233228
if (!obj_ctor) {
234229
return; // exception has been raised
235230
}
236-
237-
if (workaround_non_threadsafe_calls(env, nativeio_class)) {
238-
pw_lock_object = (*env)->NewObject(env, obj_class, obj_ctor);
239-
PASS_EXCEPTIONS(env);
240-
pw_lock_object = (*env)->NewGlobalRef(env, pw_lock_object);
241-
242-
PASS_EXCEPTIONS(env);
243-
}
231+
pw_lock_object = (*env)->NewObject(env, obj_class, obj_ctor);
232+
PASS_EXCEPTIONS(env);
233+
pw_lock_object = (*env)->NewGlobalRef(env, pw_lock_object);
234+
PASS_EXCEPTIONS(env);
244235
}
245236

246-
static void stat_deinit(JNIEnv *env) {
247-
if (stat_clazz != NULL) {
248-
(*env)->DeleteGlobalRef(env, stat_clazz);
249-
stat_clazz = NULL;
250-
}
237+
static void workaround_non_threadsafe_calls_deinit(JNIEnv *env){
251238
if (pw_lock_object != NULL) {
252239
(*env)->DeleteGlobalRef(env, pw_lock_object);
253240
pw_lock_object = NULL;
254241
}
255242
}
256243

257244
static void nioe_init(JNIEnv *env) {
245+
if (nioe_ctor != NULL) return; // Already inited
258246
// Init NativeIOException
259247
nioe_clazz = (*env)->FindClass(
260248
env, "org/apache/hadoop/io/nativeio/NativeIOException");
@@ -349,17 +337,53 @@ static void pmem_region_deinit(JNIEnv *env) {
349337
*/
350338
JNIEXPORT void JNICALL
351339
Java_org_apache_hadoop_io_nativeio_NativeIO_initNative(
352-
JNIEnv *env, jclass clazz) {
340+
JNIEnv *env, jclass clazz, jboolean doThreadsafeWorkaround) {
341+
nioe_init(env);
342+
PASS_EXCEPTIONS_GOTO(env, error);
343+
fd_init(env);
344+
PASS_EXCEPTIONS_GOTO(env, error);
345+
#ifdef UNIX
346+
errno_enum_init(env);
347+
PASS_EXCEPTIONS_GOTO(env, error);
348+
#endif
349+
if (doThreadsafeWorkaround) {
350+
workaround_non_threadsafe_calls_init(env);
351+
PASS_EXCEPTIONS_GOTO(env, error);
352+
}
353+
return;
354+
error:
355+
// these are all idempotent and safe to call even if the
356+
// class wasn't inited yet
357+
nioe_deinit(env);
358+
fd_deinit(env);
359+
#ifdef UNIX
360+
errno_enum_deinit(env);
361+
#endif
362+
if (doThreadsafeWorkaround) {
363+
workaround_non_threadsafe_calls_deinit(env);
364+
}
365+
}
366+
367+
/*
368+
* private static native void initNativePosix();
369+
*/
370+
JNIEXPORT void JNICALL
371+
Java_org_apache_hadoop_io_nativeio_NativeIO_00024POSIX_initNativePosix(
372+
JNIEnv *env, jclass clazz, jboolean doThreadsafeWorkaround) {
353373
#ifdef UNIX
354374
consts_init(env);
355375
PASS_EXCEPTIONS_GOTO(env, error);
356376
#endif
357-
stat_init(env, clazz);
377+
stat_init(env);
358378
PASS_EXCEPTIONS_GOTO(env, error);
359379
nioe_init(env);
360380
PASS_EXCEPTIONS_GOTO(env, error);
361381
fd_init(env);
362382
PASS_EXCEPTIONS_GOTO(env, error);
383+
if (doThreadsafeWorkaround) {
384+
workaround_non_threadsafe_calls_init(env);
385+
PASS_EXCEPTIONS_GOTO(env, error);
386+
}
363387
#ifdef UNIX
364388
errno_enum_init(env);
365389
PASS_EXCEPTIONS_GOTO(env, error);
@@ -373,17 +397,43 @@ Java_org_apache_hadoop_io_nativeio_NativeIO_initNative(
373397
error:
374398
// these are all idempodent and safe to call even if the
375399
// class wasn't initted yet
376-
#ifdef UNIX
377400
stat_deinit(env);
378401
#ifdef HADOOP_PMDK_LIBRARY
379402
pmem_region_deinit(env);
380-
#endif
381403
#endif
382404
nioe_deinit(env);
383405
fd_deinit(env);
384406
#ifdef UNIX
385407
errno_enum_deinit(env);
386408
#endif
409+
if (doThreadsafeWorkaround) {
410+
workaround_non_threadsafe_calls_deinit(env);
411+
}
412+
}
413+
414+
/*
415+
* private static native void initNativeWindows();
416+
*/
417+
JNIEXPORT void JNICALL
418+
Java_org_apache_hadoop_io_nativeio_NativeIO_00024Windows_initNativeWindows(
419+
JNIEnv *env, jclass clazz, jboolean doThreadsafeWorkaround) {
420+
nioe_init(env);
421+
PASS_EXCEPTIONS_GOTO(env, error);
422+
fd_init(env);
423+
PASS_EXCEPTIONS_GOTO(env, error);
424+
if (doThreadsafeWorkaround) {
425+
workaround_non_threadsafe_calls_init(env);
426+
PASS_EXCEPTIONS_GOTO(env, error);
427+
}
428+
return;
429+
error:
430+
// these are all idempodent and safe to call even if the
431+
// class wasn't initted yet
432+
nioe_deinit(env);
433+
fd_deinit(env);
434+
if (doThreadsafeWorkaround) {
435+
workaround_non_threadsafe_calls_deinit(env);
436+
}
387437
}
388438

389439
/*
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,87 @@
1+
/**
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
* <p>
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
* <p>
12+
* Unless required by applicable law or agreed to in writing, software
13+
* distributed under the License is distributed on an "AS IS" BASIS,
14+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
* See the License for the specific language governing permissions and
16+
* limitations under the License.
17+
*/
18+
package org.apache.hadoop.io.nativeio;
19+
20+
import static org.junit.Assume.assumeTrue;
21+
22+
import java.io.IOException;
23+
24+
import org.apache.hadoop.fs.Path;
25+
import org.junit.Test;
26+
27+
/**
28+
* Separate class to ensure forked Tests load the static blocks again.
29+
*/
30+
public class TestNativeIoInit {
31+
32+
/**
33+
* Refer HADOOP-14451
34+
* Scenario:
35+
* 1. One thread calls a static method of NativeIO, which loads static block
36+
* of NativeIo.
37+
* 2. Second thread calls a static method of NativeIo.POSIX, which loads a
38+
* static block of NativeIO.POSIX class
39+
* <p>
40+
* Expected: Loading these two static blocks separately should not result in
41+
* deadlock.
42+
*/
43+
@Test(timeout = 10000)
44+
public void testDeadlockLinux() throws Exception {
45+
Thread one = new Thread() {
46+
@Override
47+
public void run() {
48+
NativeIO.isAvailable();
49+
}
50+
};
51+
Thread two = new Thread() {
52+
@Override
53+
public void run() {
54+
NativeIO.POSIX.isAvailable();
55+
}
56+
};
57+
two.start();
58+
one.start();
59+
one.join();
60+
two.join();
61+
}
62+
63+
@Test(timeout = 10000)
64+
public void testDeadlockWindows() throws Exception {
65+
assumeTrue("Expected windows", Path.WINDOWS);
66+
Thread one = new Thread() {
67+
@Override
68+
public void run() {
69+
NativeIO.isAvailable();
70+
}
71+
};
72+
Thread two = new Thread() {
73+
@Override
74+
public void run() {
75+
try {
76+
NativeIO.Windows.extendWorkingSetSize(100);
77+
} catch (IOException e) {
78+
//igored
79+
}
80+
}
81+
};
82+
two.start();
83+
one.start();
84+
one.join();
85+
two.join();
86+
}
87+
}

0 commit comments

Comments
 (0)