Skip to content

Commit a4e6a0c

Browse files
committed
Enhancement Request 37123353 - [36572474->24.09.1] ENH: Add service guardian property to enable reducing the frequency of thread dumps in log (ce-main->ce-v24.09)
Remote remote.full on coherence-ce/release/coherence-ce-v24.09 success, changes 111714, synced @111714, job.9.20241001231636.42 [git-p4: depot-paths = "//dev/coherence-ce/release/coherence-ce-v24.09/": change = 111718]
1 parent aeafb3a commit a4e6a0c

File tree

2 files changed

+150
-6
lines changed

2 files changed

+150
-6
lines changed

prj/coherence-core/src/main/java/com/tangosol/net/GuardSupport.java

Lines changed: 76 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,18 @@
11
/*
2-
* Copyright (c) 2000, 2020, Oracle and/or its affiliates.
2+
* Copyright (c) 2000, 2024, Oracle and/or its affiliates.
33
*
44
* Licensed under the Universal Permissive License v 1.0 as shown at
5-
* http://oss.oracle.com/licenses/upl.
5+
* https://oss.oracle.com/licenses/upl.
66
*/
77

88
package com.tangosol.net;
99

1010

11+
import com.oracle.coherence.common.base.Logger;
1112
import com.oracle.coherence.common.util.Threads;
13+
14+
import com.tangosol.coherence.config.Config;
15+
1216
import com.tangosol.net.Guardian.GuardContext;
1317

1418
import com.tangosol.util.Base;
@@ -842,10 +846,10 @@ public static void logStackTraces()
842846
AtomicLong atomicLogTime = s_atomicLogTime;
843847
long ldtNow = getSafeTimeMillis();
844848
long ldtLast = atomicLogTime.get();
845-
long ldtNext = ldtNow + GUARDIAN_LOG_INTERVAL;
849+
long ldtNext = ldtNow + LOG_THREADDUMP_INTERVAL_MS;
846850

847851
// COH-3131: Prevent concurrent or too frequent thread dumps
848-
if (ldtNow >= ldtLast + GUARDIAN_LOG_INTERVAL
852+
if (ldtNow >= ldtLast + LOG_THREADDUMP_INTERVAL_MS
849853
&& atomicLogTime.compareAndSet(ldtLast, ldtNext))
850854
{
851855
Base.err(getThreadDump());
@@ -862,6 +866,29 @@ public static String getThreadDump()
862866
return Threads.getThreadDump();
863867
}
864868

869+
/**
870+
* Configure guardian log thread dump interval using property {@link #PROP_LOG_THREADDUMP_INTERVAL} set to
871+
* a time duration, i.e. {@code 15m} or {@code 2h}. The maximum allowed duration is {@link #MAX_LOG_THREADDUMP_INTERVAL}, any value
872+
* provided above this value is treated as the max allowed log thread dump interval.
873+
* Default duration of {@link #DEFAULT_LOG_THREADDUMP_INTERVAL} is used when property is not or set to an invalid time duration.
874+
*
875+
* @return the computed guardian log thread dump interval in milliseconds.
876+
*
877+
* @since 25.03
878+
*/
879+
protected static long getLogThreaddumpIntervalMs()
880+
{
881+
long ldtInterval = Config.getDuration(PROP_LOG_THREADDUMP_INTERVAL, new Duration(DEFAULT_LOG_THREADDUMP_INTERVAL)).as(Duration.Magnitude.MILLI);
882+
883+
if (ldtInterval > MAX_LOG_THREADDUMP_INTERVAL_MS)
884+
{
885+
Logger.warn("Property \"" + PROP_LOG_THREADDUMP_INTERVAL + "\" is set to a duration that exceeds maximum " +
886+
MAX_LOG_THREADDUMP_INTERVAL + " duration, adjusting to maximum value allowed.");
887+
ldtInterval = MAX_LOG_THREADDUMP_INTERVAL_MS;
888+
}
889+
return ldtInterval;
890+
}
891+
865892
// ----- constants ----------------------------------------------------
866893

867894
/**
@@ -884,10 +911,53 @@ public static String getThreadDump()
884911
protected static final long GUARDIAN_EARLY_THRESHOLD = 500L;
885912

886913
/**
887-
* The minimum interval at which the Guardian should log thread dumps.
914+
* Property for configuring minimum Guardian log thread dump interval as a time duration.
915+
* Configure this property to reduce the number of Guardian log thread dumps in logs when
916+
* too many are occurring over a short duration of time.
917+
*
918+
* Format is documented as {@code String} parameter of {@link Duration#Duration(String)}.
919+
*
920+
* @see #DEFAULT_LOG_THREADDUMP_INTERVAL
921+
* @see #MAX_LOG_THREADDUMP_INTERVAL
922+
*
923+
* @since 25.03
924+
*/
925+
protected static final String PROP_LOG_THREADDUMP_INTERVAL = "coherence.guardian.log.threaddump.interval";
926+
927+
/**
928+
* Default guardian log thread dump interval as a time duration.
929+
*
930+
* @since 25.03
888931
*/
889-
protected static final long GUARDIAN_LOG_INTERVAL = 3000L;
932+
protected static final String DEFAULT_LOG_THREADDUMP_INTERVAL = "3s";
890933

934+
/*
935+
* Max guardian log thread dump interval as a time duration. This is ceiling value for {@link #PROP_LOG_THREADDUMP_INTERVAL}.
936+
* Since the thread dumps are needed to analyze what caused guardian hard timeout, one is not allowed to disable guardian
937+
* log thread dumps, only to reduce the frequency of the thread dumps.
938+
*
939+
* @since 25.03
940+
*/
941+
protected static final String MAX_LOG_THREADDUMP_INTERVAL = "3h";
942+
943+
/**
944+
* Max interval in milliseconds at which the Guardian log thread dump interval can be configured.
945+
*
946+
* @since 25.03
947+
*/
948+
protected static final long MAX_LOG_THREADDUMP_INTERVAL_MS = new Duration(MAX_LOG_THREADDUMP_INTERVAL).as(Duration.Magnitude.MILLI);
949+
950+
/**
951+
* The minimum interval at which the Guardian should log thread dumps in milliseconds.
952+
*
953+
* Override default {@link #DEFAULT_LOG_THREADDUMP_INTERVAL} by setting property
954+
* {@link #PROP_LOG_THREADDUMP_INTERVAL} to a time duration.
955+
*
956+
* @see #MAX_LOG_THREADDUMP_INTERVAL
957+
*
958+
* @since 25.03
959+
*/
960+
protected static final long LOG_THREADDUMP_INTERVAL_MS = getLogThreaddumpIntervalMs();
891961

892962
// ----- data members -------------------------------------------------
893963

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
/*
2+
* Copyright (c) 2000, 2024, Oracle and/or its affiliates.
3+
*
4+
* Licensed under the Universal Permissive License v 1.0 as shown at
5+
* https://oss.oracle.com/licenses/upl.
6+
*/
7+
package com.tangosol.net;
8+
9+
import com.oracle.coherence.common.util.Duration;
10+
11+
import com.oracle.coherence.testing.SystemPropertyResource;
12+
13+
import org.junit.AfterClass;
14+
import org.junit.BeforeClass;
15+
import org.junit.Test;
16+
17+
import static org.hamcrest.CoreMatchers.is;
18+
import static org.hamcrest.Matchers.lessThan;
19+
import static org.hamcrest.MatcherAssert.assertThat;
20+
import static com.oracle.coherence.common.util.Duration.Magnitude.MILLI;
21+
import static com.tangosol.net.GuardSupport.PROP_LOG_THREADDUMP_INTERVAL;
22+
23+
/**
24+
* Unit tests for GuardSupport.
25+
*
26+
* @since 25.03
27+
* @author jf 2024.10.1
28+
*/
29+
public class GuardSupportTest
30+
{
31+
@Test
32+
public void testOverrideLogThreadDumpInterval()
33+
{
34+
final String TEST_LOG_THREADDUMP_INTERVAL = "1h";
35+
36+
try (SystemPropertyResource resource = new SystemPropertyResource(PROP_LOG_THREADDUMP_INTERVAL, TEST_LOG_THREADDUMP_INTERVAL))
37+
{
38+
assertThat(GuardSupport.getLogThreaddumpIntervalMs(), is(new Duration(TEST_LOG_THREADDUMP_INTERVAL).as(MILLI)));
39+
}
40+
}
41+
42+
@Test
43+
public void testDefaultLogThreadDumpInterval()
44+
{
45+
System.clearProperty(PROP_LOG_THREADDUMP_INTERVAL);
46+
assertThat(GuardSupport.getLogThreaddumpIntervalMs(), is(new Duration(GuardSupport.DEFAULT_LOG_THREADDUMP_INTERVAL).as(MILLI)));
47+
}
48+
49+
@Test
50+
public void testCeilingForLogThreadDumpInterval()
51+
{
52+
final String TEST_LOG_THREADDUMP_INTERVAL = "5d";
53+
54+
assertThat(GuardSupport.MAX_LOG_THREADDUMP_INTERVAL_MS, lessThan(new Duration(TEST_LOG_THREADDUMP_INTERVAL).as(MILLI)));
55+
try (SystemPropertyResource resource = new SystemPropertyResource(PROP_LOG_THREADDUMP_INTERVAL, TEST_LOG_THREADDUMP_INTERVAL))
56+
{
57+
long ldtInterval = GuardSupport.getLogThreaddumpIntervalMs();
58+
59+
assertThat(ldtInterval, is(GuardSupport.MAX_LOG_THREADDUMP_INTERVAL_MS));
60+
assertThat(ldtInterval, lessThan(new Duration(TEST_LOG_THREADDUMP_INTERVAL).as(MILLI)));
61+
}
62+
}
63+
64+
@Test
65+
public void testDefaultOnInvalidValueForLogThreadDumpInterval()
66+
{
67+
final String TEST_LOG_THREADDUMP_INTERVAL = "invalidDuration";
68+
69+
try (SystemPropertyResource resource = new SystemPropertyResource(PROP_LOG_THREADDUMP_INTERVAL, TEST_LOG_THREADDUMP_INTERVAL))
70+
{
71+
assertThat(GuardSupport.getLogThreaddumpIntervalMs(), is(new Duration(GuardSupport.DEFAULT_LOG_THREADDUMP_INTERVAL).as(MILLI)));
72+
}
73+
}
74+
}

0 commit comments

Comments
 (0)