Skip to content

Commit 43b5183

Browse files
authored
YARN-11816. Fix flaky test: TestCapacitySchedulerMultiNodes#testCheckRequestOnceForUnsatisfiedRequest. (#7659) Contributed by Tao Yang.
Signed-off-by: Shilun Fan <[email protected]>
1 parent f347b64 commit 43b5183

File tree

1 file changed

+29
-21
lines changed
  • hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity

1 file changed

+29
-21
lines changed

hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestCapacitySchedulerMultiNodes.java

Lines changed: 29 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,6 @@
2323
import static org.junit.jupiter.api.Assertions.assertEquals;
2424
import static org.junit.jupiter.api.Assertions.assertNotNull;
2525
import static org.junit.jupiter.api.Assertions.assertNull;
26-
import static org.junit.jupiter.api.Assertions.assertTrue;
2726
import static org.junit.jupiter.api.Assertions.fail;
2827
import static org.mockito.Mockito.when;
2928

@@ -35,8 +34,10 @@
3534
import java.util.Map;
3635
import java.util.Set;
3736
import java.util.concurrent.ConcurrentMap;
37+
import java.util.concurrent.TimeoutException;
3838
import java.util.concurrent.atomic.AtomicBoolean;
3939

40+
import org.apache.hadoop.test.GenericTestUtils;
4041
import org.apache.hadoop.thirdparty.com.google.common.collect.Iterators;
4142

4243
import org.apache.hadoop.yarn.server.resourcemanager.RMContext;
@@ -625,7 +626,6 @@ public void testCheckRequestOnceForUnsatisfiedRequest() throws Exception {
625626
// mock node tracker with 2000 nodes
626627
// to simulate the scenario where there are many nodes in the cluster
627628
List<FiCaSchedulerNode> mockNodes = new ArrayList<>();
628-
long ss = System.currentTimeMillis();
629629
for (int i = 0; i < 2000; i++) {
630630
FiCaSchedulerNode node =
631631
TestUtils.getMockNode("host" + i + ":1234", "", 0, 10 * GB, 10);
@@ -660,26 +660,34 @@ public List<FiCaSchedulerNode> getNodesPerPartition(String partition) {
660660
// create an unsatisfied request which will reach the headroom
661661
am1.allocate("*", 2 * GB, 10, new ArrayList<>());
662662

663-
// verify that when headroom is reached for an unsatisfied request,
664-
// scheduler should only check the request once before checking all nodes.
665-
CandidateNodeSet<FiCaSchedulerNode> candidates =
666-
new SimpleCandidateNodeSet<>(Collections.emptyMap(), "");
667-
int numSchedulingCycles = 10;
668-
long startTime = System.currentTimeMillis();
669-
for (int i = 0; i < numSchedulingCycles; i++) {
670-
spyCs.allocateContainersToNode(candidates, false);
663+
List<Long> elapsedMsLst = new ArrayList<>();
664+
try {
665+
GenericTestUtils.waitFor(() -> {
666+
// verify that when headroom is reached for an unsatisfied request,
667+
// scheduler should only check the request once before checking all nodes.
668+
CandidateNodeSet<FiCaSchedulerNode> candidates =
669+
new SimpleCandidateNodeSet<>(Collections.emptyMap(), "");
670+
int numSchedulingCycles = 10;
671+
long startTime = System.currentTimeMillis();
672+
for (int i = 0; i < numSchedulingCycles; i++) {
673+
spyCs.allocateContainersToNode(candidates, false);
674+
}
675+
long avgElapsedMs =
676+
(System.currentTimeMillis() - startTime) / numSchedulingCycles;
677+
LOG.info("Average elapsed time for a scheduling cycle: {} ms",
678+
avgElapsedMs);
679+
680+
elapsedMsLst.add(avgElapsedMs);
681+
// verify that the scheduling cycle is less than 10ms,
682+
// ideally the latency should be less than 2ms.
683+
return avgElapsedMs < 10;
684+
}, 500, 3000);
685+
} catch (TimeoutException e) {
686+
fail("Scheduling cycle expected to be less than 10ms, " +
687+
"but took too long, elapsedMs:" + elapsedMsLst);
688+
} finally {
689+
rm.stop();
671690
}
672-
long avgElapsedMs =
673-
(System.currentTimeMillis() - startTime) / numSchedulingCycles;
674-
LOG.info("Average elapsed time for a scheduling cycle: {} ms",
675-
avgElapsedMs);
676-
// verify that the scheduling cycle is less than 5ms,
677-
// ideally the latency should be less than 2ms.
678-
assertTrue(avgElapsedMs < 5,
679-
String.format("%d ms elapsed in average for a scheduling cycle, " +
680-
"expected to be less than 5ms.", avgElapsedMs));
681-
682-
rm.stop();
683691
}
684692

685693
private static void moveReservation(CapacityScheduler cs,

0 commit comments

Comments
 (0)