Skip to content

Commit c95d31f

Browse files
committed
YARN-5473. Expose per-application over-allocation info in the Resource Manager. Contributed by Haibo Chen.
1 parent 43f51bc commit c95d31f

File tree

60 files changed

+1426
-516
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

60 files changed

+1426
-516
lines changed

hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/TypeConverter.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -527,8 +527,8 @@ public static JobStatus fromYarn(ApplicationReport application,
527527
jobStatus.setNumUsedSlots(resourceUsageReport.getNumUsedContainers());
528528
jobStatus.setReservedMem(
529529
(int)resourceUsageReport.getReservedResources().getMemorySize());
530-
jobStatus.setUsedMem(
531-
(int) resourceUsageReport.getUsedResources().getMemorySize());
530+
jobStatus.setUsedMem((int) resourceUsageReport
531+
.getGuaranteedResourcesUsed().getMemorySize());
532532
}
533533
return jobStatus;
534534
}

hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/test/java/org/apache/hadoop/mapreduce/TestTypeConverter.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -94,7 +94,7 @@ public void testFromYarn() throws Exception {
9494
appUsageRpt.setNumReservedContainers(1);
9595
appUsageRpt.setNumUsedContainers(3);
9696
appUsageRpt.setReservedResources(r);
97-
appUsageRpt.setUsedResources(r);
97+
appUsageRpt.setGuaranteedResourcesUsed(r);
9898
applicationReport.setApplicationResourceUsageReport(appUsageRpt);
9999
JobStatus jobStatus = TypeConverter.fromYarn(applicationReport, "dummy-jobfile");
100100
Assert.assertEquals(appStartTime, jobStatus.getStartTime());
@@ -133,7 +133,7 @@ public void testFromYarnApplicationReport() {
133133
appUsageRpt.setNumReservedContainers(1);
134134
appUsageRpt.setNumUsedContainers(3);
135135
appUsageRpt.setReservedResources(r);
136-
appUsageRpt.setUsedResources(r);
136+
appUsageRpt.setGuaranteedResourcesUsed(r);
137137
when(mockReport.getApplicationResourceUsageReport()).thenReturn(appUsageRpt);
138138
JobStatus status = TypeConverter.fromYarn(mockReport, jobFile);
139139
Assert.assertNotNull("fromYarn returned null status", status);

hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/TestResourceMgrDelegate.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -137,7 +137,7 @@ private ApplicationReport getApplicationReport(
137137
Records.newRecord(Resource.class));
138138
Mockito.when(appResources.getReservedResources()).thenReturn(
139139
Records.newRecord(Resource.class));
140-
Mockito.when(appResources.getUsedResources()).thenReturn(
140+
Mockito.when(appResources.getGuaranteedResourcesUsed()).thenReturn(
141141
Records.newRecord(Resource.class));
142142
Mockito.when(appReport.getApplicationResourceUsageReport()).thenReturn(
143143
appResources);

hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ApplicationResourceUsageReport.java

Lines changed: 131 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -36,21 +36,27 @@ public abstract class ApplicationResourceUsageReport {
3636
@Private
3737
@Unstable
3838
public static ApplicationResourceUsageReport newInstance(
39-
int numUsedContainers, int numReservedContainers, Resource usedResources,
39+
int numUsedContainers, int numReservedContainers,
40+
Resource guaranteedResourcesUsed,
4041
Resource reservedResources, Resource neededResources,
41-
Map<String, Long> resourceSecondsMap, float queueUsagePerc,
42-
float clusterUsagePerc, Map<String, Long> preemtedResourceSecondsMap) {
42+
Map<String, Long> guaranteedResourceSecondsMap, float queueUsagePerc,
43+
float clusterUsagePerc, Map<String, Long> preemtedResourceSecondsMap,
44+
Resource opportunisticResourcesUsed,
45+
Map<String, Long> opportunisticResourcesSecondsMap) {
46+
4347
ApplicationResourceUsageReport report =
4448
Records.newRecord(ApplicationResourceUsageReport.class);
4549
report.setNumUsedContainers(numUsedContainers);
4650
report.setNumReservedContainers(numReservedContainers);
47-
report.setUsedResources(usedResources);
51+
report.setGuaranteedResourcesUsed(guaranteedResourcesUsed);
4852
report.setReservedResources(reservedResources);
4953
report.setNeededResources(neededResources);
50-
report.setResourceSecondsMap(resourceSecondsMap);
54+
report.setGuaranteedResourceSecondsMap(guaranteedResourceSecondsMap);
5155
report.setQueueUsagePercentage(queueUsagePerc);
5256
report.setClusterUsagePercentage(clusterUsagePerc);
5357
report.setPreemptedResourceSecondsMap(preemtedResourceSecondsMap);
58+
report.setOpportunisticResourcesUsed(opportunisticResourcesUsed);
59+
report.setOpportunisticResourceSecondsMap(opportunisticResourcesSecondsMap);
5460
return report;
5561
}
5662

@@ -87,16 +93,40 @@ public static ApplicationResourceUsageReport newInstance(
8793
public abstract void setNumReservedContainers(int num_reserved_containers);
8894

8995
/**
90-
* Get the used <code>Resource</code>. -1 for invalid/inaccessible reports.
91-
* @return the used <code>Resource</code>
96+
* Get the guaranteed <code>Resource</code> used.
97+
* -1 for invalid/inaccessible reports.
98+
* @return the guaranteed <code>Resource</code> used
9299
*/
93100
@Public
94101
@Stable
102+
@Deprecated
95103
public abstract Resource getUsedResources();
96104

105+
/**
106+
* Get the guaranteed <code>Resource</code> used.
107+
* -1 for invalid/inaccessible reports.
108+
* @return the guaranteed <code>Resource</code> used
109+
*/
110+
@Public
111+
@Unstable
112+
public abstract Resource getGuaranteedResourcesUsed();
113+
114+
@Private
115+
@Unstable
116+
public abstract void setGuaranteedResourcesUsed(Resource resources);
117+
118+
/**
119+
* Get the opportunistic <code>Resource</code> used.
120+
* -1 for invalid/inaccessible reports.
121+
* @return the opportunistic <code>Resource</code> used
122+
*/
123+
@Public
124+
@Unstable
125+
public abstract Resource getOpportunisticResourcesUsed();
126+
97127
@Private
98128
@Unstable
99-
public abstract void setUsedResources(Resource resources);
129+
public abstract void setOpportunisticResourcesUsed(Resource resources);
100130

101131
/**
102132
* Get the reserved <code>Resource</code>. -1 for invalid/inaccessible reports.
@@ -123,41 +153,83 @@ public static ApplicationResourceUsageReport newInstance(
123153
public abstract void setNeededResources(Resource needed_resources);
124154

125155
/**
126-
* Set the aggregated amount of memory (in megabytes) the application has
127-
* allocated times the number of seconds the application has been running.
128-
* @param memory_seconds the aggregated amount of memory seconds
156+
* Set the aggregated amount of guaranteed memory (in megabytes) the
157+
* application has allocated times the number of seconds the application
158+
* has been running.
159+
* @param memorySeconds the aggregated amount of guaranteed memory seconds
129160
*/
130161
@Private
131162
@Unstable
132-
public abstract void setMemorySeconds(long memory_seconds);
163+
public abstract void setGuaranteedMemorySeconds(long memorySeconds);
133164

134165
/**
135-
* Get the aggregated amount of memory (in megabytes) the application has
136-
* allocated times the number of seconds the application has been running.
137-
* @return the aggregated amount of memory seconds
166+
* Get the aggregated amount of guaranteed memory (in megabytes) the
167+
* application has allocated times the number of seconds the application
168+
* has been running.
169+
* @return the aggregated amount of guaranteed memory seconds
170+
*/
171+
@Public
172+
@Unstable
173+
public abstract long getGuaranteedMemorySeconds();
174+
175+
/**
176+
* Get the aggregated amount of guaranteed memory (in megabytes) the
177+
* application has allocated times the number of seconds the application
178+
* has been running.
179+
* @return the aggregated amount of guaranteed memory seconds
138180
*/
139181
@Public
140182
@Unstable
183+
@Deprecated
141184
public abstract long getMemorySeconds();
142185

143186
/**
144-
* Set the aggregated number of vcores that the application has allocated
145-
* times the number of seconds the application has been running.
146-
* @param vcore_seconds the aggregated number of vcore seconds
187+
* Set the aggregated number of guaranteed vcores that the application has
188+
* allocated times the number of seconds the application has been running.
189+
* @param vcoreSeconds the aggregated number of guaranteed vcore seconds
147190
*/
148191
@Private
149192
@Unstable
150-
public abstract void setVcoreSeconds(long vcore_seconds);
193+
public abstract void setGuaranteedVcoreSeconds(long vcoreSeconds);
151194

152195
/**
153-
* Get the aggregated number of vcores that the application has allocated
154-
* times the number of seconds the application has been running.
155-
* @return the aggregated number of vcore seconds
196+
* Get the aggregated number of guaranteed vcores that the application has
197+
* allocated times the number of seconds the application has been running.
198+
* @return the aggregated number of guaranteed vcore seconds
199+
*/
200+
@Public
201+
@Unstable
202+
public abstract long getGuaranteedVcoreSeconds();
203+
204+
/**
205+
* Get the aggregated number of guaranteed vcores that the application has
206+
* allocated times the number of seconds the application has been running.
207+
* @return the aggregated number of guaranteed vcore seconds
156208
*/
157209
@Public
158210
@Unstable
211+
@Deprecated
159212
public abstract long getVcoreSeconds();
160213

214+
/**
215+
* Get the aggregated amount of opportunistic memory (in megabytes) the
216+
* application has allocated times the number of seconds the application
217+
* has been running.
218+
* @return the aggregated amount of opportunistic memory seconds
219+
*/
220+
@Public
221+
@Unstable
222+
public abstract long getOpportunisticMemorySeconds();
223+
224+
/**
225+
* Get the aggregated number of opportunistic vcores that the application
226+
* has allocated times the number of seconds the application has been running.
227+
* @return the aggregated number of opportunistic vcore seconds
228+
*/
229+
@Public
230+
@Unstable
231+
public abstract long getOpportunisticVcoreSeconds();
232+
161233
/**
162234
* Get the percentage of resources of the queue that the app is using.
163235
* @return the percentage of resources of the queue that the app is using.
@@ -231,23 +303,35 @@ public static ApplicationResourceUsageReport newInstance(
231303
public abstract long getPreemptedVcoreSeconds();
232304

233305
/**
234-
* Get the aggregated number of resources that the application has
306+
* Get the aggregated number of guaranteed resources that the application has
235307
* allocated times the number of seconds the application has been running.
236-
* @return map containing the resource name and aggregated resource-seconds
308+
* @return map containing the resource name and aggregated guaranteed
309+
* resource-seconds
237310
*/
238311
@Public
239312
@Unstable
313+
@Deprecated
240314
public abstract Map<String, Long> getResourceSecondsMap();
241315

242316
/**
243-
* Set the aggregated number of resources that the application has
317+
* Get the aggregated number of guaranteed resources that the application has
318+
* allocated times the number of seconds the application has been running.
319+
* @return map containing the resource name and aggregated guaranteed
320+
* resource-seconds
321+
*/
322+
@Public
323+
@Unstable
324+
public abstract Map<String, Long> getGuaranteedResourceSecondsMap();
325+
326+
/**
327+
* Set the aggregated number of guaranteed resources that the application has
244328
* allocated times the number of seconds the application has been running.
245329
* @param resourceSecondsMap map containing the resource name and aggregated
246-
* resource-seconds
330+
* guaranteed resource-seconds
247331
*/
248332
@Private
249333
@Unstable
250-
public abstract void setResourceSecondsMap(
334+
public abstract void setGuaranteedResourceSecondsMap(
251335
Map<String, Long> resourceSecondsMap);
252336

253337

@@ -272,4 +356,24 @@ public abstract void setResourceSecondsMap(
272356
public abstract void setPreemptedResourceSecondsMap(
273357
Map<String, Long> preemptedResourceSecondsMap);
274358

359+
/**
360+
* Get the aggregated number of opportunistic resources that the application
361+
* has allocated times the number of seconds the application has been running.
362+
* @return map containing the resource name and aggregated opportunistic
363+
* resource-seconds
364+
*/
365+
@Public
366+
@Unstable
367+
public abstract Map<String, Long> getOpportunisticResourceSecondsMap();
368+
369+
/**
370+
* Set the aggregated number of opportunistic resources that the application
371+
* has allocated times the number of seconds the application has been running.
372+
* @param opportunisticResourceSecondsMap map containing the resource name
373+
* and aggregated opportunistic resource-seconds
374+
*/
375+
@Private
376+
@Unstable
377+
public abstract void setOpportunisticResourceSecondsMap(
378+
Map<String, Long> opportunisticResourceSecondsMap);
275379
}

hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/yarn_protos.proto

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -251,6 +251,8 @@ message ApplicationResourceUsageReportProto {
251251
optional int64 preempted_vcore_seconds = 11;
252252
repeated StringLongMapProto application_resource_usage_map = 12;
253253
repeated StringLongMapProto application_preempted_resource_usage_map = 13;
254+
optional ResourceProto used_opportunistic_resources = 14;
255+
repeated StringLongMapProto application_opportunistic_resource_usage_map = 15;
254256
}
255257

256258
message ApplicationReportProto {

hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/cli/ApplicationCLI.java

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -973,14 +973,19 @@ private ApplicationReport printApplicationReport(String applicationId)
973973

974974
private void printResourceUsage(PrintWriter appReportStr,
975975
ApplicationResourceUsageReport usageReport) {
976-
appReportStr.print("\tAggregate Resource Allocation : ");
976+
appReportStr.print("\tAggregate Guaranteed Resource Allocation : ");
977977
if (usageReport != null) {
978-
appReportStr.println(
979-
getResourceSecondsString(usageReport.getResourceSecondsMap()));
978+
appReportStr.println(getResourceSecondsString(
979+
usageReport.getGuaranteedResourceSecondsMap()));
980+
appReportStr.print("\tAggregate Opportunistic Resource Allocation : ");
981+
appReportStr.println(getResourceSecondsString(
982+
usageReport.getOpportunisticResourceSecondsMap()));
980983
appReportStr.print("\tAggregate Resource Preempted : ");
981984
appReportStr.println(getResourceSecondsString(
982985
usageReport.getPreemptedResourceSecondsMap()));
983986
} else {
987+
appReportStr.println("N/A");
988+
appReportStr.print("\tAggregate Opportunistic Resource Allocation : ");
984989
appReportStr.println("N/A");
985990
appReportStr.print("\tAggregate Resource Preempted : ");
986991
appReportStr.println("N/A");

hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/cli/TopCLI.java

Lines changed: 8 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -170,13 +170,11 @@ private static class ApplicationInformation {
170170
appReport.getApplicationResourceUsageReport()
171171
.getNumReservedContainers();
172172
displayStringsMap.put(Columns.RCONT, String.valueOf(reservedContainers));
173-
usedVirtualCores =
174-
appReport.getApplicationResourceUsageReport().getUsedResources()
175-
.getVirtualCores();
173+
usedVirtualCores = appReport.getApplicationResourceUsageReport()
174+
.getGuaranteedResourcesUsed().getVirtualCores();
176175
displayStringsMap.put(Columns.VCORES, String.valueOf(usedVirtualCores));
177-
usedMemory =
178-
appReport.getApplicationResourceUsageReport().getUsedResources()
179-
.getMemorySize() / 1024;
176+
usedMemory = appReport.getApplicationResourceUsageReport()
177+
.getGuaranteedResourcesUsed().getMemorySize() / 1024;
180178
displayStringsMap.put(Columns.MEM, String.valueOf(usedMemory) + "G");
181179
reservedVirtualCores =
182180
appReport.getApplicationResourceUsageReport().getReservedResources()
@@ -195,11 +193,11 @@ private static class ApplicationInformation {
195193
progress = appReport.getProgress() * 100;
196194
displayStringsMap.put(Columns.PROGRESS, String.format("%.2f", progress));
197195
// store in GBSeconds
198-
memorySeconds =
199-
appReport.getApplicationResourceUsageReport().getMemorySeconds() / 1024;
196+
memorySeconds = appReport.getApplicationResourceUsageReport()
197+
.getGuaranteedMemorySeconds() / 1024;
200198
displayStringsMap.put(Columns.MEMSECS, String.valueOf(memorySeconds));
201-
vcoreSeconds =
202-
appReport.getApplicationResourceUsageReport().getVcoreSeconds();
199+
vcoreSeconds = appReport.getApplicationResourceUsageReport()
200+
.getGuaranteedVcoreSeconds();
203201
displayStringsMap.put(Columns.VCORESECS, String.valueOf(vcoreSeconds));
204202
}
205203
}

hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/cli/TestYarnCLI.java

Lines changed: 17 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -121,18 +121,27 @@ public void testGetApplicationReport() throws Exception {
121121
for (int i = 0; i < 2; ++i) {
122122
ApplicationCLI cli = createAndGetAppCLI();
123123
ApplicationId applicationId = ApplicationId.newInstance(1234, 5);
124-
Map<String, Long> resourceSecondsMap = new HashMap<>();
124+
Map<String, Long> guaranteedResourceSecondsMap = new HashMap<>();
125+
Map<String, Long> opportunisticResourceSecondsMap = new HashMap<>();
125126
Map<String, Long> preemptedResoureSecondsMap = new HashMap<>();
126-
resourceSecondsMap.put(ResourceInformation.MEMORY_MB.getName(), 123456L);
127-
resourceSecondsMap.put(ResourceInformation.VCORES.getName(), 4567L);
127+
guaranteedResourceSecondsMap.put(
128+
ResourceInformation.MEMORY_MB.getName(), 123456L);
129+
guaranteedResourceSecondsMap.put(
130+
ResourceInformation.VCORES.getName(), 4567L);
131+
opportunisticResourceSecondsMap.put(
132+
ResourceInformation.MEMORY_MB.getName(), 123456L);
133+
opportunisticResourceSecondsMap.put(
134+
ResourceInformation.VCORES.getName(), 4567L);
128135
preemptedResoureSecondsMap
129136
.put(ResourceInformation.MEMORY_MB.getName(), 1111L);
130137
preemptedResoureSecondsMap
131138
.put(ResourceInformation.VCORES.getName(), 2222L);
132139
ApplicationResourceUsageReport usageReport = i == 0 ? null :
133140
ApplicationResourceUsageReport
134-
.newInstance(2, 0, null, null, null, resourceSecondsMap, 0, 0,
135-
preemptedResoureSecondsMap);
141+
.newInstance(2, 0, null, null, null,
142+
guaranteedResourceSecondsMap, 0, 0,
143+
preemptedResoureSecondsMap, null,
144+
opportunisticResourceSecondsMap);
136145
ApplicationReport newApplicationReport = ApplicationReport.newInstance(
137146
applicationId, ApplicationAttemptId.newInstance(applicationId, 1),
138147
"user", "queue", "appname", "host", 124, null,
@@ -168,7 +177,9 @@ public void testGetApplicationReport() throws Exception {
168177
pw.println("\tTracking-URL : N/A");
169178
pw.println("\tRPC Port : 124");
170179
pw.println("\tAM Host : host");
171-
pw.println("\tAggregate Resource Allocation : " +
180+
pw.println("\tAggregate Guaranteed Resource Allocation : " +
181+
(i == 0 ? "N/A" : "123456 MB-seconds, 4567 vcore-seconds"));
182+
pw.println("\tAggregate Opportunistic Resource Allocation : " +
172183
(i == 0 ? "N/A" : "123456 MB-seconds, 4567 vcore-seconds"));
173184
pw.println("\tAggregate Resource Preempted : " +
174185
(i == 0 ? "N/A" : "1111 MB-seconds, 2222 vcore-seconds"));

0 commit comments

Comments
 (0)