Hi All.
I have a 4 node cluster on which I run yarn. I created 2 queues "long" and
"short", first with 70% resource allocation, the second with 30%
allocation. Both queues are configured on all available nodes by default.
My memory for yarn per node is ~50GB. Initially I thought that when I will
run tasks in "short" queue yarn will allocate them on all nodes using 30%
of the memory on every node. So for example if I run 20 tasks, 2GB each
(40GB summary), in short queue:
- ~7 first will be scheduled on node1 (14GB total, 30% out of 50GB
available on this node for "short" queue -> 15GB)
- next ~7 tasks will be scheduled on node2
- ~6 remaining tasks will be scheduled on node3
- yarn on node4 will not use any resources assigned to "short" queue.
But this seems not to be the case. At the moment I see that all tasks are
started on node1 and other nodes have no tasks started.
I attached my yarn-site.xml and capacity-scheduler.xml.
Is there a way to force yarn to use configured above thresholds (70% and
30%) per node and not per cluster as a whole? I would like to get a
configuration in which on every node 70% is always available for "short"
queue, 70% for "long" queue and in case any resources are free for a
particular queue they are not used by other queues. Is it possible?
BR,
Rafal.
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
<property>
<name>yarn.scheduler.capacity.maximum-applications</name>
<value>10000</value>
</property>
<property>
<name>yarn.scheduler.capacity.maximum-am-resource-percent</name>
<value>0.5</value>
</property>
<property>
<name>yarn.scheduler.capacity.resource-calculator</name>
<value>org.apache.hadoop.yarn.util.resource.DefaultResourceCalculator</value>
</property>
<property>
<name>yarn.scheduler.capacity.root.queues</name>
<value>long,short</value>
</property>
<property>
<name>yarn.scheduler.capacity.root.long.capacity</name>
<value>70</value>
</property>
<property>
<name>yarn.scheduler.capacity.root.long.maximum-capacity</name>
<value>70</value>
</property>
<property>
<name>yarn.scheduler.capacity.root.long.accessible-node-labels</name>
<value>node1d,node2d,node3d,node4d</value>
</property>
<property>
<name>yarn.scheduler.capacity.root.long.accessible-node-labels.node1d.capacity</name>
<value>70</value>
</property>
<property>
<name>yarn.scheduler.capacity.root.long.accessible-node-labels.node1d.maximum-capacity</name>
<value>70</value>
</property>
<property>
<name>yarn.scheduler.capacity.root.long.accessible-node-labels.node2d.capacity</name>
<value>70</value>
</property>
<property>
<name>yarn.scheduler.capacity.root.long.accessible-node-labels.node2d.maximum-capacity</name>
<value>70</value>
</property>
<property>
<name>yarn.scheduler.capacity.root.long.accessible-node-labels.node3d.capacity</name>
<value>70</value>
</property>
<property>
<name>yarn.scheduler.capacity.root.long.accessible-node-labels.node3d.maximum-capacity</name>
<value>70</value>
</property>
<property>
<name>yarn.scheduler.capacity.root.long.accessible-node-labels.node4d.capacity</name>
<value>70</value>
</property>
<property>
<name>yarn.scheduler.capacity.root.long.accessible-node-labels.node4d.maximum-capacity</name>
<value>70</value>
</property>
<property>
<name>yarn.scheduler.capacity.root.short.capacity</name>
<value>30</value>
</property>
<property>
<name>yarn.scheduler.capacity.root.short.maximum-capacity</name>
<value>30</value>
</property>
<property>
<name>yarn.scheduler.capacity.root.short.accessible-node-labels.node1d.capacity</name>
<value>30</value>
</property>
<property>
<name>yarn.scheduler.capacity.root.short.accessible-node-labels.node1d.maximum-capacity</name>
<value>30</value>
</property>
<property>
<name>yarn.scheduler.capacity.root.short.accessible-node-labels.node2d.capacity</name>
<value>30</value>
</property>
<property>
<name>yarn.scheduler.capacity.root.short.accessible-node-labels.node2d.maximum-capacity</name>
<value>30</value>
</property>
<property>
<name>yarn.scheduler.capacity.root.short.accessible-node-labels.node3d.capacity</name>
<value>30</value>
</property>
<property>
<name>yarn.scheduler.capacity.root.short.accessible-node-labels.node3d.maximum-capacity</name>
<value>30</value>
</property>
<property>
<name>yarn.scheduler.capacity.root.short.accessible-node-labels.node4d.capacity</name>
<value>30</value>
</property>
<property>
<name>yarn.scheduler.capacity.root.short.accessible-node-labels.node4d.maximum-capacity</name>
<value>30</value>
</property>
<property>
<name>yarn.scheduler.capacity.root.long.user-limit-factor</name>
<value>1</value>
</property>
<property>
<name>yarn.scheduler.capacity.root.short.user-limit-factor</name>
<value>1</value>
</property>
<property>
<name>yarn.scheduler.capacity.root.long.state</name>
<value>RUNNING</value>
</property>
<property>
<name>yarn.scheduler.capacity.root.short.state</name>
<value>RUNNING</value>
</property>
<property>
<name>yarn.scheduler.capacity.root.long.acl_submit_applications</name>
<value>*</value>
</property>
<property>
<name>yarn.scheduler.capacity.root.short.acl_submit_applications</name>
<value>*</value>
</property>
<property>
<name>yarn.scheduler.capacity.root.long.acl_administer_queue</name>
<value>*</value>
</property>
<property>
<name>yarn.scheduler.capacity.root.short.acl_administer_queue</name>
<value>*</value>
</property>
<property>
<name>yarn.scheduler.capacity.node-locality-delay</name>
<value>-1</value>
</property>
</configuration>
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
<!-- NODEMANAGER -->
<property>
<name>yarn.nodemanager.resource.cpu-vcores</name>
<value>32</value>
</property>
<property>
<name>yarn.nodemanager.resource.memory-mb</name>
<value>51200</value>
</property>
<property>
<name>yarn.nodemanager.vmem-check-enabled</name>
<value>false</value>
</property>
<property>
<name>yarn.nodemanager.vmem-pmem-ratio</name>
<value>10</value>
</property>
<property>
<name>yarn.nodemanager.pmem-check-enabled</name>
<value>false</value>
</property>
<!-- SCHEDULER -->
<!-- VCores Allocation -->
<property>
<name>yarn.scheduler.minimum-allocation-vcores</name>
<value>1</value>
</property>
<property>
<name>yarn.scheduler.maximum-allocation-vcores</name>
<value>1</value>
</property>
<property>
<name>yarn.scheduler.increment-allocation-vcores</name>
<value>1</value>
</property>
<!-- Memory Allocation -->
<property>
<name>yarn.scheduler.minimum-allocation-mb</name>
<value>512</value>
</property>
<property>
<name>yarn.scheduler.maximum-allocation-mb</name>
<value>51200</value>
</property>
<property>
<name>yarn.scheduler.increment-allocation-mb</name>
<value>512</value>
</property>
<!-- RESOURCEMANAGER -->
<property>
<name>yarn.resourcemanager.hostname</name>
<value>node1d</value>
</property>
<property>
<name>yarn.resourcemanager.scheduler.class</name>
<value>org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler</value>
</property>
<!-- LABELS -->
<property>
<name>yarn.node-labels.fs-store.root-dir</name>
<value>file:///data/hadoop/conf</value>
</property>
<property>
<name>yarn.node-labels.enabled</name>
<value>true</value>
</property>
<!-- LOG MANAGEMENT -->
<property>
<name>yarn.nodemanager.log.retain-seconds</name>
<value>345600</value>
</property>
<property>
<name>yarn.nodemanager.delete.debug-delay-sec</name>
<value>345600</value>
</property>
<!-- LOCALIZED FILES -->
<property>
<name>yarn.nodemanager.local-dirs</name>
<value>/data/hadoop/nm-local-dir</value>
</property>
<property>
<name>yarn.nodemanager.localizer.cache.cleanup.interval-ms</name>
<value>3600000</value> <!-- 1h -->
</property>
<property>
<name>yarn.nodemanager.localizer.cache.target-size-mb</name>
<value>20480</value> <!-- 20GB -->
</property>
</configuration>
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]