sigram commented on a change in pull request #1100: SOLR-13579: Create resource 
management API
URL: https://github.com/apache/lucene-solr/pull/1100#discussion_r363873784
 
 

 ##########
 File path: 
solr/core/src/java/org/apache/solr/managed/types/CacheManagerPool.java
 ##########
 @@ -0,0 +1,329 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.managed.types;
+
+import java.lang.invoke.MethodHandles;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.concurrent.atomic.AtomicReference;
+import java.util.function.Function;
+
+import org.apache.solr.managed.ChangeListener;
+import org.apache.solr.managed.ResourceManager;
+import org.apache.solr.managed.ResourceManagerPool;
+import org.apache.solr.metrics.SolrMetricsContext;
+import org.apache.solr.search.SolrCache;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * An implementation of {@link org.apache.solr.managed.ResourceManagerPool} 
specific to
+ * the management of {@link org.apache.solr.search.SolrCache} instances.
+ * <p>This plugin calculates the total size and maxRamMB of all registered 
cache instances
+ * and adjusts each cache's limits so that the aggregated values again fit 
within the pool limits.</p>
+ * <p>In order to avoid thrashing the plugin uses a dead band (by default 
{@link #DEFAULT_DEAD_BAND}),
+ * which can be adjusted using configuration parameter {@link 
#DEAD_BAND_PARAM}. If monitored values don't
+ * exceed the limits +/- the dead band then no forcible adjustment takes 
place.</p>
+ * <p>The management strategy consists of two distinct phases: soft 
optimization phase and then hard limit phase.</p>
+ * <p><b>Soft optimization</b> tries to adjust the resource consumption based 
on the cache hit ratio.
+ * This phase is executed only if there's no total limit exceeded. Also, hit 
ratio is considered a valid monitored
+ * variable only when at least N lookups occurred since the last adjustment 
(default value is {@link #DEFAULT_LOOKUP_DELTA}).
+ * If the hit ratio is higher than a threshold (default value is {@link 
#DEFAULT_TARGET_HITRATIO}) then the size
+ * of the cache can be reduced so that the resource consumption is minimized 
while still keeping acceptable hit
+ * ratio - and vice versa.</p>
+ * <p>This optimization phase can only adjust the limits within a {@link 
#DEFAULT_MAX_ADJUST_RATIO}, i.e. increased
+ * or decreased values may not be larger / smaller than this multiple / 
fraction of the initially configured limit.</p>
+ * <p><b>Hard limit</b> phase follows the soft optimization phase and it 
forcibly reduces resource consumption of all components
+ * if the total usage is still above the pool limit after the first phase has 
completed. Each component's limit is reduced
+ * by the same factor, regardless of the actual population or hit ratio.</p>
+ */
+public class CacheManagerPool extends ResourceManagerPool<SolrCache> {
+  private static final Logger log = 
LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
+
+  public static String TYPE = "cache";
+
+  /** Controller dead-band - changes smaller than this ratio will be ignored. 
*/
+  public static final String DEAD_BAND_PARAM = "deadBand";
+  /** Target hit ratio - high enough to be useful, low enough to avoid 
excessive cache size. */
+  public static final String TARGET_HIT_RATIO_PARAM = "targetHitRatio";
+  /**
+   * Maximum allowed adjustment ratio from the initial configuration value. 
Adjusted value may not be
+   * higher than multiple of this factor, and not lower than divided by this 
factor.
+   */
+  public static final String MAX_ADJUST_RATIO_PARAM = "maxAdjustRatio";
+  /**
+   * Minimum number of lookups since last adjustment to consider the reported 
hitRatio
+   *  to be statistically valid.
+   */
+  public static final String MIN_LOOKUP_DELTA_PARAM = "minLookupDelta";
+  /** Default value of dead band (10%). */
+  public static final double DEFAULT_DEAD_BAND = 0.1;
+  /** Default target hit ratio - a compromise between usefulness and limited 
resource usage. */
+  public static final double DEFAULT_TARGET_HITRATIO = 0.8;
+  /**
+   * Default minimum number of lookups since the last adjustment. This can be 
treated as Bernoulli trials
+   * that give a 5% confidence about the statistical validity of hit ratio 
(<code>0.5 / sqrt(lookups)</code>).
+   */
+  public static final long DEFAULT_LOOKUP_DELTA = 100;
+  /**
+   * Default maximum adjustment ratio from the initially configured values.
+   */
+  public static final double DEFAULT_MAX_ADJUST_RATIO = 2.0;
+
+  protected static final Map<String, Function<Map<String, Object>, Double>> 
controlledToMonitored = new HashMap<>();
+
+  static {
+    controlledToMonitored.put(SolrCache.MAX_RAM_MB_PARAM, values -> {
+      Number ramBytes = (Number) values.get(SolrCache.RAM_BYTES_USED_PARAM);
+      return ramBytes != null ? ramBytes.doubleValue() / SolrCache.MB : 0.0;
+    });
+    controlledToMonitored.put(SolrCache.MAX_SIZE_PARAM, values ->
+        ((Number)values.getOrDefault(SolrCache.SIZE_PARAM, 
-1.0)).doubleValue());
+  }
+
+  protected double deadBand = DEFAULT_DEAD_BAND;
+  protected double targetHitRatio = DEFAULT_TARGET_HITRATIO;
+  protected long lookupDelta = DEFAULT_LOOKUP_DELTA;
+  protected double maxAdjustRatio = DEFAULT_MAX_ADJUST_RATIO;
+  protected Map<String, Long> lookups = new HashMap<>();
+  protected Map<String, Map<String, Object>> initialComponentLimits = new 
HashMap<>();
+
+  public CacheManagerPool(String name, String type, ResourceManager 
resourceManager, Map<String, Object> poolLimits, Map<String, Object> 
poolParams) {
+    super(name, type, resourceManager, poolLimits, poolParams);
+    String str = String.valueOf(poolParams.getOrDefault(DEAD_BAND_PARAM, 
DEFAULT_DEAD_BAND));
+    try {
+      deadBand = Double.parseDouble(str);
+    } catch (Exception e) {
+      log.warn("Invalid deadBand parameter value '" + str + "', using default 
" + DEFAULT_DEAD_BAND);
+    }
+  }
+
+  @Override
+  public void registerComponent(SolrCache component) {
+    super.registerComponent(component);
+    initialComponentLimits.put(component.getManagedComponentId().toString(), 
getResourceLimits(component));
+  }
+
+  @Override
+  public boolean unregisterComponent(String componentId) {
+    lookups.remove(componentId);
+    initialComponentLimits.remove(componentId);
+    return super.unregisterComponent(componentId);
+  }
+
+  @Override
+  public Object doSetResourceLimit(SolrCache component, String limitName, 
Object val) {
+    if (!(val instanceof Number)) {
+      try {
+        val = Long.parseLong(String.valueOf(val));
+      } catch (Exception e) {
+        throw new IllegalArgumentException("Unsupported value type (not a 
number) for limit '" + limitName + "': " + val + " (" + 
val.getClass().getName() + ")");
+      }
+    }
+    Number value = (Number)val;
+    if (value.longValue() > Integer.MAX_VALUE) {
+      throw new IllegalArgumentException("Invalid new value for limit '" + 
limitName +"': " + value);
+    }
+    switch (limitName) {
+      case SolrCache.MAX_SIZE_PARAM:
+        component.setMaxSize(value.intValue());
+        break;
+      case SolrCache.MAX_RAM_MB_PARAM:
+        component.setMaxRamMB(value.intValue());
+        break;
+      default:
+        throw new IllegalArgumentException("Unsupported limit name '" + 
limitName + "'");
+    }
+    return value.intValue();
+  }
+
+  @Override
+  public Map<String, Object> getResourceLimits(SolrCache component) {
+    Map<String, Object> limits = new HashMap<>();
+    limits.put(SolrCache.MAX_SIZE_PARAM, component.getMaxSize());
+    limits.put(SolrCache.MAX_RAM_MB_PARAM, component.getMaxRamMB());
+    return limits;
+  }
+
+  @Override
+  public Map<String, Object> getMonitoredValues(SolrCache component) throws 
Exception {
+    Map<String, Object> values = new HashMap<>();
+    values.put(SolrCache.SIZE_PARAM, component.size());
+    values.put(SolrCache.RAM_BYTES_USED_PARAM, component.ramBytesUsed());
+    SolrMetricsContext metricsContext = component.getSolrMetricsContext();
+    if (metricsContext != null) {
+      Map<String, Object> metrics = metricsContext.getMetricsSnapshot();
+      String key = component.getCategory().toString() + "." + 
metricsContext.getScope() + "." + SolrCache.HIT_RATIO_PARAM;
+      values.put(SolrCache.HIT_RATIO_PARAM, metrics.get(key));
+      key = component.getCategory().toString() + "." + 
metricsContext.getScope() + "." + SolrCache.LOOKUPS_PARAM;
+      values.put(SolrCache.LOOKUPS_PARAM, metrics.get(key));
+    }
+    return values;
+  }
+
+  @Override
+  protected void doManage() throws Exception {
+    Map<String, Map<String, Object>> currentValues = getCurrentValues();
+    Map<String, Object> totalValues = aggregateTotalValues(currentValues);
+    // pool limits are defined using controlled tags
+    poolLimits.forEach((poolLimitName, value) -> {
+      // only numeric limits are supported
+      if (value == null || !(value instanceof Number)) {
+        return;
+      }
+      double poolLimitValue = ((Number)value).doubleValue();
+      if (poolLimitValue <= 0) {
+        return;
+      }
+      Function<Map<String, Object>, Double> func = 
controlledToMonitored.get(poolLimitName);
+      if (func == null) {
+        return;
+      }
+      Double totalValue = func.apply(totalValues);
+      if (totalValue.doubleValue() <= 0.0) {
+        return;
+      }
+      double totalDelta = poolLimitValue - totalValue.doubleValue();
+
+      // dead band to avoid thrashing
+      if (Math.abs(totalDelta / poolLimitValue) < deadBand) {
+        return;
+      }
+
+      List<SolrCache> adjustableComponents = new ArrayList<>();
+      components.forEach((name, component) -> {
+        Map<String, Object> resourceLimits = getResourceLimits((SolrCache) 
component);
+        Object limit = resourceLimits.get(poolLimitName);
+        // XXX we could attempt here to control eg. ramBytesUsed by adjusting 
maxSize limit
+        // XXX and vice versa if the current limit is undefined or unsupported
+        if (limit == null || !(limit instanceof Number)) {
+          return;
+        }
+        double currentResourceLimit = ((Number)limit).doubleValue();
+        if (currentResourceLimit <= 0) { // undefined or unsupported
+          return;
+        }
+        adjustableComponents.add(component);
+      });
+      optimize(adjustableComponents, currentValues, poolLimitName, 
poolLimitValue, totalValue.doubleValue());
+    });
+  }
+
+  /**
+   * Manage all eligible components that support this pool limit.
+   */
+  private void optimize(List<SolrCache> components, Map<String, Map<String, 
Object>> currentValues, String limitName,
+                        double poolLimitValue, double totalValue) {
+    // changeRatio > 1.0 means there are available free resources
+    // changeRatio < 1.0 means there's shortage of resources
+    final AtomicReference<Double> changeRatio = new 
AtomicReference<>(poolLimitValue / totalValue);
+
+    // ========================== OPTIMIZATION ==============================
+    // if the situation is not critical (ie. total consumption is less than 
max)
+    // try to proactively optimize by reducing the size of caches with too 
high hitRatio
+    // (because a lower hit ratio is still acceptable if it means saving 
resources) and
+    // expand the size of caches with too low hitRatio
+    final AtomicReference<Double> newTotalValue = new 
AtomicReference<>(totalValue);
+    components.forEach(component -> {
+      long currentLookups = 
((Number)currentValues.get(component.getManagedComponentId().toString()).get(SolrCache.LOOKUPS_PARAM)).longValue();
+      long lastLookups = 
lookups.computeIfAbsent(component.getManagedComponentId().toString(), k -> 0L);
+      if (currentLookups < lastLookups + lookupDelta) {
+        // too little data, skip the optimization
+        return;
+      }
 
 Review comment:
   If the lookups has been reset due to commit we should simply skip any 
optimization - because we really don't know how useful is the content of the 
cache at the moment.
   
   Using cumulative stats is a double-edged sword, specifically because they 
are never reset... Take for example cumulative hit ratio - it's calculated 
using cumulative_lookups and cumulative_hits. This means that short-term 
fluctuations are averaged, but it also means that short-term effects of any 
adjustments are also averaged (ie. they don't seem to affect the hit ratio in 
short term), which during thee next round of optimization will cause more and 
more adjustments, and the controller will eventually grossly overshoot its 
target. This issue becomes more and more serious as the absolute numbers 
increase because it's more and more difficult to sway this average.
   

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org
For additional commands, e-mail: issues-h...@lucene.apache.org

Reply via email to