Hi guys,

in my solr setup as SolrCloud (8.3.1) I’m using 5 nodes for one collection (say 
“collection1”, one on each node.
Now I would like to add a new collection on the same solr cluster but 
additionally the new collection (say “collection2”) should be only replicated 
on only nodes with sysprop.channel=mysysprop.

Whole setup runs on GKE (Kubernetes)

So If i scale and add additionally 5 nodes wich correctly sysprop.channel being 
set and cluster gets a new node, autoscaling dies with nullpointerexception 
trying to fetch metrics from new node. (see logs attached).

This is not an IO issue because the nodes and zookeeper can talk to each other.
And if I call /metrics on these new nodes I also see the right properties.

Also I see no violations in the suggester.

Please help 😊

THX
Anton

Attached files:

  *   
autoscaling.json<https://eco.dev.search.d-p.io/solr-v3/admin/zookeeper?detail=true&path=%2Fautoscaling.json>
  *   Logs



{
  "policies":{"my-channel-policy[{
        "replica":"<100",
        "shard":"#EACH",
        "collection":"collection2",
        "nodeset":{"sysprop.channel":"mychannel"}
        }]},
  "cluster-preferences":[
    {
      "minimize":"cores",
      "precision":1},
    {"maximize":"freedisk"}],
  "triggers":{
    ".auto_add_replicas":{
      "name":".auto_add_replicas",
      "event":"nodeLost",
      "waitFor":120,
      "enabled":true,
      "actions":[
        {
          "name":"auto_add_replicas_plan",
          "class":"solr.AutoAddReplicasPlanAction"},
        {
          "name":"execute_plan",
          "class":"solr.ExecutePlanAction"}]},
    ".scheduled_maintenance":{
      "name":".scheduled_maintenance",
      "event":"scheduled",
      "startTime":"NOW",
      "every":"+1DAY",
      "enabled":true,
      "actions":[
        {
          "name":"inactive_shard_plan",
          "class":"solr.InactiveShardPlanAction"},
        {
          "name":"inactive_markers_plan",
          "class":"solr.InactiveMarkersPlanAction"},
        {
          "name":"execute_plan",
          "class":"solr.ExecutePlanAction"}]},
    "node_added_trigger":{
      "event":"nodeAdded",
      "waitFor":20,
      "enabled":true,
      "actions":[
        {
          "name":"compute_plan",
          "class":"solr.ComputePlanAction"},
        {
          "name":"execute_plan",
          "class":"solr.ExecutePlanAction"}]}},
  "listeners":{
    ".auto_add_replicas.system":{
      "beforeAction":[],
      "afterAction":[],
      "stage":[
        "STARTED",
        "ABORTED",
        "SUCCEEDED",
        "FAILED",
        "BEFORE_ACTION",
        "AFTER_ACTION",
        "IGNORED"],
      "trigger":".auto_add_replicas",
      "class":"org.apache.solr.cloud.autoscaling.SystemLogListener"},
    ".scheduled_maintenance.system":{
      "beforeAction":[],
      "afterAction":[],
      "stage":[
        "STARTED",
        "ABORTED",
        "SUCCEEDED",
        "FAILED",
        "BEFORE_ACTION",
        "AFTER_ACTION",
        "IGNORED"],
      "trigger":".scheduled_maintenance",
      "class":"org.apache.solr.cloud.autoscaling.SystemLogListener"},
    "node_added_trigger.system":{
      "beforeAction":[],
      "afterAction":[],
      "stage":[
        "STARTED",
        "ABORTED",
        "SUCCEEDED",
        "FAILED",
        "BEFORE_ACTION",
        "AFTER_ACTION",
        "IGNORED"],
      "trigger":"node_added_trigger",
      "class":"org.apache.solr.cloud.autoscaling.SystemLogListener"}},
  "properties":{}}



2020-07-29 14:54:52.372 WARN  (AutoscalingActionExecutor-8-thread-1) [   ] 
o.a.s.c.a.ScheduledTriggers Exception executing actions => 
org.apache.solr.cloud.autoscaling.TriggerActionException: Error processing 
action for trigger event: {
  "id":"ca5c53772b355Teaxi61tbu5rj7uqysz5vrcgll",
org.apache.solr.cloud.autoscaling.TriggerActionException: Error processing 
action for trigger event: {
  "id":"ca5c53772b355Teaxi61tbu5rj7uqysz5vrcgll",
  "source":"node_added_trigger",
  "eventTime":3559966177932117,
  "eventType":"NODEADDED",
  "properties":{
    "eventTimes":[3559966177932117],
    "preferredOperation":"movereplica",
    "_enqueue_time_":3559967181279816,
    
"nodeNames":["solr-sede-v3-1.solr-headless-v3.search-preprod-europe-west4-b.svc.cluster.local:8983_solr"],
    "replicaType":"NRT"}}
at 
org.apache.solr.cloud.autoscaling.ScheduledTriggers.lambda$null$3(ScheduledTriggers.java:327)
 ~[?:?]
at java.util.concurrent.Executors$RunnableAdapter.call(Unknown Source) ~[?:?]
at java.util.concurrent.FutureTask.run(Unknown Source) ~[?:?]
at 
org.apache.solr.common.util.ExecutorUtil$MDCAwareThreadPoolExecutor.lambda$execute$0(ExecutorUtil.java:210)
 ~[?:?]
at java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown Source) ~[?:?]
at java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown Source) ~[?:?]
at java.lang.Thread.run(Unknown Source) [?:?]
Caused by: org.apache.solr.common.SolrException: Unexpected exception while 
processing event: {
  "id":"ca5c53772b355Teaxi61tbu5rj7uqysz5vrcgll",
  "source":"node_added_trigger",
  "eventTime":3559966177932117,
  "eventType":"NODEADDED",
  "properties":{
    "eventTimes":[3559966177932117],
    "preferredOperation":"movereplica",
    "_enqueue_time_":3559967181279816,
    
"nodeNames":["solr-sede-v3-1.solr-headless-v3.search-preprod-europe-west4-b.svc.cluster.local:8983_solr"],
    "replicaType":"NRT"}}
at 
org.apache.solr.cloud.autoscaling.ComputePlanAction.process(ComputePlanAction.java:161)
 ~[?:?]
at 
org.apache.solr.cloud.autoscaling.ScheduledTriggers.lambda$null$3(ScheduledTriggers.java:324)
 ~[?:?]
... 6 more
Caused by: org.apache.solr.common.SolrException: 
org.apache.solr.common.SolrException: Error getting remote info
at 
org.apache.solr.common.cloud.rule.ImplicitSnitch.getTags(ImplicitSnitch.java:78)
 ~[?:?]
at 
org.apache.solr.client.solrj.impl.SolrClientNodeStateProvider.fetchTagValues(SolrClientNodeStateProvider.java:139)
 ~[?:?]
at 
org.apache.solr.client.solrj.impl.SolrClientNodeStateProvider.getNodeValues(SolrClientNodeStateProvider.java:128)
 ~[?:?]
at org.apache.solr.client.solrj.cloud.autoscaling.Row.<init>(Row.java:71) ~[?:?]
at 
org.apache.solr.client.solrj.cloud.autoscaling.Policy$Session.<init>(Policy.java:575)
 ~[?:?]
at 
org.apache.solr.client.solrj.cloud.autoscaling.Policy.createSession(Policy.java:396)
 ~[?:?]
at 
org.apache.solr.client.solrj.cloud.autoscaling.Policy.createSession(Policy.java:358)
 ~[?:?]
at 
org.apache.solr.client.solrj.cloud.autoscaling.PolicyHelper$SessionRef.createSession(PolicyHelper.java:492)
 ~[?:?]
at 
org.apache.solr.client.solrj.cloud.autoscaling.PolicyHelper$SessionRef.get(PolicyHelper.java:457)
 ~[?:?]
at 
org.apache.solr.client.solrj.cloud.autoscaling.PolicyHelper.getSession(PolicyHelper.java:513)
 ~[?:?]
at 
org.apache.solr.cloud.autoscaling.ComputePlanAction.process(ComputePlanAction.java:90)
 ~[?:?]
at 
org.apache.solr.cloud.autoscaling.ScheduledTriggers.lambda$null$3(ScheduledTriggers.java:324)
 ~[?:?]
... 6 more
Caused by: org.apache.solr.common.SolrException: Error getting remote info
at 
org.apache.solr.client.solrj.impl.SolrClientNodeStateProvider$AutoScalingSnitch.getRemoteInfo(SolrClientNodeStateProvider.java:364)
 ~[?:?]
at 
org.apache.solr.common.cloud.rule.ImplicitSnitch.getTags(ImplicitSnitch.java:76)
 ~[?:?]
at 
org.apache.solr.client.solrj.impl.SolrClientNodeStateProvider.fetchTagValues(SolrClientNodeStateProvider.java:139)
 ~[?:?]
at 
org.apache.solr.client.solrj.impl.SolrClientNodeStateProvider.getNodeValues(SolrClientNodeStateProvider.java:128)
 ~[?:?]
at org.apache.solr.client.solrj.cloud.autoscaling.Row.<init>(Row.java:71) ~[?:?]
at 
org.apache.solr.client.solrj.cloud.autoscaling.Policy$Session.<init>(Policy.java:575)
 ~[?:?]
at 
org.apache.solr.client.solrj.cloud.autoscaling.Policy.createSession(Policy.java:396)
 ~[?:?]
at 
org.apache.solr.client.solrj.cloud.autoscaling.Policy.createSession(Policy.java:358)
 ~[?:?]
at 
org.apache.solr.client.solrj.cloud.autoscaling.PolicyHelper$SessionRef.createSession(PolicyHelper.java:492)
 ~[?:?]
at 
org.apache.solr.client.solrj.cloud.autoscaling.PolicyHelper$SessionRef.get(PolicyHelper.java:457)
 ~[?:?]
at 
org.apache.solr.client.solrj.cloud.autoscaling.PolicyHelper.getSession(PolicyHelper.java:513)
 ~[?:?]
at 
org.apache.solr.cloud.autoscaling.ComputePlanAction.process(ComputePlanAction.java:90)
 ~[?:?]
at 
org.apache.solr.cloud.autoscaling.ScheduledTriggers.lambda$null$3(ScheduledTriggers.java:324)
 ~[?:?]
... 6 more
Caused by: java.lang.NullPointerException
at 
org.apache.solr.client.solrj.impl.SolrClientNodeStateProvider$AutoScalingSnitch.getRemoteInfo(SolrClientNodeStateProvider.java:338)
 ~[?:?]
at 
org.apache.solr.common.cloud.rule.ImplicitSnitch.getTags(ImplicitSnitch.java:76)
 ~[?:?]
at 
org.apache.solr.client.solrj.impl.SolrClientNodeStateProvider.fetchTagValues(SolrClientNodeStateProvider.java:139)
 ~[?:?]
at 
org.apache.solr.client.solrj.impl.SolrClientNodeStateProvider.getNodeValues(SolrClientNodeStateProvider.java:128)
 ~[?:?]
at org.apache.solr.client.solrj.cloud.autoscaling.Row.<init>(Row.java:71) ~[?:?]
at 
org.apache.solr.client.solrj.cloud.autoscaling.Policy$Session.<init>(Policy.java:575)
 ~[?:?]
at 
org.apache.solr.client.solrj.cloud.autoscaling.Policy.createSession(Policy.java:396)
 ~[?:?]
at 
org.apache.solr.client.solrj.cloud.autoscaling.Policy.createSession(Policy.java:358)
 ~[?:?]
at 
org.apache.solr.client.solrj.cloud.autoscaling.PolicyHelper$SessionRef.createSession(PolicyHelper.java:492)
 ~[?:?]
at 
org.apache.solr.client.solrj.cloud.autoscaling.PolicyHelper$SessionRef.get(PolicyHelper.java:457)
 ~[?:?]
at 
org.apache.solr.client.solrj.cloud.autoscaling.PolicyHelper.getSession(PolicyHelper.java:513)
 ~[?:?]
at 
org.apache.solr.cloud.autoscaling.ComputePlanAction.process(ComputePlanAction.java:90)
 ~[?:?]
at 
org.apache.solr.cloud.autoscaling.ScheduledTriggers.lambda$null$3(ScheduledTriggers.java:324)
 ~[?:?]
... 6 more



Reply via email to