fx19880617 commented on a change in pull request #5741: URL: https://github.com/apache/incubator-pinot/pull/5741#discussion_r459630478
########## File path: pinot-plugins/pinot-batch-ingestion/pinot-batch-ingestion-spark/src/main/java/org/apache/pinot/plugin/ingestion/batch/spark/SparkSegmentGenerationJobRunner.java ########## @@ -209,93 +210,99 @@ public void run() .get(PLUGINS_INCLUDE_PROPERTY_NAME) : null; final URI finalInputDirURI = inputDirURI; final URI finalOutputDirURI = (stagingDirURI == null) ? outputDirURI : stagingDirURI; - pathRDD.foreach(pathAndIdx -> { - for (PinotFSSpec pinotFSSpec : _spec.getPinotFSSpecs()) { - PinotFSFactory.register(pinotFSSpec.getScheme(), pinotFSSpec.getClassName(), new PinotConfiguration(pinotFSSpec)); - } - PinotFS finalOutputDirFS = PinotFSFactory.create(finalOutputDirURI.getScheme()); - String[] splits = pathAndIdx.split(" "); - String path = splits[0]; - int idx = Integer.valueOf(splits[1]); - // Load Pinot Plugins copied from Distributed cache. - File localPluginsTarFile = new File(PINOT_PLUGINS_TAR_GZ); - if (localPluginsTarFile.exists()) { - File pluginsDirFile = new File(PINOT_PLUGINS_DIR + "-" + idx); - try { - TarGzCompressionUtils.untar(localPluginsTarFile, pluginsDirFile); - } catch (Exception e) { - LOGGER.error("Failed to untar local Pinot plugins tarball file [{}]", localPluginsTarFile, e); - throw new RuntimeException(e); + // Prevent using lambda expression in Spark to avoid potential serialization exceptions, use inner function instead. Review comment: This is not happening in every spark version/cluster, likely to be a bug in Spark, but we need to accommodate it . We hit this issue until some users reported it. I don't have a good way to prevent it apart from code review :( ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org For additional commands, e-mail: commits-h...@pinot.apache.org