ludlows commented on code in PR #6760:
URL: https://github.com/apache/iceberg/pull/6760#discussion_r1185665948
##########
spark/v3.3/spark/src/main/scala/org/apache/spark/sql/execution/datasources/SparkExpressionConverter.scala:
##########
@@ -36,15 +35,30 @@ object SparkExpressionConverter {
SparkFilters.convert(DataSourceStrategy.translateFilter(sparkExpression,
supportNestedPredicatePushdown = true).get)
}
- @throws[AnalysisException]
- def collectResolvedSparkExpression(session: SparkSession, tableName: String,
where: String): Expression = {
+ def collectDeterministicSparkExpression(session: SparkSession,
+ tableName: String, where: String):
Boolean = {
+ // used only to check if a deterministic expression is true or false
+ val tableAttrs = session.table(tableName).queryExecution.analyzed.output
+ val firstColumnName = tableAttrs.head.name
+ val anotherWhere = s"$firstColumnName is not null and $where"
+ val unresolvedExpression =
session.sessionState.sqlParser.parseExpression(anotherWhere)
+ val filter = Filter(unresolvedExpression, DummyRelation(tableAttrs))
+ val optimizedLogicalPlan =
session.sessionState.executePlan(filter).optimizedPlan
+ val option = optimizedLogicalPlan.collectFirst {
+ case filter: Filter => Some(filter.condition)
+ }.getOrElse(Option.empty)
+ if (option.isDefined) true else false
+ }
+
+ def collectResolvedSparkExpressionOption(session: SparkSession,
+ tableName: String, where: String):
Option[Expression] = {
val tableAttrs = session.table(tableName).queryExecution.analyzed.output
val unresolvedExpression =
session.sessionState.sqlParser.parseExpression(where)
val filter = Filter(unresolvedExpression, DummyRelation(tableAttrs))
val optimizedLogicalPlan =
session.sessionState.executePlan(filter).optimizedPlan
optimizedLogicalPlan.collectFirst {
- case filter: Filter => filter.condition
- }.getOrElse(throw new AnalysisException("Failed to find filter
expression"))
+ case filter: Filter => Some(filter.condition)
Review Comment:
hi @szehon-ho, yes. @aokolnychyi and you are right. now the current
version is using this method to distinguish alwaysTrue, alwaysFalse and
undetermined. thanks for your explanation. as I remember, I didn't use this
method previously since I didn't understand the behavior of `collectFirst` .
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]