martin-g commented on code in PR #1597:
URL:
https://github.com/apache/datafusion-ballista/pull/1597#discussion_r3152483554
##########
ballista/scheduler/src/config.rs:
##########
@@ -237,6 +249,10 @@ pub struct SchedulerConfig {
pub override_create_grpc_client_endpoint: Option<EndpointOverrideFn>,
/// Whether to use TLS when connecting to executors (for flight proxy)
pub use_tls: bool,
+ /// Number of failures attempts before task is considered failed"
+ pub task_max_failures: u64,
+ /// Number of failures attempts before stage is considered failed"
Review Comment:
```suggestion
/// Number of failures attempts before stage is considered failed
```
##########
ballista/scheduler/src/state/task_manager.rs:
##########
@@ -423,8 +428,8 @@ impl<T: 'static + AsLogicalPlan, U: 'static +
AsExecutionPlan> TaskManager<T, U>
graph.update_task_status(
executor,
statuses,
- TASK_MAX_FAILURES,
- STAGE_MAX_FAILURES,
+ self.task_max_failures.try_into().unwrap(),
Review Comment:
What is the reason to use `u64` in the config if it is used only here as
`usize` ?
##########
ballista/scheduler/src/config.rs:
##########
@@ -170,10 +170,23 @@ pub struct Config {
#[arg(
long,
default_value_t = 15,
- help = "Interval, in seconds, to check expired or dead executors (."
+ help = "Interval, in seconds, to check expired or dead executors."
)]
pub expire_dead_executor_interval_seconds: u64,
-
+ /// Number of failures attempts before task is considered failed
+ #[arg(
+ long,
+ default_value_t = 4,
+ help = "Number of failures attempts before task is considered failed."
+ )]
+ pub task_max_failures: u64,
+ /// Number of failures attempts before stage is considered failed
+ #[arg(
+ long,
+ default_value_t = 4,
+ help = "Number of failures attempts before stage is considered failed."
Review Comment:
```suggestion
help = "Number of attempts before stage is considered failed."
```
##########
ballista/scheduler/src/config.rs:
##########
@@ -170,10 +170,23 @@ pub struct Config {
#[arg(
long,
default_value_t = 15,
- help = "Interval, in seconds, to check expired or dead executors (."
+ help = "Interval, in seconds, to check expired or dead executors."
)]
pub expire_dead_executor_interval_seconds: u64,
-
+ /// Number of failures attempts before task is considered failed
+ #[arg(
+ long,
+ default_value_t = 4,
+ help = "Number of failures attempts before task is considered failed."
Review Comment:
```suggestion
help = "Number of attempts before task is considered failed."
```
##########
ballista/scheduler/src/config.rs:
##########
@@ -237,6 +249,10 @@ pub struct SchedulerConfig {
pub override_create_grpc_client_endpoint: Option<EndpointOverrideFn>,
/// Whether to use TLS when connecting to executors (for flight proxy)
pub use_tls: bool,
+ /// Number of failures attempts before task is considered failed"
Review Comment:
```suggestion
/// Number of failures attempts before task is considered failed
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]