This is an automated email from the ASF dual-hosted git repository. gurwls223 pushed a commit to branch branch-3.0 in repository https://gitbox.apache.org/repos/asf/spark.git
commit 231c0bbe61c07eb237fa4e13457acd88e0038794 Author: beliefer <[email protected]> AuthorDate: Mon Mar 2 15:15:49 2020 +0900 [SPARK-30891][CORE][DOC] Add version information to the configuration of History ### What changes were proposed in this pull request? 1.Add version information to the configuration of `History`. 2.Update the docs of `History`. I sorted out some information show below. Item name | Since version | JIRA ID | Commit ID | Note -- | -- | -- | -- | -- spark.history.fs.logDirectory | 1.1.0 | SPARK-1768 | 21ddd7d1e9f8e2a726427f32422c31706a20ba3f#diff-a7befb99e7bd7e3ab5c46c2568aa5b3e | spark.history.fs.safemodeCheck.interval | 1.6.0 | SPARK-11020 | cf04fdfe71abc395163a625cc1f99ec5e54cc07e#diff-a7befb99e7bd7e3ab5c46c2568aa5b3e | spark.history.fs.update.interval | 1.4.0 | SPARK-6046 | 4527761bcd6501c362baf2780905a0018b9a74ba#diff-a7befb99e7bd7e3ab5c46c2568aa5b3e | spark.history.fs.cleaner.enabled | 1.3.0 | SPARK-3562 | 8942b522d8a3269a2a357e3a274ed4b3e66ebdde#diff-a7befb99e7bd7e3ab5c46c2568aa5b3e | Branch branch-1.3 does not exist, exists in branch-1.4, but it is 1.3.0-SNAPSHOT in pom.xml spark.history.fs.cleaner.interval | 1.4.0 | SPARK-5933 | 1991337336596f94698e79c2366f065c374128ab#diff-a7befb99e7bd7e3ab5c46c2568aa5b3e | spark.history.fs.cleaner.maxAge | 1.4.0 | SPARK-5933 | 1991337336596f94698e79c2366f065c374128ab#diff-a7befb99e7bd7e3ab5c46c2568aa5b3e | spark.history.fs.cleaner.maxNum | 3.0.0 | SPARK-28294 | bbc2be4f425c4c26450e1bf21db407e81046ce21#diff-6bddeb5e25239974fc13db66266b167b | spark.history.store.path | 2.3.0 | SPARK-20642 | 74daf622de4e534d5a5929b424a6e836850eefad#diff-19f35f981fdc5b0a46f070b879a9a9fc | spark.history.store.maxDiskUsage | 2.3.0 | SPARK-20654 | 8b497046c647a21bbed1bdfbdcb176745a1d5cd5#diff-19f35f981fdc5b0a46f070b879a9a9fc | spark.history.ui.port | 1.0.0 | SPARK-1276 | 9ae80bf9bd3e4da7443af97b41fe26aa5d35d70b#diff-b49b5b9c31ddb36a9061004b5b723058 | spark.history.fs.inProgressOptimization.enabled | 2.4.0 | SPARK-6951 | 653fe02415a537299e15f92b56045569864b6183#diff-19f35f981fdc5b0a46f070b879a9a9fc | spark.history.fs.endEventReparseChunkSize | 2.4.0 | SPARK-6951 | 653fe02415a537299e15f92b56045569864b6183#diff-19f35f981fdc5b0a46f070b879a9a9fc | spark.history.fs.eventLog.rolling.maxFilesToRetain | 3.0.0 | SPARK-30481 | a2fe73b83c0e7c61d1c83b236565a71e3d005a71#diff-6bddeb5e25239974fc13db66266b167b | spark.history.fs.eventLog.rolling.compaction.score.threshold | 3.0.0 | SPARK-30481 | a2fe73b83c0e7c61d1c83b236565a71e3d005a71#diff-6bddeb5e25239974fc13db66266b167b | spark.history.fs.driverlog.cleaner.enabled | 3.0.0 | SPARK-25118 | 5f11e8c4cb9a5db037ac239b8fcc97f3a746e772#diff-6bddeb5e25239974fc13db66266b167b | spark.history.fs.driverlog.cleaner.interval | 3.0.0 | SPARK-25118 | 5f11e8c4cb9a5db037ac239b8fcc97f3a746e772#diff-6bddeb5e25239974fc13db66266b167b | spark.history.fs.driverlog.cleaner.maxAge | 3.0.0 | SPARK-25118 | 5f11e8c4cb9a5db037ac239b8fcc97f3a746e772#diff-6bddeb5e25239974fc13db66266b167b | spark.history.ui.acls.enable | 1.0.1 | Spark 1489 | c8dd13221215275948b1a6913192d40e0c8cbadd#diff-b49b5b9c31ddb36a9061004b5b723058 | spark.history.ui.admin.acls | 2.1.1 | SPARK-19033 | 4ca1788805e4a0131ba8f0ccb7499ee0e0242837#diff-a7befb99e7bd7e3ab5c46c2568aa5b3e | spark.history.ui.admin.acls.groups | 2.1.1 | SPARK-19033 | 4ca1788805e4a0131ba8f0ccb7499ee0e0242837#diff-a7befb99e7bd7e3ab5c46c2568aa5b3e | spark.history.fs.numReplayThreads | 2.0.0 | SPARK-13988 | 6fdd0e32a6c3fdce1f3f7e1f8d252af05c419f7b#diff-a7befb99e7bd7e3ab5c46c2568aa5b3e | spark.history.retainedApplications | 1.0.0 | SPARK-1276 | 9ae80bf9bd3e4da7443af97b41fe26aa5d35d70b#diff-b49b5b9c31ddb36a9061004b5b723058 | spark.history.provider | 1.1.0 | SPARK-1768 | 21ddd7d1e9f8e2a726427f32422c31706a20ba3f#diff-a7befb99e7bd7e3ab5c46c2568aa5b3e | spark.history.kerberos.enabled | 1.0.1 | Spark-1490 | 866b03ef4d27b2160563b58d577de29ba6eb4442#diff-b49b5b9c31ddb36a9061004b5b723058 | spark.history.kerberos.principal | 1.0.1 | Spark-1490 | 866b03ef4d27b2160563b58d577de29ba6eb4442#diff-b49b5b9c31ddb36a9061004b5b723058 | spark.history.kerberos.keytab | 1.0.1 | Spark-1490 | 866b03ef4d27b2160563b58d577de29ba6eb4442#diff-b49b5b9c31ddb36a9061004b5b723058 | spark.history.custom.executor.log.url | 3.0.0 | SPARK-26311 | ae5b2a6a92be4986ef5b8062d7fb59318cff6430#diff-6bddeb5e25239974fc13db66266b167b | spark.history.custom.executor.log.url.applyIncompleteApplication | 3.0.0 | SPARK-26311 | ae5b2a6a92be4986ef5b8062d7fb59318cff6430#diff-6bddeb5e25239974fc13db66266b167b | ### Why are the changes needed? Supplemental configuration version information. ### Does this PR introduce any user-facing change? No ### How was this patch tested? Exists UT Closes #27751 from beliefer/add-version-to-history-config. Authored-by: beliefer <[email protected]> Signed-off-by: HyukjinKwon <[email protected]> --- .../org/apache/spark/internal/config/History.scala | 28 ++++++++++++++++++++++ docs/monitoring.md | 26 +++++++++++++++++++- 2 files changed, 53 insertions(+), 1 deletion(-) diff --git a/core/src/main/scala/org/apache/spark/internal/config/History.scala b/core/src/main/scala/org/apache/spark/internal/config/History.scala index 8f99908..581777d 100644 --- a/core/src/main/scala/org/apache/spark/internal/config/History.scala +++ b/core/src/main/scala/org/apache/spark/internal/config/History.scala @@ -26,46 +26,56 @@ private[spark] object History { val DEFAULT_LOG_DIR = "file:/tmp/spark-events" val HISTORY_LOG_DIR = ConfigBuilder("spark.history.fs.logDirectory") + .version("1.1.0") .stringConf .createWithDefault(DEFAULT_LOG_DIR) val SAFEMODE_CHECK_INTERVAL_S = ConfigBuilder("spark.history.fs.safemodeCheck.interval") + .version("1.6.0") .timeConf(TimeUnit.SECONDS) .createWithDefaultString("5s") val UPDATE_INTERVAL_S = ConfigBuilder("spark.history.fs.update.interval") + .version("1.4.0") .timeConf(TimeUnit.SECONDS) .createWithDefaultString("10s") val CLEANER_ENABLED = ConfigBuilder("spark.history.fs.cleaner.enabled") + .version("1.4.0") .booleanConf .createWithDefault(false) val CLEANER_INTERVAL_S = ConfigBuilder("spark.history.fs.cleaner.interval") + .version("1.4.0") .timeConf(TimeUnit.SECONDS) .createWithDefaultString("1d") val MAX_LOG_AGE_S = ConfigBuilder("spark.history.fs.cleaner.maxAge") + .version("1.4.0") .timeConf(TimeUnit.SECONDS) .createWithDefaultString("7d") val MAX_LOG_NUM = ConfigBuilder("spark.history.fs.cleaner.maxNum") .doc("The maximum number of log files in the event log directory.") + .version("3.0.0") .intConf .createWithDefault(Int.MaxValue) val LOCAL_STORE_DIR = ConfigBuilder("spark.history.store.path") .doc("Local directory where to cache application history information. By default this is " + "not set, meaning all history information will be kept in memory.") + .version("2.3.0") .stringConf .createOptional val MAX_LOCAL_DISK_USAGE = ConfigBuilder("spark.history.store.maxDiskUsage") + .version("2.3.0") .bytesConf(ByteUnit.BYTE) .createWithDefaultString("10g") val HISTORY_SERVER_UI_PORT = ConfigBuilder("spark.history.ui.port") .doc("Web UI port to bind Spark History Server") + .version("1.0.0") .intConf .createWithDefault(18080) @@ -73,6 +83,7 @@ private[spark] object History { ConfigBuilder("spark.history.fs.inProgressOptimization.enabled") .doc("Enable optimized handling of in-progress logs. This option may leave finished " + "applications that fail to rename their event logs listed as in-progress.") + .version("2.4.0") .booleanConf .createWithDefault(true) @@ -81,6 +92,7 @@ private[spark] object History { .doc("How many bytes to parse at the end of log files looking for the end event. " + "This is used to speed up generation of application listings by skipping unnecessary " + "parts of event log files. It can be disabled by setting this config to 0.") + .version("2.4.0") .bytesConf(ByteUnit.BYTE) .createWithDefaultString("1m") @@ -90,6 +102,7 @@ private[spark] object History { "By default, all event log files will be retained. Please set the configuration " + s"and ${EVENT_LOG_ROLLING_MAX_FILE_SIZE.key} accordingly if you want to control " + "the overall size of event log files.") + .version("3.0.0") .intConf .checkValue(_ > 0, "Max event log files to retain should be higher than 0.") .createWithDefault(Integer.MAX_VALUE) @@ -99,54 +112,67 @@ private[spark] object History { .doc("The threshold score to determine whether it's good to do the compaction or not. " + "The compaction score is calculated in analyzing, and being compared to this value. " + "Compaction will proceed only when the score is higher than the threshold value.") + .version("3.0.0") .internal() .doubleConf .createWithDefault(0.7d) val DRIVER_LOG_CLEANER_ENABLED = ConfigBuilder("spark.history.fs.driverlog.cleaner.enabled") + .version("3.0.0") .fallbackConf(CLEANER_ENABLED) val DRIVER_LOG_CLEANER_INTERVAL = ConfigBuilder("spark.history.fs.driverlog.cleaner.interval") + .version("3.0.0") .fallbackConf(CLEANER_INTERVAL_S) val MAX_DRIVER_LOG_AGE_S = ConfigBuilder("spark.history.fs.driverlog.cleaner.maxAge") + .version("3.0.0") .fallbackConf(MAX_LOG_AGE_S) val HISTORY_SERVER_UI_ACLS_ENABLE = ConfigBuilder("spark.history.ui.acls.enable") + .version("1.0.1") .booleanConf .createWithDefault(false) val HISTORY_SERVER_UI_ADMIN_ACLS = ConfigBuilder("spark.history.ui.admin.acls") + .version("2.1.1") .stringConf .toSequence .createWithDefault(Nil) val HISTORY_SERVER_UI_ADMIN_ACLS_GROUPS = ConfigBuilder("spark.history.ui.admin.acls.groups") + .version("2.1.1") .stringConf .toSequence .createWithDefault(Nil) val NUM_REPLAY_THREADS = ConfigBuilder("spark.history.fs.numReplayThreads") + .version("2.0.0") .intConf .createWithDefaultFunction(() => Math.ceil(Runtime.getRuntime.availableProcessors() / 4f).toInt) val RETAINED_APPLICATIONS = ConfigBuilder("spark.history.retainedApplications") + .version("1.0.0") .intConf .createWithDefault(50) val PROVIDER = ConfigBuilder("spark.history.provider") + .version("1.1.0") .stringConf .createOptional val KERBEROS_ENABLED = ConfigBuilder("spark.history.kerberos.enabled") + .version("1.0.1") .booleanConf .createWithDefault(false) val KERBEROS_PRINCIPAL = ConfigBuilder("spark.history.kerberos.principal") + .version("1.0.1") .stringConf .createOptional val KERBEROS_KEYTAB = ConfigBuilder("spark.history.kerberos.keytab") + .version("1.0.1") .stringConf .createOptional @@ -156,6 +182,7 @@ private[spark] object History { "some path variables via patterns which can vary on cluster manager. Please check the " + "documentation for your cluster manager to see which patterns are supported, if any. " + "This configuration has no effect on a live application, it only affects the history server.") + .version("3.0.0") .stringConf .createOptional @@ -165,6 +192,7 @@ private[spark] object History { s"${CUSTOM_EXECUTOR_LOG_URL.key}, to incomplete application as well. " + "Even if this is true, this still only affects the behavior of the history server, " + "not running spark applications.") + .version("3.0.0") .booleanConf .createWithDefault(true) } diff --git a/docs/monitoring.md b/docs/monitoring.md index 131cd2a..b72fd02 100644 --- a/docs/monitoring.md +++ b/docs/monitoring.md @@ -143,13 +143,14 @@ Security options for the Spark History Server are covered more detail in the [Security](security.html#web-ui) page. <table class="table"> - <tr><th>Property Name</th><th>Default</th><th>Meaning</th></tr> + <tr><th>Property Name</th><th>Default</th><th>Meaning</th><th>Since Version</th></tr> <tr> <td>spark.history.provider</td> <td><code>org.apache.spark.deploy.history.FsHistoryProvider</code></td> <td>Name of the class implementing the application history backend. Currently there is only one implementation, provided by Spark, which looks for application logs stored in the file system.</td> + <td>1.1.0</td> </tr> <tr> <td>spark.history.fs.logDirectory</td> @@ -160,6 +161,7 @@ Security options for the Spark History Server are covered more detail in the an HDFS path <code>hdfs://namenode/shared/spark-logs</code> or that of an alternative filesystem supported by the Hadoop APIs. </td> + <td>1.1.0</td> </tr> <tr> <td>spark.history.fs.update.interval</td> @@ -171,6 +173,7 @@ Security options for the Spark History Server are covered more detail in the As soon as an update has completed, listings of the completed and incomplete applications will reflect the changes. </td> + <td>1.4.0</td> </tr> <tr> <td>spark.history.retainedApplications</td> @@ -180,6 +183,7 @@ Security options for the Spark History Server are covered more detail in the the oldest applications will be removed from the cache. If an application is not in the cache, it will have to be loaded from disk if it is accessed from the UI. </td> + <td>1.0.0</td> </tr> <tr> <td>spark.history.ui.maxApplications</td> @@ -188,6 +192,7 @@ Security options for the Spark History Server are covered more detail in the The number of applications to display on the history summary page. Application UIs are still available by accessing their URLs directly even if they are not displayed on the history summary page. </td> + <td>2.0.1</td> </tr> <tr> <td>spark.history.ui.port</td> @@ -195,6 +200,7 @@ Security options for the Spark History Server are covered more detail in the <td> The port to which the web interface of the history server binds. </td> + <td>1.0.0</td> </tr> <tr> <td>spark.history.kerberos.enabled</td> @@ -203,6 +209,7 @@ Security options for the Spark History Server are covered more detail in the Indicates whether the history server should use kerberos to login. This is required if the history server is accessing HDFS files on a secure Hadoop cluster. </td> + <td>1.0.1</td> </tr> <tr> <td>spark.history.kerberos.principal</td> @@ -210,6 +217,7 @@ Security options for the Spark History Server are covered more detail in the <td> When <code>spark.history.kerberos.enabled=true</code>, specifies kerberos principal name for the History Server. </td> + <td>1.0.1</td> </tr> <tr> <td>spark.history.kerberos.keytab</td> @@ -217,6 +225,7 @@ Security options for the Spark History Server are covered more detail in the <td> When <code>spark.history.kerberos.enabled=true</code>, specifies location of the kerberos keytab file for the History Server. </td> + <td>1.0.1</td> </tr> <tr> <td>spark.history.fs.cleaner.enabled</td> @@ -224,6 +233,7 @@ Security options for the Spark History Server are covered more detail in the <td> Specifies whether the History Server should periodically clean up event logs from storage. </td> + <td>1.4.0</td> </tr> <tr> <td>spark.history.fs.cleaner.interval</td> @@ -236,6 +246,7 @@ Security options for the Spark History Server are covered more detail in the <code>spark.history.fs.cleaner.maxNum</code>, Spark tries to clean up the completed attempts from the applications based on the order of their oldest attempt time. </td> + <td>1.4.0</td> </tr> <tr> <td>spark.history.fs.cleaner.maxAge</td> @@ -243,6 +254,7 @@ Security options for the Spark History Server are covered more detail in the <td> When <code>spark.history.fs.cleaner.enabled=true</code>, job history files older than this will be deleted when the filesystem history cleaner runs. </td> + <td>1.4.0</td> </tr> <tr> <td>spark.history.fs.cleaner.maxNum</td> @@ -253,6 +265,7 @@ Security options for the Spark History Server are covered more detail in the This should be smaller than the underlying file system limit like `dfs.namenode.fs-limits.max-directory-items` in HDFS. </td> + <td>3.0.0</td> </tr> <tr> <td>spark.history.fs.endEventReparseChunkSize</td> @@ -262,6 +275,7 @@ Security options for the Spark History Server are covered more detail in the This is used to speed up generation of application listings by skipping unnecessary parts of event log files. It can be disabled by setting this config to 0. </td> + <td>2.4.0</td> </tr> <tr> <td>spark.history.fs.inProgressOptimization.enabled</td> @@ -270,6 +284,7 @@ Security options for the Spark History Server are covered more detail in the Enable optimized handling of in-progress logs. This option may leave finished applications that fail to rename their event logs listed as in-progress. </td> + <td>2.4.0</td> </tr> <tr> <td>spark.history.fs.driverlog.cleaner.enabled</td> @@ -277,6 +292,7 @@ Security options for the Spark History Server are covered more detail in the <td> Specifies whether the History Server should periodically clean up driver logs from storage. </td> + <td>3.0.0</td> </tr> <tr> <td>spark.history.fs.driverlog.cleaner.interval</td> @@ -285,6 +301,7 @@ Security options for the Spark History Server are covered more detail in the When <code>spark.history.fs.driverlog.cleaner.enabled=true</code>, specifies how often the filesystem driver log cleaner checks for files to delete. Files are only deleted if they are older than <code>spark.history.fs.driverlog.cleaner.maxAge</code> </td> + <td>3.0.0</td> </tr> <tr> <td>spark.history.fs.driverlog.cleaner.maxAge</td> @@ -292,6 +309,7 @@ Security options for the Spark History Server are covered more detail in the <td> When <code>spark.history.fs.driverlog.cleaner.enabled=true</code>, driver log files older than this will be deleted when the driver log cleaner runs. </td> + <td>3.0.0</td> </tr> <tr> <td>spark.history.fs.numReplayThreads</td> @@ -299,6 +317,7 @@ Security options for the Spark History Server are covered more detail in the <td> Number of threads that will be used by history server to process event logs. </td> + <td>2.0.0</td> </tr> <tr> <td>spark.history.store.maxDiskUsage</td> @@ -307,6 +326,7 @@ Security options for the Spark History Server are covered more detail in the Maximum disk usage for the local directory where the cache application history information are stored. </td> + <td>2.3.0</td> </tr> <tr> <td>spark.history.store.path</td> @@ -316,6 +336,7 @@ Security options for the Spark History Server are covered more detail in the server will store application data on disk instead of keeping it in memory. The data written to disk will be re-used in the event of a history server restart. </td> + <td>2.3.0</td> </tr> <tr> <td>spark.history.custom.executor.log.url</td> @@ -329,6 +350,7 @@ Security options for the Spark History Server are covered more detail in the <p/> For now, only YARN mode supports this configuration </td> + <td>3.0.0</td> </tr> <tr> <td>spark.history.custom.executor.log.url.applyIncompleteApplication</td> @@ -339,6 +361,7 @@ Security options for the Spark History Server are covered more detail in the Please note that incomplete applications may include applications which didn't shutdown gracefully. Even this is set to `true`, this configuration has no effect on a live application, it only affects the history server. </td> + <td>3.0.0</td> </tr> <tr> <td>spark.history.fs.eventLog.rolling.maxFilesToRetain</td> @@ -348,6 +371,7 @@ Security options for the Spark History Server are covered more detail in the all event log files will be retained. The lowest value is 1 for technical reason.<br/> Please read the section of "Applying compaction of old event log files" for more details. </td> + <td>3.0.0</td> </tr> </table> --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
