This is an automated email from the ASF dual-hosted git repository. xxyu pushed a commit to branch doc5.0 in repository https://gitbox.apache.org/repos/asf/kylin.git
The following commit(s) were added to refs/heads/doc5.0 by this push: new 2ba5eb5ad4 Add design of metastore 2ba5eb5ad4 is described below commit 2ba5eb5ad44d17dec6f1a309085f20ed29090b5d Author: XiaoxiangYu <x...@apache.org> AuthorDate: Mon Sep 19 16:59:05 2022 +0800 Add design of metastore --- .../write_and_read_persistent_entity.puml | 98 +++++++++++++++ .../write_and_read_persistent_entity_cn.png | Bin 0 -> 272752 bytes .../write_and_read_persistent_entity_cn.puml | 68 ++++++++++ .../class_diagram/metastore_resource_store.png | Bin 0 -> 175673 bytes .../class_diagram/metastore_resource_store.puml | 126 +++++++++++++++++++ .../development/dev_design/metastore_design.md | 139 +++++++++++++++++++++ website/docusaurus.config.js | 1 + website/sidebars.js | 4 + 8 files changed, 436 insertions(+) diff --git a/website/docs/development/dev_design/diagram/activity_diagram/write_and_read_persistent_entity.puml b/website/docs/development/dev_design/diagram/activity_diagram/write_and_read_persistent_entity.puml new file mode 100644 index 0000000000..ce38478968 --- /dev/null +++ b/website/docs/development/dev_design/diagram/activity_diagram/write_and_read_persistent_entity.puml @@ -0,0 +1,98 @@ +@startuml + +start + +if (alreadyInTransaction?) then (yes) + :call process() of inner transaction; + end +endif + +repeat + :Update Transaction Metrics; + :UnitOfWorkParams.getProcessor().preprocess(); + + :UnitOfWork#startTransaction; + partition #lightyellow "startTransaction" { + if (EpochNotOwnedByCurrentProcess?) then (no) + :throw Exception; + end + endif + : checkThreadLock; + : lockThreadLock; + if(isSandboxMode) then (yes) + : prepare thread kylin config; + : prepare thread ResourceStore(ThreadViewResourceStore); + endif + } + + :UnitOfWorkParams.getProcessor().process(); + :process(real business logic)// change is stored in ThreadViewResourceStore; + partition #azure "business logic" { + : copy new PersistentEntity for write; + : update on new PersistentEntity; + : create XXXManager; + : call XXXManager#save(); + partition "XXXManager" { + : some check; + : CachedCrudAssist#save(); + : ResourceStore#checkAndPutResource(); + } + } + + + :UnitOfWork#endTransaction; + partition #powderblue "endTransaction" { + : remove thread kylin config; + if(isSandboxMode) then (yes) + : Fetch change from thread ResourceStore; + endif + + :flushSandboxMeta; + :write_AuditLogStore; + :replay audit log into resource store; + + } + + +repeat while (Transaction failed and not hit max retry times?) is (retry) not (Succeed or not failed) + ->//return a object//; +stop + +'autonumber 1.0 +'doInTransactionWithRetry -> isAlreadyInTransaction +' +'isAlreadyInTransaction --> doInTransactionWithRetry +' +'doInTransactionWithRetry -> updateMetrics +' +'loop "Retry-transaction" [123] +' autonumber 2.0 +' updateMetrics -> preProcess +' +' group doTransaction [] +' autonumber 3.0 +' preProcess -> startTransaction +' +' group startTransaction [hello] +' startTransaction -> checkEpoch +' +' checkEpoch -> fetchLock +' +' fetchLock -> lock +' end +' +' lock -> process : 业务逻辑 +' +' +' process -> flushSandboxMeta +' +' group endTransaction [] +' autonumber 4.0 +' flushSandboxMeta -> write_AuditLogStore +' +' write_AuditLogStore -> write_Metastore +' end +' +'end + +@enduml \ No newline at end of file diff --git a/website/docs/development/dev_design/diagram/activity_diagram/write_and_read_persistent_entity_cn.png b/website/docs/development/dev_design/diagram/activity_diagram/write_and_read_persistent_entity_cn.png new file mode 100644 index 0000000000..170f84cfaf Binary files /dev/null and b/website/docs/development/dev_design/diagram/activity_diagram/write_and_read_persistent_entity_cn.png differ diff --git a/website/docs/development/dev_design/diagram/activity_diagram/write_and_read_persistent_entity_cn.puml b/website/docs/development/dev_design/diagram/activity_diagram/write_and_read_persistent_entity_cn.puml new file mode 100644 index 0000000000..a5d1f7e435 --- /dev/null +++ b/website/docs/development/dev_design/diagram/activity_diagram/write_and_read_persistent_entity_cn.puml @@ -0,0 +1,68 @@ +@startuml + +start + +note right + 这个是 Kylin 5.0 **元数据事务** 的提交流程分析 + ====== + 1. 元数据事务的相关代码在 UnitOfWork, 本活动图是对代码的说明; + 2. Epoch 是全局级别的, 项目粒度的元数据写锁, 用于确保同一时刻只有一个进程会修改指定项目下的元数据; + 3. 为了避免获得 Epoch 的进程多线程同时变更元数据, 所以获得 Epoch 的进程, 还需要进行一个进程内的写锁的锁定, 以确保获得 Epoch 的进程对元数据的变更, 是串行的; + 4. 调用 endTransaction 方法前, 元数据变更会发生在一个沙盒模式的 ResourceStore; + 5. 在 endTransaction 方法, 会将元数据变更持久化到 Metastore(RDBMS) 和 AuditLogStore(RDBMS), 这里使用 RDBMS 的事务保证元数据操作的一致性; + 6. 其他 Kylin 进程会定期获取 AuditLogStore 的变更, 重放到自己的 ResourceStore. +end note + +if (是否已经在事务?) then (yes) + :调用"被包含"的子事务的 process 方法并且退出子事务; + end +endif + +repeat + :更新元数据事务相关的 Metrics; + :调用 UnitOfWorkParams.getProcessor().preprocess(), 目前没有太多作用; + + partition #lightyellow "startTransaction" { + + if (当前进程是否拥有 Epoch?) then (no) + :抛出异常并且结束事务; + end + endif + : 获取进程内写锁; + : 对进程内写锁进行锁定; + if(是否启用沙盒模式?) then (yes) + : 创建 KylinConfig 的快照并且进行设置到 ThreadLocal; + : 创建 ThreadViewResourceStore, 以保证事务提交前\n事务内的元数据变更, 不会对外部可见; + endif + } + floating note #pink: 开始元数据事务的准备工作, \n相关代码在 UnitOfWork#startTransaction + + partition #azure "process 方法包含的元数据修改逻辑(业务逻辑)" { + : 获取 startTransaction 准备的 KylinConfig; + : 创建 XXXManager 和 PersistentEntity; + : 使用 XXXManager 对变更后的 PersistentEntity 进行保存; + floating note #pink: 调用 UnitOfWorkParams.getProcessor().process()\n, process 方法由 Service 层传递, process 方法包含业务逻辑,\n在这一阶段元数据变更会发生在沙盒内(也就是 ThreadViewResourceStore) + partition "XXXManager" { + : XXXManager 对变更后的 PersistentEntity 进行约束性检查; + : XXXManager 调用 CachedCrudAssist#save() 写入 Cache; + : CachedCrudAssist 调用 ResourceStore#checkAndPutResource() \n写入 ThreadViewResourceStore 的 overlay; + } + } + + partition #powderblue "endTransaction" { + floating note #pink: 调用 UnitOfWork#endTransaction 来将\n ThreadViewResourceStore 的元数据变更提交; + : 清理 ThreadLocal 级别的 KylinConfig; + : 再次检查当前进程是否持有 Epoch; + if(是否启用沙盒模式?) then (yes) + : 从 ThreadViewResourceStore 的 overlay 收集元数据变更, 保存到 UnitMessages; + : 在一个 RDBMS 事务内, 提交 UnitMessages 的变更到\n MetadataStore 和 AuditLogStore 对应的表; + : 通过 MessageSynchronization 将 UnitMessages 缓存的元数据变更\n重放到原先的 ResourceStore(此时对外部可见); + endif + :更新元数据事务的耗时指标; + } + +repeat while (事务失败, 并且事务失败次数尚未超出重试最大次数?) is (是, 则触发事务重试) not (否, 那么事务成功, 或者事务失败次数超出阈值) + ->\n//返回 Pair<Boolean, T>, 其中 Boolean 标示事务是否成功, T 是业务逻辑(也就是 process方法)的返回值 //; +stop + +@enduml \ No newline at end of file diff --git a/website/docs/development/dev_design/diagram/class_diagram/metastore_resource_store.png b/website/docs/development/dev_design/diagram/class_diagram/metastore_resource_store.png new file mode 100644 index 0000000000..03ece7d787 Binary files /dev/null and b/website/docs/development/dev_design/diagram/class_diagram/metastore_resource_store.png differ diff --git a/website/docs/development/dev_design/diagram/class_diagram/metastore_resource_store.puml b/website/docs/development/dev_design/diagram/class_diagram/metastore_resource_store.puml new file mode 100644 index 0000000000..b2712369ff --- /dev/null +++ b/website/docs/development/dev_design/diagram/class_diagram/metastore_resource_store.puml @@ -0,0 +1,126 @@ +@startuml + +namespace business_layer { + class DataModelManager #pink { + 'CRUD operation of DataModel // first kind method; + 'complex business logic of DataModel // second kind method; + + private NDataModel saveDataModelDesc(NDataModel dataModelDesc); // metadata write path, level 1 + } + + class CachedCrudAssist<T extends RootPersistentEntity> #pink { + protected Cache<String, T extends RootPersistentEntity> cache; // cache of RootPersistentEntity + ResourceStore resourceStore; + + public T save(T entity); // metadata write path, level 2 + } +} + + +namespace resource_store { + + + class RawResource { + private String resPath; + private ByteSource byteSource; + private long timestamp; + private long mvcc; + } + + class VersionedRawResource { + RawResource resource; + Integer mvcc; + } + + abstract class ResourceStore { + static Cache<KylinConfig, ResourceStore> META_CACHE; + volatile ConcurrentSkipListMap<String, VersionedRawResource> data; // cache of byte array + + public final <T extends RootPersistentEntity> void checkAndPutResource(String resPath, T obj, + Serializer<T> serializer); // metadata write path, level 3 + } + + class InMemResourceStore + + class ThreadViewResourceStore { + // ThreadViewResourceStore like a sandbox, update on ThreadViewResourceStore won't impact outside.\n ThreadViewResourceStore is used in transaction(UnitOfContext) only. */ + + InMemResourceStore overlay; // store local change of an uncommitted transaction + InMemResourceStore underlying; // real ResourceStore + } +} + + +namespace metastore { + abstract class MetadataStore { + void putResource(RawResource res, String unitPath, long epochId); // metadata write path, level 4 + } + + + class JDBCMetadataStore { + DataSourceTransactionManager transactionManager; + JdbcTemplate jdbcTemplate; + RowMapper<RawResource> RAW_RESOURCE_ROW_MAPPER; + } + + class HDFSMetadataStore + class FileMetadataStore + + class JdbcUtil { + + } + + abstract class EpochStore { + + } +} + + +namespace sync { + abstract class AuditLogStore { + void save(UnitMessages unitMessages); // metadata write path, level 5 + } + + class JDBCAuditLogStore +} + + +'---------------------- Class Relation(extends) --------------------- + +resource_store.InMemResourceStore --|> resource_store.ResourceStore : extends + +resource_store.ThreadViewResourceStore --|> resource_store.ResourceStore : extends + +resource_store.VersionedRawResource --|> resource_store.RawResource : extends + + +metastore.JDBCMetadataStore --|> metastore.MetadataStore : extends +metastore.HDFSMetadataStore --|> metastore.MetadataStore : extends +metastore.FileMetadataStore --|> metastore.MetadataStore : extends + +sync.JDBCAuditLogStore --|> sync.AuditLogStore : extends + +'---------------------- Class Relation(contains) --------------------- + + +business_layer.DataModelManager *-- "business_layer.CachedCrudAssist" : contains + +"business_layer.CachedCrudAssist" *-- resource_store.ResourceStore : contains + +resource_store.ResourceStore *-- metastore.MetadataStore : contains + +metastore.MetadataStore *-- sync.AuditLogStore : contains + +metastore.MetadataStore *-- metastore.EpochStore : contains + +resource_store.ThreadViewResourceStore *-- resource_store.InMemResourceStore : contains + + + +'---------------------- Class Relation(relate) --------------------- + +resource_store.ResourceStore --> resource_store.VersionedRawResource : store + + + +@enduml \ No newline at end of file diff --git a/website/docs/development/dev_design/metastore_design.md b/website/docs/development/dev_design/metastore_design.md new file mode 100644 index 0000000000..2022f3b9ab --- /dev/null +++ b/website/docs/development/dev_design/metastore_design.md @@ -0,0 +1,139 @@ +--- +title: Metastore Design of Kylin 5 +language: en +sidebar_label: Metastore Design of Kylin 5 +pagination_label: Metastore Design of Kylin 5 +toc_min_heading_level: 2 +toc_max_heading_level: 6 +pagination_prev: null +pagination_next: null +showLastUpdateAuthor: true +showLastUpdateTime: true +keywords: + - dev-design +draft: false +last_update: + date: 09/16/2022 + author: Xiaoxiang Yu +--- + +### Target Audience + +这篇文档是为有以下需求的用户和开发者而准备的: +1. 对 Kylin 5.0 元数据存储, 元数据缓存, 以及节点间元数据同步机制感兴趣的用户和开发者. +2. 在二次开发过程中, 想了解对 Kylin 5.0 进行元数据读写操作的最佳实践和注意事项的开发者. +3. 想对 Kylin 5.0 的元数据进行升级改造的开发者. + +### Terminology + +#### Core Class and Interface + +| Class | Comment | +|-----------------------------|----------------------------------------------------| +| ResourceStore | 用于管理内存中对元数据的操作 | +| InMemResourceStore | ResourceStore 的实现类, 用于绝大部分情况 | +| ThreadViewResourceStore | ResourceStore 的实现类, 作为一个沙盒式的 ResourceStore, 在事务中使用 | +| MetadataStore | 用于管理元数据持久化的操作 | +| AuditLogStore | 用于节点间元数据同步, 以及诊断元数据异常情况 | +| Epoch | 用于保证同时只有一个进程对指定项目下的元数据进行修改操作, 或者提交作业 | +| EpochStore | 用于持久化 Epoch | + + +### Question and Answer + +#### 1. Why Kylin 5.0 need transaction(of metadata) ? +因为用户操作可能同时操作多个元数据, 这些元数据变更必须保持一致性, 要么同时变更成功要么同时变更失败, 不允许出现中间状态. +例如, 如果用户修改 DataModel 的维度和度量, IndexPlan 也需要随之同时变更, 两者必须保持一致性. + +#### 2. How transaction was implemented? +检查 [元数据更新流程图](#metadata_write) + +#### 3. How do meta was synced in Kylin Cluster? +对于指定项目, Kylin 节点要获取 Epoch 才能对元数据进行写操作, 所以对于这个项目下面元数据, 没有获取 Epoch 的 Kylin 节点 +将作为 Follower 通过 AuditLogReplayWorker 获取元数据更新. + +Follower 同步元数据变更,通过两个方式,代码在 AuditLogReplayWorker +1. 第一个是 Follower 自己定期调度同步任务,默认间隔是 5s; +2. 第二个方式是元数据变更的节点发送 AuditLogBroadcastEventNotifier 告知所有 Follower, Follower 主动 replay + +按照设计,Follower 元数据的 delay 在 1-2s 左右(被动广播同步),最多 5s(主动定期同步). + +#### 4. How to read meta from metastore? +todo + +#### 5. As a kylin developer, how should I write my code to update metadata? + + +1. 使用 `@org.apache.kylin.rest.aspect.Transaction` 对你的业务方法进行注解, 这个在你的方法比较简短(轻量)的情况比较推荐 + ```java + class JobService{ + + @Transaction(project = 0) + public ExecutableResponse manageJob(String project, ExecutableResponse job, String action) throws IOException { + Preconditions.checkNotNull(project); + Preconditions.checkNotNull(job); + Preconditions.checkArgument(!StringUtils.isBlank(action)); + + if (JobActionEnum.DISCARD == JobActionEnum.valueOf(action)) { + return job; + } + + updateJobStatus(job.getId(), project, action); + return getJobInstance(job.getId()); + } + } + ``` +2. 使用 `EnhancedUnitOfWork.doInTransactionWithCheckAndRetry` 来包含你的业务代码(元数据修改代码) + +```java +class SomeService { + public void renameDataModel() { + // some codes not in transaction + EnhancedUnitOfWork.doInTransactionWithCheckAndRetry(() -> { + // 1. Get XXXManager, such as NDataModelManager + KylinConfig config = KylinConfig.getInstanceFromEnv(); // thread local KylinConfig was created in startTransaction + NDataModelManager manager = config.getManager(project, NDataModelManager.class); + // 2. Get RootPersistentEntity by XXXManager and Update XXX + NDataModel nDataModel = modelManager.getDataModelDesc(modelId); + checkAliasExist(modelId, newAlias, project); + nDataModel.setAlias(newAlias); + NDataModel modelUpdate = modelManager.copyForWrite(nDataModel); + // 3. Call updateXXX method of XXXManager + modelManager.updateDataModelDesc(modelUpdate); + }, project); + // some more codes not in transaction + } +} +``` + +:::info 元数据操作须知 +1. KylinConfig 和 XXXManager(例如:NDataModelManager) 需要在事务内获取, +原因在于事务开启时准备了一个事务专用的单独的 KylinConfig, 单独的 KylinConfig 绑定了事务需要的 ResourceStore +2. 开启多线程修改元数据需要注意, 原因在于新的线程并不能获取事务开启时准备的 KylinConfig, 而是全局的 KylinConfig +3. 两个事务写的时候别复用同一个对象, 以避免元数据更新时, MVCC 检查失败 +::: + +#### 6. AuditLog replay and clean + +todo + +#### 7. Metadata dumped logic + +todo + +### <span id="metadata_write">Diagram of write a piece of meta </span> + +#### About Activity Diagram +1. 元数据事务的相关代码在 `UnitOfWork`, 本活动图是对代码的说明; +2. `Epoch` 是全局级别的, 项目粒度的元数据写锁, 用于确保同一时刻只有一个进程会修改指定项目下的元数据; +3. 为了避免多线程同时变更元数据, 所以获得 Epoch 的进程, 还需要进行一个进程内的写锁的锁定, 以确保获得 `Epoch` 的进程对元数据的变更, 是串行的; +4. 调用 `endTransaction` 方法前, 元数据变更会发生在一个沙盒模式的 `ResourceStore`; +5. 在 `endTransaction` 方法, 会将元数据变更持久化到 `Metastore`(RDBMS) 和 `AuditLogStore`(RDBMS), 这里使用 RDBMS 的事务保证元数据操作的一致性; +6. 关于事物的异常和回滚, 在 `process` 方法发生的异常因为元数据变更是发生在沙盒的, 所以直接舍弃沙盒即可; +在 `endTransaction` 发生的异常, 由 RDBMS 事务保证对 Metastore 和 AudiLogStore 操作的原子性; +7. 其他 Kylin 进程会定期获取 `AuditLogStore` 的变更, 重放到自己的 `ResourceStore`, 来保持节点间的元数据的一致性. + + + +### <span id="class_metastore">Class Diagram of Metastore</span> + diff --git a/website/docusaurus.config.js b/website/docusaurus.config.js index 008c020c89..7e640b14f3 100644 --- a/website/docusaurus.config.js +++ b/website/docusaurus.config.js @@ -239,6 +239,7 @@ const config = { Built with Docusaurus.`, }, prism: { + additionalLanguages: ['java'], theme: lightCodeTheme, darkTheme: darkCodeTheme, }, diff --git a/website/sidebars.js b/website/sidebars.js index 10f48ce8e6..94cabbf514 100644 --- a/website/sidebars.js +++ b/website/sidebars.js @@ -970,6 +970,10 @@ const sidebars = { type: 'doc', id: 'development/how_to_subscribe_mailing_list' }, + { + type: 'doc', + id: 'development/dev_design/metastore_design' + }, ], },