bobbai00 commented on code in PR #3715:
URL: https://github.com/apache/texera/pull/3715#discussion_r2395254148
##########
core/workflow-core/src/main/scala/edu/uci/ics/amber/core/storage/util/LakeFSStorageClient.scala:
##########
@@ -77,22 +77,23 @@ object LakeFSStorageClient {
* Initializes a new repository in LakeFS.
*
* @param repoName Name of the repository.
- * @param defaultBranch Default branch name, usually "main".
*/
def initRepo(
repoName: String
): Repository = {
+ // validate repoName
+ // https://docs.lakefs.io/latest/understand/model/#repository
Review Comment:
merge this comment into one line
##########
core/config/src/main/scala/edu/uci/ics/amber/config/StorageConfig.scala:
##########
@@ -65,15 +65,15 @@ object StorageConfig {
conf.getInt("storage.iceberg.table.commit.retry.max-wait-ms")
// LakeFS specifics
- val lakefsEndpoint: String = conf.getString("storage.lakefs.endpoint")
+ var lakefsEndpoint: String = conf.getString("storage.lakefs.endpoint")
Review Comment:
This change seem to be unnecessary
##########
core/workflow-core/src/main/scala/edu/uci/ics/amber/core/storage/FileResolver.scala:
##########
@@ -80,7 +80,7 @@ object FileResolver {
*
* The fileName format should be:
/ownerEmail/datasetName/versionName/fileRelativePath
* e.g. /[email protected]/twitterDataset/v1/california/irvine/tw1.csv
- * The output dataset URI format is:
{DATASET_FILE_URI_SCHEME}:///{did}/{versionHash}/file-path
+ * The output dataset URI format is:
{DATASET_FILE_URI_SCHEME}:///{repositoryName}/{versionHash}/fileRelativePath
* e.g. {DATASET_FILE_URI_SCHEME}:///15/adeq233td/some/dir/file.txt
Review Comment:
update this comment also
##########
core/config/src/main/scala/edu/uci/ics/amber/config/StorageConfig.scala:
##########
@@ -65,15 +65,15 @@ object StorageConfig {
conf.getInt("storage.iceberg.table.commit.retry.max-wait-ms")
// LakeFS specifics
- val lakefsEndpoint: String = conf.getString("storage.lakefs.endpoint")
+ var lakefsEndpoint: String = conf.getString("storage.lakefs.endpoint")
Review Comment:
I see that this is for the mock lakeFS and S3. Can you try to set the
environment variable to change this value? i.e. see
`conf/src/main/resources/storage.conf`, each variable has a env var
corresponding to it, which can be used to change the actual value of the config
value
##########
core/config/src/main/scala/edu/uci/ics/amber/config/StorageConfig.scala:
##########
@@ -65,15 +65,15 @@ object StorageConfig {
conf.getInt("storage.iceberg.table.commit.retry.max-wait-ms")
// LakeFS specifics
- val lakefsEndpoint: String = conf.getString("storage.lakefs.endpoint")
+ var lakefsEndpoint: String = conf.getString("storage.lakefs.endpoint")
val lakefsApiSecret: String =
conf.getString("storage.lakefs.auth.api-secret")
val lakefsUsername: String = conf.getString("storage.lakefs.auth.username")
val lakefsPassword: String = conf.getString("storage.lakefs.auth.password")
val lakefsBlockStorageType: String =
conf.getString("storage.lakefs.block-storage.type")
val lakefsBucketName: String =
conf.getString("storage.lakefs.block-storage.bucket-name")
// S3 specifics
- val s3Endpoint: String = conf.getString("storage.s3.endpoint")
+ var s3Endpoint: String = conf.getString("storage.s3.endpoint")
Review Comment:
Ditto
##########
core/amber/src/main/scala/edu/uci/ics/texera/web/resource/dashboard/hub/HubResource.scala:
##########
@@ -326,7 +326,7 @@ object HubResource {
dataset = dataset,
accessPrivilege = datasetAccess.getPrivilege,
ownerEmail = ownerEmail,
Review Comment:
Ditto
##########
deployment/k8s/texera-helmchart/files/texera_ddl.sql:
##########
@@ -264,13 +264,14 @@ CREATE TABLE IF NOT EXISTS public_project
-- dataset
CREATE TABLE IF NOT EXISTS dataset
(
- did SERIAL PRIMARY KEY,
- owner_uid INT NOT NULL,
- name VARCHAR(128) NOT NULL,
- is_public BOOLEAN NOT NULL DEFAULT TRUE,
+ did SERIAL PRIMARY KEY,
+ owner_uid INT NOT NULL,
+ name VARCHAR(128) NOT NULL,
+ repository_name VARCHAR(128),
+ is_public BOOLEAN NOT NULL DEFAULT TRUE,
is_downloadable BOOLEAN NOT NULL DEFAULT TRUE,
- description VARCHAR(512) NOT NULL,
- creation_time TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
+ description VARCHAR(512) NOT NULL,
+ creation_time TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
Review Comment:
revert this unnecessary change
##########
core/amber/src/main/scala/edu/uci/ics/texera/web/resource/dashboard/hub/HubResource.scala:
##########
@@ -326,7 +326,7 @@ object HubResource {
dataset = dataset,
Review Comment:
This is correct. `dataset = dataset`, the left side is the parameter name,
and right side is the actual variable
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]