bobbai00 commented on code in PR #3715:
URL: https://github.com/apache/texera/pull/3715#discussion_r2395254148


##########
core/workflow-core/src/main/scala/edu/uci/ics/amber/core/storage/util/LakeFSStorageClient.scala:
##########
@@ -77,22 +77,23 @@ object LakeFSStorageClient {
     * Initializes a new repository in LakeFS.
     *
     * @param repoName         Name of the repository.
-    * @param defaultBranch    Default branch name, usually "main".
     */
   def initRepo(
       repoName: String
   ): Repository = {
+    // validate repoName
+    // https://docs.lakefs.io/latest/understand/model/#repository

Review Comment:
   merge this comment into one line



##########
core/config/src/main/scala/edu/uci/ics/amber/config/StorageConfig.scala:
##########
@@ -65,15 +65,15 @@ object StorageConfig {
     conf.getInt("storage.iceberg.table.commit.retry.max-wait-ms")
 
   // LakeFS specifics
-  val lakefsEndpoint: String = conf.getString("storage.lakefs.endpoint")
+  var lakefsEndpoint: String = conf.getString("storage.lakefs.endpoint")

Review Comment:
   This change seem to be unnecessary



##########
core/workflow-core/src/main/scala/edu/uci/ics/amber/core/storage/FileResolver.scala:
##########
@@ -80,7 +80,7 @@ object FileResolver {
     *
     * The fileName format should be: 
/ownerEmail/datasetName/versionName/fileRelativePath
     *   e.g. /[email protected]/twitterDataset/v1/california/irvine/tw1.csv
-    * The output dataset URI format is: 
{DATASET_FILE_URI_SCHEME}:///{did}/{versionHash}/file-path
+    * The output dataset URI format is: 
{DATASET_FILE_URI_SCHEME}:///{repositoryName}/{versionHash}/fileRelativePath
     *   e.g. {DATASET_FILE_URI_SCHEME}:///15/adeq233td/some/dir/file.txt

Review Comment:
   update this comment also



##########
core/config/src/main/scala/edu/uci/ics/amber/config/StorageConfig.scala:
##########
@@ -65,15 +65,15 @@ object StorageConfig {
     conf.getInt("storage.iceberg.table.commit.retry.max-wait-ms")
 
   // LakeFS specifics
-  val lakefsEndpoint: String = conf.getString("storage.lakefs.endpoint")
+  var lakefsEndpoint: String = conf.getString("storage.lakefs.endpoint")

Review Comment:
   I see that this is for the mock lakeFS and S3. Can you try to set the 
environment variable to change this value? i.e. see 
`conf/src/main/resources/storage.conf`, each variable has a env var 
corresponding to it, which can be used to change the actual value of the config 
value



##########
core/config/src/main/scala/edu/uci/ics/amber/config/StorageConfig.scala:
##########
@@ -65,15 +65,15 @@ object StorageConfig {
     conf.getInt("storage.iceberg.table.commit.retry.max-wait-ms")
 
   // LakeFS specifics
-  val lakefsEndpoint: String = conf.getString("storage.lakefs.endpoint")
+  var lakefsEndpoint: String = conf.getString("storage.lakefs.endpoint")
   val lakefsApiSecret: String = 
conf.getString("storage.lakefs.auth.api-secret")
   val lakefsUsername: String = conf.getString("storage.lakefs.auth.username")
   val lakefsPassword: String = conf.getString("storage.lakefs.auth.password")
   val lakefsBlockStorageType: String = 
conf.getString("storage.lakefs.block-storage.type")
   val lakefsBucketName: String = 
conf.getString("storage.lakefs.block-storage.bucket-name")
 
   // S3 specifics
-  val s3Endpoint: String = conf.getString("storage.s3.endpoint")
+  var s3Endpoint: String = conf.getString("storage.s3.endpoint")

Review Comment:
   Ditto



##########
core/amber/src/main/scala/edu/uci/ics/texera/web/resource/dashboard/hub/HubResource.scala:
##########
@@ -326,7 +326,7 @@ object HubResource {
           dataset = dataset,
           accessPrivilege = datasetAccess.getPrivilege,
           ownerEmail = ownerEmail,

Review Comment:
   Ditto



##########
deployment/k8s/texera-helmchart/files/texera_ddl.sql:
##########
@@ -264,13 +264,14 @@ CREATE TABLE IF NOT EXISTS public_project
 -- dataset
 CREATE TABLE IF NOT EXISTS dataset
 (
-    did            SERIAL PRIMARY KEY,
-    owner_uid      INT NOT NULL,
-    name           VARCHAR(128) NOT NULL,
-    is_public      BOOLEAN NOT NULL DEFAULT TRUE,
+    did             SERIAL PRIMARY KEY,
+    owner_uid       INT NOT NULL,
+    name            VARCHAR(128) NOT NULL,
+    repository_name VARCHAR(128),
+    is_public       BOOLEAN NOT NULL DEFAULT TRUE,
     is_downloadable BOOLEAN NOT NULL DEFAULT TRUE,
-    description    VARCHAR(512) NOT NULL,
-    creation_time  TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
+    description     VARCHAR(512) NOT NULL,
+    creation_time   TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,

Review Comment:
   revert this unnecessary change



##########
core/amber/src/main/scala/edu/uci/ics/texera/web/resource/dashboard/hub/HubResource.scala:
##########
@@ -326,7 +326,7 @@ object HubResource {
           dataset = dataset,

Review Comment:
   This is correct. `dataset = dataset`, the left side is the parameter name, 
and right side is the actual variable



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to