Re: [PATCH v5 25/27] refs: add LMDB refs storage backend

David Turner Fri, 19 Feb 2016 14:51:03 -0800

On Thu, 2016-02-18 at 15:50 +0700, Duy Nguyen wrote:
> Caveat: I did not study how to use lmdb. I just guessed what it does
> based on function names. I don't know much about refs handling either
> (especially since the transaction thing is introduced)
> 
> > diff --git a/Documentation/technical/refs-lmdb-backend.txt
> > b/Documentation/technical/refs-lmdb-backend.txt
> > new file mode 100644
> > index 0000000..eb81465
> > --- /dev/null
> > +++ b/Documentation/technical/refs-lmdb-backend.txt
> > +Reflog values are in the same format as the original files-based
> > +reflog, including the trailing LF. The date in the reflog value
> > +matches the date in the timestamp field.
> 
> ..except that SHA-1s are stored in raw values instead of hex strings.
> 
> > diff --git a/Documentation/technical/repository-version.txt
> > b/Documentation/technical/repository-version.txt
> > index 00ad379..fca5ecd 100644
> > --- a/Documentation/technical/repository-version.txt
> > +++ b/Documentation/technical/repository-version.txt
> > @@ -86,3 +86,8 @@ for testing format-1 compatibility.
> >  When the config key `extensions.preciousObjects` is set to `true`,
> >  objects in the repository MUST NOT be deleted (e.g., by `git
> > -prune` or
> >  `git repack -d`).
> > +
> > +`refStorage`
> > +~~~~~~~~~~~~
> > +This extension allows the use of alternate ref storage backends. 
> >  The
> > +only defined value is `lmdb`.
> 
> refStorage accepts empty string and `files` as well, should probably
> be worth mentioning.
> 
> > diff --git a/refs/lmdb-backend.c b/refs/lmdb-backend.c
> > +#include "../cache.h"
> > +#include <lmdb.h>
> > +#include "../object.h"
> > +#include "../refs.h"
> > +#include "refs-internal.h"
> > +#include "../tag.h"
> > +#include "../lockfile.h"
> 
> I'm quite sure we don't need "../". We have plenty of source files in
> subdirs and many of them (haven't checked all) just go with
> #include "cache.h".
> 
> > +struct lmdb_transaction transaction;
> 
> static?
> 
> > +
> > +static int in_write_transaction(void)
> > +{
> > +   return transaction.txn && !(transaction.flags &
> > MDB_RDONLY);
> > +}
> > +
> > +static void init_env(MDB_env **env, const char *path)
> > +{
> > +   int ret;
> > +   if (*env)
> > +           return;
> > +
> > +   assert(path);
> > +
> > +   ret = mdb_env_create(env);
> > +   if (ret)
> > +           die("BUG: mdb_env_create failed: %s",
> > mdb_strerror(ret));
> > +   ret = mdb_env_set_maxreaders(*env, 1000);
> > +   if (ret)
> > +           die("BUG: mdb_env_set_maxreaders failed: %s",
> > mdb_strerror(ret));
> > +   ret = mdb_env_set_mapsize(*env, (1<<30));
> > +   if (ret)
> > +           die("BUG: mdb_set_mapsize failed: %s",
> > mdb_strerror(ret));
> > +   ret = mdb_env_open(*env, path, 0 , 0664);
> 
> This permission makes me wonder if we need adjust_shared_perm() here
> and some other places.
> 
> > +   if (ret)
> > +           die("BUG: mdb_env_open (%s) failed: %s", path,
> > +               mdb_strerror(ret));
> > +}
> > +
> > +static int lmdb_init_db(int shared, struct strbuf *err)
> > +{
> > +   /*
> > +    * To create a db, all we need to do is make a directory
> > for
> > +    * it to live in; lmdb will do the rest.
> > +    */
> > +
> > +   if (!db_path)
> > +           db_path =
> > xstrdup(real_path(git_path("refs.lmdb")));
> > +
> > +   if (mkdir(db_path, 0775) && errno != EEXIST) {
> > +           strbuf_addf(err, "%s", strerror(errno));
> 
> maybe strbuf_addstr, unless want to add something more, "mkdir
> failed"?
> 
> > +static int read_per_worktree_ref(const char *submodule, const char
> > *refname,
> > +                            struct MDB_val *val, int
> > *needs_free)
> 
> From what I read, I suspect these _per_worktree functions will be
> identical for the next backend. Should we just hand over the job for
> files backend? For all entry points that may deal with per-worktree
> refs, e.g. lmdb_resolve_ref_unsafe, can we check ref_type() first
> thing, if it's per-worktree we call
> refs_be_files.resolve_ref_unsafe()
> instead?  It could even be done at frontend level,
> e.g. refs.c:resolve_ref_unsafe().
> 
> Though I may be talking rubbish here because I don't know how whether
> it has anything to do with transactions.
> 
> > +{
> > +   struct strbuf sb = STRBUF_INIT;
> > +   struct strbuf path = STRBUF_INIT;
> > +   struct stat st;
> > +   int ret = -1;
> > +
> > +   submodule_path(&path, submodule, refname);
> > +
> > +#ifndef NO_SYMLINK_HEAD
> 
> It started with the compiler warns about unused "st" when this macro
> is defined. Which makes me wonder, should we do something like this
> to
> make sure this code compiles unconditionally?
> 
> +#ifndef NO_SYMLINK_HEAD
> +       int no_symlink_head = 0;
> +#else
> +       int no_symlink_head = 1;
> +#endif
> ...
> +       if (!no_symlink_head) {
> ...
> 
> 
> > +int lmdb_transaction_begin_flags(struct strbuf *err, unsigned int
> > flags)
> 
> static?
> 
> > +#define MAXDEPTH 5
> > +
> > +static const char *parse_ref_data(struct lmdb_transaction
> > *transaction,
> > +                             const char *refname, const char
> > *ref_data,
> > +                             unsigned char *sha1, int
> > resolve_flags,
> > +                             int *flags, int bad_name)
> > +{
> > +   int depth = MAXDEPTH;
> > +   const char *buf;
> > +   static struct strbuf refname_buffer = STRBUF_INIT;
> > +   static struct strbuf refdata_buffer = STRBUF_INIT;
> > +   MDB_val key, val;
> > +   int needs_free = 0;
> > +
> > +   for (;;) {
> > +           if (--depth < 0)
> > +                   return NULL;
> > +
> > +           if (!starts_with(ref_data, "ref:")) {
> > +                   if (get_sha1_hex(ref_data, sha1) ||
> > +                       (ref_data[40] != '\0' &&
> > !isspace(ref_data[40]))) {
> > +                           if (flags)
> > +                                   *flags |= REF_ISBROKEN;
> > +                           errno = EINVAL;
> > +                           return NULL;
> > +                   }
> > +
> > +                   if (bad_name) {
> > +                           hashclr(sha1);
> > +                           if (flags)
> > +                                   *flags |= REF_ISBROKEN;
> > +                   } else if (is_null_sha1(sha1)) {
> > +                           if (flags)
> > +                                   *flags |= REF_ISBROKEN;
> > +                   }
> > +                   return refname;
> > +           }
> > +           if (flags)
> > +                   *flags |= REF_ISSYMREF;
> > +           buf = ref_data + 4;
> > +           while (isspace(*buf))
> > +                   buf++;
> > +           strbuf_reset(&refname_buffer);
> > +           strbuf_addstr(&refname_buffer, buf);
> > +           refname = refname_buffer.buf;
> > +           if (resolve_flags & RESOLVE_REF_NO_RECURSE) {
> > +                   hashclr(sha1);
> > +                   return refname;
> > +           }
> > +           if (check_refname_format(buf,
> > REFNAME_ALLOW_ONELEVEL)) {
> > +                   if (flags)
> > +                           *flags |= REF_ISBROKEN;
> > +
> > +                   if (!(resolve_flags &
> > RESOLVE_REF_ALLOW_BAD_NAME) ||
> > +                       !refname_is_safe(buf)) {
> > +                           errno = EINVAL;
> > +                           return NULL;
> > +                   }
> > +                   bad_name = 1;
> > +           }
> 
> This code looks a lot like near the end of resolve_ref_1(). Maybe we
> could share the code in refs/backend-common.c or something and call
> here instead?


Something like the following?

commit aad6b84fd1869f6e1cf6ed15bcece0c2f6429e9d
Author: David Turner <[email protected]>
Date:   Thu Feb 18 17:09:29 2016 -0500

    refs: break out some functions from resolve_ref_1
    
    A bunch of resolve_ref_1 is not backend-specific, so we can
    break it out into separate internal functions that other
    backends can use.
    
    Signed-off-by: David Turner <[email protected]>

diff --git a/refs.c b/refs.c
index c9fa34d..680c2a5 100644
--- a/refs.c
+++ b/refs.c
@@ -1221,6 +1221,66 @@ int for_each_rawref(each_ref_fn fn, void
*cb_data)
                               DO_FOR_EACH_INCLUDE_BROKEN, cb_data);
 }
 
+int parse_simple_ref(const char *buf, unsigned char *sha1, unsigned
int *flags, int bad_name)
+{
+       /*
+        * Please note that FETCH_HEAD has a second
+        * line containing other data.
+        */
+       if (get_sha1_hex(buf, sha1) ||
+           (buf[40] != '\0' && !isspace(buf[40]))) {
+               if (flags)
+                       *flags |= REF_ISBROKEN;
+               errno = EINVAL;
+               return -1;
+       }
+       if (bad_name) {
+               hashclr(sha1);
+               if (flags)
+                       *flags |= REF_ISBROKEN;
+       }
+       return 0;
+}
+
+int check_bad_refname(const char *refname, int *flags, int
resolve_flags)
+{
+       if (!check_refname_format(refname, REFNAME_ALLOW_ONELEVEL))
+               return 0;
+
+       if (flags)
+               *flags |= REF_BAD_NAME;
+
+       if (!(resolve_flags & RESOLVE_REF_ALLOW_BAD_NAME) ||
+           !refname_is_safe(refname)) {
+               errno = EINVAL;
+               return -1;
+       }
+       /*
+        * dwim_ref() uses REF_ISBROKEN to distinguish between
+        * missing refs and refs that were present but invalid,
+        * to complain about the latter to stderr.
+        *
+        * We don't know whether the ref exists, so don't set
+        * REF_ISBROKEN yet.
+        */
+       return 1;
+}
+
+/*
+ * Parse a refname out of the contents of a symref into a provided
+ * strbuf.  Return a pointer to the strbuf's contents.
+ */
+char *parse_symref_data(const char *buf, struct strbuf *sb_refname)
+{
+       buf += 4;
+       while (isspace(*buf))
+               buf++;
+       strbuf_reset(sb_refname);
+       strbuf_addstr(sb_refname, buf);
+       return sb_refname->buf;
+}
+
+
 /* backend functions */
 int refs_init_db(int shared, struct strbuf *err)
 {
diff --git a/refs/files-backend.c b/refs/files-backend.c
index da06408..52972e6 100644
--- a/refs/files-backend.c
+++ b/refs/files-backend.c
@@ -1427,25 +1427,9 @@ static const char *resolve_ref_1(const char
*refname,
        if (flags)
                *flags = 0;
 
-       if (check_refname_format(refname, REFNAME_ALLOW_ONELEVEL)) {
-               if (flags)
-                       *flags |= REF_BAD_NAME;
-
-               if (!(resolve_flags & RESOLVE_REF_ALLOW_BAD_NAME) ||
-                   !refname_is_safe(refname)) {
-                       errno = EINVAL;
-                       return NULL;
-               }
-               /*
-                * dwim_ref() uses REF_ISBROKEN to distinguish between
-                * missing refs and refs that were present but
invalid,
-                * to complain about the latter to stderr.
-                *
-                * We don't know whether the ref exists, so don't set
-                * REF_ISBROKEN yet.
-                */
-               bad_name = 1;
-       }
+       bad_name = check_bad_refname(refname, flags, resolve_flags);
+       if (bad_name < 0)
+               return NULL;
        for (;;) {
                const char *path;
                struct stat st;
@@ -1541,47 +1525,20 @@ static const char *resolve_ref_1(const char
*refname,
                 * Is it a symbolic ref?
                 */
                if (!starts_with(sb_contents->buf, "ref:")) {
-                       /*
-                        * Please note that FETCH_HEAD has a second
-                        * line containing other data.
-                        */
-                       if (get_sha1_hex(sb_contents->buf, sha1) ||
-                           (sb_contents->buf[40] != '\0' &&
!isspace(sb_contents->buf[40]))) {
-                               if (flags)
-                                       *flags |= REF_ISBROKEN;
-                               errno = EINVAL;
-                               return NULL;
-                       }
-                       if (bad_name) {
-                               hashclr(sha1);
-                               if (flags)
-                                       *flags |= REF_ISBROKEN;
-                       }
+                       if (parse_simple_ref(sb_contents->buf, sha1,
flags, bad_name))
+                               refname = NULL;
                        return refname;
                }
                if (flags)
                        *flags |= REF_ISSYMREF;
-               buf = sb_contents->buf + 4;
-               while (isspace(*buf))
-                       buf++;
-               strbuf_reset(sb_refname);
-               strbuf_addstr(sb_refname, buf);
-               refname = sb_refname->buf;
+               refname = parse_symref_data(sb_contents->buf,
sb_refname);
                if (resolve_flags & RESOLVE_REF_NO_RECURSE) {
                        hashclr(sha1);
                        return refname;
                }
-               if (check_refname_format(buf, REFNAME_ALLOW_ONELEVEL))
{
-                       if (flags)
-                               *flags |= REF_ISBROKEN;
-
-                       if (!(resolve_flags &
RESOLVE_REF_ALLOW_BAD_NAME) ||
-                           !refname_is_safe(buf)) {
-                               errno = EINVAL;
-                               return NULL;
-                       }
-                       bad_name = 1;
-               }
+               bad_name |= check_bad_refname(refname, flags,
resolve_flags);
+               if (bad_name < 0)
+                       return NULL;
        }
 }
 
diff --git a/refs/refs-internal.h b/refs/refs-internal.h
index efdde82..7cdfffe 100644
--- a/refs/refs-internal.h
+++ b/refs/refs-internal.h
@@ -218,6 +218,26 @@ int do_for_each_per_worktree_ref(const char
*submodule, const char *base,
 int do_for_each_ref(const char *submodule, const char *base,
                    each_ref_fn fn, int trim, int flags, void
*cb_data);
 
+/*
+ * Parse a non-symref -- a buf hopefully containing 40 hex characters.
+ * Set errno and flags appropriately.  If the buf can be parsed but
+ * bad_name is set, the ref is broken: zero out sha1.
+ */
+int parse_simple_ref(const char *buf, unsigned char *sha1, unsigned
int *flags,
+                    int bad_name);
+/*
+ * Parse a refname out of the contents of a symref into a provided
+ * strbuf.  Return a pointer to the strbuf's contents.
+ */
+char *parse_symref_data(const char *buf, struct strbuf *sb_refname);
+
+/*
+ * Check the format of refname.  Set flags and errno appropriately.
+ * Returns 0 if the refname is good, -1 if it is bad enough that we
+ * have to stop parsing and 1 if we just have to note that it is bad.
+ */
+int check_bad_refname(const char *refname, int *flags, int
resolve_flags);
+
 /* refs backends */
 typedef int ref_init_db_fn(int shared, struct strbuf *err);
 typedef int ref_transaction_commit_fn(struct ref_transaction
*transaction,




followed by this version of parse_ref_data:

static const char *parse_ref_data(struct lmdb_transaction *transaction,
                                  const char *refname, const char
*ref_data,
                                  unsigned char *sha1, int
resolve_flags,
                                  int *flags, int bad_name)
{
        int depth = MAXDEPTH;
        const char *buf;
        static struct strbuf refname_buffer = STRBUF_INIT;
        static struct strbuf refdata_buffer = STRBUF_INIT;
        MDB_val key, val;
        int needs_free = 0;

        for (;;) {
                if (--depth < 0)
                        return NULL;

                /*
                 * Is it a symbolic ref?
                 */
                if (!starts_with(ref_data, "ref:")) {
                        if (parse_simple_ref(ref_data, sha1, flags,
bad_name))
                                refname = NULL;
                         if (is_null_sha1(sha1) && flags)
                                *flags |= REF_ISBROKEN;
                        return refname;
                }
                if (flags)
                        *flags |= REF_ISSYMREF;

                refname = parse_symref_data(ref_data, &refname_buffer);
                if (resolve_flags & RESOLVE_REF_NO_RECURSE) {
                        hashclr(sha1);
                        return refname;
                }
                bad_name |= check_bad_refname(refname, flags,
resolve_flags);
                if (bad_name < 0)
                        return NULL;

                key.mv_data = (char *)refname;
                key.mv_size = strlen(refname) + 1;
                if (mdb_get_or_die(transaction, &key, &val,
&needs_free)) {
                        hashclr(sha1);
                        if (bad_name) {
                                if (flags)
                                        *flags |= REF_ISBROKEN;
                        }
                        if (resolve_flags & RESOLVE_REF_READING)
                                return NULL;

                        return refname;
                }
                strbuf_reset(&refdata_buffer);
                strbuf_add(&refdata_buffer, val.mv_data, val.mv_size);
                if (needs_free)
                        free(val.mv_data);
                ref_data = refdata_buffer.buf;
        }
        return refname;
}

----------------

I'm not sure I like it, because it breaks out these weird tiny
functions that take a lot of arguments.  But maybe it's worth it?  What
do you think?
--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH v5 25/27] refs: add LMDB refs storage backend

Reply via email to