On Thu, 2016-02-18 at 15:50 +0700, Duy Nguyen wrote:
> Caveat: I did not study how to use lmdb. I just guessed what it does
> based on function names. I don't know much about refs handling either
> (especially since the transaction thing is introduced)
>
> > diff --git a/Documentation/technical/refs-lmdb-backend.txt
> > b/Documentation/technical/refs-lmdb-backend.txt
> > new file mode 100644
> > index 0000000..eb81465
> > --- /dev/null
> > +++ b/Documentation/technical/refs-lmdb-backend.txt
> > +Reflog values are in the same format as the original files-based
> > +reflog, including the trailing LF. The date in the reflog value
> > +matches the date in the timestamp field.
>
> ..except that SHA-1s are stored in raw values instead of hex strings.
>
> > diff --git a/Documentation/technical/repository-version.txt
> > b/Documentation/technical/repository-version.txt
> > index 00ad379..fca5ecd 100644
> > --- a/Documentation/technical/repository-version.txt
> > +++ b/Documentation/technical/repository-version.txt
> > @@ -86,3 +86,8 @@ for testing format-1 compatibility.
> > When the config key `extensions.preciousObjects` is set to `true`,
> > objects in the repository MUST NOT be deleted (e.g., by `git
> > -prune` or
> > `git repack -d`).
> > +
> > +`refStorage`
> > +~~~~~~~~~~~~
> > +This extension allows the use of alternate ref storage backends.
> > The
> > +only defined value is `lmdb`.
>
> refStorage accepts empty string and `files` as well, should probably
> be worth mentioning.
>
> > diff --git a/refs/lmdb-backend.c b/refs/lmdb-backend.c
> > +#include "../cache.h"
> > +#include <lmdb.h>
> > +#include "../object.h"
> > +#include "../refs.h"
> > +#include "refs-internal.h"
> > +#include "../tag.h"
> > +#include "../lockfile.h"
>
> I'm quite sure we don't need "../". We have plenty of source files in
> subdirs and many of them (haven't checked all) just go with
> #include "cache.h".
>
> > +struct lmdb_transaction transaction;
>
> static?
>
> > +
> > +static int in_write_transaction(void)
> > +{
> > + return transaction.txn && !(transaction.flags &
> > MDB_RDONLY);
> > +}
> > +
> > +static void init_env(MDB_env **env, const char *path)
> > +{
> > + int ret;
> > + if (*env)
> > + return;
> > +
> > + assert(path);
> > +
> > + ret = mdb_env_create(env);
> > + if (ret)
> > + die("BUG: mdb_env_create failed: %s",
> > mdb_strerror(ret));
> > + ret = mdb_env_set_maxreaders(*env, 1000);
> > + if (ret)
> > + die("BUG: mdb_env_set_maxreaders failed: %s",
> > mdb_strerror(ret));
> > + ret = mdb_env_set_mapsize(*env, (1<<30));
> > + if (ret)
> > + die("BUG: mdb_set_mapsize failed: %s",
> > mdb_strerror(ret));
> > + ret = mdb_env_open(*env, path, 0 , 0664);
>
> This permission makes me wonder if we need adjust_shared_perm() here
> and some other places.
>
> > + if (ret)
> > + die("BUG: mdb_env_open (%s) failed: %s", path,
> > + mdb_strerror(ret));
> > +}
> > +
> > +static int lmdb_init_db(int shared, struct strbuf *err)
> > +{
> > + /*
> > + * To create a db, all we need to do is make a directory
> > for
> > + * it to live in; lmdb will do the rest.
> > + */
> > +
> > + if (!db_path)
> > + db_path =
> > xstrdup(real_path(git_path("refs.lmdb")));
> > +
> > + if (mkdir(db_path, 0775) && errno != EEXIST) {
> > + strbuf_addf(err, "%s", strerror(errno));
>
> maybe strbuf_addstr, unless want to add something more, "mkdir
> failed"?
>
> > +static int read_per_worktree_ref(const char *submodule, const char
> > *refname,
> > + struct MDB_val *val, int
> > *needs_free)
>
> From what I read, I suspect these _per_worktree functions will be
> identical for the next backend. Should we just hand over the job for
> files backend? For all entry points that may deal with per-worktree
> refs, e.g. lmdb_resolve_ref_unsafe, can we check ref_type() first
> thing, if it's per-worktree we call
> refs_be_files.resolve_ref_unsafe()
> instead? It could even be done at frontend level,
> e.g. refs.c:resolve_ref_unsafe().
>
> Though I may be talking rubbish here because I don't know how whether
> it has anything to do with transactions.
>
> > +{
> > + struct strbuf sb = STRBUF_INIT;
> > + struct strbuf path = STRBUF_INIT;
> > + struct stat st;
> > + int ret = -1;
> > +
> > + submodule_path(&path, submodule, refname);
> > +
> > +#ifndef NO_SYMLINK_HEAD
>
> It started with the compiler warns about unused "st" when this macro
> is defined. Which makes me wonder, should we do something like this
> to
> make sure this code compiles unconditionally?
>
> +#ifndef NO_SYMLINK_HEAD
> + int no_symlink_head = 0;
> +#else
> + int no_symlink_head = 1;
> +#endif
> ...
> + if (!no_symlink_head) {
> ...
>
>
> > +int lmdb_transaction_begin_flags(struct strbuf *err, unsigned int
> > flags)
>
> static?
>
> > +#define MAXDEPTH 5
> > +
> > +static const char *parse_ref_data(struct lmdb_transaction
> > *transaction,
> > + const char *refname, const char
> > *ref_data,
> > + unsigned char *sha1, int
> > resolve_flags,
> > + int *flags, int bad_name)
> > +{
> > + int depth = MAXDEPTH;
> > + const char *buf;
> > + static struct strbuf refname_buffer = STRBUF_INIT;
> > + static struct strbuf refdata_buffer = STRBUF_INIT;
> > + MDB_val key, val;
> > + int needs_free = 0;
> > +
> > + for (;;) {
> > + if (--depth < 0)
> > + return NULL;
> > +
> > + if (!starts_with(ref_data, "ref:")) {
> > + if (get_sha1_hex(ref_data, sha1) ||
> > + (ref_data[40] != '\0' &&
> > !isspace(ref_data[40]))) {
> > + if (flags)
> > + *flags |= REF_ISBROKEN;
> > + errno = EINVAL;
> > + return NULL;
> > + }
> > +
> > + if (bad_name) {
> > + hashclr(sha1);
> > + if (flags)
> > + *flags |= REF_ISBROKEN;
> > + } else if (is_null_sha1(sha1)) {
> > + if (flags)
> > + *flags |= REF_ISBROKEN;
> > + }
> > + return refname;
> > + }
> > + if (flags)
> > + *flags |= REF_ISSYMREF;
> > + buf = ref_data + 4;
> > + while (isspace(*buf))
> > + buf++;
> > + strbuf_reset(&refname_buffer);
> > + strbuf_addstr(&refname_buffer, buf);
> > + refname = refname_buffer.buf;
> > + if (resolve_flags & RESOLVE_REF_NO_RECURSE) {
> > + hashclr(sha1);
> > + return refname;
> > + }
> > + if (check_refname_format(buf,
> > REFNAME_ALLOW_ONELEVEL)) {
> > + if (flags)
> > + *flags |= REF_ISBROKEN;
> > +
> > + if (!(resolve_flags &
> > RESOLVE_REF_ALLOW_BAD_NAME) ||
> > + !refname_is_safe(buf)) {
> > + errno = EINVAL;
> > + return NULL;
> > + }
> > + bad_name = 1;
> > + }
>
> This code looks a lot like near the end of resolve_ref_1(). Maybe we
> could share the code in refs/backend-common.c or something and call
> here instead?
Something like the following?
commit aad6b84fd1869f6e1cf6ed15bcece0c2f6429e9d
Author: David Turner <[email protected]>
Date: Thu Feb 18 17:09:29 2016 -0500
refs: break out some functions from resolve_ref_1
A bunch of resolve_ref_1 is not backend-specific, so we can
break it out into separate internal functions that other
backends can use.
Signed-off-by: David Turner <[email protected]>
diff --git a/refs.c b/refs.c
index c9fa34d..680c2a5 100644
--- a/refs.c
+++ b/refs.c
@@ -1221,6 +1221,66 @@ int for_each_rawref(each_ref_fn fn, void
*cb_data)
DO_FOR_EACH_INCLUDE_BROKEN, cb_data);
}
+int parse_simple_ref(const char *buf, unsigned char *sha1, unsigned
int *flags, int bad_name)
+{
+ /*
+ * Please note that FETCH_HEAD has a second
+ * line containing other data.
+ */
+ if (get_sha1_hex(buf, sha1) ||
+ (buf[40] != '\0' && !isspace(buf[40]))) {
+ if (flags)
+ *flags |= REF_ISBROKEN;
+ errno = EINVAL;
+ return -1;
+ }
+ if (bad_name) {
+ hashclr(sha1);
+ if (flags)
+ *flags |= REF_ISBROKEN;
+ }
+ return 0;
+}
+
+int check_bad_refname(const char *refname, int *flags, int
resolve_flags)
+{
+ if (!check_refname_format(refname, REFNAME_ALLOW_ONELEVEL))
+ return 0;
+
+ if (flags)
+ *flags |= REF_BAD_NAME;
+
+ if (!(resolve_flags & RESOLVE_REF_ALLOW_BAD_NAME) ||
+ !refname_is_safe(refname)) {
+ errno = EINVAL;
+ return -1;
+ }
+ /*
+ * dwim_ref() uses REF_ISBROKEN to distinguish between
+ * missing refs and refs that were present but invalid,
+ * to complain about the latter to stderr.
+ *
+ * We don't know whether the ref exists, so don't set
+ * REF_ISBROKEN yet.
+ */
+ return 1;
+}
+
+/*
+ * Parse a refname out of the contents of a symref into a provided
+ * strbuf. Return a pointer to the strbuf's contents.
+ */
+char *parse_symref_data(const char *buf, struct strbuf *sb_refname)
+{
+ buf += 4;
+ while (isspace(*buf))
+ buf++;
+ strbuf_reset(sb_refname);
+ strbuf_addstr(sb_refname, buf);
+ return sb_refname->buf;
+}
+
+
/* backend functions */
int refs_init_db(int shared, struct strbuf *err)
{
diff --git a/refs/files-backend.c b/refs/files-backend.c
index da06408..52972e6 100644
--- a/refs/files-backend.c
+++ b/refs/files-backend.c
@@ -1427,25 +1427,9 @@ static const char *resolve_ref_1(const char
*refname,
if (flags)
*flags = 0;
- if (check_refname_format(refname, REFNAME_ALLOW_ONELEVEL)) {
- if (flags)
- *flags |= REF_BAD_NAME;
-
- if (!(resolve_flags & RESOLVE_REF_ALLOW_BAD_NAME) ||
- !refname_is_safe(refname)) {
- errno = EINVAL;
- return NULL;
- }
- /*
- * dwim_ref() uses REF_ISBROKEN to distinguish between
- * missing refs and refs that were present but
invalid,
- * to complain about the latter to stderr.
- *
- * We don't know whether the ref exists, so don't set
- * REF_ISBROKEN yet.
- */
- bad_name = 1;
- }
+ bad_name = check_bad_refname(refname, flags, resolve_flags);
+ if (bad_name < 0)
+ return NULL;
for (;;) {
const char *path;
struct stat st;
@@ -1541,47 +1525,20 @@ static const char *resolve_ref_1(const char
*refname,
* Is it a symbolic ref?
*/
if (!starts_with(sb_contents->buf, "ref:")) {
- /*
- * Please note that FETCH_HEAD has a second
- * line containing other data.
- */
- if (get_sha1_hex(sb_contents->buf, sha1) ||
- (sb_contents->buf[40] != '\0' &&
!isspace(sb_contents->buf[40]))) {
- if (flags)
- *flags |= REF_ISBROKEN;
- errno = EINVAL;
- return NULL;
- }
- if (bad_name) {
- hashclr(sha1);
- if (flags)
- *flags |= REF_ISBROKEN;
- }
+ if (parse_simple_ref(sb_contents->buf, sha1,
flags, bad_name))
+ refname = NULL;
return refname;
}
if (flags)
*flags |= REF_ISSYMREF;
- buf = sb_contents->buf + 4;
- while (isspace(*buf))
- buf++;
- strbuf_reset(sb_refname);
- strbuf_addstr(sb_refname, buf);
- refname = sb_refname->buf;
+ refname = parse_symref_data(sb_contents->buf,
sb_refname);
if (resolve_flags & RESOLVE_REF_NO_RECURSE) {
hashclr(sha1);
return refname;
}
- if (check_refname_format(buf, REFNAME_ALLOW_ONELEVEL))
{
- if (flags)
- *flags |= REF_ISBROKEN;
-
- if (!(resolve_flags &
RESOLVE_REF_ALLOW_BAD_NAME) ||
- !refname_is_safe(buf)) {
- errno = EINVAL;
- return NULL;
- }
- bad_name = 1;
- }
+ bad_name |= check_bad_refname(refname, flags,
resolve_flags);
+ if (bad_name < 0)
+ return NULL;
}
}
diff --git a/refs/refs-internal.h b/refs/refs-internal.h
index efdde82..7cdfffe 100644
--- a/refs/refs-internal.h
+++ b/refs/refs-internal.h
@@ -218,6 +218,26 @@ int do_for_each_per_worktree_ref(const char
*submodule, const char *base,
int do_for_each_ref(const char *submodule, const char *base,
each_ref_fn fn, int trim, int flags, void
*cb_data);
+/*
+ * Parse a non-symref -- a buf hopefully containing 40 hex characters.
+ * Set errno and flags appropriately. If the buf can be parsed but
+ * bad_name is set, the ref is broken: zero out sha1.
+ */
+int parse_simple_ref(const char *buf, unsigned char *sha1, unsigned
int *flags,
+ int bad_name);
+/*
+ * Parse a refname out of the contents of a symref into a provided
+ * strbuf. Return a pointer to the strbuf's contents.
+ */
+char *parse_symref_data(const char *buf, struct strbuf *sb_refname);
+
+/*
+ * Check the format of refname. Set flags and errno appropriately.
+ * Returns 0 if the refname is good, -1 if it is bad enough that we
+ * have to stop parsing and 1 if we just have to note that it is bad.
+ */
+int check_bad_refname(const char *refname, int *flags, int
resolve_flags);
+
/* refs backends */
typedef int ref_init_db_fn(int shared, struct strbuf *err);
typedef int ref_transaction_commit_fn(struct ref_transaction
*transaction,
followed by this version of parse_ref_data:
static const char *parse_ref_data(struct lmdb_transaction *transaction,
const char *refname, const char
*ref_data,
unsigned char *sha1, int
resolve_flags,
int *flags, int bad_name)
{
int depth = MAXDEPTH;
const char *buf;
static struct strbuf refname_buffer = STRBUF_INIT;
static struct strbuf refdata_buffer = STRBUF_INIT;
MDB_val key, val;
int needs_free = 0;
for (;;) {
if (--depth < 0)
return NULL;
/*
* Is it a symbolic ref?
*/
if (!starts_with(ref_data, "ref:")) {
if (parse_simple_ref(ref_data, sha1, flags,
bad_name))
refname = NULL;
if (is_null_sha1(sha1) && flags)
*flags |= REF_ISBROKEN;
return refname;
}
if (flags)
*flags |= REF_ISSYMREF;
refname = parse_symref_data(ref_data, &refname_buffer);
if (resolve_flags & RESOLVE_REF_NO_RECURSE) {
hashclr(sha1);
return refname;
}
bad_name |= check_bad_refname(refname, flags,
resolve_flags);
if (bad_name < 0)
return NULL;
key.mv_data = (char *)refname;
key.mv_size = strlen(refname) + 1;
if (mdb_get_or_die(transaction, &key, &val,
&needs_free)) {
hashclr(sha1);
if (bad_name) {
if (flags)
*flags |= REF_ISBROKEN;
}
if (resolve_flags & RESOLVE_REF_READING)
return NULL;
return refname;
}
strbuf_reset(&refdata_buffer);
strbuf_add(&refdata_buffer, val.mv_data, val.mv_size);
if (needs_free)
free(val.mv_data);
ref_data = refdata_buffer.buf;
}
return refname;
}
----------------
I'm not sure I like it, because it breaks out these weird tiny
functions that take a lot of arguments. But maybe it's worth it? What
do you think?
--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to [email protected]
More majordomo info at http://vger.kernel.org/majordomo-info.html