Ralf Wildenhues wrote: > * lib/fstrcmp.c (EXTRA_CONTEXT_FIELDS): New field edit_count_limit. > (CHECK_CONDITION): New define; check if the bound has been reached. > (fstrcmp_internal): New argument 'bound', used to compute > edit_count_limit. Return zero if the limit has been reached.
> ctxt.edit_count_limit = (1. - bound) * (xvec_length + yvec_length) + 1; I needed to add an epsilon here, to guard against rounding errors, otherwise the testsuite failed. I also optimized the early about condition a bit, and committed this (after combining the xvec_edit_count and yvec_edit_count into a single field). 2008-09-14 Ralf Wildenhues <[EMAIL PROTECTED]> * lib/fstrcmp.c (EXTRA_CONTEXT_FIELDS): Add field 'edit_count_limit'. (EARLY_ABORT): Return true when the edit_count has grown too beyond the limit. (fstrcmp_bounded): Initialize the edit_count_limit. Return 0 when compareseq was aborted. *** lib/fstrcmp.c.orig 2008-09-14 23:17:44.000000000 +0200 --- lib/fstrcmp.c 2008-09-14 22:06:04.000000000 +0200 *************** *** 65,74 **** #define EQUAL(x,y) ((x) == (y)) #define OFFSET int #define EXTRA_CONTEXT_FIELDS \ ! /* The number of elements inserted, plus the number of elements deleted. */ \ int edit_count; #define NOTE_DELETE(ctxt, xoff) ctxt->edit_count++ #define NOTE_INSERT(ctxt, yoff) ctxt->edit_count++ /* We don't need USE_HEURISTIC, since it is unlikely in typical uses of fstrcmp(). */ #include "diffseq.h" --- 65,78 ---- #define EQUAL(x,y) ((x) == (y)) #define OFFSET int #define EXTRA_CONTEXT_FIELDS \ ! /* The number of edits beyond which the computation can be aborted. */ \ ! int edit_count_limit; \ ! /* The number of edits (= number of elements inserted, plus the number of \ ! elements deleted), temporarily minus edit_count_limit. */ \ int edit_count; #define NOTE_DELETE(ctxt, xoff) ctxt->edit_count++ #define NOTE_INSERT(ctxt, yoff) ctxt->edit_count++ + #define EARLY_ABORT(ctxt) ctxt->edit_count > 0 /* We don't need USE_HEURISTIC, since it is unlikely in typical uses of fstrcmp(). */ #include "diffseq.h" *************** *** 175,184 **** ctxt.fdiag = buffer + yvec_length + 1; ctxt.bdiag = ctxt.fdiag + fdiag_len; /* Now do the main comparison algorithm */ ! ctxt.edit_count = 0; ! compareseq (0, xvec_length, 0, yvec_length, 0, ! &ctxt); /* The result is ((number of chars in common) / (average length of the strings)). --- 179,206 ---- ctxt.fdiag = buffer + yvec_length + 1; ctxt.bdiag = ctxt.fdiag + fdiag_len; + /* The edit_count is only ever increased. The computation can be aborted + when + (xvec_length + yvec_length - edit_count) / (xvec_length + yvec_length) + < lower_bound, + or equivalently + edit_count > (xvec_length + yvec_length) * (1 - lower_bound) + or equivalently + edit_count > floor((xvec_length + yvec_length) * (1 - lower_bound)). + We need to add an epsilon inside the floor(...) argument, to neutralize + rounding errors. */ + ctxt.edit_count_limit = + (lower_bound < 1.0 + ? (int) ((xvec_length + yvec_length) * (1.0 - lower_bound + 0.000001)) + : 0); + /* Now do the main comparison algorithm */ ! ctxt.edit_count = - ctxt.edit_count_limit; ! if (compareseq (0, xvec_length, 0, yvec_length, 0, &ctxt)) ! /* The edit_count passed the limit. Hence the result would be ! < lower_bound. We can return any value < lower_bound instead. */ ! return 0.0; ! ctxt.edit_count += ctxt.edit_count_limit; /* The result is ((number of chars in common) / (average length of the strings)).