While using cvsps 2.1 to import the zsh cvs repo into git, Clint Adams noticed that there were a couple files whose data didn't match the originals. Further investigation by me revealed that there were several places in the history where patch sets were applied in a reverse order in the git version of the repo. I looked into the cvsps code and discovered the following bug: The code failed to handle a set of commits where (1) the first one was a ChangeLog commit with a particular log message which was followed by (2) a commit to another file with the same log message, and then (3) a second commit to the same extra file (within the default fuzz factor of 5 minutes) that has the same log message as the other 2 commits. This caused the reverse-scan of the cvs log data to associate the newer revision of the twice-changed file with the ChangeLog commit (which gives the combined changes the patchset time of the ChangeLog change), and it then created a patchset entry for the 2nd, earlier change, but marked it as later in time than the patchset that contained the file's newer change. Here's a simple ASCII graph of the situation: -> committs over time -> 00:00 00:01 00:04 file1: x --------------+ file2: y z Note that file1 "x" and file2 "z" get joined (at time 00:00), while file2 gets a later revision, "y" (at 00:01), which reverses the patch between y and z. My fix causes "z" to be the single-file patchset (at 00:04), with "x" and "y" joined into a patchset (at 00:00). The appended patch fixes the bug in the following way: When the cvs-log data for each is being scanned, we defer doing the call that adds a patchset member to a matching patchset until we know the date of the prior (older) commit in the file. I changed some of the date- parsing code to pass around date values instead of raw date buffers in order to avoid converting a date multiple times. I also changed the re-initializing of the buffers that hold the log-data strings so that they could linger a little longer, making the later association-call nice and efficient. The basic logic for avoiding a wrongful association is pretty simple: instead of having the new entry have both min_date and max_date set to 0 (which flags that this item is the newbie and the other one has the real min/max patchset dates), only max_date is set to 0, and min_date is set to the date of the prior (older) entry in the current file. If at any time we find a potential matching patchset that has its patchset date less than or equal to that of the prior (older) entry, then this later entry can't possibly be in the patchset we're considering, and we don't associate it. This allows the prior (older) change to get associated with the other files in the older patchset, as it should. --Wayne Davison --- cvsps-2.1/cache.c 2005-05-26 03:39:40.000000000 +0000 +++ cache.c 2009-04-05 19:33:33.000000000 +0000 @@ -288,7 +288,7 @@ time_t read_cache() if (strcmp(buff, CACHE_DESCR_BOUNDARY) == 0) { debug(DEBUG_STATUS, "patch set %s %s %s %s", datebuff, authbuff, logbuff, branchbuff); - ps = get_patch_set(datebuff, logbuff, authbuff, branchbuff, NULL); + ps = get_patch_set(convert_date(datebuff), 0, logbuff, authbuff, branchbuff, NULL); /* the tag and tag_flags will be assigned by the resolve_global_symbols code * ps->tag = (strlen(tagbuff)) ? get_string(tagbuff) : NULL; * ps->tag_flags = tag_flags; --- cvsps-2.1/cvsps.c 2005-05-26 03:39:40.000000000 +0000 +++ cvsps.c 2009-04-05 20:15:23.000000000 +0000 @@ -262,7 +262,8 @@ static void load_from_cvs() char buff[BUFSIZ]; int state = NEED_FILE; CvsFile * file = NULL; - PatchSetMember * psm = NULL; + PatchSetMember * pending_psm = NULL, * psm = NULL; + time_t pending_date = 0; char datebuff[20]; char authbuff[AUTH_STR_MAX]; char logbuff[LOG_STR_MAX + 1]; @@ -376,15 +377,6 @@ static void load_from_cvs() */ rev = parse_revision(file, new_rev); - /* - * in the simple case, we are copying rev to psm->pre_rev - * (psm refers to last patch set processed at this point) - * since generally speaking the log is reverse chronological. - * This breaks down slightly when branches are introduced - */ - - assign_pre_revision(psm, rev); - /* * if this is a new revision, it will have no post_psm associated. * otherwise we are (probably?) hitting the overlap in cvsps -u @@ -402,7 +394,16 @@ static void load_from_cvs() * w.r.t this particular file. skip all of the rest * of the info (revs and logs) until we hit the next file */ + if (pending_psm) { + PatchSet * ps = get_patch_set(pending_date, 0, logbuff, authbuff, pending_psm->post_rev->branch, pending_psm); + patch_set_add_member(ps, pending_psm); + assign_pre_revision(pending_psm, rev); + pending_psm = NULL; + } psm = NULL; + logbuff[0] = 0; + loglen = 0; + have_log = 0; state = NEED_EOM; } } @@ -410,10 +411,24 @@ static void load_from_cvs() case NEED_DATE_AUTHOR_STATE: if (strncmp(buff, "date:", 5) == 0) { + time_t this_date; char * p; strncpy(datebuff, buff + 6, 19); datebuff[19] = 0; + this_date = convert_date(datebuff); + + /* We have delayed processing the pending_psm so that we can get this + * date info. Now that we have it, process the pending revision, then + * remember the parsed date value in pending_date. */ + if (pending_psm) { + PatchSet * ps = get_patch_set(pending_date, this_date, logbuff, authbuff, pending_psm->post_rev->branch, pending_psm); + patch_set_add_member(ps, pending_psm); + assign_pre_revision(pending_psm, psm->post_rev); + pending_psm = NULL; + } + + pending_date = this_date; strcpy(authbuff, "unknown"); p = strstr(buff, "author: "); @@ -440,35 +455,27 @@ static void load_from_cvs() psm->post_rev->dead = 1; } + logbuff[0] = 0; + loglen = 0; + have_log = 0; state = NEED_EOM; } break; case NEED_EOM: if (strcmp(buff, CVS_LOG_BOUNDARY) == 0) { - if (psm) - { - PatchSet * ps = get_patch_set(datebuff, logbuff, authbuff, psm->post_rev->branch, psm); - patch_set_add_member(ps, psm); - } - - logbuff[0] = 0; - loglen = 0; - have_log = 0; + pending_psm = psm; state = NEED_REVISION; } else if (strcmp(buff, CVS_FILE_BOUNDARY) == 0) { if (psm) { - PatchSet * ps = get_patch_set(datebuff, logbuff, authbuff, psm->post_rev->branch, psm); + PatchSet * ps = get_patch_set(pending_date, 0, logbuff, authbuff, psm->post_rev->branch, psm); patch_set_add_member(ps, psm); assign_pre_revision(psm, NULL); } - logbuff[0] = 0; - loglen = 0; - have_log = 0; psm = NULL; file = NULL; state = NEED_FILE; @@ -709,7 +716,7 @@ static int parse_args(int argc, char *ar return usage("argument to -d missing", ""); pt = (restrict_date_start == 0) ? &restrict_date_start : &restrict_date_end; - convert_date(pt, argv[i++]); + *pt = convert_date(argv[i++]); continue; } @@ -1154,7 +1161,8 @@ static CvsFile * parse_file(const char * return retval; } -PatchSet * get_patch_set(const char * dte, const char * log, const char * author, const char * branch, PatchSetMember * psm) +PatchSet * get_patch_set(time_t dte, time_t prior_dte, const char * log, + const char * author, const char * branch, PatchSetMember * psm) { PatchSet * retval = NULL, **find = NULL; int (*cmp1)(const void *,const void*) = (bkcvs) ? compare_patch_sets_bk : compare_patch_sets; @@ -1165,7 +1173,13 @@ PatchSet * get_patch_set(const char * dt return NULL; } - convert_date(&retval->date, dte); + /* Put the prior revision's date into the new patchset's min_date, + * leaving max_date 0. The prior date will be used to avoid associating + * a change with a patchset that is older than the prior change. Sadly, + * a branch can get in the way of this check, so if the time is not + * in the right order, ignore it. */ + retval->min_date = prior_dte < dte ? prior_dte : 0; + retval->date = dte; retval->author = get_string(author); retval->descr = xstrdup(log); retval->branch = get_string(branch); @@ -1221,8 +1235,14 @@ PatchSet * get_patch_set(const char * dt } else { - debug(DEBUG_STATUS, "new patch set!"); - debug(DEBUG_STATUS, "%s %s %s", retval->author, retval->descr, dte); + if (debuglvl & DEBUG_STATUS) + { + struct tm * tm = gmtime(&dte); + char dte_str[32]; + strftime(dte_str, 32, "%Y/%m/%d %H:%M:%S", tm); + debug(DEBUG_STATUS, "new patch set!"); + debug(DEBUG_STATUS, "%s %s %s", retval->author, retval->descr, dte_str); + } retval->min_date = retval->date - timestamp_fuzz_factor; retval->max_date = retval->date + timestamp_fuzz_factor; @@ -1617,19 +1637,23 @@ static int compare_patch_sets(const void /* * one of ps1 or ps2 is new. the other should have the min_date - * and max_date set to a window opened by the fuzz_factor + * and max_date set to a window opened by the fuzz_factor. The + * new item may have a min_date that specifies the date of the + * (time-relative) prior revision in the same file (when not 0). */ - if (ps1->min_date == 0) + if (ps1->max_date == 0) { d = ps1->date; min = ps2->min_date; max = ps2->max_date; + diff = ps1->min_date ? ps2->date - ps1->min_date : 1; } - else if (ps2->min_date == 0) + else if (ps2->max_date == 0) { d = ps2->date; min = ps1->min_date; max = ps1->max_date; + diff = ps2->min_date ? ps1->date - ps2->min_date : 1; } else { @@ -1637,7 +1661,7 @@ static int compare_patch_sets(const void exit(1); } - if (min < d && d < max) + if (min < d && d < max && diff > 0) return 0; diff = ps1->date - ps2->date; --- cvsps-2.1/cvsps.h 2005-05-26 03:39:40.000000000 +0000 +++ cvsps.h 2009-04-05 19:33:33.000000000 +0000 @@ -25,7 +25,7 @@ CvsFile * create_cvsfile(); CvsFileRevision * cvs_file_add_revision(CvsFile *, const char *); void cvs_file_add_symbol(CvsFile * file, const char * rev, const char * tag); char * cvs_file_add_branch(CvsFile *, const char *, const char *); -PatchSet * get_patch_set(const char *, const char *, const char *, const char *, PatchSetMember *); +PatchSet * get_patch_set(time_t, time_t, const char *, const char *, const char *, PatchSetMember *); PatchSetMember * create_patch_set_member(); CvsFileRevision * file_get_revision(CvsFile *, const char *); void patch_set_add_member(PatchSet * ps, PatchSetMember * psm); --- cvsps-2.1/util.c 2005-05-26 03:39:40.000000000 +0000 +++ util.c 2009-04-05 19:33:33.000000000 +0000 @@ -170,7 +170,7 @@ static time_t mktime_utc(struct tm * tm) return ret; } -void convert_date(time_t * t, const char * dte) +time_t convert_date(const char * dte) { static regex_t date_re; static int init_re; @@ -207,12 +207,9 @@ void convert_date(time_t * t, const char tm.tm_year -= 1900; tm.tm_mon--; - *t = mktime_utc(&tm); - } - else - { - *t = atoi(dte); + return mktime_utc(&tm); } + return atoi(dte); } static struct timeval start_time; --- cvsps-2.1/util.h 2005-05-26 03:39:40.000000000 +0000 +++ util.h 2009-04-05 19:33:33.000000000 +0000 @@ -18,7 +18,7 @@ char *readfile(char const *filename, cha char *strrep(char *s, char find, char replace); char *get_cvsps_dir(); char *get_string(char const *str); -void convert_date(time_t *, const char *); +time_t convert_date(const char *); void timing_start(); void timing_stop(const char *); int my_system(const char *);