unbatcher out of sync?

Eamonn McManus em at dce.ie
Wed Jan 16 10:34:28 AEST 1991


henry at zoo.toronto.edu (Henry Spencer) writes:
>It means "something's wrong with your batch":  relaynews did not find a
>"#! rnews nnnnn" line where one should have been.  Typically this means
>garbling during preparation or transmission.  One notorious trouble spot
>is that the batch format cannot tolerate transformations of newlines to
>CR-LF pairs; the byte counts in the "#! rnews" lines must be spot-on.

We had `unbatcher out of sync' problems at a site I was involved in, which
was fed its news by mail from a VMS site (ugh).  The VMS mailer (PMDF) got
confused when lines exceeded 256 characters, as References lines often do,
and would make a total hash of the header when this happened.  As a result,
the "#! rnews" count would always be off by a small amount for the affected
article.  C News resyncs at the next "#! rnews" line, but if the count is
too long for the actual article contents it will have missed the start of
the article following the garbled one.

To kludge around this problem I wrote a program `patchbatch' which zips
through a news batch looking for "#! rnews" lines with incorrect counts.
If it finds one, it hunts back and forth a small amount for the next "#!
rnews" line and adjusts the incorrect one to point to it.  This was
surprisingly effective: while it was running I believe it never failed to
correct a munged batch.

I'm including the source of patchbatch in case it is of use to the original
poster, or anyone else.

,
Eamonn

/* patchbatch.c - patch a news batch. */

/* By Eamonn McManus <emcmanus at cs.tcd.ie>, February 1990.
 * This program is not copyrighted.
 *
 * Blast through a news batch checking the offsets after `#! rnews'.
 * If we find that the offset does not lead to another `#! rnews' line
 * or EOF, we search around for the line somewhere in the vicinity.  If
 * it is found, we go back and patch the original offset to point to the
 * correct place.  This is useful for example on systems where long lines
 * get truncated or split in transmission, since in this case the stated
 * offset will be wrong.
 *
 * This is the hackiest program I have written in a long time.
 */

#include <stdio.h>
#include <string.h>
#include <sys/fcntl.h>	/* For O_RDWR. */
#include <sys/types.h>
#include <sys/stat.h>

extern long strtol();

char verbose;
extern int optind;


main(argc, argv)
char **argv;
{
	int i, status;
	while ((i = getopt(argc, argv, "v")) != -1)
		switch (i) {
		case 'v':
			verbose = 1; break;
		default:
			goto usage;
		}
	if (optind == argc) {
usage:
		fprintf(stderr, "Usage: patchbatch file [...]\n");
		exit(2);
	}
	status = 0;
	for (i = optind; i < argc; i++)
		if (patchbatch(argv[i]) < 0)
			status = 1;
	exit(status);
}


static char lead[] = "#! rnews ";
#define LEADLEN (sizeof lead - 1)
#define FUDGE (2 * sizeof lead)

int patchbatch(name)
char *name;
{
	int fd, i;
	long here, offset;
	char buf[64];
	struct stat st;
	if ((fd = open(name, O_RDWR)) < 0) {
		perror(name);
		return -1;
	}
	if (fstat(fd, &st) < 0) {
		perror(name);
		return -1;
	}
	if ((i = read(fd, buf, sizeof buf - 1)) != sizeof buf - 1) {
		if (i < 0)
			perror(name);
		else	fprintf(stderr, "%s: too short for a news batch\n");
		close(fd); return -1;
	}
	buf[sizeof buf - 1] = '\0';
	if (strncmp(buf, lead, LEADLEN) != 0) {
		fprintf(stderr, "%s: not a news batch (should start with %s)\n",
			name, lead);
		close(fd);
		return -1;
	}
	here = 0; i = 0;
	while (1) {
		char *p;
		int numsize;
		long artstart, newpos;
		offset = strtol(buf + LEADLEN, &p, 10);
		if (offset == 0) {
			fprintf(stderr,
				"%s: bad value after %s, file offset %ld\n",
				name, lead, here);
			close(fd);
			return -1;
		}
		numsize = p - (buf + LEADLEN);
		artstart = here + LEADLEN + numsize + 1/*\n*/;
		newpos = artstart + offset;
		if (newpos == st.st_size)
			return 0;
		else if (newpos > st.st_size) {
			char offstr[16];
lastart:
			offset = st.st_size - artstart;
changeoffset:
			sprintf(offstr + 1, "%ld", offset);
			switch (strlen(offstr + 1) - numsize) {
			case 0:		/* Same size, just overwrite. */
				p = offstr + 1;
				break;
			case -1:	/* Shorter, use leading 0. */
				p = offstr; *p = '0';
				break;
			case 1:		/* Longer, oops. */
				fprintf(stderr, "%s: no room to change article \
length to %ld, file offset %ld\n", name, offset, here);
				goto setnewpos;
			}
			lseek(fd, here + LEADLEN, 0);
			if (write(fd, p, numsize) < 0) {
				perror(name); return -1;
			}
			if (verbose)
				fprintf(stderr, "%s: changed article length to \
%ld, file offset %ld\n", name, offset, here);
setnewpos:
			newpos = artstart + offset;
			if (newpos >= st.st_size)
				return 0;
		} else {	/* newpos < st.st_size */
			lseek(fd, newpos - FUDGE, 0);
			if (read(fd, buf, sizeof buf - 1) < sizeof buf - 1)
				goto lastart;
			if (strncmp(buf + FUDGE, lead, LEADLEN) == 0) {
				strcpy(buf, buf + FUDGE);	/* Hmmm... */
				here = newpos;
				continue;
			}
			for (p = buf; (p = strchr(p, lead[0])) != NULL; p++)
				if (strncmp(p, lead, LEADLEN) == 0)
					break;
			if (p == NULL) {
				fprintf(stderr, "%s: can't find next article \
with offset %ld from file pos %ld\n", name, offset, here);
				close(fd); return -1;
			}
			offset = (newpos - FUDGE) + (p - buf) - artstart;
			goto changeoffset;
		}
		lseek(fd, newpos, 0);
		if (read(fd, buf, sizeof buf - 1) < sizeof buf - 1) {
			fprintf(stderr, "%s: last article too short\n", name);
			close(fd); return -1;
		}
		here = newpos;
	}
}



More information about the Alt.sources mailing list