crash dump code for VAX UDA

hakanson at orstcs.UUCP hakanson at orstcs.UUCP
Sat May 3 05:00:00 AEST 1986


[yum!]

The following contains excerpts from two messages I received
from Chris Torek (chris at mimsy.umd.edu) regarding code to do
a crash dump on vaxes with UDA50 disks (ra81's, etc.).

I installed it in the form in which it appears below, and it works
on our vax750 running 4.2.  Note that the second message below
contains code for decoding MSCP error packets from the UDA50,
which some people have requested.  I did not install this part
of the code, but some people may find it useful, so I did include
it in this posting.

A little history about this exchange seems necessary (to me :-).

I installed the RFS kernel enhancements here awhile back, and
it immediately started crashing our systems.  I have zero experience
in debugging kernels, and Chris has spent a lot of his time in
helping me work on this problem (and a few others as well) via email.
We still don't have RFS working here, but I am very grateful for
Chris's help, and I think a public "Thank you" is in order for
his support of myself and of a lot of others on the net.  I hope
someone else he has helped will at least buy him a drink if they
see him at Usenix this summer -- unfortunately I won't be able
to get there myself.

Again, this code (in the form below) slipped right in and is
working now -- it sure helps us work on this RFS problem (sigh).
Thanks, Chris....

Marion Hakanson         CSnet:  hakanson%oregon-state at csnet-relay
                        UUCP :  {hp-pcd,tektronix}!orstcs!hakanson

> Date: Wed, 16 Apr 86 19:48:41 EST
> From: Chris Torek <chris%mimsy.umd.edu at CSNET-RELAY>
> To: hakanson%oregon-state.csnet at CSNET-RELAY
> 
> 	... we're still running the brain-damaged UDA driver and
> 	don't get core-dumps from crashes.
> 
> I can fix that!  Here is a uddump() routine, and the section from
> udareg.h that you will need to use it.  Let me know if it works; I
> did some on-the-fly editing.  If it does indeed work, feel free to
> post it to net.unix-wizards or wherever.
> 
> Chris

[uda50 dump routines: first, new code for udareg.h: add this at the
end:]

/*
 * Simplified routines (e.g., uddump) reprogram the UDA50 for one command
 * and one response at a time; uda1ca is like udaca except that it provides
 * exactly one command and response descriptor.
 */
struct uda1ca {
	short	ca_xxx1;
	char	ca_xxx2;
	char	ca_bdp;
	short	ca_cmdint;
	short	ca_rspint;
	long	ca_rspdsc;
	long	ca_cmddsc;
};

[Next: uddump() itself.  Add to uda.c, near or at the end, replacing
the old uddump() if any.  If there is no old uddump(), check vax/conf.c
and insert a declaration for uddump() near those for udread() etc.,
and put `uddump,' in the obvious place in the bdevsw table.

You may also need to `#include "../vax/mtpr.h"' near the top of uda.c.]

/*
 * Do a panic dump.  We set up the controller for one command packet
 * and one response packet, for which we use `struct uda1'.
 */
struct	uda1 {
	struct	uda1ca uda1_ca;	/* communications area */
	struct	mscp uda1_rsp;	/* response packet */
	struct	mscp uda1_cmd;	/* command packet */
} uda1;

#define	DBSIZE	32		/* dump 16K at a time */

uddump(dev)
	dev_t dev;
{
	struct udadevice *udaddr;
	struct uda1 *ud_ubaddr;
	char *start;
	int num, blk, unit, maxsz, blkoff, reg;
	register struct uba_regs *uba;
	register struct uba_device *ui;
	register struct uda1 *ud;
	register struct pte *io;
	register int i;

	/*
	 * Make sure the device is a reasonable place on which to dump.
	 */
	unit = minor(dev) >> 3;
	if (unit >= NRA)
		return (ENXIO);
#define	phys(cast, addr)	((cast) ((int) addr & 0x7fffffff))
	ui = phys(struct uba_device *, uddinfo[unit]);
	if (ui == NULL || ui->ui_alive == 0)
		return (ENXIO);

	/*
	 * Find and initialise the UBA; get the physical address of the
	 * device registers, and of communications area and command and
	 * response packet.
	 */
	uba = phys(struct uba_hd *, ui->ui_hd)->uh_physuba;
	ubainit(uba);
	udaddr = (struct udadevice *) ui->ui_physaddr;
	ud = phys(struct uda1 *, &uda1);

	/*
	 * Map the ca+packets into Unibus I/O space so the UDA50 can get
	 * at them.  Use the registers at the end of the Unibus map (since
	 * we will use the registers at the beginning to map the memory
	 * we are dumping).
	 */
	num = btoc(sizeof (struct uda1)) + 1;
	reg = NUBMREG - num;
	io = &uba->uba_map[reg];
	for (i = 0; i < num; i++)
		*(int *)io++ = UBAMR_MRV | (btop(ud) + i);
	ud_ubaddr = (struct uda1 *) (((int) ud & PGOFSET) | (reg << 9));

	/*
	 * Initialise the controller, with one command and one response
	 * packet.
	 */
	udaddr->udaip = 0;
	if (udadumpwait(udaddr, UDA_STEP1))
		return (EFAULT);
	udaddr->udasa = UDA_ERR;
	if (udadumpwait(udaddr, UDA_STEP2))
		return (EFAULT);
	udaddr->udasa = (int) &ud_ubaddr->uda1_ca.ca_rspdsc;
	if (udadumpwait(udaddr, UDA_STEP3))
		return (EFAULT);
	udaddr->udasa = ((int) &ud_ubaddr->uda1_ca.ca_rspdsc) >> 16;
	if (udadumpwait(udaddr, UDA_STEP4))
		return (EFAULT);
	udaddr->udasa = UDA_GO;

	/*
	 * Set up the command and response descriptor, then set the
	 * controller characteristics and bring the drive on line.
	 * Note that all uninitialised locations in uda1_cmd are zero.
	 */
	ud->uda1_ca.ca_rspdsc = (long) &ud_ubaddr->uda1_rsp.mscp_cmdref;
	ud->uda1_ca.ca_cmddsc = (long) &ud_ubaddr->uda1_cmd.mscp_cmdref;
	/* ud->uda1_cmd.mscp_sccc.sccc_ctlrflags = 0; */
	/* ud->uda1_cmd.mscp_sccc.sccc_version = 0; */
	if (udadumpcmd(M_OP_STCON, ud, ui))
		return (EFAULT);
	ud->uda1_cmd.mscp_unit = ui->ui_slave;
	if (udadumpcmd(M_OP_ONLIN, ud, ui))
		return (EFAULT);

	maxsz = ra_sizes[minor(dev) & 7].nblocks;
	blkoff = ra_sizes[minor(dev) & 7].blkoff;

	/*
	 * Dump all of physical memory, or as much as will fit in the
	 * space provided.
	 */
	start = 0;
	num = maxfree;
	if (dumplo < 0)
		return (EINVAL);
	if (dumplo + num >= maxsz)
		num = maxsz - dumplo;
	blkoff += dumplo;

	/*
	 * Write out memory, DBSIZE pages at a time.
	 * N.B.: this code depends on the fact that the sector
	 * size == the page size.
	 */
	while (num > 0) {
		blk = num > DBSIZE ? DBSIZE : num;
		io = uba->uba_map;
		/*
		 * Map in the pages to write, leaving an invalid entry
		 * at the end to guard against wild Unibus transfers.
		 * Then do the write.
		 */
		for (i = 0; i < blk; i++)
			*(int *) io++ = UBAMR_MRV | (btop(start) + i);
		*(int *) io = 0;
		ud->uda1_cmd.mscp_unit = ui->ui_slave;
		ud->uda1_cmd.mscp_lbn = btop(start) + blkoff;
		ud->uda1_cmd.mscp_bytecnt = blk << PGSHIFT;
		if (udadumpcmd(M_OP_WRITE, ud, ui))
			return (EIO);
		start += blk << PGSHIFT;
		num -= blk;
	}
	return (0);		/* made it! */
}

/*
 * Wait for some of the bits in `bits' to come on.  If the error bit
 * comes on, or ten seconds pass without response, return true (error).
 */
udadumpwait(udaddr, bits)
	register struct udadevice *udaddr;
	register int bits;
{
	register int timo = mfpr(TODR) + 1000;

	while ((udaddr->udasa & bits) == 0) {
		if (udaddr->udasa & UDA_ERR) {
			printf("error, udasa=%x\ndump ", udaddr->udasa);
			return (1);
		}
		if (mfpr(TODR) >= timo) {
			printf("timeout\ndump ");
			return (1);
		}
	}
	return (0);
}

/*
 * Feed a command to the UDA50, wait for its response, and return
 * true iff something went wrong.
 */
udadumpcmd(op, ud, ui)
	int op;
	register struct uda1 *ud;
	struct uba_device *ui;
{
	register struct udadevice *udaddr;
	register int n;
#define mp (&ud->uda1_rsp)

	udaddr = (struct udadevice *) ui->ui_physaddr;
	ud->uda1_cmd.mscp_opcode = op;
	ud->uda1_cmd.mscp_header.uda_msglen = sizeof (struct mscp);
	ud->uda1_rsp.mscp_header.uda_msglen = sizeof (struct mscp);
	ud->uda1_ca.ca_rspdsc |= UDA_OWN | UDA_INT;
	ud->uda1_ca.ca_cmddsc |= UDA_OWN | UDA_INT;
	if (udaddr->udasa & UDA_ERR) {
		printf("error, udasa=%x\ndump ", udaddr->udasa);
		return (1);
	}
	n = udaddr->udaip;
	n = mfpr(TODR) + 1000;
	for (;;) {
		if (mfpr(TODR) > n) {
			printf("timeout\ndump ");
			return (1);
		}
		if (ud->uda1_ca.ca_cmdint)
			ud->uda1_ca.ca_cmdint = 0;
		if (ud->uda1_ca.ca_rspint == 0)
			continue;
		ud->uda1_ca.ca_rspint = 0;
		if (mp->mscp_opcode == (op | M_OP_END))
			break;
		printf("\n");
		switch (mp->mscp_header.uda_credits & 0xf0) {

		case 0x00:
			printf("sequential");
			break;

		case 0x10:
			/* May want call to uderror() here ??? (RMH)
			mscp_decodeerror(ui->ui_mi, mp);
			*/
			printf("datagram");
			break;

		case 0x20:
			printf("credits");
			break;

		case 0xf0:
			printf("maintenance");
			break;

		default:
			printf("unknown (type 0x%x)",
				mp->mscp_header.uda_credits & 0xf0);
			break;
		}
		printf(" ignored\ndump ");
		ud->uda1_ca.ca_rspdsc |= UDA_OWN | UDA_INT;
	}
	if ((mp->mscp_status & M_ST_MASK) != M_ST_SUCC) {
		printf("error: op 0x%x => 0x%x status 0x%x\ndump ", op,
			mp->mscp_opcode, mp->mscp_status);
		return (1);
	}
	return (0);
#undef mp
}

> Date: Fri, 18 Apr 86 21:22:04 EST
> From: Chris Torek <chris%mimsy.umd.edu at CSNET-RELAY>
> To: hakanson%oregon-state.csnet at CSNET-RELAY
> 
> 	... mscp_decodeerror is undefined
> 
> ....		You could just drop the call to it; or you
> could put in all the MSCP error decoding.  The decoding is quite
> useful with error datagrams encountered during normal operation,
> and you might want to figure out the right place to call the routine,
> so I will include it below.  This is the old one I was using before
> I rewrote the code, so it has a different name, `uderror'.  Of
> course I just now touched it too, so it may have broken (though I
> doubt it).
> 
> Incidentally, I think there are many people looking for a uddump()
> routine: I got another request for something along that line
> yesterday, and I suggested that one might `appear' in unix-wizards
> soon.  So if you get it working, please do post it.
> 
> Chris

/*
 * Process a UDA50 error log message
 *
 * For now, just log the error on the console.
 * Only minimal decoding is done, only "useful"
 * information is printed.  Eventually should
 * send message to an error logger.
 */

uderror(um, mp)
	register struct uba_ctlr *um;
	register struct mslg *mp;
{
	int issoft = mp->mslg_flags & (M_LF_SUCC | M_LF_CONT);
	/*
	 * For bad blocks, mp->mslg_hdr identifies a code and the logical
	 * block number.  Code 0 is a regular block; code 6 is a replacement
	 * block.  The remaining codes are currently undefined.  The code
	 * is in the upper four bits of mslg_hdr (bits 0-27 are the lbn).
	 */
	static char *codemsg[16] = {
		"lbn", "code 1", "code 2", "code 3",
		"code 4", "code 5", "rbn", "code 7",
		"code 8", "code 9", "code 10", "code 11",
		"code 12", "code 13", "code 14", "code 15"
	};
#define BADCODE(h)	(codemsg[(unsigned)(h) >> 28])
#define BADLBN(h)	((h) & 0xfffffff)

	printf("uda%d: %s error datagram%s: ", um->um_ctlr,
		issoft ? "soft" : "hard",
		mp->mslg_flags & M_LF_CONT ? " (continuing)" : "");
	switch (mp->mslg_format & 0377) {

	case M_FM_CNTERR:	/* controller error */
		break;

	case M_FM_BUSADDR:	/* host memory access error */
		printf("memory addr 0x%x: ", *(long *)&mp->mslg_busaddr);
		break;

	case M_FM_DISKTRN:
		printf("unit %d: retry %d count %d, %s %d: ",
			mp->mslg_unit, mp->mslg_group & 0xff,
			(mp->mslg_group >> 8) & 0xff,
			BADCODE(mp->mslg_hdr), BADLBN(mp->mslg_hdr));
		break;

	case M_FM_SDI:
		printf("unit %d: %s %d: ", mp->mslg_unit,
			BADCODE(mp->mslg_hdr), BADLBN(mp->mslg_hdr));
		break;

	case M_FM_SMLDSK:
		printf("unit %d: small disk error, cyl %d: ",
			mp->mslg_unit, mp->mslg_sdecyl);
		break;

	default:
		printf("unit %d: unknown error, format 0x%x: ",
			mp->mslg_unit, mp->mslg_format);
	}
	udputstatus(mp);
#undef BADCODE
#undef BADLBN
}

/*
 * Messages for the various subcodes.
 */
static char unknown_msg[] = "unknown subcode";

static char *succ_msgs[] = {
	"normal", "spin down ignored", "still connected",
	unknown_msg, "dup. unit #", unknown_msg, unknown_msg,
	unknown_msg, "already online", unknown_msg,
	unknown_msg, unknown_msg, unknown_msg, unknown_msg,
	unknown_msg, unknown_msg, "still online"
};
static char *icmd_msgs[] = {
	"invalid msg length"
};
static char *offl_msgs[] = {
	"unknown drive", "not mounted", "inoperative",
	unknown_msg, "duplicate", unknown_msg, unknown_msg,
	unknown_msg, "in diagnosis"
};
static char *media_fmt_msgs[] = {
	"fct unread - edc", "invalid sector header", "not 512 sectors",
	"not formatted", "fct ecc"
};
static char *wrprot_msgs[] = {
	unknown_msg, "software", "hardware"
};
static char *data_msgs[] = {
	"forced error", unknown_msg, "header compare", 
	"sync timeout", unknown_msg, unknown_msg,     
	unknown_msg, "uncorrectable ecc",
	"1 symbol ecc", "2 symbol ecc", "3 symbol ecc", "4 symbol ecc", 
	"5 symbol ecc", "6 symbol ecc", "7 symbol ecc", "8 symbol ecc", 
};
static char *host_buffer_msgs[] = {
	unknown_msg, "odd xfer addr", "odd xfer count", 
	"non-exist. memory", "memory parity"
};
static char *cntlr_msgs[] = {
	unknown_msg, "serdes overrun", "edc", 
	"inconsistant internal data struct"
};
static char *drive_msgs[] = {
	unknown_msg, "sdi command timeout", "ctlr detected protocol", 
	"positioner", "lost rd/wr ready", "drive clock dropout", 
	"lost recvr ready", "drive detected error", 
	"ctlr detected pulse or parity"
};

/*
 * The following table correlates message codes with the
 * decoding strings.
 */
struct code_decode {
	char	*cdc_msg;
	int	cdc_nsubcodes;
	char	**cdc_submsgs;
} code_decode[] = {
#define	SC(m)	sizeof (m) / sizeof (m[0]), m
	"- success",			SC(succ_msgs),
	"invalid command",		SC(icmd_msgs),
	"command aborted",		0, 0,
	"- unit offline",		SC(offl_msgs),
	"unit available",		0, 0,
	"media format error",		SC(media_fmt_msgs),
	"write protected",		SC(wrprot_msgs),
	"compare error",		0, 0,
	"data error",			SC(data_msgs),
	"host buffer access error",	SC(host_buffer_msgs),
	"controller error",		SC(cntlr_msgs),
	"drive error",			SC(drive_msgs),
#undef SC
};

#define CODE(c)		((c) & 0x1f)
#define SUBCODE(c)	((CODE(c) != 6 ? (c) >> 5 : (c) >> 12) & 0x7ff)

udputstatus(mp)
	struct mslg *mp;
{
	register int event = mp->mslg_event;
	register struct code_decode *cdc;
	int c, sc;
	char *cm, *scm;

	c = CODE(event);
	sc = SUBCODE(event);
	if (c >= sizeof code_decode / sizeof code_decode[0])
		cm = "- unknown code", scm = "??";
	else {
		cdc = &code_decode[c];
		cm = cdc->cdc_msg;
		if (sc >= cdc->cdc_nsubcodes)
			scm = unknown_msg;
		else
			scm = cdc->cdc_submsgs[sc];
	}
	printf("%s %s (code %d, subcode %d)\n", scm, cm, c, sc);
}



More information about the Comp.unix.wizards mailing list