problem with fread/fwrite

Chris Torek chris at mimsy.umd.edu
Tue Nov 13 20:48:37 AEST 1990


In article <2677 at cirrusl.UUCP> dhesi%cirrusl at oliveb.ATC.olivetti.com
(Rahul Dhesi) writes:
>I believe the requirement to call fseek (etc.) when switching arises
>out of the need to make stdio fast.  Due to buffering, alternating
>reads and writes can confuse each other.  The only way the stdio
>library could automatically protect you against this would be for it to
>explicitly test for internal state before every read and write.

Although this is (effectively) the reason the V7 Unix stdio and all its
descendents (and, presumably, whatever predecessor eventually became
the USG stdio and thence the System V stdio, though I have not looked
closer than determining that the SVR3 stdio was absolutely horrid
inside) ... where was I?  Oh yes, the reason most Unix stdios do not
check.  Right.

Your average out-of-the-box Unix stdio has, for efficiency, two
particular state variables in each FILE.  One is a pointer into a
current buffer, and the other is a count.  For `getc' operations, if
the count is positive, one decrements it and fetches through the
pointer, which is then increemented.  For `putc' operations, if the
count is positive, one decrements it and stores through the pointer,
which is then incremented.  This means that buffered I/O, which
typically stores somewhere between 512 and 65536 characters in each
buffer, can handle somewhere between 511 and 65535 `calls' to `getc' or
`putc' within an inline macro expansion.  Unfortunately, it also means
that

	fp = fopen("foo", "w+");
	...
	putc(' ', fp);
	c = getc(fp);

tends to `get' a random value (whatever happened to be in the current
buffer).

This particular `feature' is easy to fix without sacrificing
efficiency.  Instead of carrying one count and one pointer, stdio can
carry *two* counts (and, as it turns out, one pointer).  The current
read or write state is then stored implicitly in the two counts (as
well as explicitly elsewhere, of course).  The following extracts from
my <stdio.h> should give you the idea.

/*
 * Stdio buffers.
 */
struct __sbuf {
	unsigned char *_base;
	int	_size;
};

/*
 * Stdio state variables.
 *
 * The following always hold:
 *
 *	if (_flags&(__SLBF|__SWR)) == (__SLBF|__SWR),
 *		_lbfsize is -_bf._size, else _lbfsize is 0
 *	if _flags&__SRD, _w is 0
 *	if _flags&__SWR, _r is 0
 *
 * This ensures that the getc and putc macros (or inline functions) never
 * try to write or read from a file that is in `read' or `write' mode.
 * (Moreover, they can, and do, automatically switch from read mode to
 * write mode, and back, on "r+" and "w+" files.)
 *
 * _lbfsize is used only to make the inline line-buffered output stream
 * code as compact as possible.
 *
 * _ub, _up, and _ur are used when ungetc() pushes back more characters
 * than fit in the current _bf, or when ungetc() pushes back a character
 * that does not match the previous one in _bf.  When this happens,
 * _ub._base becomes non-nil (i.e., a stream has ungetc() data iff
 * _ub._base!=NULL) and _up and _ur save the current values of _p and _r.
 */
typedef	struct __sFILE {
	unsigned char *_p;	/* current position in (some) buffer */
	int	_r;		/* read space left for getc() */
	int	_w;		/* write space left for putc() */
	short	_flags;		/* flags, below; this FILE is free if 0 */
	short	_file;		/* fileno, if Unix descriptor, else -1 */
	struct	__sbuf _bf;	/* the buffer (at least 1 byte, if !NULL) */
	int	_lbfsize;	/* 0 or -_bf._size, for inline putc */

	/* operations */
	void	*_cookie;	/* cookie passed to io functions */
#if __STDC__ || c_plusplus
	int	(*_read)(void *_cookie, char *_buf, int _n);
	int	(*_write)(void *_cookie, const char *_buf, int _n);
	fpos_t	(*_seek)(void *_cookie, fpos_t _offset, int _whence);
	int	(*_close)(void *_cookie);
#else
	int	(*_read)();
	int	(*_write)();
	fpos_t	(*_seek)();
	int	(*_close)();
#endif

	/* separate buffer for long sequences of ungetc() */
	struct	__sbuf _ub;	/* ungetc buffer */
	unsigned char *_up;	/* saved _p when _p is doing ungetc data */
	int	_ur;		/* saved _r when _r is counting ungetc data */

	/* tricks to meet minimum requirements even when malloc() fails */
	unsigned char _ubuf[3];	/* guarantee an ungetc() buffer */
	unsigned char _nbuf[1];	/* guarantee a getc() buffer */

	/* separate buffer for fgetline() when line crosses buffer boundary */
	struct	__sbuf _lb;	/* buffer for fgetline() */

	/* Unix stdio files get aligned to block boundaries on fseek() */
	int	_blksize;	/* stat.st_blksize (may be != _bf._size) */
	int	_offset;	/* current lseek offset */
} FILE;

extern FILE __sF[];

#define	__SLBF	0x0001		/* line buffered */
#define	__SNBF	0x0002		/* unbuffered */
#define	__SRD	0x0004		/* OK to read */
#define	__SWR	0x0008		/* OK to write */
	/* RD and WR are never simultaneously asserted */
#define	__SRW	0x0010		/* open for reading & writing */
#define	__SEOF	0x0020		/* found EOF */
#define	__SERR	0x0040		/* found error */
#define	__SMBF	0x0080		/* _buf is from malloc */
#define	__SAPP	0x0100		/* fdopen()ed in append mode */
#define	__SSTR	0x0200		/* this is an sprintf/snprintf string */
#define	__SOPT	0x0400		/* do fseek() optimisation */
#define	__SNPT	0x0800		/* do not do fseek() optimisation */
#define	__SOFF	0x1000		/* set iff _offset is in fact correct */
#define	__SMOD	0x2000		/* true => fgetline modified _p text */

	[much deleted]

/*
 * The __sfoo macros are here so that we can 
 * define function versions in the C library.
 */
#define	__sgetc(p) (--(p)->_r < 0 ? __srget(p) : (int)(*(p)->_p++))
#ifdef __GNUC__
static __inline int __sputc(int _c, FILE *_p) {
	if (--_p->_w >= 0 || (_p->_w >= _p->_lbfsize && (char)_c != '\n'))
		return (*_p->_p++ = _c);
	else
		return (__swbuf(_c, _p));
}
#else
/*
 * This has been tuned to generate reasonable code on the vax using pcc
 */
#define	__sputc(c, p) \
	(--(p)->_w < 0 ? \
		(p)->_w >= (p)->_lbfsize ? \
			(*(p)->_p = (c)), *(p)->_p != '\n' ? \
				(int)*(p)->_p++ : \
				__swbuf('\n', p) : \
			__swbuf((int)(c), p) : \
		(*(p)->_p = (c), (int)*(p)->_p++))
#endif
-- 
In-Real-Life: Chris Torek, Univ of MD Comp Sci Dept (+1 301 405 2750)
Domain:	chris at cs.umd.edu	Path:	uunet!mimsy!chris



More information about the Comp.lang.c mailing list