S5R2 "grep" performance improvements aren't

guy at gorodish.UUCP guy at gorodish.UUCP
Thu Jan 29 12:01:09 AEST 1987


In S5R2, "fread" and "fwrite" were changed not to just loop doing
"getc" and "putc".  "fgets" and "fputs" were changed to do the same
thing.  "grep" was changed to use a slightly-different version of
"fgets", called "fgetl", that returned the length of the line, and 0
on EOF or error, rather than a pointer to the line and NULL on EOF or
error.

The theory was, I guess, that this would speed it up.  Unfortunately,
the reality is that it slows it down!  Some tests on a 3B2/400 and a
Sun-3/180 indicate that the old V7 code was faster.

This also casts some doubt on the merits of the changes to "fgets"
and "fputs"; it's worth noting that the 4.3BSD "fread" and "fwrite"
use similar techniques to the S5R2 ones, but the 4.3BSD "fgets" and
"fputs" don't.

Also, the "grep -i" code is dumb; it should use register variables
and pointers, and not call the "tolower" subroutine.  This stuff is
done once per character, guys....

Here's a fix, along with some other improvements (proper casting of
null pointers, reasonable error checking and error messages).

*** /archbeluga/s5r2/usr/src/cmd/grep.c	Mon Jul 25 14:24:55 1983
--- grep.c	Wed Jan 28 17:59:51 1987
***************
*** 121,127 ****
  	compile(*argv, expbuf, &expbuf[ESIZE], '\0');
  
  	if (--argc == 0)
! 		execute(NULL);
  	else
  		while (argc-- > 0)
  			execute(*++argv);
--- 121,127 ----
  	compile(*argv, expbuf, &expbuf[ESIZE], '\0');
  
  	if (--argc == 0)
! 		execute((char *)NULL);
  	else
  		while (argc-- > 0)
  			execute(*++argv);
***************
*** 130,145 ****
  }
  
  execute(file)
! register char *file;
  {
  	register char *lbuf;
! 	register i;
  
  	if (file == NULL)
  		temp = stdin;
  	else if ( (temp = fopen(file, "r")) == NULL) {
! 		if (!sflag)
! 			errmsg("grep: can't open %s\n", file);
  		nsucc = 2;
  		return;
  	}
--- 130,149 ----
  }
  
  execute(file)
! char *file;
  {
+ 	register FILE *temp;
  	register char *lbuf;
! 	register char *p1;
! 	register int c;
  
  	if (file == NULL)
  		temp = stdin;
  	else if ( (temp = fopen(file, "r")) == NULL) {
! 		if (!sflag) {
! 			fprintf(stderr, "grep: ");
! 			perror(file);
! 		}
  		nsucc = 2;
  		return;
  	}
***************
*** 147,175 ****
  	lnum = 0;
  	tln = 0;
  
! 	while((nchars = fgetl(prntbuf, BUFSIZ, temp)) != 0) {
! 		if(nchars == BUFSIZ - 1  &&  prntbuf[nchars-1] != '\n')
! 			continue;
  
- 		if(prntbuf[nchars-1] == '\n') {
- 			nlflag = 1;
- 			prntbuf[nchars-1] = '\0';
- 		} else
- 			nlflag = 0;
- 
  		lnum++;
  
  		if (iflag) {
! 			for(i=0, lbuf=linebuf; i < nchars; i++, lbuf++)
! 				*lbuf = (char)tolower((int)prntbuf[i]);
  			*lbuf = '\0';
  			lbuf = linebuf;
  		} else
  			lbuf = prntbuf;
  
! 		if((step(lbuf, expbuf) ^ vflag) && succeed(file) == 1)
! 			break;	/* lflag only once */
  	}
  	fclose(temp);
  
  	if (cflag) {
--- 151,222 ----
  	lnum = 0;
  	tln = 0;
  
! 	/*
! 	 * This flag will be cleared on the last line if it doesn't contain
! 	 * a newline.
! 	 */
! 	nlflag = 1;
! 	for (;;) {
! 		p1 = prntbuf;
! 		while ((c = getc(temp)) != '\n') {
! 			if (c == EOF) {
! 				if (ferror(temp)) {
! 					fprintf(stderr, "grep: Read error on ");
! 					perror(file ? file : "standard input");
! 					nsucc = 2;
! 					return;
! 				}
! 				if (p1 == prntbuf)
! 					goto out;
! 				nlflag = 0;
! 				break;
! 			}
! 			*p1++ = c;
! 			if (p1 >= &prntbuf[BUFSIZ-1])
! 				break;
! 		}
! 		*p1 = '\0';
! 		nchars = p1 - prntbuf;
  
  		lnum++;
  
  		if (iflag) {
! 			lbuf = linebuf;
! 			p1 = prntbuf;
! 			while ((c = *p1++) != '\0') {
! 				if (isupper(c))
! 					*lbuf++ = _tolower(c);
! 				else
! 					*lbuf++ = c;
! 			}
  			*lbuf = '\0';
  			lbuf = linebuf;
  		} else
  			lbuf = prntbuf;
  
! 		if(step(lbuf, expbuf))
! 			goto found;
! 		if(vflag) {
! 			if(succeed(file, temp))
! 				break;
! 		}
! 		continue;
! 
! 	found:
! 		if(!vflag) {
! 			if(succeed(file, temp))
! 				break;
! 		}
  	}
+ 	if (ferror(temp)) {
+ 		fprintf(stderr, "grep: Read error on ");
+ 		perror(file ? file : "standard input");
+ 		nsucc = 2;
+ 		fclose(temp);
+ 		return;
+ 	}
+ 
+ out:
  	fclose(temp);
  
  	if (cflag) {
***************
*** 180,187 ****
  	return;
  }
  
! succeed(f)
  register char *f;
  {
  	nsucc = (nsucc == 2) ? 2 : 1;
  	if (cflag) {
--- 227,235 ----
  	return;
  }
  
! succeed(f, iop)
  register char *f;
+ FILE *iop;
  {
  	nsucc = (nsucc == 2) ? 2 : 1;
  	if (cflag) {
***************
*** 197,211 ****
  		fprintf(stdout, "%s:", f);
  
  	if (bflag)	/* print block number */
! 		fprintf(stdout, "%ld:", (ftell(temp)-1)/BLKSIZE);
  
  	if (nflag)	/* print line number */
  		fprintf(stdout, "%ld:", lnum);
  
- 	if (nlflag)
- 		prntbuf[nchars-1] = '\n';
- 
  	fwrite(prntbuf, 1, nchars, stdout);
  	return(0);
  }
  
--- 245,258 ----
  		fprintf(stdout, "%s:", f);
  
  	if (bflag)	/* print block number */
! 		fprintf(stdout, "%ld:", (ftell(iop)-1)/BLKSIZE);
  
  	if (nflag)	/* print line number */
  		fprintf(stdout, "%ld:", lnum);
  
  	fwrite(prntbuf, 1, nchars, stdout);
+ 	if (nlflag)
+ 		putchar('\n');
  	return(0);
  }
  
***************
*** 257,306 ****
  
  	errmsg("%s\n", errstr[err]);
  	exit(2);
- }
- 
- /*
-  * The following code is a modified version of the fgets() stdio
-  * routine.  The reason why it is used instead of fgets() is that
-  * we need to know how many characters we read into the buffer.
-  * Thus that value is returned here instead of the value of s1.
-  */
- #define MIN(x, y)	(x < y ? x : y)
- #define _BUFSYNC(iop)	if (_bufend(iop) - iop->_ptr <   \
- 				( iop->_cnt < 0 ? 0 : iop->_cnt ) )  \
- 					_bufsync(iop)
- 
- extern int _filbuf();
- extern char *memccpy();
- 
- fgetl(ptr, size, iop)
- char *ptr;
- register int size;
- register FILE *iop;
- {
- 	char *p, *ptr0 = ptr;
- 	register int n;
- 
- 	for (size--; size > 0; size -= n) {
- 		if (iop->_cnt <= 0) { /* empty buffer */
- 			if (_filbuf(iop) == EOF) {
- 				if (ptr0 == ptr)
- 					return (NULL);
- 				break; /* no more data */
- 			}
- 			iop->_ptr--;
- 			iop->_cnt++;
- 		}
- 		n = MIN(size, iop->_cnt);
- 		if ((p = memccpy(ptr, (char *) iop->_ptr, '\n', n)) != NULL)
- 			n = p - ptr;
- 		ptr += n;
- 		iop->_cnt -= n;
- 		iop->_ptr += n;
- 		_BUFSYNC(iop);
- 		if (p != NULL)
- 			break; /* found '\n' in buffer */
- 	}
- 	*ptr = '\0';
- 	return (ptr-ptr0);
  }
--- 304,307 ----



More information about the Comp.bugs.sys5 mailing list