unc - 68000 disassembler

Sat Mar 15 00:58:27 AEST 1986

Mod.sources:  Volume 4, Issue 30
Submitted by: turner at imagen.UUCP <talcott!topaz!Shasta!imagen!Jim.Turner>

#! /bin/sh
# This is a shell archive, meaning:
# 1. Remove everything above the #! /bin/sh line.
# 2. Save the resulting text in a file.
# 3. Execute the file with /bin/sh (not csh) to create the files:
#	README
#	doc
#	doc.out
#	makefile
#	unc.h
#	alloc.c
#	file.c
#	heur.c
# This archive created: Fri Mar 14 09:54:34 1986
export PATH; PATH=/bin:$PATH
echo shar: extracting "'README'" '(806 characters)'
if test -f 'README'
then
	echo shar: will not over-write existing file "'README'"
else
cat << \SHAR_EOF > 'README'
I have gotten many many requests to email this or post it, because of
its size email'ing it screws up most mailers, so i am submitting it to
mod.sources to be posted.  Please note a major cavaet with this, it was
written under Unisoft's port of Unix so the a.out file that it uses
more closely resembles the b.out.h file that most cross assemblers
(e.g.  greenhills) use. For the obvious reasons i have not included that
file with the posting. I did not write this nor do i make any claim to
that effect.

    turner at imagen.UUCP <talcott!topaz!Shasta!imagen!Jim.Turner>

----------------------------
This is the 68000 disassembler mentioned on the net.
It is not my final version by any means, but I have found it extremely
useful and it represents several weeks' work.

    John Collins. <jmc at inset.UUCP>
SHAR_EOF
if test 806 -ne "`wc -c < 'README'`"
then
	echo shar: error transmitting "'README'" '(should have been 806 characters)'
fi
fi
echo shar: extracting "'doc'" '(6445 characters)'
if test -f 'doc'
then
	echo shar: will not over-write existing file "'doc'"
else
cat << \SHAR_EOF > 'doc'
.\"/*%	nroff -cm -rL72 %|epson|spr -f plain.a -h uncdoc -w
.nr Hb 7
.nr Hs 3
.ds HF 3 3 3 3 3 3 3
.nr Hu 5
.nr Hc 1
.SA 1
.PH "''A Disassembler''"
.PF "'Issue %I%'- Page \\\\nP -'%G%'"
.H 1 "Introduction"
This document describes the first release of a disassembler for UNIX
executable files.
The key features are:
.AL
.LI
For object files the output can be assembled to generate the same
object module, (apart from minor variations in symbol table ordering) as the
input.
.LI
For stripped executable files object modules and libraries may be scanned,
modules in the main input identified and the appropriate names automatically
inserted into the output.
.LI
An option is available to convert most non-global names into local symbols,
which cuts down the symbols in the generated assembler file.
.LI
The disassembler copes reasonably with modules merged with the
.B "-r"
option to
.B "ld" ,
generating a warning message as to the number of modules involved.
.LE
.P
At present this is available for certain Motorola 68000 ports of UNIX
System III and System V. Dependencies on
.AL a
.LI
Instruction set.
.LI
Object module format.
.LI
Library module format.
.LI
Assembler output format.
.LE
.P
are hopefully sufficiently localised to make the product useful as a
basis for other disassemblers for other versions of UNIX.
.P
The product is thus distributed in source form at present.
.H 1 "Use"
The disassembler is run by entering:
.DS I
unc mainfile lib1 lib2 ...
.DE
.P
The first named file is the file to be disassembled, which should be
a single file, either an object module, a (possibly stripped) executable
file, or a library member. Library members are designated using a
parenthesis notation, thus:
.DS I
unc '/lib/libc.a(printf.o)'
.DE
.P
It is usually necessary to escape the arguments in this case to prevent
misinterpretation by the shell. Libraries in standard places such as
.I "/lib"
and
.I "/usr/lib"
may be specified in the same way as to
.B "ld" ,
thus
.DS I
unc '-lc(printf.o)'
unc '-lcurses(wmove.o)'
.DE
.P
As an additional facility, the list of directories searched for
libraries may be varied by setting the environment variable
.B "LDPATH" ,
which is interpreted similarly to the shell
.B "PATH"
variable, and of course defaults to
.DS I
LDPATH=/lib:/usr/lib
.DE
.P
As a further facility, the insertion of
.B "lib"
before and
.B ".a"
after the argument may be suppressed by using a capital
.B "-L"
argument, thus to print out the assembler for
.I "/lib/crt0.o" ,
then the command
.DS I
unc -Lcrt0.o
.DE
.P
should have the desired effect.
.P
Second and subsequent file arguments are only referenced for stripped
executable files, and may consist of single object files and library
members, using the same syntax as before, or whole libraries of object
files, thus:
.DS I
unc strippedfile -Lcrt0.o -lcurses -ltermcap '-lm(sqrt.o)' -lc
.DE
.P
It is advisable to make some effort to put the libraries to be searched
in the order in which they were originally loaded. This is because the
search for each module starts where the previously matched module ended.
However, no harm is done if this rule is not adhered to apart from
increased execution time except in the rare cases where the disassembler
is confused by object modules which are very nearly similar.
.H 1 "Additional options"
The following options are available to modify the behaviour of the
disassembler.
.VL 15 2
.LI "-o file"
Causes output to be sent to the specified file instead of the standard
output.
.LI "-t prefix"
Causes temporary files to be created with the given prefix. The default
prefix is
.B "split" ,
thus causing two temporary files to be created with this prefix in the
current directory. If it is desired, for example, to create the files as
.B "/tmp/xx*" ,
then the argument
.B "-t /tmp/xx"
should be given. Note that the temporary files may be very large as a
complete map of the text and data segments is generated.
.LI "-a"
Suppresses the generation of non-global absolute symbols from the
output. This saves output from C compilations without any obvious
problems, but the symbols are by default included in the name of
producing as nearly identical output as possible to the original source.
.LI "-s"
Causes an additional scan to take place where all possible labels are
replaced by local symbols. The local symbols are inserted in strictly
ascending order, starting at 1.
.LI "-v"
Causes a blow-by-blow account of activities to be output on the standard
error.
.LE
.H 1 "Diagnostics etc"
Truncated or garbled object and library files usually cause processing
to stop with an explanatory message.
.P
The only other kinds of message are some passing warnings concerning
obscure constructs not handled, such as the relocation of byte fields,
or the relocation of overlapping fields. Occasionally a message
.DS I
Library clash: message
.DE
.P
may appear and processing cease. This message is found where at a late
stage in processing libraries, the program discovers that due to the
extreme similarity of two or more library members, it has come to the
wrong conclusion about which one to use. The remedy here is to spell out
to the program which members to take in which order.
.H 1 "Future development"
In the future it is hoped to devise ways of making the disassembler
independent of all the above-mentioned version dependencies, by first
reading a files defining these things. This will probably be applied
after the Common Object Format becomes more standard.
.P
In the long term it would be desirable and useful to enhance the product
to produce compilable C in addition to assemblable assembler. Stages in
the process are seen as follows:
.AL
.LI
Better identification of basic blocks in the code. Switch statements are
a major problem here, as are constant data held in the text segment.
.LI
Marrying of data to the corresponding text. It is in various places hard
to divorce static references "on the fly" (e.g. strings, and switch
lists in some implementations) from static at the head of a module. This
is part of the problem of identifying basic blocks.
.LI
Compilation of header files to work out structure references within the
text. At this stage some interaction may be needed.
.LE
.P
Meanwhile the product is one which is a useful tool to the author in its
present form. Comments and suggestions as to the most practical method
of improving the product in the ways suggested or in other ways would be
gratefully considered.
SHAR_EOF
if test 6445 -ne "`wc -c < 'doc'`"
then
	echo shar: error transmitting "'doc'" '(should have been 6445 characters)'
fi
fi
echo shar: extracting "'doc.out'" '(7415 characters)'
if test -f 'doc.out'
then
	echo shar: will not over-write existing file "'doc.out'"
else
cat << \SHAR_EOF > 'doc.out'

                       A Disassembler

                      1.  Introduction

This document describes the first release of a  disassembler
for UNIX executable files.  The key features are:

  1.  For object  files  the  output  can  be  assembled  to
      generate  the  same  object  module, (apart from minor
      variations in symbol table ordering) as the input.

  2.  For  stripped  executable  files  object  modules  and
      libraries  may  be  scanned, modules in the main input
      identified and  the  appropriate  names  automatically
      inserted into the output.

  3.  An option is  available  to  convert  most  non-global
      names  into local symbols, which cuts down the symbols
      in the generated assembler file.

  4.  The disassembler copes reasonably with modules  merged
      with the -r option to ld, generating a warning message
      as to the number of modules involved.

At present this is  available  for  certain  Motorola  68000
ports of UNIX System III and System V. Dependencies on

  a.  Instruction set.

  b.  Object module format.

  c.  Library module format.

  d.  Assembler output format.

are hopefully sufficiently localised  to  make  the  product
useful as a basis for other disassemblers for other versions
of UNIX.

The product is thus distributed in source form at present.

                          2.  Use

The disassembler is run by entering:

     unc mainfile lib1 lib2 ...

The first named file is the file to be  disassembled,  which
should  be  a  single  file,  either  an  object  module,  a
(possibly stripped) executable file, or  a  library  member.
Library members are designated using a parenthesis notation,
thus:

                                                      Page 1

                       A Disassembler

     unc '/lib/libc.a(printf.o)'

It is usually necessary to escape the arguments in this case
to  prevent  misinterpretation  by  the  shell. Libraries in
standard places such as /lib and /usr/lib may  be  specified
in the same way as to ld, thus

     unc '-lc(printf.o)'
     unc '-lcurses(wmove.o)'

As an additional facility, the list of directories  searched
for  libraries  may  be  varied  by  setting the environment
variable LDPATH, which is interpreted similarly to the shell
PATH variable, and of course defaults to

     LDPATH=/lib:/usr/lib

As a further facility, the insertion of lib  before  and  .a
after  the  argument may be suppressed by using a capital -L
argument, thus to print out the assembler  for  /lib/crt0.o,
then the command

     unc -Lcrt0.o

should have the desired effect.

Second and subsequent file arguments are only referenced for
stripped  executable files, and may consist of single object
files and library members, using the same syntax as  before,
or whole libraries of object files, thus:

     unc strippedfile -Lcrt0.o -lcurses -ltermcap '-lm(sqrt.o)' -lc

It is advisable to make some effort to put the libraries  to
be  searched  in  the  order  in  which they were originally
loaded. This is because the search for  each  module  starts
where the previously matched module ended.  However, no harm
is done if this rule is not adhered to apart from  increased
execution   time   except   in  the  rare  cases  where  the
disassembler is confused by object modules  which  are  very
nearly similar.

                   3.  Additional options

The following options are available to modify the  behaviour
of the disassembler.

  -o file      Causes output to be  sent  to  the  specified
               file instead of the standard output.

                                                      Page 2

                       A Disassembler

  -t prefix    Causes temporary files to be created with the
               given  prefix.  The  default prefix is split,
               thus  causing  two  temporary  files  to   be
               created  with  this  prefix  in  the  current
               directory. If it is desired, for example,  to
               create   the  files  as  /tmp/xx*,  then  the
               argument -t /tmp/xx  should  be  given.  Note
               that the temporary files may be very large as
               a complete map of the text and data  segments
               is generated.

  -a           Suppresses  the  generation   of   non-global
               absolute  symbols from the output. This saves
               output  from  C  compilations   without   any
               obvious  problems,  but  the  symbols  are by
               default included in the name of producing  as
               nearly  identical  output  as possible to the
               original source.

  -s           Causes an additional scan to take place where
               all  possible  labels  are  replaced by local
               symbols. The local symbols  are  inserted  in
               strictly ascending order, starting at 1.

  -v           Causes a blow-by-blow account  of  activities
               to be output on the standard error.

                    4.  Diagnostics etc

Truncated or garbled object and library files usually  cause
processing to stop with an explanatory message.

The only other kinds of message are  some  passing  warnings
concerning  obscure  constructs  not  handled,  such  as the
relocation of byte fields, or the relocation of  overlapping
fields. Occasionally a message

     Library clash: message

may appear and processing cease. This message is found where
at  a  late  stage  in  processing  libraries,  the  program
discovers that due to the extreme similarity of two or  more
library  members,  it has come to the wrong conclusion about
which one to use. The remedy here is to  spell  out  to  the
program which members to take in which order.

                                                      Page 3

                       A Disassembler

                   5.  Future development

In the future it is hoped  to  devise  ways  of  making  the
disassembler  independent of all the above-mentioned version
dependencies,  by  first  reading  a  files  defining  these
things.  This  will  probably  be  applied  after the Common
Object Format becomes more standard.

In the long term it would be desirable and useful to enhance
the   product   to  produce  compilable  C  in  addition  to
assemblable assembler. Stages in the  process  are  seen  as
follows:

  1.  Better identification of basic  blocks  in  the  code.
      Switch  statements  are  a  major problem here, as are
      constant data held in the text segment.

  2.  Marrying of data to the corresponding text. It  is  in
      various  places  hard to divorce static references "on
      the fly" (e.g.  strings,  and  switch  lists  in  some
      implementations)  from static at the head of a module.
      This is part  of  the  problem  of  identifying  basic
      blocks.

  3.  Compilation of header  files  to  work  out  structure
      references   within  the  text.  At  this  stage  some
      interaction may be needed.

Meanwhile the product is one which is a useful tool  to  the
author  in  its present form. Comments and suggestions as to
the most practical method of improving the  product  in  the
ways   suggested  or  in  other  ways  would  be  gratefully
considered.

                                                      Page 4

SHAR_EOF
if test 7415 -ne "`wc -c < 'doc.out'`"
then
	echo shar: error transmitting "'doc.out'" '(should have been 7415 characters)'
fi
fi
echo shar: extracting "'makefile'" '(128 characters)'
if test -f 'makefile'
then
	echo shar: will not over-write existing file "'makefile'"
else
cat << \SHAR_EOF > 'makefile'
CFLAGS=-v -OB
OBJS=alloc.o file.o libmtch.o robj.o iset.o prin.o heur.o main.o

unc:	$(OBJS)
	cc -o unc $(OBJS)

$(OBJS):	unc.h
SHAR_EOF
if test 128 -ne "`wc -c < 'makefile'`"
then
	echo shar: error transmitting "'makefile'" '(should have been 128 characters)'
fi
fi
echo shar: extracting "'unc.h'" '(4526 characters)'
if test -f 'unc.h'
then
	echo shar: will not over-write existing file "'unc.h'"
else
cat << \SHAR_EOF > 'unc.h'
/*
 *	SCCS:	@(#)unc.h	1.2	11/2/84	14:21:02
 *	Header file for uncompile program.
 *
 ***********************************************************************
 *	This software is copyright of
 *
 *		John M Collins
 *		47 Cedarwood Drive
 *		St Albans
 *		Herts, AL4 0DN
 *		England			+44 727 57267
 *
 *	and is released into the public domain on the following conditions:
 *
 *		1.  No free maintenance will be guaranteed.
 *		2.  Nothing may be based on this software without
 *		    acknowledgement, including incorporation of this
 *		    notice.
 *
 *	Notwithstanding the above, the author welcomes correspondence and bug
 *	fixes.
 ***********************************************************************
 */

#define	MAXCHARS	50
#define	HASHMOD		97

/*
 *	The following structure is used to keep track of symbols.
 */

struct	symstr	{
	struct	symstr	*s_next;		/*  Next in hash chain  */
	struct	symstr	*s_link;		/*  Next in duplicate labels */
	unsigned	s_type	:  3;		/*  Symbol type  */
	unsigned	s_newsym:  1;		/*  A new symbol  */
	unsigned	s_invent:  1;		/*  Invented symbol  */
	unsigned	s_glob	:  1;		/*  Global symbol  */
	long		s_value;		/*  Value if defined  */
	short		s_defs;			/*  Defined count  */
	short		s_used;			/*  Used count  */
	unsigned short	s_lsymb;		/*  Local symbol  */
	char		s_name[1];		/*  Chars of name null term */
};

typedef	struct	symstr	*symbol;

symbol	symbhash[HASHMOD];

typedef	struct	{
	int	ef_t;			/*  Text file fd  */
	int	ef_d;			/*  Data file fd  */
	long	ef_entry;		/*  Entry point  */
	long	ef_tsize;		/*  Text size  */
	long	ef_dsize;		/*  Data size  */
	long	ef_bsize;		/*  Bss size  */
	long	ef_end;			/*  End of it all  */
	long	ef_tbase;		/*  Text base  */
	long	ef_dbase;		/*  Data base  */
	long	ef_bbase;		/*  Bss base  */
	int	ef_stcnt;		/*  Number of symbols  */
	int	ef_stmax;		/*  Max number of symbols  */
	symbol	*ef_stvec;		/*  Symbol vector  */
}  ef_fids;

typedef	ef_fids	*ef_fid;

/*
 *	Description of word in text file.  This entry is held in the place
 *	corresponding to the address in the text file.
 */

typedef	struct	{
	unsigned  short	t_contents;		/*  Actual contents  */
	unsigned  short t_iindex;		/*  Index in table  */
	unsigned	t_type	:  2;		/*  Type  */
	unsigned	t_vins  :  1;		/*  Valid instruction  */
	unsigned	t_bdest	:  1;		/*  Is branch dest  */
	unsigned	t_gbdest:  1;		/*  Is global dest  */
	unsigned	t_dref	:  1;		/*  Refered to in data  */
	unsigned	t_bchtyp:  2;		/*  Branch type  */
	unsigned	t_lng	:  3;		/*  Length in words  */
	unsigned	t_reloc :  2;		/*  Relocatable  */
	unsigned	t_rptr	:  2;		/*  Where relocated  */
	unsigned	t_rdisp :  1;		/*  Relocatable displacement */
	unsigned	t_isrel :  1;		/*  Relocated  */
	unsigned	t_amap	:  1;		/*  Worked out  */
	symbol		t_relsymb;		/*  Relocation symbol  */
	long		t_reldisp;		/*  Offset + or - from symb */
	symbol		t_lab;			/*  Label  */
	unsigned  short	t_lsymb;		/*  Local symbol  */
	long		t_reflo;		/*  Lowest place referred  */
	long		t_refhi;		/*  Highest place referred  */
	unsigned  short	t_match;		/*  Lib match lng  */
}  t_entry;

/*
 *	Types ......
 */

#define	T_UNKNOWN	0
#define	T_BEGIN		1
#define	T_CONT		2

#define	R_NONE		0		/*  No relocation  */
#define	R_BYTE		1		/*  Byte relocation  */
#define	R_WORD		2		/*  Word relocation  */
#define	R_LONG		3		/*  Long relocation  */

/*
 *	Branch types.
 */

#define	T_NOBR		0
#define	T_CONDBR	1
#define	T_UNBR		2
#define	T_JSR		3

typedef	struct	{
	unsigned  char	d_contents;		/*  Actual contents  */
	unsigned	d_type  :  4;		/*  Data type  */
	unsigned	d_reloc :  2;		/*  Relocatable  */
	unsigned	d_rptr	:  2;		/*  Where relocated  */
	short		d_lng;			/*  Length -ve for D_CONT */
	symbol		d_relsymb;		/*  Relocation symbol  */
	long		d_reldisp;		/*  Offset + or - from symb */
	symbol		d_lab;			/*  Label  */
}  d_entry;

/*
 *	Data types.
 */

#define	D_ASC		0		/*  Ascii chars  */
#define	D_ASCZ		1		/*  Null-term ascii  */
#define	D_BYTE		2		/*  Decimal bytes  */
#define	D_WORD		3		/*  Words  */
#define	D_LONG		4		/*  Longs  */
#define	D_ADDR		5		/*  Address pointer  */
#define	D_CONT		6		/*  Continuation of last  */

/*
 *	'Common' items.
 */

struct	commit	{
	symbol	*c_symb;		/*  List of symbols  */
	int	c_int;			/*  Current number  */
	int	c_max;			/*  Maximum  */
};

/*
 *	Library file description.
 */

struct	libit	{
	int	lf_fd;			/*  File descriptor  */
	long	lf_offset;		/*  Offset of current file  */
	long	lf_next;		/*  Offset of next file  */
	char	lf_name[14];		/*  Name of item  */
};
SHAR_EOF
if test 4526 -ne "`wc -c < 'unc.h'`"
then
	echo shar: error transmitting "'unc.h'" '(should have been 4526 characters)'
fi
fi
echo shar: extracting "'alloc.c'" '(6396 characters)'
if test -f 'alloc.c'
then
	echo shar: will not over-write existing file "'alloc.c'"
else
cat << \SHAR_EOF > 'alloc.c'
/*
 *	SCCS:	@(#)alloc.c	1.2	11/2/84	14:17:20
 *	Allocate space etc.
 *
 ***********************************************************************
 *	This software is copyright of
 *
 *		John M Collins
 *		47 Cedarwood Drive
 *		St Albans
 *		Herts, AL4 0DN
 *		England			+44 727 57267
 *
 *	and is released into the public domain on the following conditions:
 *
 *		1.  No free maintenance will be guaranteed.
 *		2.  Nothing may be based on this software without
 *		    acknowledgement, including incorporation of this
 *		    notice.
 *
 *	Notwithstanding the above, the author welcomes correspondence and bug
 *	fixes.
 ***********************************************************************
 */

#include <stdio.h>
#include <a.out.h>
#include <setjmp.h>
#include "unc.h"

#define	STINC	10

char	*malloc(), *realloc();
char	*strncpy();
void	gette(), getde(), setde(), putte(), putde();
void	unimpl();
long	gettw();

ef_fids	mainfile;

/*
 *	Oops! out of memory.....
 */

void	nomem()
{
	(void) fprintf(stderr, "Sorry - run out of memory\n");
	exit(255);
}

/*
 *	Look up hash value of symbol.
 */

unsigned  shash(str)
register  char	*str;
{
	register  unsigned  result = 0;
	register  int	cnt = 0;

	while  (*str  &&  cnt < MAXCHARS)  {
		result += *str++;
		cnt++;
	}
	return  result % HASHMOD;
}

/*
 *	Look up hash value of symbol, possibly allocating a new symbol.
 */

symbol	lookup(str)
char	*str;
{
	register  symbol  res, *pp;
	register  int	len;

	pp = &symbhash[shash(str)];
	res = *pp;
	while  (res != NULL)  {
		if  (strncmp(res->s_name, str, MAXCHARS) == 0)
			return	res;
		pp = &res->s_next;
		res = *pp;
	}
	for  (len = 0;  len < MAXCHARS;  len++)
		if  (str[len] == '\0')
			break;
	len++;
	res = (symbol) malloc(sizeof(struct symstr) + len);
	if  (res == NULL)
		nomem();
	*pp = res;
	res->s_next = NULL;
	(void) strncpy(res->s_name, str, len);
	res->s_name[len] = '\0';		/*  Null-terminate  */
	res->s_newsym = 1;
	res->s_glob = 0;
	res->s_invent = 0;
	res->s_link = NULL;
	res->s_used = 0;
	res->s_defs = 0;
	res->s_lsymb = 0;
	return  res;
}

/*
 *	Invent a symbol, making sure that we don't know it.
 */

symbol	inventsymb(prefix)
char	*prefix;
{
	static	int  nsymb = 0;
	char	schars[10];
	register  symbol  res;

	do	(void) sprintf(schars, "%s%d", prefix, ++nsymb);
	while  (!(res = lookup(schars))->s_newsym);

	res->s_newsym = 0;
	res->s_invent = 1;
	return	res;
}

/*
 *	Reallocate symbol table.
 */

void	reallst(outf)
register  ef_fid  outf;
{
	outf->ef_stmax += STINC;
	if  (outf->ef_stvec == NULL)
		outf->ef_stvec = (symbol *) malloc(outf->ef_stmax * sizeof(symbol));
	else
		outf->ef_stvec = (symbol *) realloc(outf->ef_stvec,
					outf->ef_stmax * sizeof(symbol));
	if  (outf->ef_stvec == NULL)
		nomem();
}

/*
 *	Search through existing symbol table for symbol with given
 *	value.  Invent a new one if needed.
 */

symbol	getnsymb(fid, seg, pos)
register  ef_fid  fid;
unsigned  seg;
long	pos;
{
	register  int	i;
	register  symbol  res;

	/***********  MACHINE DEPENDENT  ******************************
	 *	Convert relocation segment type (argument) to symbol type
	 *	(as remembered in symbol table).  Don't ask me why they
	 *	have to be different.....
	 */

	seg += TEXT - RTEXT;

	/*
	 *	See if the reference is to an external symbol.
	 *	If so, use that.
	 */

	for  (i = 0;  i < fid->ef_stcnt;  i++)  {
		res = fid->ef_stvec[i];
		if  (res->s_type == seg  &&  res->s_value == pos)
			return	res;
	}

	/*
	 *	Invent a symbol and use that.
	 */

	res = inventsymb("RS");
	if  (fid->ef_stcnt >= fid->ef_stmax)
		reallst(fid);
	fid->ef_stvec[fid->ef_stcnt++] = res;
	res->s_type = seg;
	res->s_value = pos;
	if  (seg == TEXT)  {
		t_entry	tent;
		gette(fid, pos, &tent);
		tent.t_bdest = 1;
		tent.t_lab = res;
		putte(fid, pos, &tent);
	}
	else  if  (seg == DATA  ||  seg == BSS)  {
		d_entry dent;
		getde(fid, pos, &dent);
		dent.d_lab = res;
		putde(fid, pos, &dent);
	}

	return	res;
}

/*
 *	Assuming address given is in text segment, find its label, or invent
 *	one.  Also set where refered from.
 */

symbol	textlab(loc, refpos)
long	loc, refpos;
{
	t_entry	tent;

	gette(&mainfile, loc, &tent);
	if  (tent.t_type == T_CONT)
		return	NULL;
	if  (tent.t_lab == NULL)  {
		tent.t_lab = inventsymb("TS");
		tent.t_lab->s_type = TEXT;
		tent.t_lab->s_value = loc;
		tent.t_bdest = 1;
		putte(&mainfile, loc, &tent);
	}
	else
		tent.t_lab->s_used++;
	if  (tent.t_refhi < refpos)  {
		tent.t_refhi = refpos;
		putte(&mainfile, loc, &tent);
	}
	if  (tent.t_reflo > refpos)  {
		tent.t_reflo = refpos;
		putte(&mainfile, loc, &tent);
	}
	return	tent.t_lab;
}

/*
 *	Note references to data.
 */

void	mkdref(tpos, size)
long	tpos;
unsigned  size;
{
	t_entry	tent;
	d_entry	dent;
	register  symbol  ds;
	int	dchng = 0;
	int	wsize;
	long	dpos;

	gette(&mainfile, tpos, &tent);
	if  (tent.t_relsymb != NULL)
		return;

	dpos = gettw(&mainfile, tpos, R_LONG);
	if  (dpos < mainfile.ef_dbase  ||  dpos > mainfile.ef_end)
		return;

	switch  (size)  {
	default:
		wsize = D_BYTE;
		break;
	case  2:
		wsize = D_WORD;
		break;
	case  4:
		wsize = D_LONG;
		break;
	}

	getde(&mainfile, dpos, &dent);
	if  ((ds = dent.d_lab) == NULL)  {
		if  (dpos >= mainfile.ef_bbase)  {
			ds = inventsymb("BS");
			ds->s_type = BSS;
		}
		else  {
			ds = inventsymb("DS");
			ds->s_type = DATA;
		}
		ds->s_value = dpos;
		dent.d_lab = ds;
		dchng++;
	}
	else
		ds->s_used++;

	if  (dent.d_type != D_BYTE)  {
		if  (dent.d_type != wsize)  {
			if  (dent.d_type == D_ADDR)  {
				if  (wsize != D_LONG)
					unimpl("Addr word usage");
			}
			else  if  (dent.d_type > wsize)  {
				dchng++;
				dent.d_type = wsize;
				dent.d_lng = size;
			}
		}
	}
	else  {
		dent.d_type = wsize;
		dent.d_lng = size;
		dchng++;
	}
	if  (dchng)  {
		putde(&mainfile, dpos, &dent);
		for  (dchng = 1;  dchng < size; dchng++)
			setde(&mainfile, dpos+dchng, D_CONT, 1);
	}

	tent.t_relsymb = ds;
	putte(&mainfile, tpos, &tent);
}

/*
 *	Add item to common or abs list.
 */

#define	COMINC	10

void	addit(cp, symb)
register  struct  commit  *cp;
symbol	symb;
{
	if  (cp->c_int >= cp->c_max)  {
		cp->c_max += COMINC;
		if  (cp->c_symb == NULL)
			cp->c_symb = (symbol *) malloc(COMINC*sizeof(symbol));
		else
			cp->c_symb = (symbol *)
					realloc(cp->c_symb,
						cp->c_max * sizeof(symbol));
		if  (cp->c_symb == NULL)
			nomem();
	}
	cp->c_symb[cp->c_int++] = symb;
}
SHAR_EOF
if test 6396 -ne "`wc -c < 'alloc.c'`"
then
	echo shar: error transmitting "'alloc.c'" '(should have been 6396 characters)'
fi
fi
echo shar: extracting "'file.c'" '(4184 characters)'
if test -f 'file.c'
then
	echo shar: will not over-write existing file "'file.c'"
else
cat << \SHAR_EOF > 'file.c'
/*
 *	SCCS:	@(#)file.c	1.2	11/2/84	14:17:35
 *	Various operations on files.
 *
 ***********************************************************************
 *	This software is copyright of
 *
 *		John M Collins
 *		47 Cedarwood Drive
 *		St Albans
 *		Herts, AL4 0DN
 *		England			+44 727 57267
 *
 *	and is released into the public domain on the following conditions:
 *
 *		1.  No free maintenance will be guaranteed.
 *		2.  Nothing may be based on this software without
 *		    acknowledgement, including incorporation of this
 *		    notice.
 *
 *	Notwithstanding the above, the author welcomes correspondence and bug
 *	fixes.
 ***********************************************************************
 */

#include <stdio.h>
#include <a.out.h>
#include "unc.h"

long	lseek();
void	unimpl();

/*
 *	Validate addr and get text entry corresponding to it from the given
 *	file.
 */

void	gette(fid, addr, te)
register  ef_fid  fid;
register  long	addr;
t_entry	*te;
{
	addr -= fid->ef_tbase;
	if  (addr < 0  ||  addr > fid->ef_tsize  || (addr & 1) != 0)  {
		(void) fprintf(stderr, "Invalid text address %lx\n", addr);
		exit(200);
	}
	(void) lseek(fid->ef_t, (long)(addr * sizeof(t_entry)/2), 0);
	if  (read(fid->ef_t, (char *) te, sizeof(t_entry)) != sizeof(t_entry))  {
		(void) fprintf(stderr, "Trouble reading text at %lx\n", addr);
		exit(201);
	}
}

/*
 *	Store a text entry.
 */

void	putte(fid, addr, te)
register  ef_fid  fid;
register  long	addr;
t_entry	*te;
{
	addr -= fid->ef_tbase;
	if  (addr < 0  ||  addr > fid->ef_tsize  ||  (addr & 1) != 0)  {
		(void) fprintf(stderr, "Invalid text address %lx\n", addr);
		exit(200);
	}
	(void) lseek(fid->ef_t, (long)(addr * sizeof(t_entry)/2), 0);
	(void) write(fid->ef_t, (char *) te, sizeof(t_entry));
}

/*
 *	Validate addr and get data entry corresponding to it from the given
 *	file.
 */

void	getde(fid, addr, de)
register  ef_fid  fid;
register  long	addr;
d_entry	*de;
{
	if  (addr < fid->ef_dbase  ||  addr > fid->ef_end)  {
		(void) fprintf(stderr, "Invalid data address %lx\n", addr);
		exit(200);
	}
	addr -= fid->ef_dbase;
	(void) lseek(fid->ef_d, (long)(addr * sizeof(d_entry)), 0);
	if  (read(fid->ef_d, (char *) de, sizeof(d_entry)) != sizeof(d_entry))  {
		(void) fprintf(stderr, "Trouble reading data at %lx\n", addr);
		exit(201);
	}
}

/*
 *	Store a data entry.
 */

void	putde(fid, addr, de)
register  ef_fid  fid;
register  long	addr;
d_entry	*de;
{
	if  (addr < fid->ef_dbase  ||  addr > fid->ef_end)  {
		(void) fprintf(stderr, "Invalid data address %lx\n", addr);
		exit(200);
	}
	addr -= fid->ef_dbase;
	(void) lseek(fid->ef_d, (long)(addr * sizeof(d_entry)), 0);
	(void) write(fid->ef_d, (char *) de, sizeof(d_entry));
}

/*
 *	Set type and length of given data entry.
 */

void	setde(fid, addr, type, lng)
ef_fid	fid;
long	addr;
unsigned  type;
int	lng;
{
	d_entry	dat;

	if  (addr > fid->ef_end)
		return;
	getde(fid, addr, &dat);
	if  (type == D_CONT  &&  dat.d_reloc != R_NONE)  {
		char	obuf[30];
		(void) sprintf(obuf, "overlapped reloc 0x%x", addr);
		unimpl(obuf);
	}
	dat.d_type = type;
	dat.d_lng = lng;
	putde(fid, addr, &dat);
}

/*
 *	Get a word of data file, size as requested.
 */

long	getdw(fid, pos, size)
register  ef_fid  fid;
long	pos;
int	size;
{
	d_entry	dat;
	register  long	res;
	register  int	i, lt;

	getde(fid, pos, &dat);

	switch  (size)  {
	case  R_BYTE:
		return	dat.d_contents;

	case  R_LONG:
		lt = 4;
		goto  rest;

	case  R_WORD:
		lt = 2;
	rest:
		res = dat.d_contents;
		for  (i = 1;  i < lt; i++)  {
			getde(fid, pos+i, &dat);
			res = (res << 8) + dat.d_contents;
		}
		return	res;

	default:
		(void) fprintf(stderr, "Data word size error\n");
		exit(20);
	}
	/*NOTREACHED*/
}

/*
 *	Get a word of text file.
 */

long	gettw(fid, pos, size)
register  ef_fid  fid;
long	pos;
int	size;
{
	t_entry	tex;
	long	res;

	gette(fid, pos, &tex);

	switch  (size)  {
	case  R_BYTE:
		return	tex.t_contents >> 8;

	case  R_WORD:
		return	tex.t_contents;

	case  R_LONG:
		res = tex.t_contents;
		gette(fid, pos+2, &tex);
		return	(res << 16) + tex.t_contents;
	default:
		(void) fprintf(stderr, "Text word size error\n");
		exit(20);
	}
	/*NOTREACHED*/
}
SHAR_EOF
if test 4184 -ne "`wc -c < 'file.c'`"
then
	echo shar: error transmitting "'file.c'" '(should have been 4184 characters)'
fi
fi
echo shar: extracting "'heur.c'" '(9885 characters)'
if test -f 'heur.c'
then
	echo shar: will not over-write existing file "'heur.c'"
else
cat << \SHAR_EOF > 'heur.c'
/*
 *	SCCS:	@(#)heur.c	1.2	11/2/84	14:17:46
 *	Attempt to guess things about the file.
 *
 ***********************************************************************
 *	This software is copyright of
 *
 *		John M Collins
 *		47 Cedarwood Drive
 *		St Albans
 *		Herts, AL4 0DN
 *		England			+44 727 57267
 *
 *	and is released into the public domain on the following conditions:
 *
 *		1.  No free maintenance will be guaranteed.
 *		2.  Nothing may be based on this software without
 *		    acknowledgement, including incorporation of this
 *		    notice.
 *
 *	Notwithstanding the above, the author welcomes correspondence and bug
 *	fixes.
 ***********************************************************************
 */

#include <stdio.h>
#include <a.out.h>
#include "unc.h"

#define	INITDAT	256
#define	INCDAT	128

#define	STRSCNT	3
#define	STRECNT	3

char	*malloc(), *realloc();

void	gette(), getde(), setde(), putte(), putde();
void	nomem();
long	getdw();
symbol	inventsymb();

long	endt;
ef_fids	mainfile;

/*
 *	Talk about implemented things.....
 */

void	unimpl(msg)
char	*msg;
{
	(void) fprintf(stderr, "Warning: handling of \"%s\" not implemented\n", msg);
}

/*
 *	Return 1 if string char, otherwise 0.
 */

int	possstr(x)
unsigned  x;
{
	if  (x >= ' '  &&  x <= '~')
		return	1;
	if  (x == '\n'  ||  x == '\t')
		return	1;
	return	0;
}

/*
 *	Guess things about data files.
 */

void	intudat(fid)
ef_fid  fid;
{
	register  int	i, j;
	int	lt, input, invcnt;
	long	offs, soffs, endd;
	d_entry	fdat;
	unsigned  char	*inbuf;
	int	ibsize;

	inbuf = (unsigned  char *)malloc(INITDAT);
	if  (inbuf == NULL)
		nomem();
	ibsize = INITDAT;

	offs = fid->ef_dbase;
	endd = fid->ef_bbase;

	while  (offs < endd)  {
		getde(fid, offs, &fdat);
		if  (fdat.d_type != D_BYTE)  {
			offs += fdat.d_lng;
			continue;
		}

		/*
		 *	Looks like general data.  Read in as much as possible.
		 */

		input = 0;
		soffs = offs;
		do  {
			if  (input >= ibsize)  {
				ibsize += INCDAT;
				inbuf = (unsigned  char *)
					realloc((char *)inbuf, (unsigned)ibsize);
				if  (inbuf == NULL)
					nomem();
			}
			inbuf[input++] = fdat.d_contents;
			offs++;
			if  (offs >= endd)
				break;
			getde(fid, offs, &fdat);
		}  while  (fdat.d_type == D_BYTE && fdat.d_lab == NULL);

		/*
		 *	Now split up the data.
		 */

		for  (i = 0;  i < input;  )  {

			/*
			 *	Might be a string.
			 */

			if  (possstr(inbuf[i]))  {
				lt = input;
				if  (i + STRSCNT < lt)
					lt = i + STRSCNT;
				for  (j = i + 1;  j < lt;  j++)  {
					if  (inbuf[j] == '\0')
						break;
					if  (!possstr(inbuf[j]))
						goto  notstr;
				}

				/*
				 *	Looks like a string then.
				 */

				invcnt = 0;
				for  (j = i + 1; j < input;  j++)  {
					if  (inbuf[j] == '\0')  {
						j++;
						break;
					}
					if  (possstr(inbuf[j]))
						invcnt = 0;
					else  {
						invcnt++;
						if  (invcnt >= STRECNT)  {
							j -= invcnt - 1;
							break;
						}
					}
				}

				setde(fid,
				      soffs+i,
				      (unsigned)(inbuf[j-1]=='\0'?D_ASCZ:D_ASC),
				      j - i);
				for  (i++;  i < j;  i++)
					setde(fid, soffs+i, D_CONT, 1); 
				continue;
			}

notstr:
			/*
			 *	If on odd boundary, treat as a byte.
			 */

			if  ((soffs + i) & 1  ||  i + 1 >= input)  {
				setde(fid, soffs + i, D_BYTE, 1);
				i++;
				continue;
			}

			/*
			 *	Treat as longs unless not enough.
			 */

			if  (i + 3 >= input)  {
				setde(fid, soffs + i, D_WORD, 2);
				setde(fid, soffs + i + 1, D_CONT, -1);
				i += 2;
				continue;
			}

			/*
			 *	Treat as a long but mark changable.
			 */

			setde(fid, soffs + i, D_LONG, 4);
			for  (j = 1;  j < 4;  j++)
				setde(fid, soffs + i + j, D_CONT, -j);
			i += 4;
		}
	}
	free((char *)inbuf);

	/*
	 *	Now zap bss segment.
	 */

	offs = fid->ef_bbase;
	endd = fid->ef_end;

	while  (offs < endd)  {
		getde(fid, offs, &fdat);
		if  (fdat.d_type != D_BYTE)  {
			offs += fdat.d_lng;
			continue;
		}

		soffs = offs;
		do  {
			offs++;
			if  (offs >= endd)
				break;
			getde(fid, offs, &fdat);
		}  while  (fdat.d_type == D_BYTE && fdat.d_lab == NULL);

		setde(fid, soffs, D_BYTE, (int)(offs-soffs));
		for  (i = -1, soffs++;  soffs < offs; i--, soffs++)
			setde(fid, soffs, D_CONT, i); 
	}
}

/*
 *	For non relocatable files, try to identify address pointers in
 *	the data.
 */

void	inturdat(fid)
ef_fid	fid;
{
	register  long	offs = fid->ef_dbase;
	register  int	i;
	register  symbol  ds;
	long  endd = fid->ef_bbase;
	long  cont;
	d_entry	dent, refdent;

	while  (offs < endd)  {
		getde(fid, offs, &dent);
		if  (dent.d_type != D_LONG)
			goto  endit;
		cont = getdw(fid, offs, R_LONG);
		if  (cont < fid->ef_dbase || cont > fid->ef_end)
			goto  endit;
		getde(fid, cont, &refdent);
		if  (refdent.d_type == D_CONT)  {
			d_entry	pdent;
			int	siz;

			if  (refdent.d_lng >= 0)
				goto  endit;
			getde(fid, cont+refdent.d_lng, &pdent);
			i = -refdent.d_lng;
			refdent.d_lng += pdent.d_lng;
			pdent.d_lng = i;
			if  (pdent.d_type == D_LONG  &&  i == 2)
				siz = D_WORD;
			else
				siz = D_BYTE;
			refdent.d_type = siz;
			pdent.d_type = siz;
			putde(fid, cont - i, &pdent);
			for  (i = 1;  i < refdent.d_lng;  i++)
				setde(fid, cont+i, D_CONT, -i);
		}
		if  ((ds = refdent.d_lab) == NULL)  {
			if  (cont >= fid->ef_bbase)  {
				ds = inventsymb("BS");
				ds->s_type = BSS;
			}
			else  {
				ds = inventsymb("DS");
				ds->s_type = DATA;
			}
			ds->s_value = cont;
			refdent.d_lab = ds;
			putde(fid, cont, &refdent);
		}
		else
			ds->s_used++;
		dent.d_type = D_ADDR;
		dent.d_relsymb = ds;
		dent.d_rptr = ds->s_type;
		putde(fid, offs, &dent);
		for  (i = 1;  i < 4;  i++)
			setde(fid, offs+i, D_CONT, 1);
endit:
		offs += dent.d_lng;
	}
}

/*
 *	Recursively follow through the code, stopping at unconditional
 *	branches and invalid instructions.
 */

void	follseq(pos)
long	pos;
{
	t_entry	tent;
	int	lng;
	long	npos;

	while  (pos < endt)  {
		gette(&mainfile, pos, &tent);
		if  (tent.t_amap)	/*  Been here  */
			return;
		tent.t_amap = 1;
		lng = findinst(&tent, pos);
		npos = pos + lng*2;
		if  (npos > endt)  {
			tent.t_vins = 0;
			tent.t_lng = 1;
			tent.t_type = T_UNKNOWN;
			lng = 0;
			npos = endt;
		}
		putte(&mainfile, pos, &tent);
		pos = npos;

		if  (lng <= 0)
			return;

		switch  (tent.t_bchtyp)  {
		case  T_UNBR:
			if  (tent.t_relsymb == NULL)
				return;
			pos = tent.t_relsymb->s_value;
			continue;
		case  T_JSR:
			if  (tent.t_relsymb != NULL)
				follseq(tent.t_relsymb->s_value);
			continue;
		case  T_CONDBR:
			follseq(tent.t_relsymb->s_value);
		default:
			continue;
		}
	}
}

/*
 *	Try to work out things about text files.
 */

void	intutext()
{
	long	pos;
	t_entry	tent;
	int	lng;

	endt = mainfile.ef_tbase + mainfile.ef_tsize;
	pos = mainfile.ef_entry;
nextv:
	for  (;  pos < endt;)  {
		gette(&mainfile, pos, &tent);
		if  (!tent.t_amap && tent.t_vins)  {
			follseq(pos);
			pos += 2;
			goto  nextiv;
		}
		pos += tent.t_lng * 2;
		if  (tent.t_bchtyp == T_UNBR)
			goto  nextiv;
	}
	goto	dorest;
nextiv:
	for  (;  pos < endt;  pos += 2)  {
		gette(&mainfile, pos, &tent);
		if  (tent.t_bdest)
			goto  nextv;
	}
dorest:
	/*
	 *	Deal with unmapped instructions.
	 */

	for  (pos = 0;  pos < endt;)  {
		gette(&mainfile, pos, &tent);
		switch  (tent.t_type)  {
		case  T_BEGIN:
			pos += tent.t_lng * 2;
			continue;
		case  T_UNKNOWN:
			if  (tent.t_vins)  {
				lng = findinst(&tent, pos);
				putte(&mainfile, pos, &tent);
				if  (lng > 0)  {
					pos += lng * 2;
					continue;
				}
			}
		default:
			pos += 2;
			continue;
		}
	}
}

/*
 *	Invent local symbols.
 */

void	intlsym()
{
	long	bpos, epos, hiref, hipos;
	unsigned  llnum;
	t_entry	tent;
	register  symbol  tl;

	endt = mainfile.ef_tbase + mainfile.ef_tsize;
	epos = mainfile.ef_entry;
	for  (;;)  {
		bpos = epos;
		hiref = bpos;
		if  (epos >= endt)
			return;
		gette(&mainfile, epos, &tent);
		epos += tent.t_lng * 2;
		for  (;  epos < endt;)  {
			gette(&mainfile, epos, &tent);
			if  (tent.t_gbdest  ||  tent.t_dref)
				break;
			if  (tent.t_reflo < bpos)
				break;
			if  (tent.t_refhi > hiref)  {
				hiref = tent.t_refhi;
				hipos = epos;
			}
			epos += tent.t_lng * 2;
		}
		if  (hiref > epos)
			epos = hipos;
		llnum = 0;
		for  (hipos = bpos;  hipos < epos;)  {
			gette(&mainfile, hipos, &tent);
			if  (!tent.t_gbdest && !tent.t_dref &&
			 tent.t_reflo >= bpos && tent.t_refhi < epos &&
			 (tl = tent.t_lab) != NULL)
				tl->s_lsymb = ++llnum;
			hipos += tent.t_lng * 2;
		}
	}
}

/*
 *	Given the main file, a possible candidate for matching in the
 *	file and an offset, see if text matches.  Return 1 if matches,
 *	or 0 if no match.
 */

int	matchup(mf, lf, startpos)
register  ef_fid  mf, lf;
long	startpos;
{
	register  int	i, matches = 0;
	t_entry	ltent, mtent;

	if  (lf->ef_tsize > mf->ef_tsize - startpos + mf->ef_tbase)
		return	0;	/*  At end - can't fit  */

	for  (i = 0;  i < lf->ef_tsize;  i += 2)  {
		gette(lf, lf->ef_tbase + i, &ltent);
		if  (ltent.t_isrel)
			continue;
		gette(mf, startpos + i, &mtent);
		if  (mtent.t_contents != ltent.t_contents)
			return	0;
		matches++;
	}

	/*
	 *	Give up on zero length or all relocatable files.
	 */

	return	matches > 0;
}

/*
 *	Scan through main file looking for a match.
 */

long	findstart(mf, lf)
register  ef_fid  mf, lf;
{
	register  long	res = mf->ef_tbase;
	long	lim = mf->ef_tbase + mf->ef_tsize - lf->ef_tsize;
	t_entry	tent;

restart:
	for  (;  res <= lim;  res += 2)  {
		gette(mf, res, &tent);
		if  (tent.t_match != 0)  {
			res += tent.t_match;
			goto  restart;
		}
		if  (matchup(mf, lf, res))
			return	res;
	}
	return	-1;
}

/*
 *	Mark the head of a matched module to save searching.
 */

void	markmatch(mf, lf, pos)
ef_fid	mf, lf;
long	pos;
{
	t_entry	tent;

	gette(mf, pos, &tent);
	tent.t_match = (unsigned) lf->ef_tsize;
	putte(mf, pos, &tent);
}
SHAR_EOF
if test 9885 -ne "`wc -c < 'heur.c'`"
then
	echo shar: error transmitting "'heur.c'" '(should have been 9885 characters)'
fi
fi
exit 0
#	End of shell archive