Spelling utilities in C (Part 3 of 3)

Graham Toal gtoal at tharr.UUCP
Thu Jun 28 17:09:57 AEST 1990


#!/bin/sh-----cut here-----cut here-----cut here-----cut here-----
# shar:	Shell Archiver
#	Run the following text with /bin/sh to create:
#	dawg.c #	dwgcheck.c #	pack.c #	pckcheck.c #	pdawg.c #	ppack.c #	proot.c #	sample.c #	tell.c 
cat - << \SHAR_EOF > dawg.c
/* The first three #define's are to repair mangling by BITNET mailers */
#define EOR ^
              /* This should be Caret (hat, up arrow) -- C Ex-or */
#define MOD %
              /* This should be Percent -- C Modulus             */
#define NOT ~
              /* This should be Tilde (twiddle) -- C unary Not   */
#include "copyr.h"
/*****************************************************************************/
/*                                                                           */
/*      FILE : DAWG.C                                                        */
/*                                                                           */
/*      Convert an alphabetically-ordered dictionary file in text format     */
/*      (one word per line) into a Directed Acyclic Word Graph. The command  */
/*      syntax is                                                            */
/*                                                                           */
/*              DAWG <text file (inc .ext)> <output file (no ext)>           */
/*                                                                           */
/*      The first 4 bytes of the output file form a 24-bit number containing */
/*      the number of edges in the dawg. The rest of the file contains the   */
/*      dawg itself (see "The World's Fastest Scrabble Program" by Appel     */
/*      and Jacobson for a description of a dawg).                           */
/*                                                                           */
/*****************************************************************************/

#include "grope.h"
#ifndef grope_h
/* If you don't have grope.h, create an empty one.
   These will do for a basic system: */
#undef KNOWS_VOID
#undef STDLIBS
#undef PROTOTYPES
#define SYS_ANY 1
#define COMPILER_ANY 1
#define SMALL_MEMORY 1
                       /*  To be defined if you have to generate
                           the data structure in bits. This will
                           certainly be true for any non-trivial
                           dictionary on MSDOS, or most home
                           micros with 1Mb Ram or under. */
#endif
/* Portability notes:

   0) KISS! Keep It Simple, Stupid!

   1) No typedef's
   2) No bitfields
   3) No structs
   4) No #if defined()
   5) No complex #if's
   6) No procedure variables
   7) Don't trust tolower() as some libs don't range check
   8) Stay character-set independent (EBCDIC should work?)
   9) Assume sizeof(long) >= 32 bits, but sizeof(int) just >= 16
  10) Note use of ABS in preference to unsigned longs.
  11) Assume 8-bit char set
  12) Don't use k&r reserved words for variables, even if not
      in ANSI.  Such as 'entry'.
  13) Unix people; no sys/ or machine/ files please unless under a
      suitable #ifdef, and generic code supplied for everyone else...
  14) this is byte-sex independant at the moment. KEEP IT THAT WAY!
      Don't assume dawgs are stored in a fixed form, such as so-called
      'net-order'.
  15) Since C doesn't range-check arrays, we'll do it. If possible,
      leave the running systems with range checking on if you can
      afford it!
  16) No nested include files.
  17) Don't pull in any include files twice. (*Kills* the Atari!)
  18) Don't use 'register' -- trust the compiler; the compiler is your friend.
 */
/*#define RCHECK 1*/      /* We want range-checking of array accesses */

#include <stdio.h>

#ifdef LIB_STRINGS
#include <strings.h>    /* Some unixes, Mac? */
#else
#include <string.h>
#endif

#ifdef SYS_MAC
/* To compile with THINK C 4.0, place the files dawg.h grope.h,
   dawg.c and utils.c in a folder.  Create a project that contains
   dawg.c and the libraries unix and ANSI.
*/
#include <unix.h>
#include <stdlib.h>
#include <console.h>
/* Assume a 1Mb mac for the moment. Someday I'll add dynamic RAM sizing */
#define SMALL_MEMORY 1
#endif

#include "dawg.h"   /* main system constants */

#include "utils.c"  /* utils.c pulls in header file for malloc/free & exit */

#define ABS(x) ((x) < 0 ? -(x) : (x))

/**
*  The following constant, HASH_TABLE_SIZE, can be changed according to
*  dictionary size.  The two values shown will be fine for both large
*  and small systems.  It MUST be prime.
**/

#ifdef SMALL_MEMORY
#define HASH_TABLE_SIZE 30011

#ifdef SYS_MAC
/* boy, you guys must be *really* tight on space...
   are you sure you can handle a reasonable size of dictionary with
   such a small table? Bump this back up as soon as you get everything
   else working... 
   (I was given this info by a Mac site while they were first porting
   this stuff; maybe now it works on macs we can put the buffer size
   back up to something reasonable)
 */
#undef HASH_TABLE_SIZE
#define HASH_TABLE_SIZE 17389
#endif

#else
/* pick one about 20% larger than needed */
#define HASH_TABLE_SIZE 240007

/* Suitable prime numbers if you want to tune it:
     30011
    150001  <-- probably a good compromise. OK for dicts to about 1Mb text
    200003
    220009
    240007

   If you try any others, for goodness sake CHECK THAT THEY ARE PRIME!!!

 */
#endif
#define MAX_EDGES (HASH_TABLE_SIZE-1)

static FILE *fpin, *fpout;     /* Input/output files */
static char current_word[(MAX_LINE+1)];  /* The last word read from fpin */
static int first_diff, save_first_diff;
                                /* The position of the first letter at which */
                                /* current_word differs from the previous    */
                                /* word read                                 */
static NODE PCCRAP *hash_table;
static int first_time = TRUE;
static int this_char = '?', lastch;
static NODE PCCRAP *dawg;

static int FILE_ENDED = FALSE;  /* I'm having real problems getting
 merged dawgs to work on the PC; this is yet another filthy hack. Sorry. */

static INDEX nwords, nnodes, total_edges;

#ifdef SMALL_MEMORY
#define MAX_SUBDAWG 30000 /* Big enough for largest dict,
                             even on small systems */
static NODE PCCRAP *merge;

static INDEX dawgsize[256];
static INDEX dawgstart[256];
static INDEX dawgentry[256];

static int nextslot;  /* Dawgs are packed in sequentially - not in their
                         'proper' indexed position */
#endif
/*
                Shorthand macros for array accesses with checking
     If RCHECK isn't defined, these don't contribute any overhead. I suggest
     leaving RCHECK on except for production code.
 */

#define EDGE_LIST(idx) \
  edge_list[RANGECHECK(idx, MAX_CHARS)]
#define CURRENT_WORD(idx) \
  current_word[RANGECHECK(idx, MAX_LINE+1)]
#define DAWG(idx) \
  dawg[RANGECHECK(idx, MAX_EDGES)]
#define HASH_TABLE(idx) \
  hash_table[RANGECHECK(idx, HASH_TABLE_SIZE)]

/*
                Forward references
 */

#ifdef PROTOTYPES
static INDEX build_node(int depth);
static INDEX add_node(NODE  *edge_list, INDEX nedges);
static void read_next_word(VOID);
static INDEX compute_hashcode(NODE *edge_list, INDEX nedges);
static void report_size(VOID);
#else
static INDEX build_node();
static INDEX add_node();
static void read_next_word();
static INDEX compute_hashcode();
static void report_size();
#endif

#ifdef SMALL_MEMORY

#ifdef PROTOTYPES
static void merge_dawg (char *filename);
#else
static void merge_dawg ();
#endif

#endif

/**
*       main
*
*       Program entry point
**/

long words; /* dirty communication variable -- the multi-pass stuff
               was hacked in at the last minute. */

int
#ifdef PROTOTYPES
main(int argc, char **argv)
#else
main(argc, argv)
int argc;
char **argv;
#endif
{
  INDEX i;
  char fname[128];

#ifdef SYS_MAC
  argc = ccommand(&argv);
#endif

  if (argc != 3) {
    fprintf(stderr,
      "usage: dawg dictfile.ext dawgfile\n");
    exit(EXIT_ERROR);
  }

  /**
  *  Allocate the memory for the dawg
  **/

  if ((dawg = MALLOC(MAX_EDGES, sizeof(NODE PCCRAP *))) == NULL) {
    fprintf(stderr, "Can\'t allocate dictionary memory\n");
#ifndef SMALL_MEMORY
    fprintf(stderr, "-- try recompiling with -DSMALL_MEMORY\n");
#endif
    exit(EXIT_ERROR);
  }
  for (i = 0; i < (INDEX)MAX_EDGES; i++) dawg[i] = 0L;
  /**
  *  Allocate the hash table.
  *  Fill it with zeroes later just before use.  Don't trust calloc etc.
  *  - not portable enough.  Anyway, in the multi-pass version we don't
  *  want to continually free/claim...
  **/

  if ((hash_table =
      MALLOC((HASH_TABLE_SIZE+1), sizeof(NODE))) == NULL) {
    fprintf(stderr, "Can\'t allocate memory for hash table\n");
    exit(EXIT_ERROR);
  }
  /**
  *  Open the input/output files
  **/

  fpin = fopen(argv[1], READ_TEXT);
  if (fpin == NULL) {
    fprintf(stderr, "Can\'t open text file \"%s\"\n", argv[1]);
    /* Could print out error string but it's not portable... */
    exit(EXIT_ERROR);
  }

  /**
  *  Read the first word from the dictionary
  **/

  first_time = TRUE;
  nwords = 0;
  current_word[0] = 0;
  read_next_word();
  lastch = *current_word;
  /**
  *  Initialise the counters, taking account of the root node (which is
  *  a special case)
  **/

  nnodes = 1; total_edges = MAX_CHARS;

  /**
  *  Build the dawg and report the outcome
  **/

  /* Now, in the dim & distant past, this code supported the concept
     of a restricted character set - ie a..z & A..Z were packed into 6 bits;
     this caused awful problems in the loop below, where we had to try to
     keep the loop-control variable and the character code in synch; nowadays
     chars are 8 bits or else, so I'm starting to tidy up the places
     where these hacks were necessary... */
     
#ifdef SMALL_MEMORY
  for (this_char = 0; this_char < MAX_CHARS; this_char++) {
  if (FILE_ENDED) break; /* Don't waste time in this loop... */
#endif
  /* Explicitly initialise hash table to all zeros */
  {INDEX a; for (a = 0; a <= HASH_TABLE_SIZE; a++) hash_table[a] = 0;}
  words = 0;
  (void) build_node(0);
#ifdef SMALL_MEMORY
#ifdef DEBUG
  fprintf(stderr,
    "Char %d done. %ld Words, %ld Nodes\n", *current_word, words, total_edges);
#endif
  if (total_edges /* words */ == 0) continue;
#endif

  /**
  *  Save the dawg to file
  **/
#ifdef SMALL_MEMORY
  sprintf(fname, "%s%c%d", argv[2], EXT_CHAR, lastch);
#else
  sprintf(fname, "%s%cdwg", argv[2], EXT_CHAR);
#endif
  fpout = fopen(fname, WRITE_BIN);
  if (fpout == NULL) {
    fprintf(stderr, "Can\'t open output file \"%s\"\n", fname);
    exit(EXIT_ERROR);
  }
#ifdef DEBUG
  fprintf(stderr, "Writing to %s\n", fname);
#endif

  *dawg = total_edges;
  total_edges = sizeof(NODE) * (total_edges + 1); /* Convert to byte count */

  {
    long cnt;
    if ((cnt = putwords(dawg, (long)total_edges, fpout)) != total_edges) {
      fprintf(stderr, "%ld bytes written instead of %ld\n.", cnt, total_edges);
      exit(EXIT_ERROR);
    }
  }
  fclose(fpout);

  /**
  *  Read the first word from the dictionary
  **/

  first_diff = save_first_diff;
  first_time = FALSE;
  nwords = 0;
  /**
  *  Initialise the counters, taking account of the root node (which is
  *  a special case)
  **/

  nnodes = 1; total_edges = MAX_CHARS;

  lastch = *current_word;
  /**
  *  Build the dawg and report the outcome
  **/

#ifdef SMALL_MEMORY
  }
#endif
  fclose(fpin);
  fprintf(stderr, "Dawg generated\n");
#ifdef SMALL_MEMORY
  merge_dawg(argv[2]);
#endif
  exit(EXIT_OK);
}

/**
*       BUILD_NODE
*
*       Recursively build the next node and all its sub-nodes
**/

static INDEX
#ifdef PROTOTYPES
build_node(int depth)
#else
build_node(depth)
int depth;
#endif
{
  INDEX nedges = 0;
  INDEX i;
  NODE *edge_list;

  edge_list = NULL;
  if (CURRENT_WORD(depth) == 0) {
    /**
    *  End of word reached. If the next word isn't a continuation of the
    *  current one, then we've reached the bottom of the recursion tree.
    **/

    read_next_word();
    if (first_diff < depth) return(0);
  }

  edge_list = (NODE *)malloc(MAX_CHARS*sizeof(NODE));
                     /* Note this is a 'near' array */
  if (edge_list == NULL) {
    fprintf(stderr, "Stack full (depth %d)\n", depth);
    exit(EXIT_ERROR);
  }
  for (i = 0; i < MAX_CHARS; i++) EDGE_LIST(i) = 0L;

  /**
  *  Loop through all the sub-nodes until a word is read which can't
  *  be reached via this node
  **/

  do
    {
    /* Construct the edge. Letter.... */
    EDGE_LIST(nedges) = (NODE) (((NODE)CURRENT_WORD(depth)))
                        << (NODE)V_LETTER;
    /* ....end-of-word flag.... */
    if (CURRENT_WORD(depth+1L) == 0) EDGE_LIST(nedges) |= M_END_OF_WORD;
    /* ....and node pointer. */
    EDGE_LIST(nedges) |= build_node(depth+1); nedges++;
                                                   /* (don't ++ in a macro) */
    } while (first_diff == depth);

  if (first_diff > depth) {
    fprintf(stderr, "Internal error -- first_diff = %d, depth = %d\n",
      first_diff, depth);
    exit(EXIT_ERROR);
  }

  EDGE_LIST(nedges-1) |= M_END_OF_NODE;
                                  /* Flag the last edge in the node */

  /**
  *  Add the node to the dawg
  **/

  if (depth) {
    NODE result = add_node(edge_list, nedges);
    free(edge_list);
    return(result);
  }

  /**
  *  depth is zero, so the root node (as a special case) goes at the start
  **/

  edge_list[MAX_CHARS-1] |= M_END_OF_NODE;      /* For backward searches */
  for (i = 0; i < MAX_CHARS; i++)
    {
    dawg[i+1] = edge_list[i];
    }
  free(edge_list);
  return(0);
}

/**
*       ADD_NODE
*
*       Add a node to the dawg if it isn't already there. A hash table is
*       used to speed up the search for an identical node.
**/

static INDEX
#ifdef PROTOTYPES
add_node(NODE *edge_list, INDEX nedges)
#else
add_node(edge_list, nedges)
NODE *edge_list;
INDEX nedges;
#endif
{
  NODE hash_entry;
  INDEX inc;
  INDEX a, first_a;
  INDEX i;

  /**
  *  Look for an identical node. A quadratic probing algorithm is used
  *  to traverse the hash table.
  **/

  first_a = compute_hashcode(edge_list, nedges);
  first_a = ABS(first_a) MOD HASH_TABLE_SIZE;
  a = first_a;
  inc = 9;

  for (;;)
    {
    hash_entry = HASH_TABLE(a) & M_NODE_POINTER;

    if (hash_entry == 0) break;   /* Node not found, so add it to the dawg */

    for (i = 0; i < nedges; i++)
      if (DAWG((hash_entry+i) MOD HASH_TABLE_SIZE) != EDGE_LIST(i)) break;

/* On the 1.6M dictionary, this gave a rangecheck with < 0. Now fixed
   I think - it was a problem with MOD giving unexpected results. */

      if (i == nedges) {
        return(hash_entry);        /* Node found */
      }
      /**
      *  Node not found here. Look in the next spot
      **/

      a += inc;
      inc += 8;
      if (a >= HASH_TABLE_SIZE) a -= HASH_TABLE_SIZE;
      if (inc >= HASH_TABLE_SIZE) inc -= HASH_TABLE_SIZE;
      if (a == first_a) {
        fprintf(stderr, "Hash table full\n");
        exit(EXIT_ERROR);
      }
    }

  /**
  *  Add the node to the dawg
  **/

  if (total_edges + nedges >= MAX_EDGES) {
    fprintf(stderr,
      "Error -- dictionary full - total edges = %ld\n", total_edges);
    exit(EXIT_ERROR);
  }

  HASH_TABLE(a) |= total_edges + 1;
  nnodes++;
  for (i = 0; i < nedges; i++) {
    DAWG((total_edges + 1 + i) MOD HASH_TABLE_SIZE) = EDGE_LIST(i);
  }
  total_edges += nedges;
  return(total_edges - nedges + 1);
}

/**
*       READ_NEXT_WORD
*
*       Read the next word from the input file, setting first_diff accordingly
**/

static void
#ifdef PROTOTYPES
read_next_word(VOID)
#else
read_next_word()
#endif
{
  /* This stuff imposes the limitation of not allowing '\0' in a word;
     not yet a problem but the dawg structure itself could probably cope
     if the feature were wanted. (Maybe with a little teweaking)       */
  char linebuff[(MAX_LINE+1)];
  int length;
  for (;;)
    {
    int next = 0, c;
    for (;;) {
      c = fgetc(fpin);
      if (FILE_ENDED || c == EOF || ferror(fpin) || feof(fpin)) {
        /* for some reason, we always get a blank line at the end of files */
        current_word[0] = 0;
        first_diff = -1;
        linebuff[next] = '\0';
        FILE_ENDED = TRUE;
        return;
      }
      c &= 255;
      if (next == 0 && c == '\n') continue; /* skip blank lines... */
      linebuff[next++] = c;
      if (c == '\n') break;
    }
    linebuff[next] = '\0';

    words++;

    length = strlen(linebuff);

    if (linebuff[length-1] == '\n') linebuff[length-1] = '\0';
    if (linebuff[length] == '\n') linebuff[length] = '\0';

    if (length < 2 || length > MAX_LINE-1)
      {
      fprintf(stderr, "\n%s - invalid length\n", linebuff);
      continue;    /* Previously exit()ed, but now ignore so that
                      test sites without my pddict can use /usr/dict/words */
      }
    break;
    }
  for (length = 0; current_word[length] == linebuff[length]; length++) {
    /* Get common part of string to check order */
  }
  if (current_word[length] > linebuff[length]) {
    fprintf(stderr, "Error -- %s (word out of sequence)\n", linebuff);
    exit(EXIT_ERROR);
  }
  first_diff = length;

  nwords++;
  strcpy(current_word, linebuff);

  if ((nwords > 1) && (!(nwords & 0x3FF))) report_size();
#ifdef SMALL_MEMORY
  if (current_word[0] != lastch) {
    save_first_diff = first_diff;
    first_diff = -1;
    report_size();
  }
#else
  this_char = current_word[0]; /* for diagnostics... */
#endif
}
/**
*       COMPUTE_HASHCODE
*
*       Compute the hash code for a node
**/

static INDEX
#ifdef PROTOTYPES
compute_hashcode(NODE *edge_list, INDEX nedges)
#else
compute_hashcode(edge_list, nedges)
NODE *edge_list;
INDEX nedges;
#endif
{
  INDEX i;
  INDEX res = 0L;

  for (i = 0; i < nedges; i++)
    res = ((res << 1) | (res >> 31)) EOR EDGE_LIST(i);
  return(res);
}

/**
*       REPORT_SIZE
*
*       Report the current size etc
**/

static void
#ifdef PROTOTYPES
report_size(VOID)
#else
report_size()
#endif
{

  if (first_time)
    {
    fprintf(stderr, "      Words    Nodes    Edges    Bytes    BPW\n");
    fprintf(stderr, "      -----    -----    -----    -----    ---\n");
    first_time = FALSE;
    }
  if (*current_word) fprintf(stderr, "%c:", *current_word);
  else fprintf(stderr, "Total:");
  
  /* THE FOLLOWING IS RATHER GRATUITOUS USE OF FLOATING POINT - REMOVE
     IT AND REPLACE WITH INTEGER ARITHMETIC * 100 */

  /* (hey - I already did this in the copy I sent to Richard; so how
      come its missing? Oh no, not again: I've got out of synch and
      used an old copy, haven't I? :-(   ) */ 

  fprintf(stderr, "  %7ld  %7ld  %7ld  %7ld  %5.2f\n",
    nwords, nnodes, total_edges, sizeof(NODE PCCRAP *)*(total_edges+1),
      (float)(sizeof(NODE PCCRAP *)*(total_edges+1))/(float)nwords);
}

#ifdef SMALL_MEMORY
static void
#ifdef PROTOTYPES
merge_dawg (char *filename)
#else
merge_dawg (filename)
char *filename;
#endif
{
  FILE *fp, *outfile;
  NODE data, edge;
  INDEX nedges, nextfree, i, dentry;
  INDEX count, lastnode;
  int dictch, padding;
  char fname[128];

  nedges = (INDEX)MAX_SUBDAWG;
  if ((merge = MALLOC((SIZE_T)nedges, sizeof(INDEX))) == 0) {
     fprintf(stderr, "Memory allocation error -- %ld wanted\n",
       nedges*sizeof(INDEX));
     exit(EXIT_ERROR);
  }

  nextfree = MAX_CHARS; /* 0 is special, 1..26 are root nodes for a..z */
  nextslot = 0;
  for (dictch = 0; dictch < MAX_CHARS; dictch++) {
    /***
    *   Open the file and find out the size of the dawg
    ***/
    sprintf(fname, "%s%c%d", filename, EXT_CHAR, dictch);
    if ((fp = fopen(fname, READ_BIN)) == NULL) {
      continue;
    }
    nedges = getword(fp);
    dawgstart[nextslot] = nextfree;
    dawgsize[nextslot] = nedges-MAX_CHARS;

    /* the first entry is (erroneously) the pointer to the chunk */
    dentry = getword(fp);
    dawgentry[nextslot] = dentry - MAX_CHARS + nextfree;

    nextfree += nedges - MAX_CHARS;
    nextslot++;

    fclose(fp);
  }

/* Now output total edges, and starts[a..z] */
/* Then set nextfree to start of each block  */
  sprintf(fname, "%s%cdwg", filename, EXT_CHAR);
  outfile = fopen(fname, WRITE_BIN);
  if (outfile == NULL) {
    fprintf(stderr, "Cannot open output dawg file %s\n", fname);
    exit(EXIT_ERROR);
  }
  putword(nextfree, outfile);
  nextfree = 1; nextslot = 0; padding = 0;
  lastnode = MAX_CHARS-1;
  for (;;) {
    if (dawgentry[lastnode] != 0) break; /* Leave with 'lastnode' set */
    lastnode -= 1;
  }
  for (dictch = 0; dictch < MAX_CHARS; dictch++) {
    NODE edge = dawgentry[nextslot];
    if (edge == 0) {
      padding++;
      continue;
    }
    if (dictch == lastnode) {
      edge |= M_END_OF_NODE;
    } else {
      edge &= (NOT M_END_OF_NODE);
    }
    putword(edge, outfile);
    nextfree++; nextslot++;
  }
  nextfree += padding;
  while (padding > 0) {
    putword(0L, outfile); padding -= 1;
  }
  /* nextslot = 0; ???? This one not used? */
  for (dictch = 0; dictch < MAX_CHARS; dictch++) {
    sprintf(fname, "%s%c%d", filename, EXT_CHAR, dictch);
    if ((fp = fopen(fname, READ_BIN)) == NULL) {
      continue;
    }

    nedges = getword(fp);

    for (i = 0; i < MAX_CHARS; i++) {
      (void) getword(fp);
      /* Skip root pointers */
    }

    nedges -= MAX_CHARS;
    count = getwords(&merge[1], (long)(nedges*sizeof(NODE)), fp);
    if (count != nedges*sizeof(NODE)) {
      fprintf(stderr, "Dictionary read error - %ld wanted - %ld got\n",
        nedges*sizeof(NODE), count);
      exit(EXIT_ERROR);
    }
    fclose(fp);

    DELETE(fname);    /* On floppy systems, we can almost get away with
                         little more space than the final files would take! */

    /* relocate all nodes */
    for (i = 1; i <= nedges; i++) {
      data = merge[i];
      edge = data & M_NODE_POINTER;
      if (edge > MAX_CHARS) {
        data &= (NOT M_NODE_POINTER);
        data |= edge - MAX_CHARS - 1 + nextfree;
        merge[i] = data;
      }
      putword(merge[i], outfile);
    }
    nextfree += nedges;
    /*  nextslot++;   this one not used??? */
  }
  fclose(outfile);
  FREE(merge);
}
#endif
SHAR_EOF
cat - << \SHAR_EOF > dwgcheck.c
/*

    File:          dwgcheck.c
    Author:        Graham Toal
    Purpose:       check correct spelling using dict.dwg
    Creation date: 22/06/90
    Lastedit:      23/06/90 01:10:12

    Description:

       This can be expanded to be like the unix 'spell' command.
    This demo file simply checks words passed on the command line.
    note how it remembers words so that the second time it sees
    them, it will call them correct.  This is how you should
    implement the 'ignore' feature of an interactive checker.

    This code uses the dawg data structure, which I recommend
    you stick with; however for *extremely* fast checking in
    special circumstances you can use the 'pack' versions of
    check() et al.

*/


/* Manadatory header files */
#include <stdio.h>
#include "dawg.h"
#include "grope.h"

/*#define RCHECK*/     /* Enable for internal range checks... */

#include "utils.c"

/* Headers here as needed on per-program basis */
#include <ctype.h>  /* eg, for isalpha() */

/* Spelling library utilities */
#include "init.c"      /* Loading dicts */
#include "dyntrie.c"   /* Creating dicts at run-time */
#include "print.c"     /* Printing dicts */
#include "check.c"     /* Checking words */
#include "locate.c"    /* Finding words by their stems */

#include "soundex.c"   /* Soundex algorithm for word-likeness comparison */
#include "similcmp.c"  /* Closeness-metric for correction */
#include "correct.c"   /* Hack code to attempt error correction (uses above) */

/* Let's be friendly to these guys... */
#ifdef SYS_MAC
/* To compile with THINK C 4.0, place all the relevant .h and .c
   files in a folder.  Then create a project which contains this main.c
   and the libraries unix and ANSI.
*/
#include <unix.h>
#include <stdlib.h>
#include <console.h>
#endif

/* Your own header files go here, for instance:
               #include "readword.h"
 */



int
#ifdef PROTOTYPES
main(int argc, char **argv)
#else
main(argc, argv)
int argc;
char **argv;
#endif
{
NODE PCCRAP *dawg;      /* precompiled dictionary (dict.dwg) */
INDEX edges;            /* size of above */

NODE PCCRAP *userdict;  /* run-time dictionary, added to dynamically */

char *word;
int each;

#ifdef SYS_MAC
  argc = ccommand(&argv);  /* Mac users have my sympathy */
#endif

  /* Your program goes here... */
  if (argc == 1) {
    fprintf(stderr, "usage: %s possibly mispeled wurdz\n", argv[0]);
    exit(EXIT_ERROR);
  }
  if (!dawg_init("", &dawg, &edges)) exit(EXIT_ERROR);

  if (!trie_new(&userdict)) exit(EXIT_ERROR);

  for (each = 1; each < argc; each++) {
    word = argv[each];
    fprintf(stderr, "* %s:\n", word);
    if (dawg_check(dawg, word) || dawg_check(userdict, word)) {
      fprintf(stderr, "Correct\n");
    } else {
      fprintf(stderr, "Wrong\n");
      (void)trie_add(&userdict, word);
    }
    if (each+1 != argc) fprintf(stderr, "\n");
  }

  exit(EXIT_OK);
}
SHAR_EOF
cat - << \SHAR_EOF > pack.c
/* The line below is to protect against Bitnet mailer damage */
#define MOD %
             /* This should be a Percent (for C Modulus) */ 

/* Blech. This file is a mess. Make it the first rewrite... */
#include "copyr.h"
/*****************************************************************************/
/*                                                                           */
/*      FILE : PACK.C                                                        */
/*                                                                           */
/*      Re-pack a dawg/trie data structure using Franklin Liang's            */
/*      algorithm for sparse matrix storage.  Final structure allows         */
/*      direct indexing into trie nodes, so is exceedingly fast at checking. */
/*      Unfortunately the trade off is that any algorithms which walk        */
/*      the data structure and generate words will take much longer          */
/*                                                                           */
/*              PACK <dawg file (inc .ext)> <pack file (inc .ext)>           */
/*                                                                           */
/*****************************************************************************/

/* Pending:

   see what closest gap between dawgptr + freenode is, and see whether we
   can save space by overlapping input & output arrays with a window between
   them.  Should get almost 50% of memory back.  Also, because of hacking
   around a bug some time back, I'm using an extra array (new_loc) for
   relocation of pointers, when in fact I could (and have in the past)
   managed to relocate them in situ with not much extra overhead.
      As I said, it needs a rewrite...

 */

/* Note: I tried one implementation of this which used bitsets to test
   whether two nodes were compatible.  In fact, it wasn't sufficiently
   faster to justify the extra space it used for the arrays of flags.
   Now I check for compatibility on the fly with lots of comparisons.
   I do however have a seperate byte array to flag whether a trie
   is based at any address.  There's probably a way of removing this.
*/

#include "grope.h"
#ifndef grope_h
/* If you don't have grope.h, create an empty one.
   These will do for a basic system: */
#undef KNOWS_VOID
#undef STDLIBS
#undef PROTOTYPES
#define SYS_ANY 1
#define COMPILER_ANY 1
#define SMALL_MEMORY 1 /*  To be defined if you have to generate
                           the data structure in bits. This will
                           certainly be true for any non-trivial
                           dictionary on MSDOS, or most home
                           micros with 1Mb Ram or under. */
#endif

#include <stdio.h>

/*#define RCHECK*/  /* Turn this back on if you have any problems. */

#include "dawg.h"
#include "utils.c"
#include "init.c"
#include "print.c"

#ifdef SYS_MAC
/* To compile with THINK C 4.0, place the files dawg.h grope.h,
   pack.c and utils.c in a folder.  Create a project that contains
   pack.c and the libraries unix and ANSI.
*/
#include <unix.h>
#include <stdlib.h>
#include <console.h>
/* Assume a 1Mb mac for the moment. Someday I'll add dynamic RAM sizing */
#define SMALL_MEMORY
#endif

#define TRUE (0==0)
#define FALSE (0!=0)
#define MAX_WORD_LENGTH 32

/* These two magic numbers alter a very hacky heuristic employed in
   the packing algorithm.  Tweaking them judiciously ought to speed
   it up significantly (at the expense of a slightly sparser packing */
#define DENSE_LOWER 100
#define DENSE_UPPER 200

/*###########################################################################*/
INDEX ptrie_size = 0;
static NODE PCCRAP *ptrie;
#ifdef RCHECK
/* can't use the standard range_check macro because these are slightly
   non-standard. */
#define PTRIE(x) ptrie[RANGECHECK((x), ptrie_size)]
#define DATA(x) (((x) >> 12) == 0xf2f1 ? toosmall((x), 0, __LINE__) : (x))
#else
/* so supply an explicit base case */
#define PTRIE(x) ptrie[x]
#define DATA(x) (x)
#endif
static char PCCRAP *trie_at;  /* save some time by caching this info --
                          previously it was a function called on each test */
static INDEX freenode, lowest_base = 1, highest_base = -1;
static int debug = FALSE;

int
#ifdef PROTOTYPES
compatible(NODE *node, INDEX i) /* Can a node be overlaid here? */
#else
compatible(node, i)
NODE *node;
INDEX i;
/* Can a node be overlaid here? */
#endif
{
int c;
 for (c = 0; c < MAX_CHARS; c++) {
   if ((node[c]&M_FREE) == 0) { /* Something to slot in... */
    if ((PTRIE(i+c) & M_FREE) == 0) return(FALSE); /* but no slot for it */
   }
 }
 return(TRUE);
}

INDEX
#ifdef PROTOTYPES
find_slot(NODE *node)
#else
find_slot(node)
NODE *node;
#endif
{               /* Try each position until we can overlay this node */
INDEX i;
  for (i = lowest_base; i < freenode; i++) {
    if ((!trie_at[i]) && (compatible(node, i))) {
       /* nothing here already */
                         /* and this one will fit */
      return(i);
    }
  }
  fprintf(stderr, "Should never fail to find a slot!\n");
                     /* because the output array is larger than the input... */
  exit(5);
  /* NOT REACHED */
  return(0);
}

static int changes;

INDEX
#ifdef PROTOTYPES
pack(NODE *node)
#else
pack(node)
NODE *node;
#endif
{
int c;
INDEX slot;
NODE value;

  slot = find_slot(node);  /* slot is also returned as the result,
                              to be used in relocation */
  /* Place node at slot */
  changes = 0;
  for (c = 0; c < MAX_CHARS; c++) {
    value = node[c];
    if ((value&M_FREE) == 0) { /* Something to fit? */
      if (((value>>V_LETTER)&M_LETTER) == (INDEX)c+BASE_CHAR) {
                  /* Just a consistency check - could safely be removed */
        PTRIE(slot+c) = DATA(value);
        changes++;
      }
    }
  }
  /* The hack heuristics below keep a N^2 operation down to linear time! */
  if ((slot == lowest_base) || (slot >= lowest_base+DENSE_LOWER)) {
    /* Heuristic is: we increase the initial search position if the
       array is packed solid up to this point or we're finding it *very*
       hard to find suitable slots */
    int found = FALSE;
    for (;;) {
      INDEX c;
      while (trie_at[lowest_base]) lowest_base++;
      for (c = lowest_base; c < lowest_base+MAX_CHARS; c++) {
        if ((PTRIE(c)&M_FREE) != 0) {found = TRUE; break;}
      }
      if (found && (slot < lowest_base+DENSE_UPPER)) break;
                  /* ^ Skip hard slots to fill */
      lowest_base++; /* although nothing is based here, the next N slots
                      are filled with data from the last N tries. */

      /* Actually I'm not sure if 256 sequential trees each with the
         same element used would actually block the next 256 slots
         without a trie_at[] flag being set for them.  However it
         does no harm to believe this... I must formally check this
         someday once all the other stuff is in order. */
    }
  }

  if (slot > highest_base) highest_base = slot;
     /* This is info for when we try to overlap input & output
        arrays, -- with the output sitting some large number of
        bytes lower down than the input. */
  trie_at[slot] = TRUE;
  return(slot);
}
/*###########################################################################*/

static NODE PCCRAP *dawg;
static INDEX PCCRAP *new_loc;
static INDEX nedges;

NODE this_node[MAX_CHARS];

INDEX
#ifdef PROTOTYPES
take_node(INDEX ptr)
#else
take_node(ptr)
INDEX ptr;
#endif
{
NODE data;
INDEX edge;
int let;
int endsword;
int endsnode;
char ch;
int changes = 0;
  for (let = 0; let < MAX_CHARS; let++) this_node[let] = M_FREE;
  for (;;) {
    data = dawg[ptr++];
    if (data == 0) {
      return(-1);
    } else {
      endsword = ((data & M_END_OF_WORD) != 0);
      endsnode = ((data & M_END_OF_NODE) != 0);
      edge = data & M_NODE_POINTER;
      let = (int) ((data >> V_LETTER) & M_LETTER);
      ch = let + BASE_CHAR;
  
      this_node[let] = edge & M_NODE_POINTER;
      if (endsword) this_node[let] |= M_END_OF_WORD;
      this_node[let] |= (NODE)ch<<V_LETTER;
  
      changes++;
      if (endsnode) break;
    }
  }
  if (changes != 0) {
    return(ptr);
  } else {
    fprintf(stderr, "Fatal error\n");
    return(0);
  }
}

NODE
#ifdef PROTOTYPES
redirect_node(NODE ptr)
#else
redirect_node(ptr)
NODE ptr;
#endif
{
NODE data;
INDEX edge;
int endsword;
char ch;
  data = DATA(PTRIE(ptr));

  endsword = ((data & M_END_OF_WORD) != 0);
  edge = data & M_NODE_POINTER;
  ch = (int) ((data >> V_LETTER) & M_LETTER);

  /*edge = dawg[edge] & M_NODE_POINTER;*/
  edge = new_loc[edge];

  ptr = edge;
  ptr |= (NODE)ch<<V_LETTER;
  if (endsword) ptr |= M_END_OF_WORD;

  return(ptr);
}

int
#ifdef PROTOTYPES
main(int argc, char **argv)
#else
main(argc, argv)
int argc;
char **argv;
#endif
{
INDEX dawgptr = 1, trieptr, newdawgptr, i, nodes = 0;
FILE *triefile;

#ifdef SYS_MAC
  argc = ccommand(&argv);
#endif

  if (argc != 3) {
    fprintf(stderr, "pack: wrong parameters - should be infile outfile\n");
    exit(EXIT_ERROR);
  }

  if (!dawg_init((argc >= 2 ? argv[1]: ""), &dawg, &nedges)) exit(EXIT_ERROR);
  if (debug) dawg_print(dawg, (INDEX)ROOT_NODE); /* assume small test file! */

  freenode = ((nedges*16)/15)+(4*MAX_CHARS);
                               /* Minimal slop for pathological packing? */
  ptrie_size = freenode;
  ptrie = MALLOC((SIZE_T)freenode, sizeof(NODE));
  if (ptrie == NULL) {
    fprintf(stderr, "Cannot claim %ld bytes for ptrie[]\n",
            sizeof(NODE)*freenode);
    exit(EXIT_ERROR);
  }
  new_loc = MALLOC((SIZE_T)freenode, sizeof(NODE));
  if (new_loc == NULL) {
    fprintf(stderr, "Cannot claim %ld bytes for new_loc[]\n",
            sizeof(NODE)*freenode);
    exit(EXIT_ERROR);
  }
  trie_at = (char PCCRAP *)MALLOC((SIZE_T)freenode, sizeof(char));
  if (trie_at == NULL) {
    fprintf(stderr, "Cannot claim %ld bytes for trie_at[]\n", freenode);
    exit(EXIT_ERROR);
  }
  for (i = 0; i < freenode; i++) {
    ptrie[i] = M_FREE; new_loc[i] = 0; trie_at[i] = FALSE;
  }

  dawg[0] = 0; /* 1st entry is never looked at, and maps to itself */

  dawgptr = 1;

/* Relocate initial node at 1 seperately */

    newdawgptr = take_node(dawgptr /* 1 */);
    trieptr = pack(this_node);
    /*dawg[dawgptr] = trieptr;*/
      /* What the hell does this do??? I've forgotten!!! - oh yes, this
         was relocating in situ, without new_loc... */
    new_loc[dawgptr] = trieptr;
    dawgptr = MAX_CHARS+1;

{INDEX maxdiff = 0, diff;
  for (;;) {
    if (highest_base > dawgptr) {
      diff = highest_base - dawgptr;
      if (diff > maxdiff) maxdiff = diff;
    }
    newdawgptr = take_node(dawgptr);
    if (newdawgptr == -1) {
      dawgptr++;
      continue;
    }
    trieptr = pack(this_node);
    /*dawg[dawgptr] = trieptr;*/
    new_loc[dawgptr] = trieptr;
    dawgptr = newdawgptr;
    if (dawgptr > nedges) {
      break;  /* AHA!!! I was doing this in the
                 wrong order & lost last entry! *AND* had '>=' for '>' */
    }
    nodes++;
    if ((nodes MOD 1000) == 0) fprintf(stderr, "%ld nodes\n", nodes);
  }
  if (debug) fprintf(stderr, "wavefront gets up to %ld\n", maxdiff);
}
  if (debug) fprintf(stderr, "All packed - used %ld nodes\n", highest_base);
  for (trieptr = 1; trieptr < freenode; trieptr++) {
    /*
      extract edge from ptrie[trieptr],
      look it up via dawg[edge], slot it back in
      (while preserving other bits)
     */
    PTRIE(trieptr) = redirect_node(trieptr);
  }
  /* Finally, remember to bump up highest_base in case last node is only
     one edge and 25 others are missing! */
  if (debug) fprintf(stderr, "Redirected...\n");

  triefile = fopen((argc >= 3 ? argv[2] : DEFAULT_PACK), WRITE_BIN);
  if (triefile == NULL) {
    fprintf(stderr, "Cannot write to packed trie file\n");
    exit(1);
  }
  if (debug) fprintf(stderr, "File opened...\n");

  ptrie[0] = highest_base+MAX_CHARS-1;/* File size in words
                                     (-1 because doesn't include itself)  */
  if (debug) fprintf(stderr, "Dumping... (0..%ld)\n", highest_base+MAX_CHARS-1);
  for (i = 0; i < highest_base+MAX_CHARS; i++) {/* dump packed DAG */
  NODE n;
    n = DATA(PTRIE(i));
    putword(n, triefile);
    if (ferror(triefile)) {
      fprintf(stderr, "*** TROUBLE: Writing DAG -- disk full?\n");
      fclose(triefile);
      exit(1);
    }
  }
  if (debug) fprintf(stderr, "Dumped...\n");
  fclose(triefile);
  if (debug) fprintf(stderr, "Done.\n");
}
SHAR_EOF
cat - << \SHAR_EOF > pckcheck.c
/*

    File:          pckcheck.c
    Author:        Graham Toal
    Purpose:       check correct spelling using dict.pck
    Creation date: 22/06/90
    Lastedit:      23/06/90 01:15:39

    Description:

       This can be expanded to be like the unix 'spell' command.
    This demo file simply checks words passed on the command line.
    note how it remembers words so that the second time it sees
    them, it will call them correct.  This is how you should
    implement the 'ignore' feature of an interactive checker.

    This version used the fast 'packed' data structure.  This is
    approximately 3 times faster, but not all utilities support
    the packed versions.  Also utilities which walk the trie are
    considerably slower (say by a factor of 100) -- so chose when
    to used 'dawg' and when to use 'pack'ed versions carefully!

*/


/* Manadatory header files */
#include <stdio.h>
#include "dawg.h"
#include "grope.h"

/*#define RCHECK*/     /* Enable for internal range checks... */

#include "utils.c"

/* Headers here as needed on per-program basis */
#include <ctype.h>  /* eg, for isalpha() */

/* Spelling library utilities */
#include "init.c"      /* Loading dicts */
#include "dyntrie.c"   /* Creating dicts at run-time */
#include "print.c"     /* Printing dicts */
#include "check.c"     /* Checking words */
#include "locate.c"    /* Finding words by their stems */

#include "soundex.c"   /* Soundex algorithm for word-likeness comparison */
#include "similcmp.c"  /* Closeness-metric for correction */
#include "correct.c"   /* Hack code to attempt error correction (uses above) */

/* Let's be friendly to these guys... */
#ifdef SYS_MAC
/* To compile with THINK C 4.0, place all the relevant .h and .c
   files in a folder.  Then create a project which contains this main.c
   and the libraries unix and ANSI.
*/
#include <unix.h>
#include <stdlib.h>
#include <console.h>
#endif

/* Your own header files go here, for instance:
               #include "readword.h"
 */



int
#ifdef PROTOTYPES
main(int argc, char **argv)
#else
main(argc, argv)
int argc;
char **argv;
#endif
{
NODE PCCRAP *ptrie;     /* precompiled dictionary (dict.pck) */
INDEX edges;            /* size of above */

NODE PCCRAP *userdict;  /* run-time dictionary, added to dynamically */

char *word;
int each;

#ifdef SYS_MAC
  argc = ccommand(&argv);  /* Mac users have my sympathy */
#endif

  /* Your program goes here... */
  if (argc == 1) {
    fprintf(stderr, "usage: %s possibly mispeled wurdz\n", argv[0]);
    exit(EXIT_ERROR);
  }

  if (!pack_init("", &ptrie, &edges)) exit(EXIT_ERROR);

  if (!trie_new(&userdict)) exit(EXIT_ERROR);

  for (each = 1; each < argc; each++) {
    word = argv[each];
    fprintf(stderr, "* %s:\n", word);
    if (pack_check(ptrie, word) || dawg_check(userdict, word)) {
      fprintf(stderr, "Correct\n");
    } else {
      fprintf(stderr, "Wrong\n");
      (void)trie_add(&userdict, word);
    }
    if (each+1 != argc) fprintf(stderr, "\n");
  }

  exit(EXIT_OK);
}
SHAR_EOF
cat - << \SHAR_EOF > pdawg.c
/* Manadatory headers */
/*#define RCHECK*/
/*#define DEBUG*/
#include <stdio.h>
#include "dawg.h"
#include "grope.h"
#include "utils.c"

/* Headers here as needed on per-program basis */
#include <ctype.h>  /* for isalpha() */

/* Spelling library utilities */
#include "init.c"
#include "print.c"
/*
#include "check.c"
#include "locate.c"
#include "similcmp.c"
#include "soundex.c"
#include "correct.c"
*/
/*#include "dyntrie.c"*/

int
#ifdef PROTOTYPES
main(int argc, char **argv)
#else
main(argc, argv)
int argc;
char **argv;
#endif
{
NODE PCCRAP *dawg;
INDEX nedges;
  if (!dawg_init((argc == 1 ? "" : argv[1]), &dawg, &nedges)) exit(EXIT_ERROR);
  dawg_print(dawg, (INDEX)ROOT_NODE);
  fprintf(stderr, "Finished printing dawg\n");
  exit(0);
}
SHAR_EOF
cat - << \SHAR_EOF > ppack.c
/* Manadatory headers */
#include <stdio.h>
#include "dawg.h"
#include "grope.h"
#include "utils.c"

/* Headers here as needed on per-program basis */
#include <ctype.h>  /* for isalpha() */

/* Spelling library utilities */
#include "init.c"
#include "print.c"
/*
#include "check.c"
#include "locate.c"
#include "similcmp.c"
#include "soundex.c"
#include "correct.c"
*/
/*#include "dyntrie.c"*/



int
#ifdef PROTOTYPES
main(int argc, char **argv)
#else
main(argc, argv)
int argc;
char **argv;
#endif
{
NODE PCCRAP *ptrie;
INDEX nedges;
  if (!pack_init((argc == 1 ? "": argv[1]), &ptrie, &nedges)) exit(EXIT_ERROR);
  pack_print(ptrie, (INDEX)ROOT_NODE);
  exit(0);
}
SHAR_EOF
cat - << \SHAR_EOF > proot.c
/*

    File:          proot.c
    Author:        Graham Toal
    Purpose:       find all words starting with 'root'
    Creation date: 22/06/90
    Lastedit:      22:24:32

    Description:
      some spelling programs remove characters from the end of
    wrongly spelt words one by one until the resulting root is
    found to be a prefix of other words in the dictionary.  This
    works because the assumption is made that the word was in
    fact correct, but was an inflected form of a word in the
    dictionary which had not been stored.
*/


/* Manadatory header files */
#include <stdio.h>
#include "dawg.h"
#include "grope.h"
#include "utils.c"

/* Headers here as needed on per-program basis */
#include <ctype.h>  /* eg, for isalpha() */

/* Spelling library utilities */
#include "init.c"        /* Loading dicts */
/*#include "dyntrie.c"*/ /* Creating dicts at run-time */
#include "print.c"       /* Printing dicts */
/*#include "check.c"*/   /* Checking words */
#include "locate.c"      /* Finding words by their stems */

/*#include "soundex.c"*/ /* Soundex algorithm for word-likeness comparison */
/*#include "similcmp.c"*//* Closeness-metric for correction */
/*#include "correct.c"*/ /* Code to attempt error correction (uses above) */

/* Let's be friendly to these guys... */
#ifdef SYS_MAC
/* To compile with THINK C 4.0, place all the relevant .h and .c
   files in a folder.  Then create a project which contains this main.c
   and the libraries unix and ANSI.
*/
#include <unix.h>
#include <stdlib.h>
#include <console.h>
#endif

/* Your own header files go here, for instance:
               #include "readword.h"
 */



int
#ifdef PROTOTYPES
main(int argc, char **argv)
#else
main(argc, argv)
int argc;
char **argv;
#endif
{
NODE PCCRAP *dawg;
INDEX root;
INDEX edges;
int each;

#ifdef SYS_MAC
  argc = ccommand(&argv);  /* Mac users have my sympathy */
#endif

  /* Your program goes here... */
  if (argc == 1) {
    fprintf(stderr, "usage: %s part\n", argv[0]);
    exit(EXIT_ERROR);
  }
  if (!dawg_init("", &dawg, &edges)) exit(EXIT_ERROR);
  for (each = 1; each < argc; each++) {
    fprintf(stderr, "* %s:\n", argv[each]);
    root = dawg_locate_prefix(dawg, argv[each], ROOT_NODE);
    if (root != ROOT_NODE) {
      dawg_print_prefix(dawg, argv[each], root);
    } else {
      fprintf(stderr, "(none found)\n");
    }
    if (each+1 != argc) fprintf(stderr, "\n");
  }

  exit(EXIT_OK);
}
SHAR_EOF
cat - << \SHAR_EOF > sample.c
/*
   Dummy program for writing word utilities.
   If you stick to the style shown here, your programs will
   remain portable across a vast range of machines.
   Skeleton file by Graham Toal.  Remove this header when you've
   added your own...

   (If you want to see some worked examples, try
    tell.c  dwgcheck.c  pckcheck.c)

*/

/*

    File:          
    Author:        
    Purpose:       
    Creation date: 
    Lastedit:      

    Description:

*/


/* Manadatory header files */
#include <stdio.h>
#include "dawg.h"
#include "grope.h"
#include "utils.c"

/* Headers here as needed on per-program basis */
#include <ctype.h>  /* eg, for isalpha() */

/* Spelling library utilities */
#include "init.c"      /* Loading dicts */
#include "dyntrie.c"   /* Creating dicts at run-time */
#include "print.c"     /* Printing dicts */
#include "check.c"     /* Checking words */
#include "locate.c"    /* Finding words by their stems */

#include "soundex.c"   /* Soundex algorithm for word-likeness comparison */
#include "similcmp.c"  /* Closeness-metric for correction */
#include "correct.c"   /* Hack code to attempt error correction (uses above) */

/* Let's be friendly to these guys... */
#ifdef SYS_MAC
/* To compile with THINK C 4.0, place all the relevant .h and .c
   files in a folder.  Then create a project which contains this main.c
   and the libraries unix and ANSI.
*/
#include <unix.h>
#include <stdlib.h>
#include <console.h>
#endif

/* Your own header files go here, for instance:
               #include "readword.h"
 */



int
#ifdef PROTOTYPES
main(int argc, char **argv)
#else
main(argc, argv)
int argc;
char **argv;
#endif
{
#ifdef SYS_MAC
  argc = ccommand(&argv);  /* Mac users have my sympathy */
#endif

  /* Your program goes here... */

  exit(EXIT_OK);
}
SHAR_EOF
cat - << \SHAR_EOF > tell.c
/*

    File:          tell.c
    Author:        Graham Toal
    Purpose:       offer correct spelling
    Creation date: 22/06/90
    Lastedit:      22:24:32

    Description:

       Like the unix 'spelltell' command.

*/


/* Manadatory header files */
#include <stdio.h>
#include "dawg.h"
#include "grope.h"
#include "utils.c"

/* Headers here as needed on per-program basis */
#include <ctype.h>  /* eg, for isalpha() */

/* Spelling library utilities */
#include "init.c"      /* Loading dicts */
#include "dyntrie.c"   /* Creating dicts at run-time */
#include "print.c"     /* Printing dicts */
#include "check.c"     /* Checking words */
#include "locate.c"    /* Finding words by their stems */

#include "soundex.c"   /* Soundex algorithm for word-likeness comparison */
#include "similcmp.c"  /* Closeness-metric for correction */
#include "correct.c"   /* Hack code to attempt error correction (uses above) */

/* Let's be friendly to these guys... */
#ifdef SYS_MAC
/* To compile with THINK C 4.0, place all the relevant .h and .c
   files in a folder.  Then create a project which contains this main.c
   and the libraries unix and ANSI.
*/
#include <unix.h>
#include <stdlib.h>
#include <console.h>
#endif

/* Your own header files go here, for instance:
               #include "readword.h"
 */



int
#ifdef PROTOTYPES
main(int argc, char **argv)
#else
main(argc, argv)
int argc;
char **argv;
#endif
{
NODE PCCRAP *dawg;
INDEX edges;
int each;

#ifdef SYS_MAC
  argc = ccommand(&argv);  /* Mac users have my sympathy */
#endif

  /* Your program goes here... */
  if (argc == 1) {
    fprintf(stderr, "usage: %s mispeled wurdz\n", argv[0]);
    exit(EXIT_ERROR);
  }
  if (!dawg_init("", &dawg, &edges)) exit(EXIT_ERROR);
  for (each = 1; each < argc; each++) {
    fprintf(stderr, "* %s:\n", argv[each]);
    if (!dawg_correct(dawg, argv[each])) {
      fprintf(stderr, "(none found)\n");
    }
    if (each+1 != argc) fprintf(stderr, "\n");
  }

  exit(EXIT_OK);
}
SHAR_EOF



More information about the Alt.sources mailing list