src - OpenBSD base system

diff options


context:
space:
mode:

author	Marc Espie <espie@cvs.openbsd.org>	2001-03-26 22:53:34 +0000
committer	Marc Espie <espie@cvs.openbsd.org>	2001-03-26 22:53:34 +0000
commit	60ea328912452bb904fc34be01f64367de8fe8b3 (patch)
tree	3a4743094e1fef5ac26efb7c9c1b9af4d6a1c1a5 /usr.bin
parent	0ed499a8c4ed0e7ef2f74d6e681f98b82cce5028 (diff)

Replacement for original tsort.

The old code suffers from a few defects: - it does not even implement the standard optimal topological sort algorithm. It's much slower. - its longest cycle computation is completely bogus. This is clean-slate code, that does implement the actual standard optimal topological sort, together with a correct graph traversal to find longest cycles. It does also feature a `stable tsort' mode, where it uses a heap to yield the least disturbed permutation of input nodes that satisfies the ordering constraints (in particular, try tsort -f). Thanks to the nature of the problem, the actual output won't exactly match the old one, but it does pass the regression suite (and it is a topological sorter). Ok millert@

Diffstat (limited to 'usr.bin')

-rw-r--r--

usr.bin/tsort/tsort.1

-rw-r--r--

usr.bin/tsort/tsort.c

1215

2 files changed, 904 insertions, 359 deletions

diff --git a/usr.bin/tsort/tsort.1 b/usr.bin/tsort/tsort.1
index 48132883ff7..971db804316 100644
--- a/usr.bin/tsort/tsort.1
+++ b/usr.bin/tsort/tsort.1

@@ -1,4 +1,4 @@

-.\" $OpenBSD: tsort.1,v 1.6 2000/03/11 21:40:06 aaron Exp $

+.\" $OpenBSD: tsort.1,v 1.7 2001/03/26 22:53:33 espie Exp $

.\" $NetBSD: tsort.1,v 1.6 1996/01/17 20:37:49 mycroft Exp $

.\"

@@ -37,7 +37,7 @@

.\"

.\" @(#)tsort.1 8.3 (Berkeley) 4/1/94

.\"

-.Dd April 1, 1994

+.Dd November 1, 1999

.Dt TSORT 1

.Os

.Sh NAME

@@ -45,11 +45,15 @@

.Nd topological sort of a directed graph

.Sh SYNOPSIS

.Nm tsort

+.Op Fl f

+.Op Fl h Ar file

.Op Fl l

.Op Fl q

+.Op Fl r

+.Op Fl w

.Op Ar file

.Sh DESCRIPTION

-.Nm

+.Nm tsort

takes a list of pairs of node names representing directed arcs in

a graph and prints the nodes in topological order on standard output.

Input is taken from the named

@@ -57,7 +61,7 @@ Input is taken from the named

or from standard input if no file

is given.

.Pp

-Node names in the input are separated by whitespace and there must

+Node names in the input are separated by white space and there must

be an even number of node pairs.

.Pp

Presence of a node in a graph can be represented by an arc from the node

@@ -70,23 +74,45 @@ Cycles are reported on standard error.

.Pp

The options are as follows:

.Bl -tag -width Ds

+.It Fl f

+Resolve ambiguities by selecting nodes based on the order of apparition

+of the first component of the pairs.

+.It Fl h Ar file

+Use

+.Ar file ,

+which holds an ordered list of nodes, to resolve ambiguities.

.It Fl l

Search for and display the longest cycle.

-Can take a very long time.

.It Fl q

-Do not display informational messages about cycles.

-This is primarily

+Do not display informational messages about cycles. This is primarily

intended for building libraries, where optimal ordering is not critical,

and cycles occur often.

+.It Fl r

+Reverse the ordering relation.

+.It Fl v

+Inform on the exact number of edges broken while breaking cycles.

+.It Fl w

+Exit with exit code the number of cycles

+.Nm

+had to break.

.El

.Sh SEE ALSO

-.Xr ar 1

+.Xr ar 1 ,

+.Xr lorder 1 ,

+.Rs

+.%A Donald E. Knuth

+.%B The Art of Computer Programming

+.%V Vol. 1

+.%P pp 258-268

+.%D 1973

+.Re

.Sh HISTORY

.Nm

command appeared in

.At v7 .

This

-.Nm

-command and manual page are derived from sources contributed to Berkeley by

-Michael Rendell of Memorial University of Newfoundland.

+.Nm tsort

+command was completely rewritten by Marc Espie for

+.Ox ,

+to finally use the well-known optimal algorithms for topological sorting.

diff --git a/usr.bin/tsort/tsort.c b/usr.bin/tsort/tsort.c
index 8fc1acf0379..c43ace0cf96 100644
--- a/usr.bin/tsort/tsort.c
+++ b/usr.bin/tsort/tsort.c

@@ -1,12 +1,9 @@

-/* $OpenBSD: tsort.c,v 1.4 1997/01/15 23:43:26 millert Exp $ */

-/* $NetBSD: tsort.c,v 1.11 1996/01/17 20:37:53 mycroft Exp $ */

+/* $OpenBSD: tsort.c,v 1.5 2001/03/26 22:53:33 espie Exp $ */

+/* ex:ts=8 sw=4:

+ */

- *

- * This code is derived from software contributed to Berkeley by

- * Michael Rendell of Memorial University of Newfoundland.

* Redistribution and use in source and binary forms, with or without

* modification, are permitted provided that the following conditions

@@ -18,423 +15,945 @@

* documentation and/or other materials provided with the distribution.

* 3. All advertising materials mentioning features or use of this software

* must display the following acknowledgement:

- * This product includes software developed by the University of

- * California, Berkeley and its contributors.

- * 4. Neither the name of the University nor the names of its contributors

- * may be used to endorse or promote products derived from this software

- * without specific prior written permission.

+ * This product includes software developed by Marc Espie for the OpenBSD

+ * Project.

- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND

- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE

- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE

- * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE

- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL

- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS

- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)

- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT

- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY

- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF

- * SUCH DAMAGE.

+ * THIS SOFTWARE IS PROVIDED BY THE OPENBSD PROJECT AND CONTRIBUTORS

+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT

+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR

+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBSD

+ * PROJECT OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,

+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT

+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,

+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY

+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT

+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE

+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

-#ifndef lint

-static char copyright[] =

-#endif /* not lint */

-#ifndef lint

-#if 0

-static char sccsid[] = "@(#)tsort.c 8.3 (Berkeley) 5/4/95";

-#endif

-static char rcsid[] = "$OpenBSD: tsort.c,v 1.4 1997/01/15 23:43:26 millert Exp $";

-#endif /* not lint */

#include <sys/types.h>

+#include <assert.h>

#include <ctype.h>

-#include <db.h>

#include <err.h>

-#include <errno.h>

-#include <fcntl.h>

+#include <limits.h>

+#include <stddef.h>

+#include <ohash.h>

#include <stdio.h>

#include <stdlib.h>

#include <string.h>

+#include <sysexits.h>

#include <unistd.h>

-/*

- * Topological sort. Input is a list of pairs of strings separated by

- * white space (spaces, tabs, and/or newlines); strings are written to

- * standard output in sorted order, one per line.

+/* The complexity of topological sorting is O(e), where e is the

+ * size of input. While reading input, vertices have to be identified,

+ * thus add the complexity of e keys retrieval among v keys using

+ * an appropriate data structure. This program uses open double hashing

+ * for that purpose. See Knuth for the expected complexity of double

+ * hashing (Brent variation should probably be used if v << e, as a user

+ * option).

+ *

+ * The algorithm used for longest cycle reporting is accurate, but somewhat

+ * expensive. It may need to build all free paths of the graph (a free

+ * path is a path that never goes twice through the same node), whose

+ * number can be as high as O(2^e). Usually, the number of free paths is

+ * much smaller though. This program's author does not believe that a

+ * significantly better worst-case complexity algorithm exists.

+ *

+ * In case of a hints file, the set of minimal nodes is maintained as a

+ * heap. The resulting complexity is O(e+v log v) for the worst case.

+ * The average should actually be near O(e).

+ *

+ * The simple topological sort algorithm detects cycles. This program

+ * goes further, breaking cycles through the use of simple heuristics.

+ * Each cycle break checks the whole set of nodes, hence if c cycles break

+ * are needed, this is an extra cost of O(c v).

- * usage:

- * tsort [-l] [inputfile]

- * If no input file is specified, standard input is read.

+ * Possible heuristics are as follows:

+ * - break cycle at node with lowest number of predecessors (default case),

+ * - break longest cycle at node with lowest number of predecessors,

+ * - break cycle at next node from the hints file.

- * Should be compatable with AT&T tsort HOWEVER the output is not identical

- * (i.e. for most graphs there is more than one sorted order, and this tsort

- * usually generates a different one then the AT&T tsort). Also, cycle

- * reporting seems to be more accurate in this version (the AT&T tsort

- * sometimes says a node is in a cycle when it isn't).

+ * Except for the hints file case, which sets an explicit constraint on

+ * which cycle to break, those heuristics locally result in the smallest

+ * number of broken edges.

- * Michael Rendell, michael@stretch.cs.mun.ca - Feb 26, '90

+ * Those are admittedly greedy strategies, as is the selection of the next

+ * node from the hints file amongst equivalent candidates that is used for

+ * `stable' topological sorting.

-#define HASHSIZE 53 /* doesn't need to be big */

-#define NF_MARK 0x1 /* marker for cycle detection */

-#define NF_ACYCLIC 0x2 /* this node is cycle free */

-#define NF_NODEST 0x4 /* Unreachable */

-typedef struct node_str NODE;

-struct node_str {

- NODE **n_prevp; /* pointer to previous node's n_next */

- NODE *n_next; /* next node in graph */

- NODE **n_arcs; /* array of arcs to other nodes */

- int n_narcs; /* number of arcs in n_arcs[] */

- int n_arcsize; /* size of n_arcs[] array */

- int n_refcnt; /* # of arcs pointing to this node */

- int n_flags; /* NF_* */

- char n_name[1]; /* name of this node */

+#ifdef __GNUC__

+#define UNUSED __attribute__((unused))

+#else

+#define UNUSED

+#endif

+struct node;

+/* The set of arcs from a given node is stored as a linked list. */

+struct link {

+ struct link *next;

+ struct node *node;

};

-typedef struct _buf {

- char *b_buf;

- int b_bsize;

-} BUF;

+struct node {

+ unsigned int refs; /* Number of arcs left, coming into this node .

+ * Note that nodes with a null count can't

+ * be part of cycles. */

+ struct link *arcs; /* List of forward arcs. */

-DB *db;

-NODE *graph, **cycle_buf, **longest_cycle;

-int debug, longest, quiet;

+ unsigned int order; /* Order of nodes according to a hint file. */

-void add_arc __P((char *, char *));

-int find_cycle __P((NODE *, NODE *, int, int));

-NODE *get_node __P((char *));

-void *grow_buf __P((void *, int));

-void remove_node __P((NODE *));

-void tsort __P((void));

-void usage __P((void));

+ /* Cycle detection algorithms build a free path of nodes. */

+ struct node *from; /* Previous node in the current path. */

-int

-main(argc, argv)

- int argc;

- char *argv[];

+ unsigned int mark; /* Mark processed nodes in cycle discovery. */

+ struct link *traverse; /* Next link to traverse when backtracking. */

+ char k[1]; /* Name of this node. */

+};

+#define HASH_START 9

+struct array {

+ unsigned int entries;

+ struct node **t;

+};

+static void nodes_init __P((struct ohash *));

+static struct node *node_lookup __P((struct ohash *, const char *, const char *));

+static void usage __P((void));

+static struct node *new_node __P((const char *, const char *));

+static void read_pairs __P((FILE *, struct ohash *, int, const char *));

+static void split_nodes __P((struct ohash *, struct array *, struct array *));

+static void insert_arc __P((struct node *, struct node *));

+#ifdef DEBUG

+static void dump_node __P((struct node *));

+static void dump_array __P((struct array *));

+static void dump_hash __P((struct ohash *));

+#endif

+static void read_hints __P((FILE *, struct ohash *, const char *));

+static struct node *find_smallest_node __P((struct array *));

+static struct node *find_good_cycle_break __P((struct array *));

+static void print_cycle __P((struct array *));

+static int find_cycle_with __P((struct node *, struct array *));

+static struct node *find_predecessor __P((struct array *, struct node *));

+static unsigned int traverse_node __P((struct node *, unsigned int, struct array *));

+static struct node *find_longest_cycle __P((struct array *, struct array *));

+static void heap_down __P((struct array *, unsigned int));

+static void heapify __P((struct array *));

+static struct node *dequeue __P((struct array *));

+static void enqueue __P((struct array *, struct node *));

+#define erealloc(n, s) emem(realloc(n, s))

+static void *hash_alloc __P((size_t, void *));

+static void hash_free __P((void *, size_t, void *));

+static void* entry_alloc __P((size_t, void *));

+static void *emalloc __P((size_t));

+static void *emem __P((void *));

+#define DEBUG_TRAVERSE 0

+static struct ohash_info node_info = {

+ offsetof(struct node, k), NULL, hash_alloc, hash_free, entry_alloc };

+int main __P((int, char *[]));

+/***

+ *** Memory handling.

+ ***/

+static void *

+emem(p)

+ void *p;

{

- register BUF *b;

- register int c, n;

- FILE *fp;

- int bsize, ch, nused;

- BUF bufs[2];

- while ((ch = getopt(argc, argv, "dlq")) != -1)

- switch (ch) {

- case 'd':

- debug = 1;

- break;

- case 'l':

- longest = 1;

- break;

- case 'q':

- quiet = 1;

- break;

- case '?':

- default:

- usage();

- }

- argc -= optind;

- argv += optind;

+ if (p)

+ return p;

+ else

+ errx(EX_SOFTWARE, "Memory exhausted");

- switch (argc) {

- case 0:

- fp = stdin;

- break;

- case 1:

- if ((fp = fopen(*argv, "r")) == NULL)

- err(1, "%s", *argv);

- break;

- default:

- usage();

- }

+static void *

+hash_alloc(s, u)

+ size_t s;

+ void *u UNUSED;

+ return emem(calloc(s, 1));

- for (b = bufs, n = 2; --n >= 0; b++)

- b->b_buf = grow_buf(NULL, b->b_bsize = 1024);

+static void

+hash_free(p, s, u)

+ void *p;

+ size_t s UNUSED;

+ void *u UNUSED;

+ free(p);

- /* parse input and build the graph */

- for (n = 0, c = getc(fp);;) {

- while (c != EOF && isspace(c))

- c = getc(fp);

- if (c == EOF)

- break;

+static void *

+entry_alloc(s, u)

+ size_t s;

+ void *u UNUSED;

+ return emalloc(s);

- nused = 0;

- b = &bufs[n];

- bsize = b->b_bsize;

- do {

- b->b_buf[nused++] = c;

- if (nused == bsize)

- b->b_buf = grow_buf(b->b_buf, bsize *= 2);

- c = getc(fp);

- } while (c != EOF && !isspace(c));

- b->b_buf[nused] = '\0';

- b->b_bsize = bsize;

- if (n)

- add_arc(bufs[0].b_buf, bufs[1].b_buf);

- n = !n;

- }

- (void)fclose(fp);

- if (n)

- errx(1, "odd data count");

+static void *

+emalloc(s)

+ size_t s;

+ return emem(malloc(s));

- /* do the sort */

- tsort();

- exit(0);

+/***

+ *** Hash table.

+ ***/

+/* Inserting and finding nodes in the hash structure.

+ * We handle interval strings for efficiency wrt fgetln. */

+static struct node *

+new_node(start, end)

+ const char *start;

+ const char *end;

+ struct node *n;

+ n = ohash_create_entry(&node_info, start, &end);

+ n->from = NULL;

+ n->arcs = NULL;

+ n->refs = 0;

+ n->mark = 0;

+ n->order = 0;

+ n->traverse = NULL;

+ return n;

}

-/* double the size of oldbuf and return a pointer to the new buffer. */

-void *

-grow_buf(bp, size)

- void *bp;

- int size;

+static void

+nodes_init(h)

+ struct ohash *h;

{

- if ((bp = realloc(bp, (u_int)size)) == NULL)

- err(1, NULL);

- return (bp);

+ ohash_init(h, HASH_START, &node_info);

}

-/*

- * add an arc from node s1 to node s2 in the graph. If s1 or s2 are not in

- * the graph, then add them.

- */

-void

-add_arc(s1, s2)

- char *s1, *s2;

+static struct node *

+node_lookup(h, start, end)

+ struct ohash *h;

+ const char *start;

+ const char *end;

{

- register NODE *n1;

- NODE *n2;

- int bsize, i;

+ unsigned int i;

+ struct node * n;

- n1 = get_node(s1);

+ i = ohash_qlookupi(h, start, &end);

- if (!strcmp(s1, s2))

+ n = ohash_find(h, i);

+ if (n == NULL)

+ n = ohash_insert(h, i, new_node(start, end));

+ return n;

+#ifdef DEBUG

+static void

+dump_node(n)

+ struct node *n;

+ struct link *l;

+ if (n->refs == 0)

return;

+ printf("%s (%u): ", n->k, n->refs);

+ for (l = n->arcs; l != NULL; l = l->next)

+ if (n->refs != 0)

+ printf("%s(%u) ", l->node->k, l->node->refs);

+ putchar('\n');

+static void

+dump_array(a)

+ struct array *a;

+ unsigned int i;

- n2 = get_node(s2);

+ for (i = 0; i < a->entries; i++)

+ dump_node(a->t[i]);

+static void

+dump_hash(h)

+ struct hash *h;

+ unsigned int i;

+ struct node *n;

+ for (n = ohash_first(h, &i); n != NULL; n = ohash_next(h, &i))

+ dump_node(n);

+#endif

+/***

+ *** Reading data.

+ ***/

+static void

+insert_arc(a, b)

+ struct node *a, *b;

+ struct link *l;

- /*

- * Check if this arc is already here.

- */

- for (i = 0; i < n1->n_narcs; i++)

- if (n1->n_arcs[i] == n2)

+ /* Check that this arc is not already present. */

+ for (l = a->arcs; l != NULL; l = l->next) {

+ if (l->node == b)

return;

- /*

- * Add it.

- */

- if (n1->n_narcs == n1->n_arcsize) {

- if (!n1->n_arcsize)

- n1->n_arcsize = 10;

- bsize = n1->n_arcsize * sizeof(*n1->n_arcs) * 2;

- n1->n_arcs = grow_buf(n1->n_arcs, bsize);

- n1->n_arcsize = bsize / sizeof(*n1->n_arcs);

}

- n1->n_arcs[n1->n_narcs++] = n2;

- ++n2->n_refcnt;

+ b->refs++;

+ l = emalloc(sizeof(struct link));

+ l->node = b;

+ l->next = a->arcs;

+ a->arcs = l;

}

-/* Find a node in the graph (insert if not found) and return a pointer to it. */

-NODE *

-get_node(name)

- char *name;

+static void

+read_pairs(f, h, reverse, name)

+ FILE *f;

+ struct ohash *h;

+ int reverse;

+ const char *name;

{

- DBT data, key;

- NODE *n;

+ int toggle;

+ struct node *a;

+ size_t size;

+ char *str;

+ unsigned int o;

+ o = 1;

+ toggle = 1;

+ a = NULL;

+ while ((str = fgetln(f, &size)) != NULL) {

+ char *sentinel;

+ sentinel = str + size;

+ for (;;) {

+ char *e;

+ while (isspace(*str) && str < sentinel)

+ str++;

+ if (str == sentinel)

+ break;

+ for (e = str; !isspace(*e) && e < sentinel; e++)

+ continue;

+ if (toggle) {

+ a = node_lookup(h, str, e);

+ if (a->order == 0)

+ a->order = o++;

+ } else {

+ struct node *b;

+ b = node_lookup(h, str, e);

+ assert(a != NULL);

+ if (b != a) {

+ if (reverse)

+ insert_arc(b, a);

+ else

+ insert_arc(a, b);

+ }

+ toggle = !toggle;

+ str = e;

+ }

+ if (toggle == 0)

+ errx(EX_DATAERR, "odd number of pairs in %s", name);

+ if (!feof(f))

+ err(EX_IOERR, "error reading %s", name);

- if (db == NULL &&

- (db = dbopen(NULL, O_RDWR, 0, DB_HASH, NULL)) == NULL)

- err(1, "db: %s", name);

+static void

+read_hints(f, h, name)

+ FILE *f;

+ struct ohash *h;

+ const char *name;

+ char *str;

+ size_t size;

+ unsigned int i;

- key.data = name;

- key.size = strlen(name) + 1;

+ i = 1;

- switch ((*db->get)(db, &key, &data, 0)) {

- case 0:

- bcopy(data.data, &n, sizeof(n));

- return (n);

- case 1:

- break;

- default:

- case -1:

- err(1, "db: %s", name);

- }

+ while ((str = fgetln(f, &size)) != NULL) {

+ char *sentinel;

+ sentinel = str + size;

+ for (;;) {

+ char *e;

+ struct node *a;

- if ((n = malloc(sizeof(NODE) + key.size)) == NULL)

- err(1, NULL);

- n->n_narcs = 0;

- n->n_arcsize = 0;

- n->n_arcs = NULL;

- n->n_refcnt = 0;

- n->n_flags = 0;

- bcopy(name, n->n_name, key.size);

- /* Add to linked list. */

- if ((n->n_next = graph) != NULL)

- graph->n_prevp = &n->n_next;

- n->n_prevp = &graph;

- graph = n;

- /* Add to hash table. */

- data.data = &n;

- data.size = sizeof(n);

- if ((*db->put)(db, &key, &data, 0))

- err(1, "db: %s", name);

- return (n);

+ while (isspace(*str) && str < sentinel)

+ str++;

+ if (str == sentinel)

+ break;

+ for (e = str; !isspace(*e) && e < sentinel; e++)

+ continue;

+ a = node_lookup(h, str, e);

+ if (a->order != 0)

+ errx(EX_DATAERR,

+ "duplicate node %s in hints file %s",

+ a->k, name);

+ else

+ a->order = i++;

+ str = e;

+ }

}

+/***

+ *** Standard heap handling routines.

+ ***/

-/*

- * Clear the NODEST flag from all nodes.

- */

-void

-clear_cycle()

+static void

+heap_down(h, i)

+ struct array *h;

+ unsigned int i;

{

- NODE *n;

+ unsigned int j;

+ struct node *swap;

- for (n = graph; n != NULL; n = n->n_next)

- n->n_flags &= ~NF_NODEST;

+ for (; (j=2*i+1) < h->entries; i = j) {

+ if (j+1 < h->entries && h->t[j+1]->order < h->t[j]->order)

+ j++;

+ if (h->t[i]->order <= h->t[j]->order)

+ break;

+ swap = h->t[i];

+ h->t[i] = h->t[j];

+ h->t[j] = swap;

+ }

}

-/* do topological sort on graph */

-void

-tsort()

+static void

+heapify(h)

+ struct array *h;

{

- register NODE *n, *next;

- register int cnt, i;

- while (graph != NULL) {

- /*

- * Keep getting rid of simple cases until there are none left,

- * if there are any nodes still in the graph, then there is

- * a cycle in it.

- */

- do {

- for (cnt = 0, n = graph; n != NULL; n = next) {

- next = n->n_next;

- if (n->n_refcnt == 0) {

- remove_node(n);

- ++cnt;

- }

- } while (graph != NULL && cnt);

+ unsigned int i;

- if (graph == NULL)

- break;

+ for (i = h->entries; i != 0;)

+ heap_down(h, --i);

- if (!cycle_buf) {

- /*

- * Allocate space for two cycle logs - one to be used

- * as scratch space, the other to save the longest

- * cycle.

- */

- for (cnt = 0, n = graph; n != NULL; n = n->n_next)

- ++cnt;

- cycle_buf = malloc((u_int)sizeof(NODE *) * cnt);

- longest_cycle = malloc((u_int)sizeof(NODE *) * cnt);

- if (cycle_buf == NULL || longest_cycle == NULL)

- err(1, NULL);

+#define DEQUEUE(h) ( hints_flag ? dequeue(h) : (h)->t[--(h)->entries] )

+static struct node *

+dequeue(h)

+ struct array *h;

+ struct node *n;

+ if (h->entries == 0)

+ n = NULL;

+ else {

+ n = h->t[0];

+ if (--h->entries != 0) {

+ h->t[0] = h->t[h->entries];

+ heap_down(h, 0);

}

- for (n = graph; n != NULL; n = n->n_next)

- if (!(n->n_flags & NF_ACYCLIC))

- if (cnt = find_cycle(n, n, 0, 0)) {

- if (!quiet) {

- warnx("cycle in data");

- for (i = 0; i < cnt; i++)

- warnx("%s",

- longest_cycle[i]->n_name);

- }

- remove_node(n);

- clear_cycle();

- break;

- } else {

- /* to avoid further checks */

- n->n_flags |= NF_ACYCLIC;

- clear_cycle();

- }

+ }

+ return n;

+#define ENQUEUE(h, n) do { \

+ if (hints_flag) \

+ enqueue((h), (n)); \

+ else \

+ (h)->t[(h)->entries++] = (n); \

+ } while(0);

+static void

+enqueue(h, n)

+ struct array *h;

+ struct node *n;

+ unsigned int i, j;

+ struct node *swap;

- if (n == NULL)

- errx(1, "internal error -- could not find cycle");

+ h->t[h->entries++] = n;

+ for (i = h->entries-1; i > 0; i = j) {

+ j = (i-1)/2;

+ if (h->t[j]->order < h->t[i]->order)

+ break;

+ swap = h->t[j];

+ h->t[j] = h->t[i];

+ h->t[i] = swap;

}

-/* print node and remove from graph (does not actually free node) */

-void

-remove_node(n)

- register NODE *n;

+/***

+ *** Search through hash array for nodes.

+ ***/

+/* Split nodes into unrefed nodes/live nodes. */

+static void

+split_nodes(hash, heap, remaining)

+ struct ohash *hash;

+ struct array *heap;

+ struct array *remaining;

{

- register NODE **np;

- register int i;

- (void)printf("%s\n", n->n_name);

- for (np = n->n_arcs, i = n->n_narcs; --i >= 0; np++)

- --(*np)->n_refcnt;

- n->n_narcs = 0;

- *n->n_prevp = n->n_next;

- if (n->n_next)

- n->n_next->n_prevp = n->n_prevp;

+ struct node *n;

+ unsigned int i;

+ heap->t = emalloc(sizeof(struct node *) * ohash_entries(hash));

+ remaining->t = emalloc(sizeof(struct node *) * ohash_entries(hash));

+ heap->entries = 0;

+ remaining->entries = 0;

+ for (n = ohash_first(hash, &i); n != NULL; n = ohash_next(hash, &i)) {

+ if (n->refs == 0)

+ heap->t[heap->entries++] = n;

+ else

+ remaining->t[remaining->entries++] = n;

+ }

}

+/* Good point to break a cycle: live node with as few refs as possible. */

+static struct node *

+find_good_cycle_break(h)

+ struct array *h;

+ unsigned int i;

+ unsigned int best;

+ struct node *u;

+ best = UINT_MAX;

+ u = NULL;

+ assert(h->entries != 0);

+ for (i = 0; i < h->entries; i++) {

+ struct node *n = h->t[i];

+ /* No need to look further. */

+ if (n->refs == 1)

+ return n;

+ if (n->refs != 0 && n->refs < best) {

+ best = n->refs;

+ u = n;

+ }

+ assert(u != NULL);

+ return u;

+/* Retrieve the node with the smallest order. */

+static struct node *

+find_smallest_node(h)

+ struct array *h;

+ unsigned int i;

+ unsigned int best;

+ struct node *u;

+ best = UINT_MAX;

+ u = NULL;

+ assert(h->entries != 0);

+ for (i = 0; i < h->entries; i++) {

+ struct node *n = h->t[i];

+ if (n->refs != 0 && n->order < best) {

+ best = n->order;

+ u = n;

+ }

+ assert(u != NULL);

+ return u;

-/* look for the longest? cycle from node from to node to. */

-int

-find_cycle(from, to, longest_len, depth)

- NODE *from, *to;

- int depth, longest_len;

+/***

+ *** Graph algorithms.

+ ***/

+/* Explore the nodes reachable from i to find a cycle containing it, store

+ * it in c. This may fail. */

+static int

+find_cycle_with(i, c)

+ struct node *i;

+ struct array *c;

{

- register NODE **np;

- register int i, len;

- /*

- * avoid infinite loops and ignore portions of the graph known

- * to be acyclic

- */

- if (from->n_flags & (NF_NODEST|NF_MARK|NF_ACYCLIC))

- return (0);

- from->n_flags |= NF_MARK;

- for (np = from->n_arcs, i = from->n_narcs; --i >= 0; np++) {

- cycle_buf[depth] = *np;

- if (*np == to) {

- if (depth + 1 > longest_len) {

- longest_len = depth + 1;

- (void)memcpy((char *)longest_cycle,

- (char *)cycle_buf,

- longest_len * sizeof(NODE *));

+ struct node *n;

+ n = i;

+ /* XXX Previous cycle findings may have left this pointer non-null. */

+ i->from = NULL;

+ for (;;) {

+ /* Note that all marks are reversed before this code exits. */

+ n->mark = 1;

+ if (n->traverse)

+ n->traverse = n->traverse->next;

+ else

+ n->traverse = n->arcs;

+ /* Skip over dead nodes. */

+ while (n->traverse && n->traverse->node->refs == 0)

+ n->traverse = n->traverse->next;

+ if (n->traverse) {

+ struct node *go = n->traverse->node;

+ if (go->mark) {

+ if (go == i) {

+ c->entries = 0;

+ for (; n != NULL; n = n->from)

+ c->t[c->entries++] = n;

+ return 1;

+ }

+ } else {

+ go->from = n;

+ n = go;

}

} else {

- if ((*np)->n_flags & (NF_MARK|NF_ACYCLIC|NF_NODEST))

- continue;

- len = find_cycle(*np, to, longest_len, depth + 1);

+ n->mark = 0;

+ n = n->from;

+ if (n == NULL)

+ return 0;

+ }

- if (debug)

- (void)printf("%*s %s->%s %d\n", depth, "",

- from->n_name, to->n_name, len);

+/* Find a live predecessor of node n. This is a slow routine, as it needs

+ * to go through the whole array, but it is not needed often.

+ */

+static struct node *

+find_predecessor(a, n)

+ struct array *a;

+ struct node *n;

+ unsigned int i;

- if (len == 0)

- (*np)->n_flags |= NF_NODEST;

+ for (i = 0; i < a->entries; i++) {

+ struct node *m;

- if (len > longest_len)

- longest_len = len;

+ m = a->t[i];

+ if (m->refs != 0) {

+ struct link *l;

- if (len > 0 && !longest)

- break;

+ for (l = m->arcs; l != NULL; l = l->next)

+ if (l->node == n)

+ return m;

+ }

+ assert(1 == 0);

+ return NULL;

+/* Traverse all strongly connected components reachable from node n.

+ Start numbering them at o. Return the maximum order reached.

+ Update the largest cycle found so far.

+ */

+static unsigned int

+traverse_node(n, o, c)

+ struct node *n;

+ unsigned int o;

+ struct array *c;

+ unsigned int min, max;

+ n->from = NULL;

+ min = o;

+ max = ++o;

+ for (;;) {

+ n->mark = o;

+ if (DEBUG_TRAVERSE)

+ printf("%s(%d) ", n->k, n->mark);

+ /* Find next arc to explore. */

+ if (n->traverse)

+ n->traverse = n->traverse->next;

+ else

+ n->traverse = n->arcs;

+ /* Skip over dead nodes. */

+ while (n->traverse && n->traverse->node->refs == 0)

+ n->traverse = n->traverse->next;

+ /* If arc left. */

+ if (n->traverse) {

+ struct node *go;

+ go = n->traverse->node;

+ /* Optimisation: if go->mark < min, we already

+ * visited this strongly-connected component in

+ * a previous pass. Hence, this can yield no new

+ * cycle. */

+ /* Not part of the current path: go for it. */

+ if (go->mark == 0 || go->mark == min) {

+ go->from = n;

+ n = go;

+ o++;

+ if (o > max)

+ max = o;

+ /* Part of the current path: check cycle length. */

+ } else if (go->mark > min) {

+ if (DEBUG_TRAVERSE)

+ printf("%d\n", o - go->mark + 1);

+ if (o - go->mark + 1 > c->entries) {

+ struct node *t;

+ unsigned int i;

+ c->entries = o - go->mark + 1;

+ i = 0;

+ c->t[i++] = go;

+ for (t = n; t != go; t = t->from)

+ c->t[i++] = t;

+ }

+ /* No arc left: backtrack. */

+ } else {

+ n->mark = min;

+ n = n->from;

+ if (!n)

+ return max;

+ o--;

+ }

+static void

+print_cycle(c)

+ struct array *c;

+ unsigned int i;

+ /* Printing in reverse order, since cycle discoveries finds reverse

+ * edges. */

+ for (i = c->entries; i != 0;) {

+ i--;

+ warnx("%s", c->t[i]->k);

+ }

+static struct node *

+find_longest_cycle(h, c)

+ struct array *h;

+ struct array *c;

+ unsigned int i;

+ unsigned int o;

+ unsigned int best;

+ struct node *n;

+ static int notfirst = 0;

+ assert(h->entries != 0);

+ /* No cycle found yet. */

+ c->entries = 0;

+ /* Reset the set of marks, except the first time around. */

+ if (notfirst) {

+ for (i = 0; i < h->entries; i++)

+ h->t[i]->mark = 0;

+ } else

+ notfirst = 1;

+ o = 0;

+ /* Traverse the array. Each unmarked, live node heralds a

+ * new set of strongly connected components. */

+ for (i = 0; i < h->entries; i++) {

+ n = h->t[i];

+ if (n->refs != 0 && n->mark == 0) {

+ /* Each call to traverse_node uses a separate

+ * interval of numbers to mark nodes. */

+ o++;

+ o = traverse_node(n, o, c);

+ }

+ assert(c->entries != 0);

+ n = c->t[0];

+ best = n->refs;

+ for (i = 0; i < c->entries; i++) {

+ if (c->t[i]->refs < best) {

+ n = c->t[i];

+ best = n->refs;

}

- from->n_flags &= ~NF_MARK;

- return (longest_len);

+ return n;

}

-void

+#define plural(n) ((n) > 1 ? "s" : "")

+int

+main(argc, argv)

+ int argc;

+ char *argv[];

+ struct ohash pairs;

+ int reverse_flag, quiet_flag, long_flag,

+ warn_flag, hints_flag, verbose_flag;

+ reverse_flag = quiet_flag = long_flag =

+ warn_flag = hints_flag = verbose_flag = 0;

+ nodes_init(&pairs);

+ {

+ int c;

+ while ((c = getopt(argc, argv, "h:flqrvw")) != -1) {

+ switch(c) {

+ case 'h': {

+ FILE *f;

+ f = fopen(optarg, "r");

+ if (f == NULL)

+ err(EX_NOINPUT, "Can't open hint file %s",

+ optarg);

+ read_hints(f, &pairs, optarg);

+ fclose(f);

+ }

+ /*FALLTHRU*/

+ case 'f':

+ if (hints_flag == 1)

+ usage();

+ hints_flag = 1;

+ break;

+ case 'l':

+ long_flag = 1;

+ break;

+ case 'q':

+ quiet_flag = 1;

+ break;

+ case 'r':

+ reverse_flag = 1;

+ break;

+ case 'v':

+ verbose_flag = 1;

+ break;

+ case 'w':

+ warn_flag = 1;

+ break;

+ default:

+ usage();

+ }

+ argc -= optind;

+ argv += optind;

+ }

+ switch(argc) {

+ case 1: {

+ FILE *f;

+ f = fopen(argv[0], "r");

+ if (f == NULL)

+ err(EX_NOINPUT, "Can't open file %s", argv[1]);

+ read_pairs(f, &pairs, reverse_flag, argv[1]);

+ fclose(f);

+ break;

+ }

+ case 0:

+ read_pairs(stdin, &pairs, reverse_flag, "stdin");

+ break;

+ default:

+ usage();

+ }

+ {

+ struct array aux; /* Unrefed nodes/cycle reporting. */

+ struct array remaining;

+ unsigned int broken_arcs, broken_cycles;

+ unsigned int left;

+ broken_arcs = 0;

+ broken_cycles = 0;

+ split_nodes(&pairs, &aux, &remaining);

+ ohash_delete(&pairs);

+ if (hints_flag)

+ heapify(&aux);

+ left = remaining.entries + aux.entries;

+ while (left != 0) {

+ /* Standard topological sort. */

+ while (aux.entries) {

+ struct link *l;

+ struct node *n;

+ n = DEQUEUE(&aux);

+ printf("%s\n", n->k);

+ left--;

+ /* We can't free nodes, as we don't know which

+ * entry we can remove in the hash table. We

+ * rely on refs == 0 to recognize live nodes.

+ * Decrease ref count of live nodes, enter new

+ * candidates into the unrefed list. */

+ for (l = n->arcs; l != NULL; l = l->next)

+ if (l->node->refs != 0 &&

+ --l->node->refs == 0) {

+ ENQUEUE(&aux, l->node);

+ }

+ /* There are still cycles to break. */

+ if (left != 0) {

+ struct node *n;

+ broken_cycles++;

+ /* XXX Simple cycle detection and long cycle

+ * detection are mutually exclusive. */

+ if (long_flag) {

+ n = find_longest_cycle(&remaining, &aux);

+ } else {

+ if (hints_flag)

+ n = find_smallest_node(&remaining);

+ else

+ n = find_good_cycle_break(&remaining);

+ if (!quiet_flag) {

+ while (!find_cycle_with(n, &aux))

+ n = find_predecessor(&remaining, n);

+ }

+ if (!quiet_flag) {

+ warnx("cycle in data");

+ print_cycle(&aux);

+ }

+ if (verbose_flag)

+ warnx("%u edge%s broken", n->refs,

+ plural(n->refs));

+ broken_arcs += n->refs;

+ n->refs = 0;

+ /* Reinitialization, cycle reporting uses aux. */

+ aux.t[0] = n;

+ aux.entries = 1;

+ }

+ if (verbose_flag && broken_cycles != 0)

+ warnx("%u cycle%s broken, for a total of %u edge%s",

+ broken_cycles, plural(broken_cycles),

+ broken_arcs, plural(broken_arcs));

+ if (warn_flag)

+ exit(broken_cycles < 256 ? broken_cycles : 255);

+ else

+ exit(EX_OK);

+ }

+extern char *__progname;

+static void

usage()

{

- (void)fprintf(stderr, "usage: tsort [-lq] [file]\n");

- exit(1);

+ fprintf(stderr, "Usage: %s [-h file] [-flqrvw] [file]\n", __progname);

+ exit(EX_USAGE);

}