module stree { // interfaces defined here interface SearchTree; // the top-level construct interface Word; // a binary search tree node -- represents one word interface Document; // a document stored in the repository interface Cite; // a reference to a line in a document
// A binary search tree of Word objects interface SearchTree { private: attribute ref<Word> root; // the root of the tree // Update the entry matching WORD to add a citation. // Add a new Word if necessary. void insert(in lref<char> word, in ref<Cite> cite); public: // Constructor: make an empty tree void initialize(); // Insert a new Document into the repository. The argument is a // pathname to be interpreted in the Unix name space as the name of a // Unix file containing the raw data. A new Document object with // the same base name is created in the current Shore directory, // filled with a copy of the file's context, and indexed by all of // its words. void insert_file(in lref<char> src); // Retrieve the Word object matching the argument. // Return NULL if not found. ref<Word> find(in lref<char> word) const; };
// There is one Word object for each distinct word appearing in any // document in the repository. interface Word { private: attribute string value; attribute ref<Word> left, right; public: relationship set<Cite> cited_by inverse cites; // Constructor: empty occurrences list void initialize(in lref<char> word); // How many occurrences? long count() const; // Get ith occurrence (returns NULL if not that many) ref<Cite> occurrence(in long i) const; // The following methods are meant to be used only by SearchTree. // Find decendant matching WORD creating one if necessary ref<Word> find_or_add(in lref<char> word); // Find only, return NULL on not found ref<Word> find(in lref<char> word) const; // Debugging dump void print(in long verbose) const; // Add an occurence void occurs_on(in ref<Cite> cite); };
// A Cite object represents a citation. There is one Cite object for each // line of each document in the repository. There is thus a many-many // relationship from Cite to Word and a many-one relationship from Cite // to Document. interface Cite { private: attribute long offset; public: relationship ref<Document> doc inverse cited_by; relationship set<Word> cites inverse cited_by; // Constructor void initialize(in ref<Document> d, in long o); // Print the referenced line void print(in long vflag) const; // Destructor void finalize(); };
// A Document is a chunk of text that looks like a Unix file. // We also record the file name under which it was created. // (The need to record the name may go away when Shore adds a way to find // the pathname of a registered object given a Ref to it.) interface Document { private: attribute text body; attribute string name; attribute long cur_len; public: relationship set<Cite> cited_by inverse doc; // Constructor: The body is empty. void initialize(in lref<char> base_name, in long len); // Add some text to the end of the body. void append(in lref<char> str); // Read-only access to the file name. lref<char> get_name() const; // Current length of text long size() const; // Print a line starting at OFFSET void print_line(in long offset) const; // Destructor void finalize(); }; }
/* * ShoreConfig.h is needed only by applications * that distinguish platforms. (Stree does not, * but we include this for documentation purposes.) */ #include <ShoreConfig.h> #include <iostream.h> #include <fstream.h> #include <std.h> #include "stree.h" Ref<SearchTree> repository; Ref<Pool> nodes; // Place to create new anonymous objects const char *DEMO_DIR = "stree"; char *argv0; int verbose; extern "C" int optind; enum OPERATION { OP_NONE, OP_ADD, OP_LIST, OP_DEL, OP_POOL_LIST, OP_CLEAR } operation = OP_NONE; static void add_files(int argc, char *const*argc); static void list_files(char *str); static void delete_files(int argc, char *const*argc); static void pool_list(); static void clear_all(); void usage() { cerr << "usage:" << endl; cerr << "\t" << argv0 << " -a[V] fname [fname ...]" << endl; cerr << "\t" << argv0 << " -l[V] word" << endl; cerr << "\t" << argv0 << " -d fname [fname ...]" << endl; cerr << "\t" << argv0 << " -p" << endl; cerr << "\t" << argv0 << " -c" << endl; cerr << "\t" << "the -V option turns on verbose mode" << endl; exit(1); } int main(int argc, char *argv[]) { argv0 = argv[0]; shrc rc;
// initialize connection to server SH_DO(Shore::init(argc, argv, 0, getenv("STREE_RC"))); // get command-line options int c; while ((c = getopt(argc,argv,"aldpcV")) != EOF) switch(c) { case 'a': operation = OP_ADD; break; case 'l': operation = OP_LIST; break; case 'd': operation = OP_DEL; break; case 'p': operation = OP_POOL_LIST; break; case 'c': operation = OP_CLEAR; break; case 'V': verbose++; break; default: usage(); } if (operation == OP_NONE) usage();
// Start a transaction for initialization SH_BEGIN_TRANSACTION(rc); if (rc) rc.fatal(); // this terminates the program with extreme prejudice // Check that our demo directory exists rc = Shore::chdir(DEMO_DIR); if (rc != RCOK) { if (rc != RC(SH_NotFound)) SH_ABORT_TRANSACTION(rc); // Not found. Must be the first time through. // Create the directory SH_DO(Shore::mkdir(DEMO_DIR, 0755)); SH_DO(Shore::chdir(DEMO_DIR));
// Make a new SearchTree object ... repository = new("repository", 0644) SearchTree; repository.update()->initialize(); // ... and a pool for allocating Nodes. SH_DO(nodes.create_pool("pool", 0644, nodes)); } else { // not first time // Get the repository root from the database ... SH_DO(Ref<SearchTree>::lookup("repository",repository)); // ... and the pool for creating nodes SH_DO(nodes.lookup("pool", nodes)); } SH_DO(SH_COMMIT_TRANSACTION); switch (operation) { case OP_ADD: add_files(argc-optind, argv+optind); break; case OP_LIST: if (optind != argc-1) usage(); list_files(argv[optind]); break; case OP_DEL: delete_files(argc-optind, argv+optind); break; case OP_POOL_LIST: pool_list(); break; case OP_CLEAR: clear_all(); break; default: break; } return 0; } // main // Add all the named files to the repository static void add_files(int argc, char *const*argv) { shrc rc; SH_BEGIN_TRANSACTION(rc); if (rc) rc.fatal(); for (int i=0; i<argc; i++) repository.update()->insert_file(argv[i]); if (verbose) cout << "about to commit" << endl; SH_DO(SH_COMMIT_TRANSACTION); if (verbose) cout << "committed" << endl; } // add_files // List all uses of a word static void list_files(char *str) { shrc rc; int occurrences=0; SH_BEGIN_TRANSACTION(rc); if (rc) rc.fatal(); Ref<Word> w = repository->find(str); if (verbose) cout << "========== " << str << endl; if (w && w->count() > 0) { Ref<Cite> c; for (int i=0; c = w->occurrence(i); i++) { occurrences++; c->print(verbose); } } else if (verbose) { cout << "**** Not found" << endl; occurrences = -1; } if(occurrences >= 0 && verbose) { cout << "**** " << occurrences << " citation" << (char *)(occurrences==1?"":"s") << endl; } SH_DO(SH_COMMIT_TRANSACTION); } // list_files
// Removed the named files from the repository static void delete_files(int argc, char *const*argv) { shrc rc; SH_BEGIN_TRANSACTION(rc); if (rc) rc.fatal(); for (int i=0; i<argc; i++) { Ref<Document> d; SH_DO(d.lookup(argv[i],d)); d.update()->finalize(); SH_DO(Shore::unlink(argv[i])); } if (verbose) cout << "about to commit" << endl; SH_DO(SH_COMMIT_TRANSACTION); if (verbose) cout << "committed" << endl; } // delete_files
static void pool_list() { shrc rc; SH_BEGIN_TRANSACTION(rc); if (rc) rc.fatal(); Ref<any> ref; Ref<Word> w; Ref<Cite> c; { PoolScan scan("pool"); if (scan != RCOK) SH_ABORT_TRANSACTION(scan.rc()); while (scan.next(ref, true) == RCOK) { if (w = TYPE_OBJECT(Word).isa(ref)) { w->print(1); } else if (c = TYPE_OBJECT(Cite).isa(ref)) { c->print(2); } else cout << " Unknown type of object" << endl; } } SH_DO(SH_COMMIT_TRANSACTION); } // pool_list
static void clear_all() { shrc rc; SH_BEGIN_TRANSACTION(rc); if (rc) rc.fatal(); rc = Shore::unlink("repository"); if (rc) cout << rc << endl; SH_DO(nodes.lookup("pool", nodes)); SH_DO(nodes.destroy_contents()); rc = Shore::unlink("pool"); if (rc) cout << rc << endl; rc = Shore::chdir(".."); if (rc) cout << rc << endl; SH_DO(Shore::rmdir(DEMO_DIR)); SH_DO(SH_COMMIT_TRANSACTION); } // clear_all
// Member functions of the SearchTree class #include <iostream.h> #include <string.h> #include <ctype.h> #include "stree.h" #include <sys/types.h> #include <sys/stat.h> extern Ref<Pool> nodes; static int getword(const char *&p, char *res, int size); extern int verbose; // defined in main.C void SearchTree::initialize() { root = NULL; }
void SearchTree::insert(char *s, Ref<Cite> c) { Ref<Word> w; if (root) { w = root.update()->find_or_add(s); } else { root = new(nodes) Word; root.update()->initialize(s); w = root; } w.update()->occurs_on(c); } void SearchTree::insert_file(char *fname) { shrc rc; if (verbose) cout << "Indexing file " << fname << endl; // Open input file ifstream in(fname); if (!in) { perror(fname); SH_ABORT_TRANSACTION(rc); } // do a unix stat to get the total size of the file. struct stat in_stat; if (stat(fname,&in_stat)) { perror(fname); SH_ABORT_TRANSACTION(rc); } // Create target document // Strip leading path from file name; char *base_name = strrchr(fname, '/'); if (base_name) base_name++; else base_name = fname; Ref<Document> doc; rc = doc.new_persistent(base_name, 0644, doc); if (rc) { perror(base_name); SH_ABORT_TRANSACTION(rc); } doc.update()->initialize(base_name,in_stat.st_size); // for each line of the document ... char linebuf[1024]; while (in.getline(linebuf, sizeof linebuf -1)) { long off = doc->size(); // copy the line to the body of the document doc.update()->append(linebuf); doc.update()->append("\n"); // allocate a new Cite object for this line Ref<Cite> cite = new (nodes) Cite; cite.update()->initialize(doc, off); // for each word on the line ... char word[100]; const char *p = linebuf; while (getword(p, word, sizeof word)) { // link the citation to the word insert(word, cite); } } }
Ref<Word> SearchTree::find(char *str) const { if (root) return root->find(str); return NULL; } // Copy a word of at most SIZE characters (including terminating null) // in to the buffer starting at RES. Start searching at location P. // Words are delimited by white space. The result is translated to lower // case, with all non-letters eliminated. // P is updated to point to the first character not copied. // The result is 1 if a word is found, 0 if '\0' is encountered first. static int getword(const char *&p, char *res, int size) { for (;; ) { // skip leading white space while (isspace(*p)) p++; // check for eoln if (*p == 0) return 0; // gather non-space characters, translating to lower case and // ignoring non-alpha characters int len; for (len = 0; len < size-1 && *p && !isspace(*p); p++) { if (isupper(*p)) res[len++] = tolower(*p); else if (islower(*p)) res[len++] = *p; } if (len > 0) { res[len] = 0; return 1; } // otherwise, word was all digits and punctuation, so try again. } }
// Member functions of the Word class #include <iostream.h> #include <string.h> #include "stree.h" extern Ref<Pool> nodes;
void Word::initialize(char *word) { value = word; left = NULL; right = NULL; } long Word::count() const { return cited_by.get_size(); }
Ref<Cite> Word::occurrence(long i) const { return cited_by.get_elt(i); } Ref<Word> Word::find_or_add(char *s) { int i = strcmp(s,value); if (i == 0) return this; if (i < 0) { if (left) return left.update()->find_or_add(s); else { left = new(nodes) Word; left.update()->initialize(s); return left; } } else { if (right) return right.update()->find_or_add(s); else { right = new(nodes) Word; right.update()->initialize(s); return right; } } } Ref<Word> Word::find(char *s) const { int i = strcmp(s,value); if (i == 0) return this; if (i < 0) return left ? left->find(s) : (Ref<Word>)NULL; return right ? right->find(s) : (Ref<Word>)NULL; }
void Word::occurs_on(Ref<Cite> cite) { cited_by.add(cite); } void Word::print(long verbose) const { if (verbose) { int s = cited_by.get_size(); cout << "Word '" << (char *)value << "' occurs on " << s << " line" << (s==1 ? "" : "s") << endl; } else cout << (char *)value; }
// Member functions of the Cite class #include <iostream.h> #include "stree.h" void Cite::initialize(Ref<Document> d, long o) { doc = d; offset = o; } void Cite::print(long v) const { switch (v) { default: case 0: // just the file name cout << doc->get_name() << endl; break; case 1: // the file name and the corresponding line cout << doc->get_name() << ": "; doc->print_line(offset); break; case 2: // debugging version cout << "Cite, offset " << offset << " in file " << doc->get_name() << " cites"; { int count = cites.get_size(); for (int i = 0; i < count; i++) { Ref<Word> w = cites.get_elt(i); cout << " "; w->print(0); } cout << endl; } break; } } void Cite::finalize() { while (cites.delete_one()) {} }
// Member functions of the Document class #include <iostream.h> #include <string.h> #include "stree.h" void Document::initialize(char *base_name, long ilen) { body = 0; name = base_name; // set a char at the end of the body to initialze the // string space. body.set(ilen-1,0); // initialize cur_len. cur_len = 0; }
void Document::append(char *str) { // body.set(str, body.blen(), ::strlen(str)); int str_size = ::strlen(str); body.set(str, cur_len, str_size); cur_len += str_size; } char *Document::get_name() const { return name; } long Document::size() const { // return body.strlen(); return cur_len; } void Document::print_line(long offset) const { char buf[100]; body.get(buf, offset, sizeof buf); buf[sizeof buf - 1] = 0; char *p = strchr(buf, '\n'); if (p) *++p = 0; cout << buf; }
void Document::finalize() { Ref<Cite> p; while (p = cited_by.delete_one()) { p.update()->finalize(); SH_DO(p.destroy()); } }
#define MODULE_CODE #include "stree.h"
#!/bin/sh # --------------------------------------------------------------- # # -- Copyright (c) 1994, 1995 Computer Sciences Department, -- # # -- University of Wisconsin-Madison, subject to the terms -- # # -- and conditions given in the file COPYRIGHT. All Rights -- # # -- Reserved. -- # # --------------------------------------------------------------- # # $Header: /p/shore/shore_cvs/src/examples/stree/swc,v 1.3 1995/04/26 11:03:06 solomon Exp $ mountpoint=/shoremnt program=stree if test $1x = -ix then program=doc_index shift fi if test $# -ne 1 then echo "usage: $0 [-i] keyword" exit 1 fi prog_path=`pwd`/$program cd $mountpoint/$program wc `$prog_path -l $1 | sort -u`
#!/bin/sh # --------------------------------------------------------------- # # -- Copyright (c) 1994, 1995 Computer Sciences Department, -- # # -- University of Wisconsin-Madison, subject to the terms -- # # -- and conditions given in the file COPYRIGHT. All Rights -- # # -- Reserved. -- # # --------------------------------------------------------------- # # $Header: /p/shore/shore_cvs/src/examples/stree/sedit,v 1.3 1995/04/26 11:03:03 solomon Exp $ mountpoint=/shoremnt program=stree if test $1x = -ix then program=doc_index shift fi if test -t 0 then edit=${EDITOR:-emacs} else edit="echo EDIT" fi if test $# -ne 1 then echo "usage: $0 [-i] keyword" exit 1 fi prog_path=`pwd`/$program files="`$prog_path -l $1`" || exit 1 if [ -n "$files" ] then cd $mountpoint/$program $edit $files else echo $1 not found fi
// Member functions of the DocIndex class #include <iostream.h> #include <string.h> #include <ctype.h> #include <sys/types.h> #include <sys/stat.h> #include "doc_index.h" extern Ref<Pool> nodes; static int getword(const char *&p, char *res, int size); extern int verbose; // defined in main.C void DocIndex::initialize() { SH_DO(ind.init(UniqueBTree)); }
void DocIndex::insert(char *s, Ref<Cite> c) { Ref<Word> w; bool found; SH_DO(ind.find(s,w,found)); if (!found) { w = new(nodes) Word; w.update()->initialize(s); SH_DO(ind.insert(s,w)); } w.update()->occurs_on(c); } void DocIndex::insert_file(char *fname) { shrc rc; if (verbose) cout << "Indexing file " << fname << endl; // Open input file ifstream in(fname); if (!in) { perror(fname); SH_ABORT_TRANSACTION(rc); } // do a unix stat to get the total size of the file. struct stat in_stat; if (stat(fname,&in_stat)) { perror(fname); SH_ABORT_TRANSACTION(rc); } // Create target document // Strip leading path from file name; char *base_name = strrchr(fname, '/'); if (base_name) base_name++; else base_name = fname; Ref<Document> doc; rc = doc.new_persistent(base_name, 0644, doc); if (rc) { perror(base_name); SH_ABORT_TRANSACTION(rc); } doc.update()->initialize(base_name,in_stat.st_size); // for each line of the document ... char linebuf[1024]; while (in.getline(linebuf, sizeof linebuf -1)) { long off = doc->size(); // copy the line to the body of the document doc.update()->append(linebuf); doc.update()->append("\n"); // allocate a new Cite object for this line Ref<Cite> cite = new (nodes) Cite; cite.update()->initialize(doc, off); // for each word on the line ... char word[100]; const char *p = linebuf; while (getword(p, word, sizeof word)) { // link the citation to the word insert(word, cite); } } }
Ref<Word> DocIndex::find(char *str) const { Ref<Word> w; bool found; SH_DO(ind.find(str,w,found)); if (found) return w; return NULL; } // Copy a word of at most SIZE characters (including terminating null) // in to the buffer starting at RES. Start searching at location P. // Words are delimited by white space. The result is translated to lower // case, with all non-letters eliminated. // P is updated to point to the first character not copied. // The result is 1 if a word is found, 0 if '\0' is encountered first. static int getword(const char *&p, char *res, int size) { for (;; ) { // skip leading white space while (isspace(*p)) p++; // check for eoln if (*p == 0) return 0; // gather non-space characters, translating to lower case and // ignoring non-alpha characters int len; for (len = 0; len < size-1 && *p && !isspace(*p); p++) { if (isupper(*p)) res[len++] = tolower(*p); else if (islower(*p)) res[len++] = *p; } if (len > 0) { res[len] = 0; return 1; } // otherwise, word was all digits and punctuation, so try again. } } void DocIndex::delete_word(sdl_string w) { int count; SH_DO(ind.remove(w,count)); if (verbose) cout << "deleted " << count << (count==1 ? " copy" : " copies") << " of word '" << (char *)w << "' from the index" << endl; } void DocIndex::print() const { // index_iter<typeof(ind)> iterator(ind); IndexScanIter<sdl_string,Ref<Word> > iterator(this->ind); SH_DO(iterator.next()); while (!iterator.eof) { cout << "key: '" << iterator.cur_key << "' value: "; iterator.cur_val->print(1); SH_DO(iterator.next()); } }