The following papers are available via ftp from the Machine
Learning Research Group of the University of Wisconsin, Madison.
These papers can be retrieved via a WWW browser, such as Mosaic,
or by anonymous ftp. The URL for our publications page is:
http://www.cs.wisc.edu/~shavlik/publications.html
The URL for our group's "home" page is:
http://www.cs.wisc.edu/~shavlik/uwml.html
The URL for retrieving these papers via anonymous ftp is:
ftp://ftp.cs.wisc.edu/machine-learning/shavlik-group/
If your computer cannot uncompress ".Z" files, simply leave off
the ".Z" when you request the ftp transfer; our ftp server will
then uncompress the file before transmitting it.
Email shavlik@cs.wisc.edu if you have any major problems.
------
THESES
------
@phdthesis{allex.thesis
,author = "C. F. Allex"
,title = "Computational Methods for Fast and Accurate DNA Fragment Assembly"
,school = "Department of Computer Sciences, University of Wisconsin-Madison"
,year = "1999"
,filename = "allex.thesis99.ps"
,note = "(Also appears as UW Technical Report CS-TR-99-1406)"
,abstract= "As advances in technology result in the production of
increasing amounts of DNA sequencing data in decreasing amounts of time,
developing computational methods that allow data analysis to keep pace is
imperative. In this dissertation, I present methods that improve the
speed and accuracy of DNA fragment assembly.
One critical characteristic of automatic methods for fragment assembly is
that they must be accurate. Currently, to ensure accurate sequences, the
data that underlies questionable base calls must be examined by human
editors so that the correct base call can be determined. This manual
process is both error-prone and time-consuming. Automatic methods that
yield high accuracy and few questionable calls can reduce errors and
lessen the need for manual inspections. In my work, I developed a method,
Trace-Evidence, that automatically produces highly accurate consensus
sequences, even with few aligned sequences.
Most assembly programs analyze only base calls when determining a
consensus sequence. The key to the high accuracy is that I incorporate
morphological information about the underlying ABI trace data. This is
accomplished through a new representation of traces, Trace-Class, that
characterizes the height and shape of traces. The new representation not
only yields high accuracy when used in consensus-calling methods, but
also produces improved results when used in removing poor-quality data,
and when used as inputs for neural networks for consensus determination.
The need for fast processing is becoming more important as the size of
sequencing projects increases. Almost all existing fragment assembly
programs perform pairwise comparisons of reads, resulting in execution
times proportional to n2, where n is the number of reads. I describe a
new algorithm for fragment layout, SLIC, that runs in time proportional
to n. SLIC relies on subsequences of bases that occur in overlapping
regions of fragment reads. Subsequences that are common to two or more
fragment reads are aligned to determine the overall layout of reads.
The work I present provides improvements to currently available
computational methods for DNA sequencing that can serve as a foundation
for further study in developing better solutions to problems in fragment
assembly."
}
@phdthesis{craven.thesis
,author = "M. W. Craven"
,title = "Extracting Comprehensible Models from Trained Neural Networks"
,school = "Department of Computer Sciences, University of Wisconsin-Madison"
,year = "1996"
,filename = "craven.thesis.ps"
,note = "(Also appears as UW Technical Report CS-TR-96-1326)"
,abstract= "Although neural networks have been used to develop highly
accurate classifiers in numerous real-world problem domains, the models
they learn are notoriously difficult to understand. This thesis
investigates the task of extracting comprehensible models from trained
neural networks, thereby alleviating this limitation.
The primary contribution of the thesis is an algorithm that overcomes the
significant limitations of previous methods by taking a novel approach to
the task of extracting comprehensible models from trained networks.
This algorithm, called TREPAN, views the task as an inductive learning
problem. Given a trained network, or any other learned model, TREPAN uses
queries to induce a decision tree that approximates the function represented
by the model. Unlike previous work in this area, TREPAN is broadly
applicable as well as scalable to large networks and problems with
high-dimensional input spaces. The thesis presents experiments that
evaluate TREPAN by applying it to individual networks and to ensembles of
neural networks trained in classification, regression, and
reinforcement-learning domains. These experiments demonstrate that TREPAN
is able to extract decision trees that are comprehensible, yet maintain high
levels of fidelity to their respective networks. In problem domains in
which neural networks provide superior predictive accuracy to conventional
decision tree algorithms, the trees extracted by TREPAN also exhibit
superior accuracy, but are comparable in terms of complexity, to the trees
learned directly from the training data.
A secondary contribution of this thesis is an algorithm, called BBP, that
constructively induces simple neural networks. The motivation underlying
this algorithm is similar to that for TREPAN: to learn comprehensible models
in problem domains in which neural networks have an especially appropriate
inductive bias. The BBP algorithm, which is based on a hypothesis-boosting
method, learns perceptrons that have relatively few connections. This
algorithm provides an appealing combination of strengths: it provides
learnability guarantees for a fairly natural class of target functions;
it provides good predictive accuracy in a variety of problem domains; and
it constructs syntactically simple models, thereby facilitating human
comprehension of what it has learned.
These algorithms provide mechanisms for improving the understanding of
what a trained neural network has learned."
}
@phdthesis{opitz.thesis
,author = "D. W. Opitz"
,title = "An Anytime Approach to Connectionist Theory Refinement:
Refining the Topologies of Knowledge-Based Neural Networks"
,school = "Department of Computer Sciences, University of Wisconsin-Madison"
,year = "1995"
,filename = "opitz.thesis.ps"
,note = "(Also appears as UW Technical Report CS-TR-95-1281)"
,abstract= "Many scientific and industrial problems can be better understood by
learning from samples of the task at hand. For this reason, the machine
learning and statistics communities devote considerable research effort
on generating inductive-learning algorithms that try to learn the true
``concept'' of a task from a set of its examples. Often times, however, one
has additional resources readily available, but largely unused, that can
improve the concept that these learning algorithms generate. These
resources include available computer cycles, as well as prior knowledge
describing what is currently known about the domain. Effective utilization
of available computer time is important since for most domains an expert
is willing to wait for weeks, or even months, if a learning system can
produce an improved concept. Using prior knowledge is important since it
can contain information not present in the current set of training examples.
In this thesis, I present three ``anytime'' approaches to connectionist theory
refinement. Briefly, these approaches start by translating a set of rules
describing what is currently known about the domain into a neural network,
thus generating a knowledge-based neural network (KNN). My approaches then
utilize available computer time to improve this KNN by continually refining
its weights and topology. My first method, TopGen, searches for good
``local'' refinements to the KNN topology. It does this by adding nodes to
the KNN in a manner analogous to symbolically adding rules and conjuncts
to an incorrect rule base. My next approach, REGENT, uses genetic
algorithms to find better ``global'' changes to this topology. REGENT
proceeds by using (a) the domain-specific rules to help create the initial
population of KNNs and (b) crossover and mutation operators specifically
designed for KNNs. My final algorithm, ADDEMUP, searches for an ``ensemble''
of KNNs that work together to produce an effective composite prediction.
ADDEMUP works by using genetic algorithms to continually create new
networks, keeping the set of networks that are as accurate as possible while
disagreeing with each other as much as possible. Empirical results show
that these algorithms successfully achieve each of their respective goals."
}
@phdthesis{maclin.thesis
,author = "R. Maclin"
,title = "Learning from Instruction and Experience: Methods for Incorporating Procedural Domain Theories into Knowledge-Based Neural Networks"
,school = "Department of Computer Sciences, University of Wisconsin-Madison"
,year = "1995"
,filename = "maclin.thesis.firsthalf.ps maclin.thesis.secondhalf.ps"
,note = "(Also appears as UW Technical Report CS-TR-95-1285)"
,abstract= "This thesis defines and evaluates two systems that allow a teacher to
provide instructions to a machine learner. My systems, FSKBANN and RATLE,
expand the language that a teacher may use to provide advice to the
learner. In particular, my techniques allow a teacher to give partially
correct instructions about procedural tasks - tasks that are solved as
sequences of steps. FSKBANN and RATLE allow a computer to learn both from
instruction and from experience.
Experiments with these systems on several testbeds demonstrate that they
produce learners that successfully use and refine the instructions they are
given.
In my initial approach, FSKBANN, the teacher provides instructions as a
set of propositional rules organized around one or more finite-state
automata (FSAs). FSKBANN maps the knowledge in the rules and FSAs into a
recurrent neural network. I used FSKBANN to refine the Chou-Fasman
algorithm, a method for solving the secondary-structure prediction problem,
a difficult task in molecular biology. FSKBANN produces a refined
algorithm that outperforms the original (non-learning) Chou-Fasman
algorithm, as well as a standard neural-network approach.
My second system, RATLE, allows a teacher to communicate advice, using
statements in a simple programming language, to a connectionist,
reinforcement-learning agent. The teacher indicates conditions of the
environment and actions the agent should take under those conditions.
RATLE allows the teacher to give advice continuously by translating the
teacher's statements into additions to the agent's neural network. The
RATLE language also includes novel (to the theory-refinement literature)
features such as multi-step plans and looping constructs. In experiments
with RATLE on two simulated testbeds involving multiple agents, I
demonstrate that a RATLE agent receiving advice outperforms both an agent
that does not receive advice and an agent that receives instruction, but
does not refine it.
My methods provide an appealing approach for learning from both instruction
and experience in procedural tasks. This work widens the ``information
pipeline'' between humans and machine learners, without requiring that the
human provide absolutely correct information to the learner."
}
@phdthesis{towell.thesis
,author = "G. G. Towell"
,title = "Symbolic Knowledge and Neural Networks:
Insertion, Refinement and Extraction"
,school = "Department of Computer Sciences, University of Wisconsin-Madison"
,year = "1991"
,note = "(Also appears as UW Technical Report 1072 [out of print].)"
,filename = "towell.thesis.1-2.ps towell.thesis.3.ps towell.thesis.4-7.ps towell.thesis.app.ps"
,abstract= "Explanation-based and empirical learning are two largely complementary methods
of machine learning. These approaches to machine learning both have serious
problems which preclude their being a general purpose learning method.
However, a ``hybrid'' learning method that combines explanation-based with
empirical learning may be able to use the strengths of one learning method to
address the weaknesses of the other method. Hence, a system that effectively
combines the two approaches to learning can be expected to be superior to
either approach in isolation. This thesis describes a hybrid system called
KBANN which is shown to be an effective combination of these two learning
methods.
KBANN (Knowledge-Based Artificial Neural Networks) is a three-part hybrid
learning system built on top of ``neural'' learning techniques. The first part
uses a set of approximately-correct rules to determine the structure and
initial link weights of an artificial neural network, thereby making the rules
accessible for modification by neural learning. The second part of KBANN
modifies the resulting network using essentially standard neural learning
techniques. The third part of KBANN extracts refined rules from trained
networks.
KBANN is evaluated by empirical tests in the domain of molecular biology.
Networks created by KBANN are shown to be superior, in terms of their ability
to correctly classify unseen examples, to a wide variety of learning systems
as well as techniques proposed by experts in the problems investigated. In
addition, empirical tests show that KBANN is robust to errors in the initial
rules and insensitive to problems resulting from the presence of extraneous
input features.
The third part of KBANN, which extracts rules from trained networks, addresses
a significant problem in the use of neural networks -- understanding what a
neural network learns. Empirical tests of the proposed rule-extraction method
show that it simplifies understanding of trained networks by reducing the
number of: consequents (hidden units), antecedents (weighted links), and
possible antecedent weights. Surprisingly, the extracted rules are often more
accurate at classifying examples not seen during training than the trained
network from which they came."
}
@phdthesis{gutstein.thesis
,author = "Eric Gutstein"
,title = "SIFT: A Self-Improving Fractions Tutor"
,school = "Department of Computer Sciences, University of Wisconsin-Madison"
,year = "1993"
}
-----------------
CONFERENCE PAPERS
-----------------
@inproceedings{eliassi-rad.icml01
,filename = "eliassi-rad.icml01.pdf eliassi-rad.icml01.ps"
,author = "T. Eliassi-Rad and J. Shavlik"
,title = "A Theory-Refinement Approach to Information Extraction"
,booktitle = "Proceedings of the Eighteenth International Conference on
Machine Learning"
,year = "2001"
,address = "Williamstown, MA"
,abstract = "We investigate applying theory refinement to the task of extracting
information from text. In theory refinement, partial domain knowledge
(which may be incorrect) is given to a supervised learner. The provided
knowledge guides the learner in its task, but the learner can refine or
even discard this knowledge during training. Our supervised learner
is a knowledge-based neural network that initially contains compiled
prior knowledge about a particular information extraction (IE) task.
The prior knowledge needs to specify the extraction slots for the specific
IE task. Our approach uses generate-and-test to address the IE task.
In the generation step, we produce candidate extractions by intelligently
searching the space of possible extractions. In the test step, we
use the trained network to judge each candidate and output those that exceed
a system-selected threshold. Experiments on the CMU seminar-announcements
and the Yeast subcellular-localization domains demonstrate our approach's value."
}
@inproceedings{craven.ismb2000
,filename = "craven-ismb00.pdf craven-ismb00.ps"
,author = "M. Craven, D. Page, J. Shavlik, J. Bockhorst, and J. Glasner"
,title = "Probabilistic Learning Approach to Whole-Genome Operon Prediction"
,booktitle = "Proceedings of the Eighth International Conference on Intelligent
Systems for Molecular Biology"
,year = 2000
,address = "La Jolla, CA"
,abstract = "We present a computational approach to predicting operons
in the genomes of prokaryotic organisms. Our approach uses machine learning
methods to induce predictive models for this task from a rich variety of
data types including sequence data, gene expression data, and functional
annotations associated with genes. We use multiple learned models that
individually predict promoters, terminators and operons themselves. A key
part of our approach is a dynamic programming method that uses our predictions
to map every known and putative gene in a given genome into its most
probable operon. We evaluate our approach using data from the E. coli K-12
genome."
}
@inproceedings{craven.icml2000
,filename = "craven-icml00.pdf craven-icml00.ps"
,author = "M. Craven, D. Page, J. Shavlik, J. Bockhorst, and J. Glasner"
,title = "Using Multiple Levels of Learning and Diverse Evidence Sources
to Uncover Coordinately Controlled Genes"
,booktitle = "Proceedings of the Seventeenth International Conference on Machine
Learning"
,year = 2000
,address = "Palo Alto, CA"
,abstract = "Now that the complete genomes of numerous organisms have been
determined, a key problem in computational molecular biology is uncovering the
relationships that exist among the genes in each organism and the regulatory
mechanisms that control their operation. We are developing computational methods
for discovering such regulatory mechanisms and relationships. Toward this end,
we have developed a machine learning approach to identifying sets of genes that
are coordinately controlled in the E. coli genome. A number of factors make this
an interesting application for machine learning: (i) there is a rich variety of
data types that provide useful evidence for this task, (ii) the overall problem
of uncovering regulatory mechanisms can be decomposed in multiple machine learning
subtasks operating at different levels of detail, (iii) there are not any known
negative training examples, and (iv) some of the features are misleading in their
predictiveness."
}
@inproceedings{goecks.iui2000
,filename = "goecks-iui2000.doc goecks-iui2000.html"
,author = "J. Goecks and J. Shavlik"
,title = "Learning Users' Interests by Unobtrusively Observing
Their Normal Behavior"
,booktitle = "Proceedings of the 2000 International Conference on
Intelligent User Interfaces"
,year = 2000
,address = "New Orleans, LA"
,pages = "129-132"
,abstract = "For intelligent interfaces attempting to learn a user's interests,
the cost of obtaining labeled training instances is prohibitive because the user
must directly label each training instance, and few users are willing to do so.
We present an approach that circumvents the need for human-labeled pages. Instead,
we learn ``surrogate'' tasks where the desired output is easily measured, such as the
number of hyperlinks clicked on a page or the amount of scrolling performed. Our
assumption is that these outputs will highly correlate with the user's interests.
In other words, by unobtrusively ``observing'' the user's behavior we are able to
learn functions of value. For example, an intelligent browser could silently observe
the user's browsing behavior during the day, then use these training examples to
learn such functions and gather, during the middle of the night, pages that are
likely to be of interest to the user. Previous work has focused on learning a user
profile by passively observing the hyperlinks clicked on and those passed over. We
extend this approach by measuring user mouse and scrolling activity in addition to
user browsing activity. We present empirical results that demonstrate our agent can
accurately predict some easily measured aspects of one's use of his or her browser."
}
@inproceedings{shavlik.iui99
,filename = "shavlik.iui99.pdf shavlik.iui99.ps"
,author = "J. Shavlik, S. Calcari, T. Eliassi-Rad, and J. Solock"
,title = "An Instructable, Adaptive Interface for Discovering and
Monitoring Information on the World-Wide Web"
,booktitle = "Proceedings of the 1999 International Conference on Intelligent
User Interfaces"
,year = 1999
,address = "Redondo Beach, CA"
,abstract =
"We are creating a customizable, intelligent interface to the World-Wide Web
that assists a user in locating specific, current, and relevant information.
The Wisconsin Adaptive Web Assistant (WAWA) is capable of accepting instructions
regarding what type of information the users are seeking and how to go about
looking for it. WAWA compiles these instructions into neural networks, which
means that the system's behavior can be modified via training examples. Users
can create these training examples by rating pages retrieved by WAWA, but more
importantly the system uses techniques from reinforcement learning to internally
create its own examples (users can also later provide additional instructions).
WAWA uses these neural networks to guide its autonomous navigation of the Web,
thereby producing an interface to the Web that users periodically instruct and
which in the background searches the Web for relevant information, including
periodically revisiting pages that change regularly."
}
@inproceedings{allex.ismb97
,filename = "allex.ismb97.ps"
,author = "C.F. Allex, S.F. Baldwin, J.W. Shavlik, and F.R. Blattner"
,title = "Increasing Consensus Accuracy in DNA Fragment Assemblies
by Incorporating Fluorescent Trace Representations"
,booktitle = "Proceedings, Fifth International Conference on Intelligent
Systems for Molecular Biology"
,year = 1997
,pages = ""
,publisher = "AAAI Press"
,pub_addr = "Menlo Park, CA"
,address = "Halkidiki, Greece"
,abstract =
"We present a new method for determining the consensus sequence in DNA fragment
assemblies. The new method, Trace-Evidence, directly incorporates aligned ABI
trace information into consensus calculations via our previously described
representation, Trace-Data Classifications. The new method extracts and sums
evidence indicated by the representation to determine consensus calls. Using
the Trace-Evidence method results in automatically produced consensus sequences
that are more accurate and less ambiguous than those produced with standard
majority-voting methods. Additionally, these improvements are achieved with
less coverage than required by the standard methods Ð using Trace-Evidence and
a coverage of only three, error rates are as low as those with a coverage of
over ten sequences."
}
@inproceedings{allex.ismb96
,filename = "allex.ismb96.ps"
,author = "C.F. Allex, S.F. Baldwin, J.W. Shavlik, and F.R. Blattner"
,title = "Improving the Quality of Automatic DNA Sequence Assembly using Fluorescent Trace-Data Classifications"
,booktitle = "Proceedings, Fourth International Conference on Intelligent Systems for Molecular Biology"
,year = 1996
,pages = "3-14"
,publisher = "AAAI Press"
,pub_addr = "Menlo Park, CA"
,address = "St. Louis, MO"
,abstract =
"Virtually all large-scale sequencing projects use automatic sequence-assembly programs to aid in the determination of DNA sequences. The computer-generated assemblies require substantial hand-editing to transform them into submissions for GenBank. As the size of sequencing projects increases, it becomes essential to improve the quality of the automated assemblies so that this time-consuming hand-editing may be reduced. Current ABI sequencing technology uses base calls made from fluorescently-labeled DNA fragments run on gels. We present a new representation for the fluorescent trace data associated with individual base calls. This representation can be used before, during, and after fragment assembly to improve the quality of assemblies. We demonstrate one such use -- end-trimming of sub-optimal data -- that results in a significant improvement in the quality of subsequent assemblies."
}
@inproceedings{cherkauer.kdd96
,filename = "cherkauer.kdd96.ps"
,author = "K.J. Cherkauer and J.W. Shavlik"
,title = "Growing Simpler Decision Trees to Facilitate Knowledge
Discovery"
,booktitle = "Proceedings, Second International Conference on Knowledge
Discovery and Data Mining"
,year = 1996
,pages = "315-318"
,publisher = "AAAI Press"
,pub_addr = "Menlo Park, CA"
,address = "Portland, OR"
,abstract =
"When using machine learning techniques for knowledge discovery, output
that is comprehensible to a human is as important as predictive accuracy.
We introduce a new algorithm, SET-Gen, that improves the comprehensibility
of decision trees grown by standard C4.5 without reducing accuracy. It does
this by using genetic search to select the set of input features C4.5 is
allowed to use to build its tree. We test SET-Gen on a wide variety of
real-world datasets and show that SET-Gen trees are significantly smaller
and reference significantly fewer features than trees grown by C4.5 without
using SET-Gen. Statistical significance tests show that the accuracies of
SET-Gen's trees are either not distinguishable from or are more accurate
than those of the original C4.5 trees on all ten datasets tested."
}
@inproceedings{opitz.nips96
,author = "D. W. Opitz and J. W. Shavlik"
,title = "Generating Accurate and Diverse Members of a Neural-Network Ensemble"
,booktitle = "Advances in Neural Information Processing Systems"
,volume = 8
,year = 1996
,publisher = "MIT Press"
,address = "Denver, CO"
,pages = "535-543"
,filename = "opitz.nips96.ps"
,abstract= "Neural-network ensembles have been shown to be very accurate classification
techniques. Previous work has shown that an effective ensemble should
consist of networks that are not only highly correct, but ones that make
their errors on different parts of the input space as well. Most existing
techniques, however, only indirectly address the problem of creating such
a set of networks. In this paper we present a technique called ADDEMUP
that uses genetic algorithms to directly search for an accurate and diverse
set of trained networks. ADDEMUP works by first creating an initial
population, then uses genetic operators to continually create new networks,
keeping the set of networks that are as accurate as possible while
disagreeing with each other as much as possible. Experiments on three DNA
problems show that ADDEMUP is able to generate a set of trained networks
that is more accurate than several existing approaches. Experiments also
show that ADDEMUP is able to effectively incorporate prior knowledge, if
available, to improve the quality of its ensemble."
}
Note: see also opitz.mlrgwp95.ps
@inproceedings{cherkauer.nips96
,author = "K. J. Cherkauer and J. W. Shavlik"
,title = "Rapid Quality Estimation of Neural Network Input
Representations"
,booktitle = "Advances in Neural Information Processing Systems"
,volume = 8
,year = 1996
,pages = "45-51"
,publisher = "MIT Press"
,address = "Denver, CO"
,filename = "cherkauer.nips96.ps"
,abstract= "The choice of an input representation for a neural network can have a
profound impact on its accuracy in classifying novel instances. However,
neural networks are typically computationally expensive to train, making it
difficult to test large numbers of alternative representations. This paper
introduces fast quality measures for neural network representations, allowing
one to quickly and accurately estimate which of a collection of possible
representations for a problem is the best. We show that our measures for
ranking representations are more accurate than a previously published
measure, based on experiments with three difficult, real-world pattern
recognition problems."
}
Note: see also cherkauer.ijcai-wkshp95.ps
@inproceedings{craven.nips96
,author = "M. W. Craven and J. W. Shavlik"
,title = "Extracting Tree-Structured Representations of Trained Networks"
,booktitle = "Advances in Neural Information Processing Systems"
,volume = 8
,year = 1996
,pages = "24-30"
,address = "Denver, CO"
,publisher = "MIT Press"
,filename = "craven.nips96.ps"
,abstract= "A significant limitation of neural networks is that the representations
they learn are usually incomprehensible to humans. We present a novel
algorithm, TREPAN, for extracting comprehensible, symbolic representations
from trained neural networks. Our algorithm uses queries to induce a
decision tree that approximates the concept represented by a given network.
Our experiments demonstrate that TREPAN is able to produce decision
trees that maintain a high level of fidelity to their respective networks,
while being comprehensible and accurate. Unlike previous work in this
area, our algorithm is both general in its applicability and scales well
to large networks and problems with high-dimensional input spaces."
}
@inproceedings{jackson.nips96
,author = "J. C. Jackson and M. W. Craven"
,title = "Learning Sparse Perceptrons"
,booktitle = "Advances in Neural Information Processing Systems"
,volume = 8
,year = 1996
,pages = "654-662"
,address = "Denver, CO"
,publisher = "MIT Press"
,filename = "jackson.nips96.ps"
,abstract= "We introduce a new algorithm designed to learn sparse perceptrons
over input representations which include high-order features.
Our algorithm, which is based on a hypothesis-boosting method,
is able to PAC-learn a relatively natural class of target concepts.
Moreover, the algorithm appears to work well in practice: on a set of
three problem domains, the algorithm produces classifiers that utilize
small numbers of features yet exhibit good generalization performance.
Perhaps most importantly, our algorithm generates concept descriptions
that are easy for humans to understand."
}
@inproceedings{shavlik.icnn96
,author = "J. W. Shavlik"
,title = "An Overview of Research at Wisconsin on Knowledge-Based Neural Networks"
,booktitle = "Proceedings of the International Conference on Neural Networks"
,year = 1996
,address = "Washington, DC"
,filename = "shavlik.icnn96.ps"
,pages = "65-69"
,abstract= "Recent research at the University of Wisconsin on knowledge-based neural
networks is surveyed. This work has focused on (a) using symbolically
represented background knowledge to improve neural-network learning and
(b) extracting comprehensible symbolic representations from trained networks.
Important open issues are discussed."
}
@inproceedings{maclin.ijcai95
,author = "R. Maclin and J. W. Shavlik"
,title = "Combining the Predictions of Multiple Classifiers:
Using Competitive Learning to Initialize Neural Networks"
,booktitle = "Proceedings of the Fourteenth International Joint
Conference on Artificial Intelligence"
,year = 1995
,address = "Montreal, Canada"
,filename = "maclin.ijcai95.ps"
,pages = "524-530"
,abstract= "The primary goal of inductive learning is to generalize well - that is,
induce a function that accurately produces the correct output for future
inputs. Hansen and Salamon showed that, under certain assumptions,
combining the predictions of several separately trained neural networks
will improve generalization. One of their key assumptions is that the
individual networks should be independent in the errors they produce. In
the standard way of performing backpropagation this assumption may be
violated, because the standard procedure is to initialize network weights
in the region of weight space near the origin. This means that
backpropagation's gradient-descent search may only reach a small subset
of the possible local minima. In this paper we present an approach to
initializing neural networks that uses competitive learning to
intelligently create networks that are originally located far from the
origin of weight space, thereby potentially increasing the set of
reachable local minima. We report experiments on two real-world
datasets where combinations of networks initialized with our method
generalize better than combinations of networks initialized the
traditional way."
}
@inproceedings{craven.ismb95
,author = "M. W. Craven and R. J. Mural and L. J. Hauser and
E. C. Uberbacher"
,title = "Predicting Protein Folding Classes without Overly Relying
on Homology"
,booktitle = "Proceedings of the Third International Conference on
Intelligent Systems for Molecular Biology"
,year = 1995
,address = "Cambridge, England"
,publisher = "AAAI Press"
,pages = "98-106"
,filename = "craven.ismb95.ps"
}
@inproceedings{opitz.mlc94
,author = "D. W. Opitz and J. W. Shavlik"
,title = "Using Genetic Search to Refine Knowledge-Based
Neural Networks"
,booktitle = "Machine Learning: Proceedings of the Eleventh International
Conference"
,year = 1994
,address = "New Brunswick, NJ"
,publisher = "Morgan Kaufmann"
,pages = "208-216"
,filename = "opitz.mlc94.ps"
,abstract= "An ideal inductive-learning algorithm should exploit all available
resources, such as computing power and domain-specific knowledge,
to improve its ability to generalize. Connectionist theory-refinement
systems have proven to be effective at utilizing domain-specific
knowledge; however, most are unable to exploit available computing
power. This weakness occurs because they lack the ability to refine
the topology of the networks they produce, thereby limiting generalization,
especially when given impoverished domain theories. We present
the REGENT algorithm, which uses genetic algorithms to broaden the type
of networks seen during its search. It does this by using (a) the
domain theory to help create an initial population and (b) crossover
and mutation operators specifically designed for knowledge-based
networks. Experiments on three real-world domains indicate that
our new algorithm is able to significantly increase generalization
compared to a standard connectionist theory-refinement system,
as well as our previous algorithm for growing knowledge-based networks."
}
@inproceedings{craven.mlc94
,author = "M. W. Craven and J. W. Shavlik"
,title = "Using Sampling and Queries to Extract Rules from
Trained Neural Networks"
,booktitle = "Proceedings of the Eleventh International Conference on
Machine Learning"
,year = 1994
,pages = "37-45"
,address = "New Brunswick, NJ"
,filename = "craven.mlc94.ps"
,abstract= "Concepts learned by neural networks are difficult to understand because
they are represented using large assemblages of real-valued parameters.
One approach to understanding trained neural networks is to extract
symbolic rules that describe their classification behavior. There are
several existing rule-extraction approaches that operate by searching
for such rules. We present a novel method that casts rule extraction
not as a search problem, but instead as a learning problem.
In addition to learning from training examples, our method exploits
the property that networks can be efficiently queried. We describe
algorithms for extracting both conjunctive and M-of-N rules, and
present experiments that show that our method is more efficient than
conventional search-based approaches."
}
@inproceedings{maclin.aaai94
,author = "R. Maclin and J. W. Shavlik"
,title = "Incorporating Advice into Agents that Learn from
Reinforcements"
,booktitle = "Proceedings of the Twelfth National Conference on
Artificial Intelligence"
,year = 1994
,address = "Seattle, WA"
,pages = "694-699"
,filename = "maclin.aaai94.ps"
,abstract= "Learning from reinforcements is a promising approach for creating intelligent
agents. However, reinforcement learning usually requires a large number of
training episodes. We present an approach that addresses this shortcoming by
allowing a connectionist Q-learner to accept advice given, at any time and in
a natural manner, by an external observer. In our approach, the advice-giver
watches the learner and occasionally makes suggestions, expressed as
instructions in a simple programming language. Based on techniques from
knowledge-based neural networks, these programs are inserted directly into
the agent's utility function. Subsequent reinforcement learning further
integrates and refines the advice. We present empirical evidence that shows
our approach leads to statistically-significant gains in expected reward.
Importantly, the advice improves the expected reward regardless of the stage
of training at which it is given."
}
@inproceedings{opitz.ijcai93
,author = "D. W. Opitz and J. W. Shavlik"
,title = "Heuristically Expanding Knowledge-Based Neural Networks"
,booktitle = "Proceedings of the Thirteenth International Joint
Conference on Artificial Intelligence"
,year = 1993
,month = "September"
,address = "Chambery, France"
,pages = "1360-1365"
,filename = "opitz.ijcai93.ps"
,abstract= "Knowledge-based neural networks are networks whose topology is determined
by mapping the dependencies of a domain-specific rulebase into a neural
network. However, existing network training methods lack the ability to
add new rules to the (reformulated) rulebases. Thus, on domain theories
that are lacking rules, generalization is poor, and training can
corrupt the original rules, even those that were initially correct.
We present TOPGEN, an extension to the KBANN algorithm, that
heuristically searches for possible expansions of a knowledge-based
neural network, guided by the domain theory, the network, and the training
data. It does this by dynamically adding hidden nodes to the neural
representation of the domain theory, in a manner analogous to adding rules
and conjuncts to the symbolic rulebase. Experiments indicate that our
method is able to heuristically find effective places to add nodes to
the knowledge-base network and verify that new nodes must be added
in an intelligent manner."
}
@inproceedings{craven.ijcai93,
author = "M. W. Craven and J. W. Shavlik",
title = "Learning to Represent Codons: A Challenge Problem for
Constructive Induction",
booktitle = "Proceedings of the Thirteenth International Joint Conference
on Artificial Intelligence",
year = 1993,
month = "September",
pages = "1319-1324",
address = "Chambery, France",
filename = "craven.ijcai93.ps"
,abstract= "The ability of an inductive learning system to find a good solution to
a given problem is dependent upon the representation used for the
features of the problem. Systems that perform constructive induction
are able to change their representation by constructing new features.
We describe an important, real-world problem - finding genes in DNA -
that we believe offers an interesting challenge to constructive-induction
researchers. We report experiments that demonstrate that: (1) two
different input representations for this task result in significantly
different generalization performance for both neural networks and
decision trees; and (2) both neural and symbolic methods for constructive
induction fail to bridge the gap between these two representations.
We believe that this real-world domain provides an interesting
challenge problem for constructive induction because the relationship
between the two representations is well known, and because the
representational shift involved in constructing the better representation
is not imposing."
}
@inproceedings{cherkauer.ismb93,
author = "K. J. Cherkauer and J. W. Shavlik",
title = "Protein Structure Prediction: Selecting Salient Features
From Large Candidate Pools",
booktitle = "Proceedings of the First International Conference on
Intelligent Systems for Molecular Biology",
year = 1993,
address = "Bethesda, MD",
pages = "74-82",
publisher = "AAAI Press",
filename = "cherkauer.ismb93.ps"
,abstract= "We introduce a parallel approach, ``DT-Select,'' for selecting features
used by inductive learning algorithms to predict protein secondary structure.
DT-Select is able to rapidly choose small, nonredundant feature sets from
pools containing hundreds of thousands of potentially useful features. It
does this by building a decision tree, using features from the pool, that
classifies a set of training examples. The features included in the tree
provide a compact description of the training data and are thus suitable for
use as inputs to other inductive learning algorithms. Empirical experiments
in the protein secondary-structure task, in which sets of complex features
chosen by DT-Select are used to augment a standard artificial neural network
representation, yield surprisingly little performance gain, even though
features are selected from very large feature pools. We discuss some possible
reasons for this result."
}
@inproceedings{craven.mlc93,
author = "M. W. Craven and J. W. Shavlik",
title = "Learning Symbolic Rules Using Artificial Neural Networks",
booktitle = "Proceedings of the Tenth International Conference on
Machine Learning",
year = 1993,
address = "Amherst, MA",
pages = "73-80",
publisher = "Morgan Kaufmann",
filename = "craven.mlc93.ps"
,abstract= "A distinct advantage of symbolic learning algorithms over
artificial neural networks is that typically the concept representations
they form are more easily understood by humans.
One approach to understanding the representations formed by neural
networks is to extract symbolic rules from trained networks.
In this paper we describe and investigate an approach for extracting
rules from networks that uses (1) the NofM extraction algorithm,
and (2) the network training method of soft weight-sharing.
Previously, the NofM algorithm had been successfully applied only
to knowledge-based neural networks. Our experiments demonstrate that our
extracted rules generalize better than rules learned using
the C4.5 system. In addition to being accurate, our extracted rules are
also reasonably comprehensible."
}
@inproceedings{craven.hicss93,
author = "M. W. Craven and J. W. Shavlik",
title = "Learning to Predict Reading Frames in {{E}. coli}
{DNA} Sequences",
booktitle = "Proceedings of the 26th Hawaii International Conference
on System Sciences",
year = 1993,
address = "Wailea, HI",
publisher = "IEEE Computer Society Press",
pages = "773-782",
filename = "craven.hicss93.ps"
,abstract= "Two fundamental problems in analyzing DNA sequences are (1) locating the
regions of a DNA sequence that encode proteins, and (2) determining the
reading frame for each region. We investigate using artificial neural
networks (ANNs) to find coding regions, determine reading frames, and
detect frameshift errors in E. coli DNA sequences. We describe our
adaptation of the approach used by Uberbacher and Mural to identify
coding regions in human DNA, and we compare the performance of ANNs
to several conventional methods for predicting reading frames.
Our experiments demonstrate that ANNs can outperform these conventional
approaches."
}
@inproceedings{towell.aaai92,
author = "G. G. Towell and J. W.Shavlik",
title = "Using Symbolic Learning to Improve Knowledge-Based
Neural Networks",
booktitle = "Proceedings of the Tenth National Conference on
Artificial Intelligence",
pages = "177-182",
year = "1992",
address = "San Jose, CA",
filename = "towell.aaai92.ps"
,abstract= "The previously-reported KBANN system integrates existing knowledge into neural
networks by defining the network topology and setting initial link weights.
Standard neural learning techniques can then be used to train such networks,
thereby refining the information upon which the network is based. However,
standard neural learning techniques are reputed to have difficulty training
networks with multiple layers of hidden units; KBANN commonly creates such
networks. In addition, standard neural learning techniques ignore some of the
information contained in the networks created by KBANN. This paper describes a
symbolic inductive learning algorithm for training such networks that uses
this previously-ignored information and which helps to address the problems of
training ``deep'' networks. Empirical evidence shows that this method improves
not only learning speed, but also the ability of networks to generalize
correctly to testing examples."
}
@inproceedings{maclin.aaai92,
author = "R. Maclin and J. W. Shavlik",
title = "Using Knowledge-Based Neural Networks to Improve Algorithms:
Refining the Chou-Fasman Algorithm for Protein Folding",
booktitle = "Proceedings of the Tenth National Conference on
Artificial Intelligence",
pages = "165-170",
year = "1992",
address = "San Jose, CA",
filename = "maclin.aaai92.ps"
,abstract= "We describe a method for using machine learning to refine algorithms
represented as generalized finite-state automata. The knowledge in an
automaton is translated into an artificial neural network, and then
refined with backpropagation on a set of examples. Our technique for
translating an automaton into a network extends KBANN, a system that
translates a set of propositional rules into a corresponding neural
network. The extended system, FSKBANN, allows one to refine the large
class of algorithms that can be represented as state-based processes.
As a test, we use FSKBANN to refine the Chou-Fasman algorithm, a method
for predicting how globular proteins fold. Empirical evidence shows the
refined algorithm FSKBANN produces is statistically significantly more
accurate than both the original Chou-Fasman algorithm and a neural
network trained using the standard approach."
}
@inproceedings{scott.nips4,
author = "G. M. Scott and J. W. Shavlik and W. H. Ray",
title = "Refining {PID} Controllers using Neural Networks",
booktitle = "Advances in Neural Information Processing Systems",
year = "1992",
volume = "4",
pages = "555-562",
editor = "J. Moody and S. Hanson and R. Lippmann",
address = "Denver, CO",
publisher = "Morgan Kaufmann",
filename = "scott.nips4.ps"
,abstract= "The KBANN (Knowledge-Based Artificial Neural Networks) approach uses neural
networks to refine knowledge that can be written in the form of simple
propositional rules. We extend this idea further by presenting the MANNCON
(Multivariable Artificial Neural Network Control) algorithm by which the
mathematical equations governing a PID (Proportional-Integral-Derivative)
controller determine the topology and initial weights of a network, which is
further trained using backpropagation. We apply this method to the task of
controlling the outflow and temperature of a water tank, producing
statistically-significant gains in accuracy over both a standard neural
network approach and a non-learning PID controller. Furthermore, using the
PID knowledge to initialize the weights of the network produces statistically
less variation in testset accuracy when compared to networks initialized with
small random numbers."
}
Note: see also paper in Neural Computation 4:5
@inproceedings{towell.nips4,
author = "G. G. Towell and J. W. Shavlik",
title = "Interpretation of Artificial Neural Networks:
Mapping knowledge-based Neural Networks into Rules",
booktitle = "Advances in Neural Information Processing Systems",
year = 1992,
volume = 4,
pages = "977-984",
editor = "J. Moody and S. Hanson and R. Lippmann",
address = "Denver, CO",
publisher = "Morgan Kaufmann",
filename = "towell.nips4.ps"
,abstract= "We propose and empirically evaluate a method for the extraction of
expert-comprehensible rules from trained neural networks. Our method operates
in the context of a three-step process for learning that uses rule-based
domain knowledge in combination with neural networks. Empirical tests using
real-worlds problems from molecular biology show that the rules our method
extracts from trained neural networks: closely reproduce the accuracy of the
network from which they came, are superior to the rules derived by a learning
system that directly refines symbolic rules, and are expert-comprehensible."
}
Note: see also towell.mlj93
@inproceedings{noordewier.nips3,
author = "M. O. Noordewier and G. G. Towell and J. W. Shavlik",
title = "Training Knowledge-Based Neural Networks to Recognize Genes
in {DNA} Sequences",
booktitle = "Advances in Neural Information Processing Systems",
year = 1991,
pages = "530-536",
volume = 3,
editor = "R. Lippmann and J. Moody and D. Touretzky",
publisher = "Morgan Kaufmann",
address = "Denver, CO",
filename = "noordewier.nips3.ps"
,abstract= "We describe the application of a hybrid symbolic/connectionist machine
learning algorithm to the task of recognizing important genetic sequences.
The symbolic portion of the KBANN system utilizes inference rules that provide
a roughly-correct method for recognizing a class of DNA sequences known as
eukaryotic splice-junctions. We then map this ``domain theory'' into a neural
network and provide training examples. Using the samples, the neural
network's learning algorithm adjusts the domain theory so that it properly
classifies these DNA sequences. Our procedure constitutes a general method
for incorporating preexisting knowledge into artificial neural networks. We
present an experiment in molecular genetics that demonstrates the value of
doing so."
}
Note: see also towell.aij94
@inproceedings{towell.aaai90,
author = "G. G. Towell and J. W. Shavlik and M. O. Noordewier",
title = "Refinement of Approximate Domain Theories by Knowledge-Based
Neural Networks",
booktitle = "Proceedings of the Eighth National Conference on
Artificial Intelligence",
year = "1990",
pages = "861-866",
address = "Boston, MA",
filename = "towell.aaai90.ps"
,abstract= "Standard algorithms for explanation-based learning require complete and
correct knowledge bases. The KBANN system relaxes this constraint through the
use of empirical learning methods to refine approximately correct knowledge.
This knowledge is used to determine the structure of an artificial neural
network and the weights on its links, thereby making the knowledge accessible
for modification by neural learning. KBANN is evaluated by empirical tests in
the domain of molecular biology. Networks created by KBANN are shown to be
superior, in terms of their ability to correctly classify unseen examples, to
randomly initialized neural networks, decision trees, ``nearest neighbor''
matching, and standard techniques reported in the biological literature. In
addition, KBANN's networks improve the initial knowledge in biologically
interesting ways."
}
Note: see also towell.aij94.ps
---------------
WORKSHOP PAPERS
---------------
@inproceedings{goecks.ijcai-wkshp99
,filename = "goecks_IJCAI_ML_IF_workshop99.pdf"
,author = "J. Goecks and J.W. Shavlik"
,title = "Automatically Labeling Web Pages Based on Normal User Actions"
,booktitle = "Proceedings of the IJCAI-99 Workshop on Machine Learning for Information Filtering"
,year = 1999
,publisher = ""
,pub_addr = ""
,wkshp_date = "Jul 31--Aug 6, 1999"
,conf_date = "Jul 31--Aug 6, 1999"
,address = "Stockholm, Sweden"
,workshop = ""
,abstract =
"For agents attempting to learn a user's interests, the cost of obtaining labeled
training instances is prohibitive because the user must directly label each training
instance, and few users are willing to do so. We present an approach that
circumvents the need for human-labeled pages. Instead, we learn 'surrogate' tasks
where the desired output is easily measured, such as the number of hyperlinks clicked
on a page or the amount of scrolling performed. Our assumption is that these outputs
will highly correlate with the user's interests. In other words, by unobtrusively
'observing' the user's behavior we are able to learn functions of value. For
example, an agent could silently observe the user's browser behavior during the day,
then use these training examples to learn such functions and gather, during the
middle of the night, pages that are likely to be of interest to the user. Previous
work has focused on learning a user profile by passively observing the hyperlinks
clicked on and those passed over. We extend this approach by measuring user mouse
and scrolling activity in addition to user browsing activity. We present empirical
results that demonstrate our agent can accurately predict some easily measured
aspects of one's use of his or her browser."
}
@inproceedings{shavlik.aaai-wkshp98
,filename = "shavlik.aaai-wkshp98.pdf shavlik.aaai-wkshp98.ps"
,author = "J.W. Shavlik and T. Eliassi-Rad"
,title = "Intelligent Agents for Web-Based Tasks: An Advice-Taking Approach"
,booktitle = "Working Notes of the AAAI/ICML-98 Workshop on Learning for Text Categorization"
,year = 1998
,publisher = "AAAI Press"
,pub_addr = "Menlo Park, CA"
,wkshp_date = "Jul 26--30, 1998"
,conf_date = "Jul 26--30, 1998"
,address = "Madison, WI"
,workshop = ""
,abstract =
"We present and evaluate an implemented system with which to rapidly and easily build intelligent
software agents for Web-based tasks. Our design is centered around two basic functions:
ScoreThisLink and ScoreThisPage. If given highly accurate such functions, standard heuristic
search would lead to efficient retrieval of useful information. Our approach allows users to tailor
our system's behavior by providing approximate advice about the above functions. This advice is
mapped into neural network implementations of the two functions. Subsequent reinforcements
from the Web (e.g., dead links) and any ratings of retrieved pages that the user wishes to provide
are, respectively, used to refine the link- and page-scoring functions. Hence, our architecture
provides an appealing middle ground between non-adaptive agent programming languages and
systems that solely learn user preferences from the user's ratings of pages. We describe our
internal representation of Web pages, the major predicates in our advice language, how advice is
mapped into neural networks, and the mechanisms for refining advice based on subsequent
feedback. We also present a case study where we provide some simple advice and specialize our
general-purpose system into a ``home-page finder''. An empirical study demonstrates that our
approach leads to a more effective home-page finder than that of a leading commercial Web
search site."
}
@inproceedings{shavlik.conald-wkshp98
,filename = "shavlik.conald-wkshp98.pdf shavlik.conald-wkshp98.ps"
,author = "J.W. Shavlik and T. Eliassi-Rad"
,title = "Building Intelligent Agents for Web-Based Tasks: A Theory-Refinement Approach"
,booktitle = "Presented at the Conf on Automated Learning and Discovery Workshop on Learning
from Text and the Web"
,year = 1998
,publisher = ""
,pub_addr = ""
,wkshp_date = "Jun 11--13, 1998"
,conf_date = "Jun 11--13, 1998"
,address = "Pittsburgh, PA"
,workshop = ""
,abstract =
"We present and evaluate an infrastructure with which to rapidly and easily build intelligent
software agents for Web-based tasks. Our design is centered around two basic functions:
ScoreThisLink and ScoreThisPage. If given highly accurate such functions, standard heuristic
search would lead to efficient retrieval of useful information. Our approach allows users to tailor
our system's behavior by providing approximate advice about the above functions. This advice is
mapped into neural network implementations of the two functions. Subsequent reinforcements
from the Web (e.g., dead links) and any ratings of retrieved pages that the user wishes to provide
are, respectively, used to refine the link- and page-scoring functions. Hence, our agent
architecture provides an appealing middle ground between non-adaptive ``agent'' programming
languages and systems that solely learn user preferences from the user's ratings of pages. We
present a case study where we provide some simple advice and specialize our general-purpose
system into a ``home-page finder''. An empirical study demonstrates that our approach leads to a
more effective home-page finder than that of a leading commercial Web search engine."
}
@inproceedings{cherkauer.aaai-wkshp96
,filename = "cherkauer.aaai-wkshp96.ps"
,author = "K.J. Cherkauer"
,title = "Human Expert-Level Performance on a Scientific Image
Analysis Task by a System Using Combined Artificial Neural
Networks"
,booktitle = "Working Notes, Integrating Multiple Learned Models for
Improving and Scaling Machine Learning Algorithms Wkshp,
13th Nat Conf on Artificial Intelligence"
,year = 1996
,pages = "15-21"
,publisher = "AAAI Press"
,pub_addr = "Menlo Park, CA"
,wkshp_date = "Aug 4--5, 1996"
,conf_date = "Aug 4--8, 1996"
,address = "Portland, OR"
,workshop = ""
,abstract =
"This paper presents the Plannett system, which combines artificial
neural networks to achieve expert-level accuracy on the difficult
scientific task of recognizing volcanos in radar images of the surface
of the planet Venus. Plannett uses ANNs that vary along two dimensions:
the set of input features used to train and the number of hidden units.
The ANNs are combined simply by averaging their output activations. When
Plannett is used as the classification module of a three-stage image
analysis system called JARtool, the end-to-end accuracy (sensitivity and
specificity) is as good as that of a human planetary geologist on a
four-image test suite. JARtool-Plannett also achieves the best
algorithmic accuracy on these images to date."
}
@inproceedings{craven.ijcai-wkshp95
,author = "M. W. Craven and J. W. Shavlik"
,title = "Extracting Comprehensible Concept Representations from
Trained Neural Networks"
,booktitle = "Presented at the IJCAI Workshop on Comprehensibility in
Machine Learning"
,year = 1995
,address = "Montreal, Quebec, Canada"
,filename = "craven.ijcai-wkshp95.ps"
,workshop = ""
,abstract= "Although they are applicable to a wide array of problems, and have
demonstrated good performance on a number of difficult, real-world
tasks, neural networks are not usually applied to problems in which
comprehensibility of the acquired concepts is important. The concept
representations formed by neural networks are hard to understand
because they typically involve distributed, nonlinear relationships
encoded by a large number of real-valued parameters. To address this
limitation, we have been developing algorithms for extracting ``symbolic''
concept representations from trained neural networks. We first discuss
why it is important to be able to understand the concept representations
formed by neural networks. We then briefly describe our approach and
discuss a number of issues pertaining to comprehensibility that have
arisen in our work. Finally, we discuss choices that we have made in
our research to date, and open research issues that we have not yet
addressed."
}
@inproceedings{cherkauer.ijcai-wkshp95
,author = "K. J. Cherkauer and J. W. Shavlik"
,title = "Rapidly Estimating the Quality of Input Representations for
Neural Networks"
,booktitle = "Working Notes of the IJCAI-95 Workshop on Data Engineering
for Inductive Learning, Fourteenth International Joint
Conference on Artificial Intelligence"
,year = 1995
,address = "Montreal, Quebec, Canada"
,filename = "cherkauer.ijcai-wkshp95.ps"
,workshop = ""
,abstract= "The choice of an input representation for machine learning can have a
profound impact on the accuracy of the learned model in classifying novel
instances. A reliable method of rapidly estimating the value of a
representation, independent of the learner, would be a powerful tool in the
search for better representations. We introduce a fast representation-
quality measure that is more accurate than Rendell and Ragavan's blurring
metric in rank ordering input representations for neural networks on two
difficult, real-world datasets. This work constitutes a step forward both
in representation quality measures and in our understanding of the
characteristics that engender good representations."
}
Note: see also cherkauer.nips96.ps
@inproceedings{opitz.isiknh94
,author = "D. W. Opitz and J. W. Shavlik"
,title = "Genetically Refining Topologies of Knowledge-Based
Neural Networks"
,booktitle = "International Symposium on Integrating Knowledge and
Neural Heuristics"
,year = 1994
,address = "Pensacola, FL"
,pages = "57-66"
,filename = "opitz.isiknh94.ps"
,workshop = ""
,abstract= "Traditional approaches to connectionist theory refinement map the
dependencies of a domain-specific rulebase into a neural network,
then refine these reformulated rules using neural learning. These
approaches have proven to be effective at classifying previously unseen
examples; however, most of these approaches suffer in that they are
unable to refine the topology of the networks they produce. Thus, when
given an ``impoverished'' domain theory, they generalize poorly. A recently
published improvement to these approaches, the TopGen algorithm, addressed
this limitation by heuristically searching expansions to the knowledge-based
networks produced by these algorithms. We show, however, that TopGen's
search is too restricted. In response, we present the REGENT algorithm,
which uses genetic algorithms to broaden the type of networks seen during
its search. It does this by using (a) the domain theory to help create
an initial population and (b) crossover and mutation operators specifically
designed for knowledge-based networks. Experiments on three real-world
domains indicate that our new algorithm is able to significantly increase
generalization when compared to both TopGen and a standard approach that
does not alter its knowledge-based network's topology."
}
@inproceedings{maclin.mlc91,
author = "R. Maclin and J. W. Shavlik",
title = "Refining Domain Theories Expressed as Finite-State Automata",
booktitle = "Proceedings of the Eighth International Machine Learning
Workshop",
year = "1991",
pages = "524-528",
address = "Evanston, IL",
publisher = "Morgan Kaufmann",
filename = "maclin.mlc91.ps"
,workshop = ""
,abstract= "The KBANN system uses neural networks to refine domain theories.
Currently, domain knowledge in KBANN is expressed as non-recursive,
propositional rules. We extend KBANN to domain theories expressed as
finite-state automata. We apply finite-state KBANN to the task of
predicting how proteins fold, producing a small but statistically significant
gain in accuracy over both a standard neural network approach and a
non-learning algorithm from the biological literature. Our method shows
promise at solving this and other real-world problems that can be described in
terms of state-dependent decisions."
}
Note: this work has been superseded by maclin.aaai92
@inproceedings{towell.mlc91,
author = "G. G. Towell and M. W. Craven and J. W. Shavlik",
title = "Constructive Induction in Knowledge-Based Neural Networks",
booktitle = "Proceedings of the Eighth International Machine
Learning Workshop",
year = 1991,
pages = "213-217",
address = "Evanston, IL",
publisher = "Morgan Kaufmann",
filename = "towell.mlc91.ps",
workshop = ""
,abstract= "Artificial neural networks have proven to be a successful, general method for
inductive learning from examples. However, they have not often been viewed in
terms of constructive induction. We describe a method for using a
knowledge-based neural network of the kind created by the KBANN algorithm as
the basis of a system for constructive induction. After training, we extract
two types of rules from a network: modified versions of the rules initially
provided to the knowledge-based neural network, and rules which describe newly
constructed features. Our experiments show that the extracted rules are more
accurate, at classifying novel examples, than the trained network from
which the rules are extracted."
}
Note: this work has been superseded by towell.nips4, towell.mlj93,
@inproceedings{shavlik.cbr91,
author = "J. W. Shavlik",
title = "Finding Genes by Case-Based Reasoning in the Presence of
Noisy Case Boundaries",
booktitle = "Proceedings of the DARPA Cased-Based Reasoning Workshop",
year = "1991",
pages = "327-338",
publisher = "Morgan Kaufmann",
filename = "shavlik.cbr91.ps"
,workshop = ""
,abstract= "Effectively using previous cases requires that a reasoner first match, in some
fashion, the current problem against a large library of stored cases. One
largely unaddressed task in case-based reasoning is the process of parsing
continuous input into discrete cases. If this parsing is not done accurately,
the relevant previous cases may not be found and the advantages of case-based
problem solving will be lost. Parsing the data into cases is further
complicated when the input data is noisy. This paper presents an approach to
applying the case-based paradigm in the presence of noisy case boundaries.
The approach has been fully implemented and applied in the domain of molecular
biology; specifically, a successful case-based approach to gene finding is
described. An empirical study demonstrates that the method is robust even
with high error rates. This system is being used in conjunction with a Human
Genome project in the Wisconsin Department of Genetics that is sequencing the
DNA of the bacterium E. coli."
}
--------------
JOURNAL PAPERS
--------------
@article{eliassi-rad.umuai01
,author = "T. Eliassi-Rad and J. Shavlik"
,title = "A System for Building Intelligent Agents that Learn to
Retrieve and Extract Information"
,journal = "International Journal on User Modeling and User-Adapted
Interaction, Special Issue on User Modeling and Intelligent Agents (to appear)"
,year = "2001"
,filename = "eliassi-rad.umuai01.pdf eliassi-rad.umuai01.ps"
,abstract = "We present a system for rapidly and easily building instructable and
self-adaptive software agents that retrieve and extract information. Our Wisconsin
Adaptive Web Assistant (WAWA) constructs intelligent agents by accepting user preferences
in the form of instructions. These user-provided instructions are compiled into neural
networks that are responsible for the adaptive capabilities of an intelligent agent. The
agent's neural networks are modified via user-provided and system-constructed training
examples. Users can create training examples by rating Web pages (or documents), but
more importantly WAWA's agents uses techniques from reinforcement learning to
internally create their own examples. Users can also provide additional instruction
throughout the life of an agent. Our experimental evaluations on a home-page finder agent
and a seminar-announcement extractor agent illustrate the value of using instructable
and adaptive agents for retrieving and extracting information."
}
@article{allex.bioinformatics
,author = "C.F. Allex, J.W. Shavlik, and F.R. Blattner"
,title = "Neural Network Input Representations that Produce Accurate
Consensus Sequences from DNA Fragment Assemblies"
,journal = "Bioinformatics"
,year = 1999
,filename = "allex.bioinformatics.ps"
,volume = 15
,pages = "723-728"
,abstract = " Motivation: Given inputs extracted from an aligned column
of DNA bases and the underlying Perkin Elmer Applied Biosystems (ABI)
fluorescent traces, our goal is to train a neural network to correctly
determine the consensus base for the column. Choosing an appropriate
network input representation is critical to success in this task. We
empirically compare five representations; one uses only base calls and
the others include trace information.
Results: We attained the most accurate results from networks that
incorporate trace information into their input representations. Based on
estimates derived from using 10-fold cross-validation, the best network
topology produces consensus accuracies ranging from 99.26% to over 99.98%
for coverages from two to six aligned sequences. With a coverage of six,
it makes only three errors in 20,000 consensus calls. In contrast, the
network that only uses base calls in its input representation has over
double that error rate - eight errors in 20,000 consensus calls."
}
@article{opitz.jair97
,author = "D. W. Opitz and J. W. Shavlik"
,title = "Connectionist Theory Refinement: Genetically Searching the Space of Network Topologies"
,journal = "Journal of Artificial Intelligence Research"
,year = 1997
,filename = "opitz.jair97.ps"
,volume = 6
,pages = "177-209"
,abstract= "An algorithm that learns from a set of examples should
ideally be able to exploit the available resources of (a) abundant
computing power and (b) domain-specific knowledge to improve its
ability to generalize. Connectionist theory-refinement systems, which
use background knowledge to select a neural network's topology and
initial weights, have proven to be effective at exploiting domain-
specific knowledge; however, most do not exploit available computing
power. This weakness occurs because they lack the ability to refine
the topology of the neural networks they produce, thereby limiting
generalization, especially when given impoverished domain theories.
We present the REGENT algorithm which uses (a) domain-specific
knowledge to help create an initial population of knowledge-based
neural networks and (b) genetic operators of crossover and mutation
(specifically designed for knowledge-based networks) to continually
search for better network topologies. Experiments on three real-world
domains indicate that our new algorithm is able to significantly
increase generalization compared to a standard connectionist theory-
refinement system, as well as our previous algorithm for growing
knowledge-based networks. "
}
@article{craven.fgcs97
,author = "M. W. Craven and J. W. Shavlik"
,title = "Using Neural Networks for Data Mining"
,journal = "Future Generation Computer Systems"
,year = 1997
,filename = "craven.fgcs97.ps"
,note = "(in press)"
,abstract= "Neural networks have been successfully applied in a wide range
of supervised and unsupervised learning applications. Neural-network
methods are not commonly used for data-mining tasks, however, because
they often produce incomprehensible models and require long training
times. In this article, we describe neural-network learning algorithms
that are able to produce comprehensible models, and that do not require
excessive training times. Specifically, we discuss two classes of
approaches for data mining with neural networks. The first type of
approach, often called rule extraction, involves extracting symbolic
models from trained neural networks. The second approach is to directly
learn simple, easy-to-understand networks. We argue that, given the
current state of the art, neural-network methods deserve a place in the
tool boxes of data-mining specialists."
}
@article{craven.ijns97
,author = "M. W. Craven and J. W. Shavlik"
,title = "Understanding Time-Series Networks: A Case Study in
Rule Extraction"
,journal = "International Journal of Neural Systems"
,year = 1997
,filename = "craven.ijns97.ps"
,note = "(in press)"
,abstract= "A significant limitation of neural networks is that the
representations they learn are usually incomprehensible to humans.
We have developed an algorithm, called TREPAN, for extracting
comprehensible, symbolic representations from trained neural networks.
Given a trained network, TREPAN produces a decision tree that
approximates the concept represented by the network. In this article,
we discuss the application of TREPAN to a neural network trained on a
noisy time series task: predicting the Dollar-Mark exchange rate.
We present experiments that show that TREPAN is able to extract a
decision tree from this network that equals the network in terms of
predictive accuracy, yet provides a comprehensible concept representation.
Moreover, our experiments indicate that decision trees induced directly
from the training data using conventional algorithms do not match the
accuracy nor the comprehensibility of the tree extracted by TREPAN."
}
@article{opitz.consci96
,author = "D. W. Opitz and J. W. Shavlik"
,title = "Actively Searching for an Effective Neural-Network Ensemble"
,journal = "Connection Science"
,year = 1996
,volume = 8
,number = "3/4"
,pages = "337-353"
,filename = "opitz.consci96.ps"
,abstract= " A neural-network ensemble is a very successful technique
where the outputs of a set of separately trained neural network are
combined to form one unified prediction. An effective ensemble should
consist of a set of networks that are not only highly correct, but ones
that make their errors on different parts of the input space as well;
however, most existing techniques only indirectly address the problem of
creating such a set. We present an algorithm called ADDEMUP that uses genetic
algorithms to explicitly search for a highly diverse set of accurate trained
networks. ADDEMUP works by first creating an initial population, then uses
genetic operators to continually create new networks, keeping the set of
networks that are highly accurate while disagreeing with each other as much
as possible. Experiments on four real-world domains show that ADDEMUP is
able to generate a set of trained networks that is more accurate than several
existing ensemble approaches. Experiments also show that ADDEMUP is able
to effectively incorporate prior knowledge, if available, to improve the
quality of its ensemble."
}
@article{cherkauer.informatica95
,author = "K. J. Cherkauer"
,title = "Stuffing Mind into Computer: Knowledge and Learning for
Intelligent Systems"
,journal = "Informatica"
,year = 1995
,volume = 19
,number = 4
,pages = "501-511"
,month = "Nov"
,filename = "cherkauer.informatica95.ps"
,abstract= "The task of somehow putting mind into a computer is one that has been
pursued by artificial intelligence researchers for decades, and though
we are getting closer, we have not caught it yet. Mind is an incredibly
complex and poorly understood thing, but we should not let this stop us
from continuing to strive toward the goal of intelligent computers. Two
issues that are essential to this endeavor are knowledge and learning.
These form the basis of human intelligence, and most people believe they
are fundamental to achieving similar intelligence in computers. This paper
explores issues surrounding knowledge acquisition and learning in
intelligent artificial systems in light of both current philosophies of
mind and the present state of artificial intelligence research. Its scope
ranges from the mundane to the (almost) outlandish, with the goal of
stimulating serious thought about where we are, where we would like to go,
and how to get there in our attempts to render an intelligence in silicon."
}
@article{maclin.mlj96
,author = "R. Maclin and J. W. Shavlik"
,title = "Creating Advice-Taking Reinforcement Learners"
,journal = "Machine Learning"
,volume = 22
,pages = "251-281"
,year = 1996
,filename = "maclin.mlj96.ps"
,abstract= "Learning from reinforcements is a promising approach for creating
intelligent agents. However, reinforcement learning usually requires a
large number of training episodes. We present and evaluate a design that
addresses this shortcoming by allowing a connectionist Q-learner to accept
advice given, at any time and in a natural manner, by an external observer.
In our approach, the advice-giver watches the learner and occasionally makes
suggestions, expressed as instructions in a simple imperative programming
language. Based on techniques from knowledge-based neural networks, we
insert these programs directly into the agent's utility function.
Subsequent reinforcement learning further integrates and refines the advice.
We present empirical evidence that investigates several aspects of our
approach and show that, given good advice, a learner can achieve
statistically significant gains in expected reward. A second experiment
shows that advice improves the expected reward regardless of the stage of
training at which it is given, while another study demonstrates that
subsequent advice can result in further gains in reward. Finally, we
present experimental results that indicate our method is more powerful than
a naive technique for making use of advice."
}
@article{opitz.kbs95
,author = "D. W. Opitz and J. W. Shavlik"
,title = "Dynamically Adding Symbolically Meaningful Nodes to
Knowledge-Based Neural Networks"
,journal = "Knowledge-Based Systems"
,volume = 8
,number = 6
,pages = "301-311"
,year = 1995
,filename = "opitz.kbs95.ps"
,abstract= "Traditional connectionist theory-refinement systems map the dependencies
of a domain-specific rule base into a neural network, and then refine
this network using neural learning techniques. Most of these systems,
however, lack the ability to refine their network's topology and are thus
unable to add new rules to the (reformulated) rule base. Therefore, on
domain theories that are lacking rules, generalization is poor, and
training can corrupt the original rules, even those that were initially
correct. We present TopGen, an extension to the KBANN algorithm, that
heuristically searches for possible expansions to KBANN's network.
TopGen does this by dynamically adding hidden nodes to the neural
representation of the domain theory, in a manner analogous to adding
rules and conjuncts to the symbolic rule base. Experiments indicate
that our method is able to heuristically find effective places to add
nodes to the knowledge bases of four real-world problems, as well as an
artificial chess domain. The experiments also verify that new nodes must
be added in an intelligent manner. Our algorithm showed statistically
significant improvements over KBANN in all five domains."
}
@article{towell.aij94,
author = "G. G. Towell and J. W. Shavlik",
title = "Knowledge-Based Artificial Neural Networks",
journal = "Artificial Intelligence",
year = 1994,
volume = 70,
number = "1/2",
pages = "119-165",
filename = "towell.aij94.ps"
,abstract= "Explanation-based and empirical learning are two largely complementary methods
of machine learning. These approaches both have serious problems which
preclude their being a general-purpose learning method. However, a ``hybrid''
learning method that combines explanation-based with empirical learning may be
able to use the strengths of one learning method to address the weaknesses of
the other, thereby resulting in a system superior to either approach in
isolation. KBANN (Knowledge-Based Artificial Neural Networks) is a hybrid
learning system built on top of connectionist learning techniques. It maps
problem-specific ``domain theories'', represented in propositional logic, into
neural networks and then refines this reformulated knowledge using
backpropagation. KBANN is evaluated by extensive empirical tests in the
domain of molecular biology. Among other results, these tests show that the
networks created by KBANN are superior, in terms of their ability to correctly
classify unseen examples, to a wide variety of learning systems, as well as
techniques proposed by biologists."
}
@article{towell.mlj93,
author = "G. G. Towell and J. W. Shavlik",
title = "The Extraction of Refined Rules from Knowledge-Based
Neural Networks",
journal = "Machine Learning",
year = 1993,
volume = 13,
number = 1,
pages = "71-101",
filename = "towell.mlj93.ps"
,abstract= "Neural networks, despite their empirically-proven abilities, have been little
used for the refinement of existing knowledge because this task requires a
three-step process. First, knowledge must be inserted into a neural network.
Second, the network must be refined. Third, the refined knowledge must be
extracted from the network. We have previously described a method for the
first step of this process. Standard neural learning techniques can accomplish
the second step. In this paper, we propose and empirically evaluate a method
for the final, and possibly most difficult, step. Our method efficiently
extracts symbolic rules from trained neural networks. The four major results
of empirical tests of this method are that the extracted rules:
(1) closely reproduce the accuracy
of the network from which they are extracted;
(2) are superior to the rules produced by
methods that directly refine symbolic rules;
(3) are superior to those produced by
previous techniques for extracting rules from trained neural networks;
(4) are ``human comprehensible.''
Thus, this method demonstrates that neural networks can be used to
effectively refine symbolic knowledge. Moreover, the rule-extraction
technique developed herein contributes to the understanding of how symbolic
and connectionist approaches to artificial intelligence can be
profitably integrated."
}
@article{craven.aitools92,
author = "M. W. Craven and J. W. Shavlik",
title = "Visualizing Learning and Computation in Artificial
Neural Networks",
year = 1992,
pages = "399-425",
journal = "International Journal on Artificial Intelligence Tools",
volume = "1",
number = "3",
filename = "craven.ijait93.ps"
,abstract= "Scientific visualization is the process of using graphical images to form
succinct and lucid representations of numerical data. Visualization has
proven to be a useful method for understanding both learning and
computation in artificial neural networks. While providing a
powerful and general technique for inductive learning, artificial
neural networks are difficult to comprehend because they form
representations that are encoded by a large number of real-valued
parameters. By viewing these parameters pictorially, a better
understanding can be gained of how a network maps inputs into outputs.
In this article, we survey a number of visualization techniques for
understanding the learning and decision-making processes of neural
networks. We also describe our work in {\em knowledge-based neural
networks} and the visualization techniques we have used to understand
these networks. In a knowledge-based neural network, the topology and
initial weight values of the network are determined by an
approximately-correct set of inference rules. Knowledge-based
networks are easier to interpret than conventional networks because
of the synergy between visualization methods and the relation of the
networks to symbolic rules."
}
@article{maclin.mlj93
,author = "R. Maclin and J. W. Shavlik"
,title = "Using Knowledge-based Neural Networks To Improve Algorithms:
Refining the {Chou-Fasman} Algorithm for Protein Folding"
,journal = "Machine Learning"
,volume = 11
,pages = "195-215"
,year = 1993
,filename = "maclin.mlj93.ps"
,abstract= "This paper describes a connectionist method for refining algorithms
represented as generalized finite-state automata. The method translates the
rule-like knowledge in an automaton into a corresponding artificial neural
network, and then refines the reformulated automaton by applying
backpropagation to a set of examples. This technique for translating an
automaton into a network extends the KBANN algorithm, a system that
translates a set of propositional rules into a corresponding neural network.
The extended system, FSKBANN, allows one to refine the large class of
algorithms that can be represented as state-based processes. As a test,
FSKBANN is used to improve the Chou-Fasman algorithm, a method for
predicting how globular proteins fold. Empirical evidence shows that the
multistrategy approach of FSKBANN leads to a statistically significantly
more accurate solution than both the original Chou-Fasman algorithm and a
neural network trained using the standard approach. Extensive statistics
report the types of errors made by the Chou-Fasman algorithm, the standard
neural network, and by the FSKBANN network."
}
@article{craven.mlrgwp93,
author = "M. W. Craven and J. W. Shavlik",
title = "Machine Learning Approaches to Gene Recognition",
journal = "IEEE Expert",
volume =9,
number = 2,
year = "1993",
note = "(The on-line file is a variant of the journal article.)",
filename = "craven.mlrgwp93.ps"
,abstract= "Currently, a major computational problem in molecular biology is to
identify genes in uncharacterized DNA sequences. The variation,
complexity, and incompletely-understood nature of genes make it
impractical to hand-code algorithms to recognize them. Machine learning
methods - which are able to form their own descriptions of genetic
concepts - offer a promising approach to this problem. This article
surveys machine-learning approaches to identifying genes in DNA.
We discuss two broad classes of gene-recognition approaches: search by
signal and search by content. For both classes, we define the specific
tasks that they address, describe how these tasks have been framed as
machine-learning problems, and survey some of the machine-learning
algorithms that have been applied to them."
}
@article{shavlik.tr92,
author = "J. W. Shavlik",
title = "A Framework for Combining Symbolic and Neural Learning",
journal="Machine Learning",
volume=14,
number = 3,
year = "1994",
pages = "321-331",
note = "(The on-line file is an extended version of the journal article.)",
filename = "shavlik.tr92.ps"
,abstract= "This article describes an approach to combining symbolic and connectionist
approaches to machine learning. A three-stage framework is presented and the
research of several groups is reviewed with respect to this framework. The
first stage involves the insertion of symbolic knowledge into neural networks,
the second addresses the refinement of this prior knowledge in its neural
representation, while the third concerns the extraction of the refined
symbolic knowledge. Experimental results and open research issues are
discussed."
}
-------------
BOOK CHAPTERS
-------------
@incollection{craven.mlrgwp92
,author = "M. W. Craven and J. W. Shavlik"
,title = "Investigating the Value of a Good Input Representation"
,booktitle = "Computational Learning Theory and Natural Learning Systems, Volume III"
,editor = "T. Petsche and S. Hanson and J. Shavlik"
,year = 1995
,filename = "craven.mlrgwp92.ps"
,publisher = "{MIT} Press"
}
@incollection{opitz.clnl95
,author = "D. W. Opitz and J. W. Shavlik"
,title = "Using Heuristic Search to Expand Knowledge-Based
Neural Networks"
,booktitle = "Computational Learning Theory and Natural Learning Systems,
Volume III"
,editor = "T. Petsche and S. Hanson and J. Shavlik"
,year = 1995
,pages = "3-19"
,filename = "opitz.clnl95.ps"
,publisher = "{MIT} Press"
}
Note: this work has been superseded by opitz.tr94.ps
@incollection{maclin.mlrgwp91
,author = "R. Maclin and J. W. Shavlik"
,title = "Refining Algorithms with Knowledge-Based Neural Networks:
Improving the Chou-Fasman Algorithm for Protein Folding"
,booktitle = "Computational Learning Theory and Natural Learning Systems,
Volume I"
,editor = "S. Hanson and G. Drastal and R. Rivest"
,year = 1994
,filename = "maclin.mlrgwp91.ps"
,publisher = "{MIT} Press"
}
Note: see also maclin.aaai92
@incollection{cherkauer.inbook94,
author = "K. J. Cherkauer and J. W. Shavlik",
title = "Selecting Salient Features for Machine Learning from
Large Candidate Pools through Parallel Decision-Tree
Construction",
booktitle = "Massively Parallel Artificial Intelligence",
editor = "H. Kitano",
year = "1994",
pages = "102-136",
address = "Menlo Park, CA",
publisher = "AAAI Press/The MIT Press",
filename = "cherkauer.mpai.ps"
,abstract= "The particular representation used to describe training and testing examples
can have profound effects on an inductive algorithm's ability to learn.
However, the space of possible representations is virtually infinite, so
choosing a good representation is not a simple task. This chapter describes
a method whereby the selection of a good input representation for
classification tasks is automated. This technique, which we call DT-Select
(``Decision Tree feature Selection''), builds decision trees, via a fast
parallel implementation of ID3 (Quinlan, 1986), which attempt to correctly
classify the training data. The internal nodes of the trees are features
drawn from very large pools of complex general-purpose and domain-specific
constructed features. Thus, the features included in the trees constitute
compact and informative sets which can then be used as input representations
for other learning algorithms attacking the same problem. We have implemented
DT-Select on a parallel message-passing MIMD architecture, the Thinking
Machines CM-5, enabling us to select from pools containing several hundred
thousand features in reasonable time. We present here some work using this
approach to produce augmentations of artificial neural network input
representations for the molecular biology problem of predicting protein
secondary structures."
}
@incollection{towell.ml493,
author = "G. G. Towell and J. W. Shavlik",
title = "Refining Symbolic Knowledge Using Neural Networks",
booktitle = "Machine Learning: An Integrated Approach",
volume = 4,
editor = "R. S. Michalski and G. Tecuci",
year = "1993",
address = "San Mateo, CA",
publisher = "Morgan Kaufmann",
filename = "towell.ml4.ps"
}
-----------------
TECHNICAL REPORTS
-----------------
@techreport{maclin.tr94
,author = "R. Maclin and J. W. Shavlik"
,title = "Incorporating Advice into Agents that Learn from
Reinforcements"
,institution = "Department of Computer Sciences, University of Wisconsin"
,number = "UW TR 1227"
,year = "1994"
,note = "(A shorter version appears in AAAI-94.)"
,filename = "maclin.tr94.ps"
,abstract= "Learning from reinforcements is a promising approach for creating intelligent
agents. However, reinforcement learning usually requires a large number of
training episodes. We present a system called RATLE that addresses this
shortcoming by allowing a connectionist Q-learner to accept advice given, at
any time and in a natural manner, by an external observer. In RATLE, the
advice-giver watches the learner and occasionally makes suggestions, expressed
as instructions in a simple programming language. Based on techniques from
knowledge-based neural networks, RATLE inserts these programs directly into
the agent's utility function. Subsequent reinforcement learning further
integrates and refines the advice. We present empirical evidence that shows
our approach leads to statistically-significant gains in expected reward.
Importantly, the advice improves the expected reward regardless of the stage
of training at which it is given."
}
--------------
WORKING PAPERS
--------------
@techreport{craven.mlrgwp99
,author = "M. W. Craven and J. W. Shavlik"
,title = "Rule Extraction: Where Do We Go from Here?"
,institution = "Department of Computer Sciences, University of Wisconsin"
,number = "Machine Learning Research Group Working Paper 99-1"
,year = "1999"
,filename = "craven.mlrgwp99.ps"
,abstract = "We argue that despite being an actively researched area
for nearly a decade, rule-extraction technology has not made as significant
of an impact as it should have. A confluence of trends, however, has made
the ability to extract comprehensible descriptions from complex learned models
more important now than ever. We argue that rule-extraction methods can have
a significant impact in the overlapping data-mining, machine-learning and
neural-network communities if research is focused on several commonly overlooked
issues. We then briefly describe how we have tried to address these issues in
our own work."
}
@techreport{craven.mlrgwp95
,author = "M. W. Craven and J. W. Shavlik"
,title = "Extracting Tree-Structured Representations of Trained Networks"
,institution = "Department of Computer Sciences, University of Wisconsin"
,number = "Machine Learning Research Group Working Paper 95-1"
,year = "1995"
}
Note: this work has been superseded by craven.nips96.ps
@techreport{opitz.mlrgwp95,
author = "D. W. Opitz and J. W. Shavlik",
title = "Generating Accurate and Diverse Members of a Neural-Network
Ensemble",
institution = "Department of Computer Sciences, University of Wisconsin",
number = "Machine Learning Research Group Working Paper 95-2",
year = "1995",
note = "(A version of this paper appears in {Advances in Neural
Information Processing, vol. 8,} 1996)",
filename = "opitz.mlrgwp95.ps"
,abstract= "Neural-network ensembles have been shown to be very accurate classification
techniques. Previous work has shown that an effective ensemble should
consist of networks that are not only highly correct, but ones that make
their errors on different parts of the input space as well. Most existing
techniques, however, only indirectly address the problem of creating such
a set of networks. In this paper we present a technique called ADDEMUP
that uses genetic algorithms to directly search for an accurate and diverse
set of trained networks. ADDEMUP works by first creating an initial
population, then uses genetic operators to continually create new networks,
keeping the set of networks that are as accurate as possible while
disagreeing with each other as much as possible. Experiments on three DNA
problems show that ADDEMUP is able to generate a set of trained networks
that is more accurate than several existing approaches. Experiments also
show that ADDEMUP is able to effectively incorporate prior knowledge, if
available, to improve the quality of its ensemble."
}
Note: this work has been superseded by opitz.nips96.ps
@techreport{cherkauer.mlrgwp95,
author = "K. J. Cherkauer and J. W. Shavlik",
title = "Rapid Quality Estimation of Neural Network Input
Representations",
institution = "Department of Computer Sciences, University of Wisconsin",
number = "Machine Learning Research Group Working Paper 95-3",
year = "1995",
note = "(A version of this paper appears in {\em Advances in Neural
Information Processing, vol. 8,} 1996)",
}
Note: this work has been superseded by cherkauer.nips96.ps
@techreport{gutstein.mlrgwp92,
author = "E. Gutstein",
title = "Learning from Students to Improve an Intelligent
Tutoring System",
institution = "Department of Computer Sciences, University of Wisconsin",
number = "Machine Learning Research Group Working Paper 92-3",
year = "1992",
filename = "gutstein.mlrgwp92.ps"
}