@InProceedings{moll2008matching-of-structural-motifs,
 abstract  = {There is an increasing number of proteins with known structure but unknown
	     function. Determining their function would have a significant impact on
	     understanding diseases and designing new therapeutics. However, experimental
	     protein function determination is expensive and very time-consuming. Computational
	     methods can facilitate function determination by identifying proteins that have
	     high structural and chemical similarity. Our focus is on methods that determine
	     binding site similarity. Although several such methods exist, it still remains a
	     challenging problem to quickly find all functionally-related matches for structural
	     motifs in large data sets with high specificity. In this context, a structural
	     motif is a set of 3D points annotated with physicochemical information that
	     characterize a molecular function. We propose a new method called LabelHash that
	     creates hash tables of $n$-tuples of residues for a set of targets. Using these
	     hash tables, we can quickly look up partial matches to a motif and expand those
	     matches to complete matches. We show that by applying only very mild geometric
	     constraints we can find statistically significant matches with extremely high
	     specificity in very large data sets and for very general structural motifs. We
	     demonstrate that our method requires a reasonable amount of storage when employing
	     a simple geometric filter and further improves on the specificity of our previous
	     work while maintaining very high sensitivity. Our algorithm is evaluated on 20
	     homolog classes and a non-redundant version of the Protein Data Bank as our
	     background data set. We use cluster analysis to analyze why certain classes of
	     homologs are more difficult to classify than others. The LabelHash algorithm is
	     implemented on a web server at http://kavrakilab.org/labelhash/.},
 author	   = {Mark Moll and Lydia E. Kavraki},
 booktitle = {The Seventh Annual International Conference on Computational Systems
	     Bioinformatics (CSB2008)},
 doi	   = {10.1142/9781848162648_0014},
 pages	   = {157-168},
 pmid	   = {19642277},
 title	   = {Matching of Structural Motifs Using Hashing on Residue Labels and Geometric
	     Filtering for Protein Function Prediction},
 url	   = {http://csb2008.org/csb2008papers/077Moll.pdf},
 year	   = {2008}
}
