  author = 	 {David Lewis},
  title = 	 {Representation and Learning in Information Retrieval},
  school = 	 {University of Massachusetts},
  year = 	 1992

    author = "David D. Lewis and Robert E. Schapire and James P. Callan and Ron Papka",
    title = "Training Algorithms for Linear Text Classifiers",
    booktitle = "Proceedings of {SIGIR}-96, 19th {ACM} International Conference on Research and Development in Information Retrieval",
    publisher = "ACM Press, New York, US",
    address = "Z{\"{u}}rich, CH",
    editor = "Hans-Peter Frei and Donna Harman and Peter Sch{\"{a}}uble and Ross Wilkinson",
    pages = "298--306",
    year = 1996,
    url = "citeseer.nj.nec.com/lewis96training.html"

  author = 	 {Robert E. Schapire and Yoram Singer and Amit Singhal},
  title = 	 {Boosting and Rocchio Applied to Text Filtering},
  booktitle =	 {Proceedings of the Twenty First Annual International ACM SIGIR Conference on Research and Development in Information Retrieval},
  year =	 1998

  author = 	 {Raj D. Iyer and David D. Lewis and Robert E. Schapire and Yoram Singerz and Amit Singhal},
  title = 	 {Boosting for Document Routing},
  booktitle =	 {Ninth International Conference on Information and Knowledge Management (CIKM) 2000},
  year =	 2000

  author = 	 {Robert E. Shapire and Yoram Singer},
  title = 	 {BoosTexter: A Boosting-based System for Text Categorization},
  journal = 	 {Machine Learning},
  year = 	 2000,
  volume =	 39,
  number =	 {2/3},
  pages =	 {135--168}

  author = 	 {Abraham Bookstein and Don Kraft},
  title = 	 {Operations Research Applied to Document Indexing and Retrieval Decisions},
  journal = 	 {Journal of the Association for Computing Machinery},
  year = 	 1977,
  volume =	 24,
  number =	 3,
  pages =	 {418--427},
  month =	 {July}

    author = "David A. Grossman and Ophir Frieder and David O. Holmes and David C. Roberts",
    title = "Integrating Structured Data and Text: A Relational Approach",
    journal = "Journal of the American Society of Information Science",
    volume = 48,
    number = 2,
    pages = "122--132",
    year = 1997,
    url = "citeseer.nj.nec.com/article/grossman95integrating.html"

  author = 	 {Sholom M. Weiss and Chidanand Apte and Fred J. Damerau and David E. Johnson and Frank J. Oles and Thilo Goetz and Thomas Hampp},
  title = 	 {Maximizing Text-Mining Performance},
  journal = 	 {IEEE Intelligent Systems},
  year = 	 1999

  author = "A. McCallum and K. Nigam",
  title = "A comparison of event models for Naive Bayes text classification",
  text = "A. McCallum and K. Nigam. A comparison of event models for Naive Bayestext classification. In AAAI-98 Workshop on Learning for Text Categorization, 1998.",
  year = "1998",
  url = "citeseer.nj.nec.com/mccallum98comparison.html" }

@Article{ nigam00text,
    author = "Kamal Nigam and Andrew K. McCallum and Sebastian Thrun and Tom M. Mitchell",
    title = "Text Classification from Labeled and Unlabeled Documents using {EM}",
    journal = "Machine Learning",
    volume = "39",
    number = "2/3",
    pages = "103--134",
    year = "2000",
    url = "citeseer.nj.nec.com/nigam99text.html" }

  author =	 {C.J. van Rijsbergen},
  title = 	 {Information Retrieval},
  publisher = 	 {Butterworths},
  year = 	 1979

  author =	 {David D. Lewis and Yoram Singer},
  title =	 {Introduction to Machine Learning for Information Retrieval},
  howpublished = {Tutorial presented at 23rd {ACM SIGIR} Conference},
  month =	 {August},
  year =	 2002

  author =	 {David D. Lewis and Yoram Singer},
  title =	 {Reference List to Accompany {SIGIR}-97 Tutorial on Machine Learning for Information Retrieval},
  howpublished = {Tutorial presented at 23rd {ACM SIGIR} Conference},
  month =	 {August},
  year =	 2002

  author = "D. Lewis",
  title = "Active by accident: Relevance feedback in information retrieval",
  text = "DD Lewis. Active by accident: Relevance feedback in information retrieval.
    In AAAI Fall Symposium on Active Learning, 1995.",
  year = 1995,
  url = "citeseer.nj.nec.com/lewis95active.html" }

  author = 	 {Michael W. Berry and Zlatko Drmac and Elizabeth R. Jessup},
  title = 	 {Matrices, Vector Spaces and Information Retrieval},
  journal = 	 {SIAM Review},
  year = 	 1999,
  volume =	 41,
  number =	 2,
  pages =	 {335--362}

  author =	 {G. Holmes and C.G. Nevill-Manning},
  title =	 {Feature Selection Via the Discovery of Simple Classification Rules}

    author = "D. D. Lewis",
    title = "{Evaluating and Optimizing Autonomous Text Classification Systems}",
    booktitle = "Proceedings of the 18th Annual International {ACM} {SIGIR} Conference on Research and Development in Information Retrieval",
    publisher = "ACM Press",
    address = "Seattle, Washington",
    editor = "E. A. Fox and P. Ingwersen and R. Fidel",
    pages = "246--254",
    year = 1995,
    url = "citeseer.nj.nec.com/lewis95evaluating.html" }

    author = "D. D. Lewis",
    title = "{Evaluating and Optimizing Autonomous Text Classification Systems}",
    booktitle = "Proceedings of the 18th Annual International {ACM} {SIGIR} Conference on Research and Development in Information Retrieval",
    publisher = "ACM Press",
    address = "Seattle, Washington",
    editor = "E. A. Fox and P. Ingwersen and R. Fidel",
    pages = "246--254",
    year = 1995,
    url = "citeseer.nj.nec.com/lewis95evaluating.html" }

    author = "Norbert Fuhr",
    title = "Probabilistic Models in Information Retrieval",
    journal = "The Computer Journal",
    volume = 35,
    number = 3,
    pages = "243--255",
    year = 1992,
    url = "citeseer.nj.nec.com/fuhr92probabilistic.html"}

  author =	 {Ramon Ferrer Cancho and Ricard V. Sole},
  title =	 {Two regimes in the frequency the origins of complex lexicons: {Z}ipf's law revisited}

    author = "Eibe Frank and Gordon W. Paynter and Ian H. Witten and Carl Gutwin and Craig G. Nevill-Manning",
    title = "Domain-Specific Keyphrase Extraction",
    booktitle = "{IJCAI}",
    pages = "668--673",
    year = "1999",
    url = "citeseer.nj.nec.com/frank99domainspecific.html" }

  author =	 {S. Muthu Muthukrishnan},
  title =	 {Efficient Algorithms for Document Retrieval Problems}

author = "Amit Singhal and Chris Buckley and Mandar Mitra",
title = "Pivoted Document Length Normalization",
booktitle = "Research and Development in Information Retrieval",
pages = "21--29",
year = 1996,
url = "citeseer.nj.nec.com/singhal96pivoted.html"

  author = 	 {S.E. Robertson and K. Sparck Jones},
  title = 	 {Relevance Weighting of Search Terms},
  journal = 	 {Journal of the American Society for Information Science},
  year = 	 1976,
  volume =	 27,
  number =	 3,
  pages =	 {129--146}

    author = "Daphne Koller and Mehran Sahami",
    title = "Hierarchically classifying documents using very few words",
    booktitle = "Proceedings of {ICML}-97, 14th International Conference on Machine Learning",
    publisher = "Morgan Kaufmann Publishers, San Francisco, US",
    address = "Nashville, US",
    editor = "Douglas H. Fisher",
    pages = "170--178",
    year = "1997",
    url = "citeseer.nj.nec.com/koller97hierarchically.html" }

    author = "Scott C. Deerwester and Susan T. Dumais and Thomas K. Landauer and George W. Furnas and Richard A. Harshman",
    title = "Indexing by Latent Semantic Analysis",
    journal = "Journal of the American Society of Information Science",
    volume = 41,
    number = 6,
    pages = "391--407",
    year = 1990,
    url = "citeseer.nj.nec.com/deerwester90indexing.html"

    author = "Michael W. Berry and Susan T. Dumais and Gavin W. O'Brien",
    title = "Using Linear Algebra for Intelligent Information Retrieval",
    number = "UT-CS-94-270",
    year = 1994,
    url = "citeseer.nj.nec.com/berry95using.html" }

    author = "D. D. Lewis",
    title = "{Feature Selection and Feature Extraction for Text Categorization}",
    booktitle = "Proceedings of Speech and Natural Language Workshop",
    publisher = "Morgan Kaufmann",
    address = "San Mateo, California",
    pages = "212--217",
    year = 1992,
    url = "citeseer.nj.nec.com/lewis92feature.html" }

    author = "Soumen Chakrabarti and Byron Dom and Rakesh Agrawal and Prabhakar Raghavan",
    title = "Scalable Feature Selection, Classification and Signature Generation for Organizing Large Text Databases into Hierarchical Topic Taxonomies",
    journal = "{VLDB} Journal: Very Large Data Bases",
    volume = 7,
    number = 3,
    pages = "163--178",
    year = 1998,
    url = "citeseer.nj.nec.com/chakrabarti98scalable.html"

  author = 	 {William P. Jones and George W. Fumas},
  title = 	 {Pictures of Relevance: A Geometric Analysis of Similarity Measures},
  journal = 	 {Journal of the American Society for Information Science},
  year = 	 1987,
  volume =	 38,
  number =	 6,
  pages =	 {420--442},
  month =	 {November}

 author = {Nicholas J. Belkin and W. Bruce Croft},
 title = {Information Filtering and Information Retrieval: Two Sides of the Same Coin?},
 journal = {Communications of the {ACM}},
 volume = 35,
 number = 12,
 year = 1992,
 issn = {0001-0782},
 pages = {29--38},
 doi = {http://doi.acm.org/10.1145/138859.138861},
 publisher = {ACM Press},

  author = 	 {K. Sparck Jones and S. Walker and S. E. Robertson},
  title = 	 {A Probabilistic Model of Information Retrieval: Development and Comparative Experiments; Part 1},
  journal = 	 {Information Processing and Management},
  year = 	 2000,
  volume =	 36,
  number =	 6,
  pages =	 {779--808},
  month =	 {November}

  author = 	 {K. Sparck Jones and S. Walker and S. E. Robertson},
  title = 	 {A Probabilistic Model of Information Retrieval: Development and Comparative Experiments; Part 1},
  journal = 	 {Information Processing and Management},
  year = 	 2000,
  volume =	 36,
  number =	 6,
  pages =	 {779--808},
  month =	 {November}

  author = 	 {S. E. Robertson and S. Walker and M. Beaulieu},
  title = 	 {Experimentation as a Way of Life: Okapi at {TREC}},
  journal = 	 {Information Processing and Management},
  year = 	 2000,
  volume =	 36,
  number =	 1,
  pages =	 {95--108}

  author = 	 {Ellen M. Voorhees},
  title = 	 {Variations in Relevance Judgments and the Measurement of Retrieval Effectiveness},
  journal = 	 {Information Processing and Management},
  year = 	 2000,
  volume =	 36,
  number =	 5,
  pages =	 {697--716},
  month =	 {September}

  author = 	 {Norbert Fuhr},
  title = 	 {Models for Retrieval with Probabilistic Indexing},
  journal = 	 {Information Processing and Management},
  year = 	 1989,
  volume =	 25,
  number =	 1,
  pages =	 {55--72}

  author = 	 {Pauline V. Angione},
  title = 	 {On the Equivalence of Boolean and Weighted Searching Based on the Convertibility of Query Forms},
  journal = 	 {Journal of the American Society for Information Science},
  year = 	 1975,
  pages =	 {112--124},
  month =	 {March--April}

  author = 	 {Paul Travis Nicholls},
  title = 	 {Estimation of {Z}ipf Parameters},
  journal = 	 {Journal of the American Society of Infomation Science},
  year = 	 1987,
  volume =	 38,
  number =	 6,
  pages =	 {443--445}

 author = {Jay M. Ponte and W. Bruce Croft},
 title = {A Language Modeling Approach to Information Retrieval},
 booktitle = {Proceedings of the 21st Annual International ACM SIGIR Conference on Research and Development in Information Retrieval},
 year = 1998,
 isbn = {1-58113-015-5},
 pages = {275--281},
 location = {Melbourne, Australia},
 doi = {http://doi.acm.org/10.1145/290941.291008},
 publisher = {{ACM} Press},

 author = {John Lafferty and Chengxiang Zhai},
 title = {Document language models, query models, and risk minimization for information retrieval},
 booktitle = {Proceedings of the 24th annual international ACM SIGIR conference on Research and development in information retrieval},
 year = 2001,
 isbn = {1-58113-331-6},
 pages = {111--119},
 location = {New Orleans, Louisiana, United States},
 doi = {http://doi.acm.org/10.1145/383952.383970},
 publisher = {ACM Press},

 author = {ChengXiang Zhai and John Lafferty},
 title = {Two-stage Language Models for Information Retrieval},
 booktitle = {Proceedings of the 25th Annual International {ACM SIGIR} Conference on Research and Development in Information Retrieval},
 year = 2002,
 isbn = {1-58113-561-0},
 pages = {49--56},
 location = {Tampere, Finland},
 doi = {http://doi.acm.org/10.1145/564376.564387},
 publisher = {ACM Press},

 author = {Chengxiang Zhai and John Lafferty},
 title = {Model-Based Feedback in the Language Modeling Approach to Information Retrieval},
 booktitle = {Proceedings of the Tenth International Conference on Information and Knowledge Management},
 year = {2001},
 isbn = {1-58113-436-3},
 pages = {403--410},
 location = {Atlanta, Georgia, {USA}},
 doi = {http://doi.acm.org/10.1145/502585.502654},
 publisher = {{ACM} Press},

 author = {Fei Song and W. Bruce Croft},
 title = {A General Language Model for Information Retrieval},
 booktitle = {Proceedings of the Eighth International Conference on Information and Knowledge Management},
 year = {1999},
 isbn = {1-58113-146-1},
 pages = {316--321},
 location = {Kansas City, Missouri, United States},
 doi = {http://doi.acm.org/10.1145/319950.320022},
 publisher = {{ACM} Press},

 author = {Rong Jin and Alex G. Hauptmann and Cheng Xiang Zhai},
 title = {Title Language Model for Information Retrieval},
 booktitle = {Proceedings of the 25th Annual International {ACM SIGIR} Conference on Research and Development in Information Retrieval},
 year = 2002,
 isbn = {1-58113-561-0},
 pages = {42--48},
 location = {Tampere, Finland},
 doi = {http://doi.acm.org/10.1145/564376.564386},
 publisher = {ACM Press},

 author = {Adam Berger and John Lafferty},
 title = {Information Retrieval as Statistical Translation},
 booktitle = {Proceedings of the 22nd Annual International {ACM SIGIR} Conference on Research and Development in Information Retrieval},
 year = 1999,
 isbn = {1-58113-096-1},
 pages = {222--229},
 location = {Berkeley, California, United States},
 doi = {http://doi.acm.org/10.1145/312624.312681},
 publisher = {{ACM} Press},

  author = 	 {Abraham Bookstein and Vladimir A. Kulyukin and Timo Raita},
  title = 	 {Generalized Hamming Distance},
  journal = 	 {Information Retrieval},
  year = 	 2002,
  volume =	 5,
  number =	 4,
  pages =	 {353--375},
  month =	 {October}

 author = {Abraham Bookstein},
 title = {Implications of Boolean Structure for Probabilistic Retrieval},
 booktitle = {Proceedings of the 8th Annual International {ACM SIGIR} Conference on Research and Development in Information Retrieval},
 year = 1985,
 isbn = {0-89791-159-8},
 pages = {11--17},
 location = {Montreal, Quebec, Canada},
 doi = {http://doi.acm.org/10.1145/253495.253505},
 publisher = {{ACM} Press},

 author = {Joon Ho Lee},
 title = {Properties of Extended Boolean Models in Information Retrieval},
 booktitle = {Proceedings of the 17th Annual International {ACM SIGIR} Conference on Research and Development in Information Retrieval},
 year = 1994,
 isbn = {0-387-19889-X},
 pages = {182--190},
 location = {Dublin, Ireland},
 publisher = {Springer-Verlag New York, Inc.},

 author = {Joon Ho Lee and Won Yong Kin and Myoung Ho Kim and Yoon Joon Lee},
 title = {On the Evaluation of Boolean Operators in the Extended Boolean Retrieval Framework},
 booktitle = {Proceedings of the 16th Annual International ACM SIGIR Conference on Research and Development in Information Retrieval},
 year = 1993,
 isbn = {0-89791-605-0},
 pages = {291--297},
 location = {Pittsburgh, Pennsylvania, United States},
 doi = {http://doi.acm.org/10.1145/160688.160741},
 publisher = {{ACM} Press},

  author = 	 {W. G. Waller and Donald H. Kraft},
  title = 	 {A Mathematical Model of a Weighted Boolean Retrieval System},
  journal = 	 {Information Processing and Management},
  year = 	 1979,
  volume =	 15,
  number =	 5,
  pages =	 {219--267}

    author = "Abraham Bookstein and Shmuel T. Klein and Timo Raita",
    title = "Clumping Properties of Content-Bearing Words",
    journal = "Journal of the American Society of Information Science",
    volume = 49,
    number = 2,
    pages = "102--114",
    year = 1998,
    url = "citeseer.nj.nec.com/bookstein98clumping.html" 

  author =	 {William E. Winkler},
  title =	 {Machine Learning, Information Retrieval, and Record Linkage}

 author = {A. Bookstein},
 title = {Set Oriented Retrieval},
 booktitle = {Proceedings of the 11th Annual International {ACM SIGIR} Conference on Research and Development in Information Retrieval},
 year = 1988,
 isbn = {2-7061-0309-4},
 pages = {583--596},
 location = {Grenoble, France},
 doi = {http://doi.acm.org/10.1145/62437.62499},
 publisher = {{ACM} Press}

 author = {R. Manmatha and T. Rath and F. Feng},
 title = {Modeling Score Distributions for Combining the Outputs of Search Engines},
 booktitle = {Proceedings of the 24th Annual International {ACM SIGIR} Conference on Research and Development in Information Retrieval},
 year = 2001,
 isbn = {1-58113-331-6},
 pages = {267--275},
 location = {New Orleans, Louisiana, United States},
 doi = {http://doi.acm.org/10.1145/383952.384005},
 publisher = {ACM Press},

  author = 	 {John A. Swets},
  title = 	 {Information Retrieval Systems},
  journal = 	 {Science},
  year = 	 1963,
  volume =	 141,
  number =	 3577,
  pages =	 {245--250},
  month =	 {July}

  author = 	 {Ivan P. Fellegi and Alan B. Sunter},
  title = 	 {A Theory for Record Linkage},
  journal = 	 {Journal of the American Statistical Association},
  year = 	 1969,
  volume =	 64,
  number =	 328,
  pages =	 {1183--1210},
  month =	 {December}

  author = 	 {Gerard Salton and Edward A. Fox and Harry Wu},
  title = 	 {Extended Boolean Information Retrieval},
  journal = 	 {Communications of the {ACM}},
  year = 	 1983,
  volume =	 26,
  number =	 12,
  month =	 {December}

  author = 	 {Robert M. Losee},
  title = 	 {When Information Retrieval Measures Agree about the Relative Quality of Document Rankings},
  journal = 	 {Journal of the American Society for Information Science},
  year = 	 2000,
  volume =	 51,
  number =	 9,
  pages =	 {834--840}

    author = "Daphne Koller and Mehran Sahami",
    title = "Toward Optimal Feature Selection",
    booktitle = "International Conference on Machine Learning",
    pages = "284--292",
    year = 1996,
    url = "citeseer.nj.nec.com/koller96toward.html"

  author = "M. Keim and D. Lewis and D. Madigan",
  title = "Bayesian Information Retrieval: Preliminary Evaluation",
  text = "KEIM, M., LEWIS, D. D., AND MADIGAN, D. Bayesian information retrieval:
    Preliminary evaluation. In Preliminary Papers of the Sixth International
    Workshop on Artificial Intelligence and Statistics (Ft. Lauderdale, Florida,
    Jan. 1997), D. Madigan and P. Smyth, Eds., pp. 303--310.",
  year = 1997,
  url = "citeseer.nj.nec.com/keim97bayesian.html"

    author = "David D. Lewis",
    title = "Naive ({B}ayes) at Forty: The Independence Assumption in Information Retrieval.",
    booktitle = "Proceedings of {ECML}-98, 10th European Conference on Machine Learning",
    number = 1398,
    publisher = "Springer Verlag, Heidelberg, DE",
    address = "Chemnitz, DE",
    editor = "Claire N{\'{e}}dellec and C{\'{e}}line Rouveirol",
    pages = "4--15",
    year = 1998,
    url = "citeseer.nj.nec.com/lewis98naive.html"

    author = "Yiming Yang and Jan O. Pedersen",
    title = "A Comparative Study on Feature Selection in Text Categorization",
    booktitle = "Proceedings of {ICML}-97, 14th International Conference on Machine Learning",
    publisher = "Morgan Kaufmann Publishers, San Francisco, US",
    address = "Nashville, US",
    editor = "Douglas H. Fisher",
    pages = "412--420",
    year = 1997,
    url = "citeseer.nj.nec.com/yang97comparative.html"

  author = 	 {Duncan A. Buell and Donald H. Kraft},
  title = 	 {Threshold Values and Boolean Retrieval Systems},
  journal = 	 {Information Processing and Manangement},
  year = 	 1981,
  volume =	 17,
  pages =	 {127--136}

  author = 	 {Abraham Bookstein and William Cooper},
  title = 	 {A General Mathematical Model for Information Retrieval Systems},
  journal = 	 {Library Quarterly},
  year = 	 1976,
  volume =	 46,
  number =	 2,
  pages =	 {153--167}

  author = 	 {W.G. Waller and Donald H. Kraft},
  title = 	 {A Mathematical Model of a Weighted Boolean Retrieval System},
  journal = 	 {Information Processing and Management},
  year = 	 1979,
  volume =	 15,
  number =	 5,
  pages =	 {235--245}

  author =	 {Warren R. Greiff and W. Bruce Croft},
  title =	 {Computational Tractable Probabilistic Modeling of Boolean Operators}

    author = "Yiming Yang",
    title = "A Study on Thresholding Strategies for Text Categorization",
    booktitle = "Proceedings of {SIGIR}-01, 24th {ACM} International Conference on Research and Development in Information Retrieval",
    publisher = "ACM Press, New York, US",
    address = "New Orleans, US",
    editor = "W. Bruce Croft and David J. Harper and Donald H. Kraft and Justin Zobel",
    pages = "137--145",
    year = 2001,
    url = "citeseer.nj.nec.com/yang01study.html"

  editor =	 {Luis Gravano},
  title = 	 {Special Issue on Text and Databases},
  publisher = 	 {{IEEE} Computer Society},
  year = 	 2001,
  volume =	 24,
  number =	 4,
  series =	 {Bulletin of the Technical Committee on Data Engineering},
  month =	 {December}

  author = 	 {Gerard Salton and Christopher Buckley},
  title = 	 {Term-Weighting Approaches in Automatic Text Retrieval},
  journal = 	 {Information Processing and Management},
  year = 	 1988,
  volume =	 24,
  number =	 5,
  pages =	 {513--523}

    author = "David D. Lewis and William A. Gale",
    title = "A Sequential Algorithm for Training Text Classifiers",
    booktitle = "Proceedings of {SIGIR}-94, 17th {ACM} International Conference on Research and Development in Information Retrieval",
    publisher = "Springer Verlag, Heidelberg, DE",
    address = "Dublin, IE",
    editor = "W. Bruce Croft and Cornelis J. van Rijsbergen",
    pages = "3--12",
    year = 1994,
    url = "citeseer.nj.nec.com/lewis94sequential.html"

  author = 	 {David D. Lewis},
  title = 	 {A Sequential Algorithm for Training Text Classifiers: Corrigendum and Additional Data},
  booktitle =	 {{SIGIR} Forum},
  pages =	 {13--19},
  year =	 1995,
  volume =	 29,
  number =	 2

    author = "David D. Lewis and W. Bruce Croft",
    title = "Term Clustering of Syntactic Phrases",
    booktitle = "Proc. of the Thirteenth Annual International {ACM SIGIR} Conference on Research and Development in Information Retrieval",
    pages = "385--404",
    year = 1990,
    url = "citeseer.nj.nec.com/lewis90term.html"

  author = 	 {C.J. van Rijsbergen},
  title = 	 {A Theoretical Basis for Use of Co-occurrence Data in Information Retrieval},
  journal = 	 {Journal of Documentation},
  year = 	 1977,
  volume =	 33,
  number =	 2,
  pages =	 {106--119}

  author = 	 {Robert M. Losee and Abraham Bookstein},
  title = 	 {Integrating Boolean Queries in Conjunctive Normal Form with Probabilistic Retrieval Models},
  journal = 	 {Information Processing and Management},
  year = 	 1988,
  volume =	 24,
  number =	 3,
  pages =	 {315--321}

  author = 	 {William S. Cooper},
  title = 	 {Getting Beyond Boole},
  journal = 	 {Information Processing and Management},
  year = 	 1988,
  volume =	 24,
  number =	 3,
  pages =	 {243--248}

  author = 	 {Gerard Salton and Chris Buckley},
  title = 	 {Improving Retrieval Performance by Relevance Feedback},
  journal = 	 {Journal of the American Society for Information Science},
  year = 	 1990,
  volume =	 41,
  number =	 4,
  pages =	 {288--297}

  author = 	 {David D. Lewis},
  title = 	 {Learning in Intelligent Information Retrieval},
  booktitle =	 {Machine Learning: Proceedings of the Eighth International Workshop ({ML}91)},
  pages =	 {235--239},
  year =	 1991

  author = 	 {Tadeusz Radecki},
  title = 	 {Trends in Research on Information Retrieval -- The Potential for Improvements in Conventional Boolean Retrieval Systems},
  journal = 	 {Information Processing and Mangement},
  year = 	 1988,
  volume =	 24,
  number =	 3,
  pages =	 {219--227}

    author = "Jon M. Kleinberg",
    title = "Authoritative Sources in a Hyperlinked Environment",
    journal = "Journal of the {ACM}",
    volume = 46,
    number = 5,
    pages = "604--632",
    year = 1999,
    url = "citeseer.nj.nec.com/kleinberg97authoritative.html"

  author =	 {Dunja Mladeni\'c},
  title =	 {Feature Subset Selection in Text-Learning}

    author = "Brian T. Bartell and Garrison W. Cottrell and Richard K. Belew",
    title = "Latent Semantic Indexing is an Optimal Special Case of Multidimensional Scaling",
    booktitle = "Research and Development in Information Retrieval",
    pages = "161--167",
    year = 1992,
    url = "citeseer.nj.nec.com/bartell92latent.html" }

  author = 	 {Michael Berry and Jack Dongarra},
  title = 	 {Atlanta Organizers Put Mathematics to Work For the Math Sciences Community},
  booktitle =	 {{SIAM} News},
  year =	 1999,
  volume =	 32,
  number =	 6,
  month =	 {July/August}

  author = 	 {I.A.R. Moghrabi and R.A. Makholian},
  title = 	 {A New Approach to Clustering Records in Information Retrieval Systems},
  journal = 	 {Information Retrieval},
  year = 	 2000,
  volume =	 3,
  number =	 2,
  pages =	 {105--126}

    author = "Marti A. Hearst and Jan O. Pedersen",
    title = "Reexamining the Cluster Hypothesis: Scatter/Gather on Retrieval Results",
    booktitle = "Proceedings of {SIGIR}-96, 19th {ACM} International Conference on Research and Development in Information Retrieval",
    address = "Z{\"{u}}rich, CH",
    pages = "76--84",
    year = 1996,
    url = "citeseer.nj.nec.com/hearst96reexamining.html"

  author = "S. M. R{\"u}ger and S. E. Gauch",
  title = "Feature Reduction for Document Clustering and Classification",
  institution = "Computing Department, Imperial College",
  address = "London, UK",
  year = 2000,
  url = "citeseer.nj.nec.com/uger00feature.html"

 author = {Ellen M. Voorhees},
 title = {The Cluster Hypothesis Revisited},
 booktitle = {Proceedings of the 8th Annual International {ACM SIGIR} Conference on Research and Development in Information Retrieval},
 year = 1985,
 isbn = {0-89791-159-8},
 pages = {188--196},
 location = {Montreal, Quebec, Canada},
 doi = {http://doi.acm.org/10.1145/253495.253524},
 publisher = {{ACM} Press},

  editor =	 {Gerard Salton},
  title = 	 {The {SMART} Retrieval System: Experiments in Automatic Document Processing},
  chapter = 	 {Cluster Search Strategies and the Optimization of Retrieval Effectiveness},
  publisher = 	 {Prentice Hall},
  year = 	 1971

  author = 	 {Peter Willett},
  title = 	 {Recent Rrends in Hierarchic Document Clustering: A Critical Review},
  journal = 	 {Information Processing and Management},
  year = 	 1988,
  volume =	 24,
  number =	 5,
  pages =	 {577--597}

  author = 	 {N. Jardine and C.J. van Rijsbergen},
  title = 	 {The Use of Hierarchic Clustering in Information Retrieval},
  journal = 	 {Information Storage and Retrieval},
  year = 	 1971,
  volume =	 7,
  pages =	 {217--240}

  editor =	 {Gerard Salton},
  title = 	 {The SMART Retrieval System},
  chapter = 	 {Relevance Assessments and Retrieval System Evaluation},
  publisher = 	 {Prentice-Hall},
  year = 	 1971,
  pages =	 {506--527}

  author = 	 {Marc Damashek},
  title = 	 {Gauging Similarity with n-Grams: Language-Independent Categorization of Text},
  journal = 	 {Science},
  year = 	 1995,
  volume =	 267,
  pages =	 {843--848},
  month =	 {February}

  author = 	 {Elizabeth D. Liddy and Woojin Paik and Edmund S. Yu},
  title = 	 {Text Categorization for Multiple Users Based on Semantic Features from a Machine-Readable Dictionary},
  journal = 	 {{ACM} Transactions on Information Systems},
  year = 	 1994,
  volume =	 12,
  number =	 3,
  pages =	 {278--295},
  month =	 {July}

  author = 	 {Jung Jin Lee and Paul B. Kantor},
  title = 	 {A Study of Probabilistic Information Retrieval Systems in the Case of Inconsistent Expert Judgments},
  journal = 	 {Journal of the American Society for Information Science},
  year = 	 1991,
  volume =	 42,
  number =	 3,
  pages =	 {166--172}

  author = 	 {N.J. Belkin and P. Kantor and E.A. Fox and J.A. Shaw},
  title = 	 {Combining the Evidence of Multiple Query Representations for Information Retrieval},
  journal = 	 {Information Processing and Management},
  year = 	 1995,
  volume =	 31,
  number =	 3,
  pages =	 {431--448}

  author = 	 {Paul B. Kantor},
  title = 	 {Information Retrieval Techniques},
  booktitle = 	 {Annual Review of Information Science and Technology},
  pages =	 {53--90},
  publisher =	 {American Society for Information Science},
  year =	 1994,
  editor =	 {Martha E. Williams},
  volume =	 29

  author = 	 {Elke Mittendorf and Bojidar Mateev and Peter Schauable},
  title = 	 {Using the Co-occurence of Words for Retrieval Weighting},
  journal = 	 {Information Retrieval},
  year = 	 2000,
  volume =	 3,
  number =	 3,
  pages =	 {243--251},
  month =	 {October}

  author = 	 {W.B. Croft and D.J. Harper},
  title = 	 {Using Probabilistic Models of Document Retrieval Without Relevance Information},
  journal = 	 {Journal of Documentation},
  year = 	 1979,
  volume =	 35,
  number =	 4,
  pages =	 {285--295}

 author = {S. E. Robertson and S. Walker},
 title = {Some Simple Effective Approximations to the 2-Poisson Model for Probabilistic Weighted Retrieval},
 booktitle = {Proceedings of the 17th Annual International {ACM SIGIR} Conference on Research and Development in Information Retrieval},
 year = 1994,
 isbn = {0-387-19889-X},
 pages = {232--241},
 location = {Dublin, Ireland},
 publisher = {Springer-Verlag New York, Inc.},

  author = 	 {Abraham Bookstein},
  title = 	 {When the Most "Pertinent" Document Should Not Be Retrieved - An Analysis of the {S}wets Model},
  journal = 	 {Information Processing and Management},
  year = 	 1977,
  volume =	 13,
  pages =	 {377--383}

 author = {G. Salton and A. Wong and C. S. Yang},
 title = {A Vector Space Model for Automatic Indexing},
 journal = {Communications of the ACM},
 volume = 18,
 number = 11,
 year = 1975,
 issn = {0001-0782},
 pages = {613--620},
 doi = {http://doi.acm.org/10.1145/361219.361220},
 publisher = {ACM Press},

  author = 	 {G. Salton and E.A. Fox and E. Voorhees},
  title = 	 {Advanced Feedback Methods in Information Retrieval},
  journal = 	 {Journal of the American Society for Information Science},
  year = 	 1985,
  volume =	 36,
  number =	 3,
  pages =	 {200--210}

  author = 	 {C.J. van Rijsbergen and D.J. Harper and M.F. Porter},
  title = 	 {The Selection of Good Search Terms},
  journal = 	 {Information Processing and Management},
  year = 	 1981,
  volume =	 17,
  pages =	 {77-91}

  author = 	 {B.C. Brookes},
  title = 	 {The Measures of Information Retrieval Effectiveness Proposed by Swets},
  journal = 	 {Information Processing and Mangement},
  year = 	 1968,
  volume =	 24,
  number =	 1,
  pages =	 {41--54}

  author = 	 {G. Salton and E. Voorhees and E.A. Fox},
  title = 	 {A Comparison of Two Methods for Boolean Query Relevancy Feedback},
  journal = 	 {Information Processing and Mangement},
  year = 	 1984,
  volume =	 20,
  number =	 {5/6},
  pages =	 {637--651}

  author = 	 {Tadeusz Radecki},
  title = 	 {Probabilistic Methods for Ranking Output Documents in Conventional Boolean Retrieval Systems},
  journal = 	 {Information Processing and Management},
  year = 	 1988,
  volume =	 24,
  number =	 3,
  pages =	 {281--302}

  author = 	 {Paul B. Kantor},
  title = 	 {The Logic of Weighted Queries},
  journal = 	 {IEEE Transactions on Systems, Man and Cybernetics},
  year = 	 1981,
  volume =	 11,
  number =	 12,
  pages =	 {816--821}

  author =	 {Rocchio, Jr., J.J.},
  editor =	 {Gerard Salton},
  title = 	 {The SMART Retrieval System: Experiments in Automatic Document Processing},
  chapter = 	 {Relevance Feedback in Information Retrieval},
  publisher = 	 {Prentice-Hall},
  year = 	 1971,
  pages =	 {313--323}

  author = 	 {Zhixiang Chen and Binhai Zhu},
  title = 	 {Some Formal Analysis of Rocchio's Similarity-Based Relevance Feedback Algorithm},
  journal = 	 {Information Retrieval},
  year = 	 2002,
  volume =	 5,
  number =	 1,
  pages =	 {61--86}

  author = 	 {S.E. Robertson and K. Spark Jones},
  title = 	 {Simple Proven Approaches to Text Retrieval},
  institution =  {Cambridge University Computer Laboratory},
  year = 	 1997,
  number =	 {TR356},
  annote =	 {Tech. Rep. TR356}

    author = "Warren R. Greiff",
    title = "A Theory of Term Weighting Based on Exploratory Data Analysis",
    booktitle = "Research and Development in Information Retrieval",
    pages = "11-19",
    year = 1998

 author = {Harry Wu and Gerard Salton},
 title = {A Comparison of Search Term Weighting: Term Relevance vs. Inverse Document Frequency},
 booktitle = {Proceedings of the 4th annual international ACM SIGIR conference on Information storage and retrieval},
 year = 1981,
 isbn = {0-89791-052-4},
 pages = {30--39},
 location = {Oakland, California},
 doi = {http://doi.acm.org/10.1145/511754.511759},
 publisher = {ACM Press},

 author = {S. E. Robertson and S. Walker},
 title = {On Relevance Weights with Little Relevance Information},
 booktitle = {Proceedings of the 20th annual international ACM SIGIR conference on Research and development in information retrieval},
 year = 1997,
 isbn = {0-89791-836-3},
 pages = {16--24},
 location = {Philadelphia, Pennsylvania, United States},
 doi = {http://doi.acm.org/10.1145/258525.258529},
 publisher = {ACM Press},

  author = 	 {W.J. Thompson},
  title = 	 {Poisson Distributions},
  journal = 	 {Computing in Science and Engineering},
  year = 	 2001,
  volume =	 3,
  number =	 3,
  pages =	 {78--82},
  month =	 {May-June},
  note =	 {see also IEEE Computational Science and Engineering}

  author = 	 {W.J. Thompson},
  title = 	 {Don't Subtract the Background},
  journal = 	 {Computing in Science and Engineering},
  year = 	 1999,
  volume =	 1,
  number =	 5,
  pages =	 {84--88},
  month =	 {September-October},
  note =	 {see also IEEE Computational Science and Engineering, signal count analysis method}

  author = 	 {D. E. Johnson, F. J. Oles, T. Zhang, T. Goetz},
  title = 	 {A Decision-Tree Based Symbolic Rule Induction System for Text Categorization},
  journal = 	 {IBM Systems Journal},
  year = 	 2002,
  volume =	 41,
  number =	 3,
  pages =	 {428-436}