@PhdThesis{74, author = {David Lewis}, title = {Representation and Learning in Information Retrieval}, school = {University of Massachusetts}, year = 1992 } @InProceedings{75, author = "David D. Lewis and Robert E. Schapire and James P. Callan and Ron Papka", title = "Training Algorithms for Linear Text Classifiers", booktitle = "Proceedings of {SIGIR}-96, 19th {ACM} International Conference on Research and Development in Information Retrieval", publisher = "ACM Press, New York, US", address = "Z{\"{u}}rich, CH", editor = "Hans-Peter Frei and Donna Harman and Peter Sch{\"{a}}uble and Ross Wilkinson", pages = "298--306", year = 1996, url = "citeseer.nj.nec.com/lewis96training.html" } @InProceedings{76, author = {Robert E. Schapire and Yoram Singer and Amit Singhal}, title = {Boosting and Rocchio Applied to Text Filtering}, booktitle = {Proceedings of the Twenty First Annual International ACM SIGIR Conference on Research and Development in Information Retrieval}, year = 1998 } @InProceedings{77, author = {Raj D. Iyer and David D. Lewis and Robert E. Schapire and Yoram Singerz and Amit Singhal}, title = {Boosting for Document Routing}, booktitle = {Ninth International Conference on Information and Knowledge Management (CIKM) 2000}, year = 2000 } @Article{78, author = {Robert E. Shapire and Yoram Singer}, title = {BoosTexter: A Boosting-based System for Text Categorization}, journal = {Machine Learning}, year = 2000, volume = 39, number = {2/3}, pages = {135--168} } @Article{80, author = {Abraham Bookstein and Don Kraft}, title = {Operations Research Applied to Document Indexing and Retrieval Decisions}, journal = {Journal of the Association for Computing Machinery}, year = 1977, volume = 24, number = 3, pages = {418--427}, month = {July} } @article{92, author = "David A. Grossman and Ophir Frieder and David O. Holmes and David C. Roberts", title = "Integrating Structured Data and Text: A Relational Approach", journal = "Journal of the American Society of Information Science", volume = 48, number = 2, pages = "122--132", year = 1997, url = "citeseer.nj.nec.com/article/grossman95integrating.html" } @Article{102, author = {Sholom M. Weiss and Chidanand Apte and Fred J. Damerau and David E. Johnson and Frank J. Oles and Thilo Goetz and Thomas Hampp}, title = {Maximizing Text-Mining Performance}, journal = {IEEE Intelligent Systems}, year = 1999 } @misc{120, author = "A. McCallum and K. Nigam", title = "A comparison of event models for Naive Bayes text classification", text = "A. McCallum and K. Nigam. A comparison of event models for Naive Bayestext classification. In AAAI-98 Workshop on Learning for Text Categorization, 1998.", year = "1998", url = "citeseer.nj.nec.com/mccallum98comparison.html" } @Article{ nigam00text, author = "Kamal Nigam and Andrew K. McCallum and Sebastian Thrun and Tom M. Mitchell", title = "Text Classification from Labeled and Unlabeled Documents using {EM}", journal = "Machine Learning", volume = "39", number = "2/3", pages = "103--134", year = "2000", url = "citeseer.nj.nec.com/nigam99text.html" } @Book{138, author = {C.J. van Rijsbergen}, title = {Information Retrieval}, publisher = {Butterworths}, year = 1979 } @Misc{139, author = {David D. Lewis and Yoram Singer}, title = {Introduction to Machine Learning for Information Retrieval}, howpublished = {Tutorial presented at 23rd {ACM SIGIR} Conference}, month = {August}, year = 2002 } @Misc{140, author = {David D. Lewis and Yoram Singer}, title = {Reference List to Accompany {SIGIR}-97 Tutorial on Machine Learning for Information Retrieval}, howpublished = {Tutorial presented at 23rd {ACM SIGIR} Conference}, month = {August}, year = 2002 } @misc{141, author = "D. Lewis", title = "Active by accident: Relevance feedback in information retrieval", text = "DD Lewis. Active by accident: Relevance feedback in information retrieval. In AAAI Fall Symposium on Active Learning, 1995.", year = 1995, url = "citeseer.nj.nec.com/lewis95active.html" } @Article{394, author = {Michael W. Berry and Zlatko Drmac and Elizabeth R. Jessup}, title = {Matrices, Vector Spaces and Information Retrieval}, journal = {SIAM Review}, year = 1999, volume = 41, number = 2, pages = {335--362} } @Misc{466, author = {G. Holmes and C.G. Nevill-Manning}, title = {Feature Selection Via the Discovery of Simple Classification Rules} } @inproceedings{479, author = "D. D. Lewis", title = "{Evaluating and Optimizing Autonomous Text Classification Systems}", booktitle = "Proceedings of the 18th Annual International {ACM} {SIGIR} Conference on Research and Development in Information Retrieval", publisher = "ACM Press", address = "Seattle, Washington", editor = "E. A. Fox and P. Ingwersen and R. Fidel", pages = "246--254", year = 1995, url = "citeseer.nj.nec.com/lewis95evaluating.html" } @inproceedings{479, author = "D. D. Lewis", title = "{Evaluating and Optimizing Autonomous Text Classification Systems}", booktitle = "Proceedings of the 18th Annual International {ACM} {SIGIR} Conference on Research and Development in Information Retrieval", publisher = "ACM Press", address = "Seattle, Washington", editor = "E. A. Fox and P. Ingwersen and R. Fidel", pages = "246--254", year = 1995, url = "citeseer.nj.nec.com/lewis95evaluating.html" } @article{485, author = "Norbert Fuhr", title = "Probabilistic Models in Information Retrieval", journal = "The Computer Journal", volume = 35, number = 3, pages = "243--255", year = 1992, url = "citeseer.nj.nec.com/fuhr92probabilistic.html"} @Misc{501, author = {Ramon Ferrer Cancho and Ricard V. Sole}, title = {Two regimes in the frequency the origins of complex lexicons: {Z}ipf's law revisited} } @inproceedings{547, author = "Eibe Frank and Gordon W. Paynter and Ian H. Witten and Carl Gutwin and Craig G. Nevill-Manning", title = "Domain-Specific Keyphrase Extraction", booktitle = "{IJCAI}", pages = "668--673", year = "1999", url = "citeseer.nj.nec.com/frank99domainspecific.html" } @Misc{553, author = {S. Muthu Muthukrishnan}, title = {Efficient Algorithms for Document Retrieval Problems} } @InProceedings{605, author = "Amit Singhal and Chris Buckley and Mandar Mitra", title = "Pivoted Document Length Normalization", booktitle = "Research and Development in Information Retrieval", pages = "21--29", year = 1996, url = "citeseer.nj.nec.com/singhal96pivoted.html" } @Article{606, author = {S.E. Robertson and K. Sparck Jones}, title = {Relevance Weighting of Search Terms}, journal = {Journal of the American Society for Information Science}, year = 1976, volume = 27, number = 3, pages = {129--146} } @inproceedings{618, author = "Daphne Koller and Mehran Sahami", title = "Hierarchically classifying documents using very few words", booktitle = "Proceedings of {ICML}-97, 14th International Conference on Machine Learning", publisher = "Morgan Kaufmann Publishers, San Francisco, US", address = "Nashville, US", editor = "Douglas H. Fisher", pages = "170--178", year = "1997", url = "citeseer.nj.nec.com/koller97hierarchically.html" } @Article{645, author = "Scott C. Deerwester and Susan T. Dumais and Thomas K. Landauer and George W. Furnas and Richard A. Harshman", title = "Indexing by Latent Semantic Analysis", journal = "Journal of the American Society of Information Science", volume = 41, number = 6, pages = "391--407", year = 1990, url = "citeseer.nj.nec.com/deerwester90indexing.html" } @techreport{646, author = "Michael W. Berry and Susan T. Dumais and Gavin W. O'Brien", title = "Using Linear Algebra for Intelligent Information Retrieval", number = "UT-CS-94-270", year = 1994, url = "citeseer.nj.nec.com/berry95using.html" } @inproceedings{649, author = "D. D. Lewis", title = "{Feature Selection and Feature Extraction for Text Categorization}", booktitle = "Proceedings of Speech and Natural Language Workshop", publisher = "Morgan Kaufmann", address = "San Mateo, California", pages = "212--217", year = 1992, url = "citeseer.nj.nec.com/lewis92feature.html" } @Article{650, author = "Soumen Chakrabarti and Byron Dom and Rakesh Agrawal and Prabhakar Raghavan", title = "Scalable Feature Selection, Classification and Signature Generation for Organizing Large Text Databases into Hierarchical Topic Taxonomies", journal = "{VLDB} Journal: Very Large Data Bases", volume = 7, number = 3, pages = "163--178", year = 1998, url = "citeseer.nj.nec.com/chakrabarti98scalable.html" } @Article{652, author = {William P. Jones and George W. Fumas}, title = {Pictures of Relevance: A Geometric Analysis of Similarity Measures}, journal = {Journal of the American Society for Information Science}, year = 1987, volume = 38, number = 6, pages = {420--442}, month = {November} } @Article{653, author = {Nicholas J. Belkin and W. Bruce Croft}, title = {Information Filtering and Information Retrieval: Two Sides of the Same Coin?}, journal = {Communications of the {ACM}}, volume = 35, number = 12, year = 1992, issn = {0001-0782}, pages = {29--38}, doi = {http://doi.acm.org/10.1145/138859.138861}, publisher = {ACM Press}, } @Article{654, author = {K. Sparck Jones and S. Walker and S. E. Robertson}, title = {A Probabilistic Model of Information Retrieval: Development and Comparative Experiments; Part 1}, journal = {Information Processing and Management}, year = 2000, volume = 36, number = 6, pages = {779--808}, month = {November} } @Article{655, author = {K. Sparck Jones and S. Walker and S. E. Robertson}, title = {A Probabilistic Model of Information Retrieval: Development and Comparative Experiments; Part 1}, journal = {Information Processing and Management}, year = 2000, volume = 36, number = 6, pages = {779--808}, month = {November} } @Article{656, author = {S. E. Robertson and S. Walker and M. Beaulieu}, title = {Experimentation as a Way of Life: Okapi at {TREC}}, journal = {Information Processing and Management}, year = 2000, volume = 36, number = 1, pages = {95--108} } @Article{657, author = {Ellen M. Voorhees}, title = {Variations in Relevance Judgments and the Measurement of Retrieval Effectiveness}, journal = {Information Processing and Management}, year = 2000, volume = 36, number = 5, pages = {697--716}, month = {September} } @Article{658, author = {Norbert Fuhr}, title = {Models for Retrieval with Probabilistic Indexing}, journal = {Information Processing and Management}, year = 1989, volume = 25, number = 1, pages = {55--72} } @Article{660, author = {Pauline V. Angione}, title = {On the Equivalence of Boolean and Weighted Searching Based on the Convertibility of Query Forms}, journal = {Journal of the American Society for Information Science}, year = 1975, pages = {112--124}, month = {March--April} } @Article{661, author = {Paul Travis Nicholls}, title = {Estimation of {Z}ipf Parameters}, journal = {Journal of the American Society of Infomation Science}, year = 1987, volume = 38, number = 6, pages = {443--445} } @InProceedings{665, author = {Jay M. Ponte and W. Bruce Croft}, title = {A Language Modeling Approach to Information Retrieval}, booktitle = {Proceedings of the 21st Annual International ACM SIGIR Conference on Research and Development in Information Retrieval}, year = 1998, isbn = {1-58113-015-5}, pages = {275--281}, location = {Melbourne, Australia}, doi = {http://doi.acm.org/10.1145/290941.291008}, publisher = {{ACM} Press}, } @InProceedings{666, author = {John Lafferty and Chengxiang Zhai}, title = {Document language models, query models, and risk minimization for information retrieval}, booktitle = {Proceedings of the 24th annual international ACM SIGIR conference on Research and development in information retrieval}, year = 2001, isbn = {1-58113-331-6}, pages = {111--119}, location = {New Orleans, Louisiana, United States}, doi = {http://doi.acm.org/10.1145/383952.383970}, publisher = {ACM Press}, } @InProceedings{667, author = {ChengXiang Zhai and John Lafferty}, title = {Two-stage Language Models for Information Retrieval}, booktitle = {Proceedings of the 25th Annual International {ACM SIGIR} Conference on Research and Development in Information Retrieval}, year = 2002, isbn = {1-58113-561-0}, pages = {49--56}, location = {Tampere, Finland}, doi = {http://doi.acm.org/10.1145/564376.564387}, publisher = {ACM Press}, } @inproceedings{668, author = {Chengxiang Zhai and John Lafferty}, title = {Model-Based Feedback in the Language Modeling Approach to Information Retrieval}, booktitle = {Proceedings of the Tenth International Conference on Information and Knowledge Management}, year = {2001}, isbn = {1-58113-436-3}, pages = {403--410}, location = {Atlanta, Georgia, {USA}}, doi = {http://doi.acm.org/10.1145/502585.502654}, publisher = {{ACM} Press}, } @InProceedings{669, author = {Fei Song and W. Bruce Croft}, title = {A General Language Model for Information Retrieval}, booktitle = {Proceedings of the Eighth International Conference on Information and Knowledge Management}, year = {1999}, isbn = {1-58113-146-1}, pages = {316--321}, location = {Kansas City, Missouri, United States}, doi = {http://doi.acm.org/10.1145/319950.320022}, publisher = {{ACM} Press}, } @InProceedings{670, author = {Rong Jin and Alex G. Hauptmann and Cheng Xiang Zhai}, title = {Title Language Model for Information Retrieval}, booktitle = {Proceedings of the 25th Annual International {ACM SIGIR} Conference on Research and Development in Information Retrieval}, year = 2002, isbn = {1-58113-561-0}, pages = {42--48}, location = {Tampere, Finland}, doi = {http://doi.acm.org/10.1145/564376.564386}, publisher = {ACM Press}, } @inproceedings{671, author = {Adam Berger and John Lafferty}, title = {Information Retrieval as Statistical Translation}, booktitle = {Proceedings of the 22nd Annual International {ACM SIGIR} Conference on Research and Development in Information Retrieval}, year = 1999, isbn = {1-58113-096-1}, pages = {222--229}, location = {Berkeley, California, United States}, doi = {http://doi.acm.org/10.1145/312624.312681}, publisher = {{ACM} Press}, } @Article{673, author = {Abraham Bookstein and Vladimir A. Kulyukin and Timo Raita}, title = {Generalized Hamming Distance}, journal = {Information Retrieval}, year = 2002, volume = 5, number = 4, pages = {353--375}, month = {October} } @inproceedings{674, author = {Abraham Bookstein}, title = {Implications of Boolean Structure for Probabilistic Retrieval}, booktitle = {Proceedings of the 8th Annual International {ACM SIGIR} Conference on Research and Development in Information Retrieval}, year = 1985, isbn = {0-89791-159-8}, pages = {11--17}, location = {Montreal, Quebec, Canada}, doi = {http://doi.acm.org/10.1145/253495.253505}, publisher = {{ACM} Press}, } @inproceedings{675, author = {Joon Ho Lee}, title = {Properties of Extended Boolean Models in Information Retrieval}, booktitle = {Proceedings of the 17th Annual International {ACM SIGIR} Conference on Research and Development in Information Retrieval}, year = 1994, isbn = {0-387-19889-X}, pages = {182--190}, location = {Dublin, Ireland}, publisher = {Springer-Verlag New York, Inc.}, } @InProceedings{676, author = {Joon Ho Lee and Won Yong Kin and Myoung Ho Kim and Yoon Joon Lee}, title = {On the Evaluation of Boolean Operators in the Extended Boolean Retrieval Framework}, booktitle = {Proceedings of the 16th Annual International ACM SIGIR Conference on Research and Development in Information Retrieval}, year = 1993, isbn = {0-89791-605-0}, pages = {291--297}, location = {Pittsburgh, Pennsylvania, United States}, doi = {http://doi.acm.org/10.1145/160688.160741}, publisher = {{ACM} Press}, } @Article{677, author = {W. G. Waller and Donald H. Kraft}, title = {A Mathematical Model of a Weighted Boolean Retrieval System}, journal = {Information Processing and Management}, year = 1979, volume = 15, number = 5, pages = {219--267} } @Article{678, author = "Abraham Bookstein and Shmuel T. Klein and Timo Raita", title = "Clumping Properties of Content-Bearing Words", journal = "Journal of the American Society of Information Science", volume = 49, number = 2, pages = "102--114", year = 1998, url = "citeseer.nj.nec.com/bookstein98clumping.html" } @Misc{679, author = {William E. Winkler}, title = {Machine Learning, Information Retrieval, and Record Linkage} } @InProceedings{680, author = {A. Bookstein}, title = {Set Oriented Retrieval}, booktitle = {Proceedings of the 11th Annual International {ACM SIGIR} Conference on Research and Development in Information Retrieval}, year = 1988, isbn = {2-7061-0309-4}, pages = {583--596}, location = {Grenoble, France}, doi = {http://doi.acm.org/10.1145/62437.62499}, publisher = {{ACM} Press} } @InProceedings{681, author = {R. Manmatha and T. Rath and F. Feng}, title = {Modeling Score Distributions for Combining the Outputs of Search Engines}, booktitle = {Proceedings of the 24th Annual International {ACM SIGIR} Conference on Research and Development in Information Retrieval}, year = 2001, isbn = {1-58113-331-6}, pages = {267--275}, location = {New Orleans, Louisiana, United States}, doi = {http://doi.acm.org/10.1145/383952.384005}, publisher = {ACM Press}, } @Article{682, author = {John A. Swets}, title = {Information Retrieval Systems}, journal = {Science}, year = 1963, volume = 141, number = 3577, pages = {245--250}, month = {July} } @Article{683, author = {Ivan P. Fellegi and Alan B. Sunter}, title = {A Theory for Record Linkage}, journal = {Journal of the American Statistical Association}, year = 1969, volume = 64, number = 328, pages = {1183--1210}, month = {December} } @Article{684, author = {Gerard Salton and Edward A. Fox and Harry Wu}, title = {Extended Boolean Information Retrieval}, journal = {Communications of the {ACM}}, year = 1983, volume = 26, number = 12, month = {December} } @Article{685, author = {Robert M. Losee}, title = {When Information Retrieval Measures Agree about the Relative Quality of Document Rankings}, journal = {Journal of the American Society for Information Science}, year = 2000, volume = 51, number = 9, pages = {834--840} } @InProceedings{686, author = "Daphne Koller and Mehran Sahami", title = "Toward Optimal Feature Selection", booktitle = "International Conference on Machine Learning", pages = "284--292", year = 1996, url = "citeseer.nj.nec.com/koller96toward.html" } @Misc{689, author = "M. Keim and D. Lewis and D. Madigan", title = "Bayesian Information Retrieval: Preliminary Evaluation", text = "KEIM, M., LEWIS, D. D., AND MADIGAN, D. Bayesian information retrieval: Preliminary evaluation. In Preliminary Papers of the Sixth International Workshop on Artificial Intelligence and Statistics (Ft. Lauderdale, Florida, Jan. 1997), D. Madigan and P. Smyth, Eds., pp. 303--310.", year = 1997, url = "citeseer.nj.nec.com/keim97bayesian.html" } @InProceedings{690, author = "David D. Lewis", title = "Naive ({B}ayes) at Forty: The Independence Assumption in Information Retrieval.", booktitle = "Proceedings of {ECML}-98, 10th European Conference on Machine Learning", number = 1398, publisher = "Springer Verlag, Heidelberg, DE", address = "Chemnitz, DE", editor = "Claire N{\'{e}}dellec and C{\'{e}}line Rouveirol", pages = "4--15", year = 1998, url = "citeseer.nj.nec.com/lewis98naive.html" } @InProceedings{691, author = "Yiming Yang and Jan O. Pedersen", title = "A Comparative Study on Feature Selection in Text Categorization", booktitle = "Proceedings of {ICML}-97, 14th International Conference on Machine Learning", publisher = "Morgan Kaufmann Publishers, San Francisco, US", address = "Nashville, US", editor = "Douglas H. Fisher", pages = "412--420", year = 1997, url = "citeseer.nj.nec.com/yang97comparative.html" } @Article{692, author = {Duncan A. Buell and Donald H. Kraft}, title = {Threshold Values and Boolean Retrieval Systems}, journal = {Information Processing and Manangement}, year = 1981, volume = 17, pages = {127--136} } @Article{693, author = {Abraham Bookstein and William Cooper}, title = {A General Mathematical Model for Information Retrieval Systems}, journal = {Library Quarterly}, year = 1976, volume = 46, number = 2, pages = {153--167} } @Article{694, author = {W.G. Waller and Donald H. Kraft}, title = {A Mathematical Model of a Weighted Boolean Retrieval System}, journal = {Information Processing and Management}, year = 1979, volume = 15, number = 5, pages = {235--245} } @Misc{696, author = {Warren R. Greiff and W. Bruce Croft}, title = {Computational Tractable Probabilistic Modeling of Boolean Operators} } @InProceedings{697, author = "Yiming Yang", title = "A Study on Thresholding Strategies for Text Categorization", booktitle = "Proceedings of {SIGIR}-01, 24th {ACM} International Conference on Research and Development in Information Retrieval", publisher = "ACM Press, New York, US", address = "New Orleans, US", editor = "W. Bruce Croft and David J. Harper and Donald H. Kraft and Justin Zobel", pages = "137--145", year = 2001, url = "citeseer.nj.nec.com/yang01study.html" } @Book{698, editor = {Luis Gravano}, title = {Special Issue on Text and Databases}, publisher = {{IEEE} Computer Society}, year = 2001, volume = 24, number = 4, series = {Bulletin of the Technical Committee on Data Engineering}, month = {December} } @Article{699, author = {Gerard Salton and Christopher Buckley}, title = {Term-Weighting Approaches in Automatic Text Retrieval}, journal = {Information Processing and Management}, year = 1988, volume = 24, number = 5, pages = {513--523} } @InProceedings{701, author = "David D. Lewis and William A. Gale", title = "A Sequential Algorithm for Training Text Classifiers", booktitle = "Proceedings of {SIGIR}-94, 17th {ACM} International Conference on Research and Development in Information Retrieval", publisher = "Springer Verlag, Heidelberg, DE", address = "Dublin, IE", editor = "W. Bruce Croft and Cornelis J. van Rijsbergen", pages = "3--12", year = 1994, url = "citeseer.nj.nec.com/lewis94sequential.html" } @InProceedings{702, author = {David D. Lewis}, title = {A Sequential Algorithm for Training Text Classifiers: Corrigendum and Additional Data}, booktitle = {{SIGIR} Forum}, pages = {13--19}, year = 1995, volume = 29, number = 2 } @InProceedings{703, author = "David D. Lewis and W. Bruce Croft", title = "Term Clustering of Syntactic Phrases", booktitle = "Proc. of the Thirteenth Annual International {ACM SIGIR} Conference on Research and Development in Information Retrieval", pages = "385--404", year = 1990, url = "citeseer.nj.nec.com/lewis90term.html" } @Article{704, author = {C.J. van Rijsbergen}, title = {A Theoretical Basis for Use of Co-occurrence Data in Information Retrieval}, journal = {Journal of Documentation}, year = 1977, volume = 33, number = 2, pages = {106--119} } @Article{705, author = {Robert M. Losee and Abraham Bookstein}, title = {Integrating Boolean Queries in Conjunctive Normal Form with Probabilistic Retrieval Models}, journal = {Information Processing and Management}, year = 1988, volume = 24, number = 3, pages = {315--321} } @Article{706, author = {William S. Cooper}, title = {Getting Beyond Boole}, journal = {Information Processing and Management}, year = 1988, volume = 24, number = 3, pages = {243--248} } @Article{707, author = {Gerard Salton and Chris Buckley}, title = {Improving Retrieval Performance by Relevance Feedback}, journal = {Journal of the American Society for Information Science}, year = 1990, volume = 41, number = 4, pages = {288--297} } @InProceedings{708, author = {David D. Lewis}, title = {Learning in Intelligent Information Retrieval}, booktitle = {Machine Learning: Proceedings of the Eighth International Workshop ({ML}91)}, pages = {235--239}, year = 1991 } @Article{710, author = {Tadeusz Radecki}, title = {Trends in Research on Information Retrieval -- The Potential for Improvements in Conventional Boolean Retrieval Systems}, journal = {Information Processing and Mangement}, year = 1988, volume = 24, number = 3, pages = {219--227} } @Article{711, author = "Jon M. Kleinberg", title = "Authoritative Sources in a Hyperlinked Environment", journal = "Journal of the {ACM}", volume = 46, number = 5, pages = "604--632", year = 1999, url = "citeseer.nj.nec.com/kleinberg97authoritative.html" } @Misc{713, author = {Dunja Mladeni\'c}, title = {Feature Subset Selection in Text-Learning} } @InProceedings{714, author = "Brian T. Bartell and Garrison W. Cottrell and Richard K. Belew", title = "Latent Semantic Indexing is an Optimal Special Case of Multidimensional Scaling", booktitle = "Research and Development in Information Retrieval", pages = "161--167", year = 1992, url = "citeseer.nj.nec.com/bartell92latent.html" } @InProceedings{716, author = {Michael Berry and Jack Dongarra}, title = {Atlanta Organizers Put Mathematics to Work For the Math Sciences Community}, booktitle = {{SIAM} News}, year = 1999, volume = 32, number = 6, month = {July/August} } @Article{717, author = {I.A.R. Moghrabi and R.A. Makholian}, title = {A New Approach to Clustering Records in Information Retrieval Systems}, journal = {Information Retrieval}, year = 2000, volume = 3, number = 2, pages = {105--126} } @inproceedings{718, author = "Marti A. Hearst and Jan O. Pedersen", title = "Reexamining the Cluster Hypothesis: Scatter/Gather on Retrieval Results", booktitle = "Proceedings of {SIGIR}-96, 19th {ACM} International Conference on Research and Development in Information Retrieval", address = "Z{\"{u}}rich, CH", pages = "76--84", year = 1996, url = "citeseer.nj.nec.com/hearst96reexamining.html" } @TechReport{719, author = "S. M. R{\"u}ger and S. E. Gauch", title = "Feature Reduction for Document Clustering and Classification", institution = "Computing Department, Imperial College", address = "London, UK", year = 2000, url = "citeseer.nj.nec.com/uger00feature.html" } @InProceedings{720, author = {Ellen M. Voorhees}, title = {The Cluster Hypothesis Revisited}, booktitle = {Proceedings of the 8th Annual International {ACM SIGIR} Conference on Research and Development in Information Retrieval}, year = 1985, isbn = {0-89791-159-8}, pages = {188--196}, location = {Montreal, Quebec, Canada}, doi = {http://doi.acm.org/10.1145/253495.253524}, publisher = {{ACM} Press}, } @InBook{721, editor = {Gerard Salton}, title = {The {SMART} Retrieval System: Experiments in Automatic Document Processing}, chapter = {Cluster Search Strategies and the Optimization of Retrieval Effectiveness}, publisher = {Prentice Hall}, year = 1971 } @Article{722, author = {Peter Willett}, title = {Recent Rrends in Hierarchic Document Clustering: A Critical Review}, journal = {Information Processing and Management}, year = 1988, volume = 24, number = 5, pages = {577--597} } @Article{732, author = {N. Jardine and C.J. van Rijsbergen}, title = {The Use of Hierarchic Clustering in Information Retrieval}, journal = {Information Storage and Retrieval}, year = 1971, volume = 7, pages = {217--240} } @InBook{724, editor = {Gerard Salton}, title = {The SMART Retrieval System}, chapter = {Relevance Assessments and Retrieval System Evaluation}, publisher = {Prentice-Hall}, year = 1971, pages = {506--527} } @Article{725, author = {Marc Damashek}, title = {Gauging Similarity with n-Grams: Language-Independent Categorization of Text}, journal = {Science}, year = 1995, volume = 267, pages = {843--848}, month = {February} } @Article{726, author = {Elizabeth D. Liddy and Woojin Paik and Edmund S. Yu}, title = {Text Categorization for Multiple Users Based on Semantic Features from a Machine-Readable Dictionary}, journal = {{ACM} Transactions on Information Systems}, year = 1994, volume = 12, number = 3, pages = {278--295}, month = {July} } @Article{727, author = {Jung Jin Lee and Paul B. Kantor}, title = {A Study of Probabilistic Information Retrieval Systems in the Case of Inconsistent Expert Judgments}, journal = {Journal of the American Society for Information Science}, year = 1991, volume = 42, number = 3, pages = {166--172} } @Article{728, author = {N.J. Belkin and P. Kantor and E.A. Fox and J.A. Shaw}, title = {Combining the Evidence of Multiple Query Representations for Information Retrieval}, journal = {Information Processing and Management}, year = 1995, volume = 31, number = 3, pages = {431--448} } @InCollection{729, author = {Paul B. Kantor}, title = {Information Retrieval Techniques}, booktitle = {Annual Review of Information Science and Technology}, pages = {53--90}, publisher = {American Society for Information Science}, year = 1994, editor = {Martha E. Williams}, volume = 29 } @Article{730, author = {Elke Mittendorf and Bojidar Mateev and Peter Schauable}, title = {Using the Co-occurence of Words for Retrieval Weighting}, journal = {Information Retrieval}, year = 2000, volume = 3, number = 3, pages = {243--251}, month = {October} } @Article{731, author = {W.B. Croft and D.J. Harper}, title = {Using Probabilistic Models of Document Retrieval Without Relevance Information}, journal = {Journal of Documentation}, year = 1979, volume = 35, number = 4, pages = {285--295} } @inproceedings{732, author = {S. E. Robertson and S. Walker}, title = {Some Simple Effective Approximations to the 2-Poisson Model for Probabilistic Weighted Retrieval}, booktitle = {Proceedings of the 17th Annual International {ACM SIGIR} Conference on Research and Development in Information Retrieval}, year = 1994, isbn = {0-387-19889-X}, pages = {232--241}, location = {Dublin, Ireland}, publisher = {Springer-Verlag New York, Inc.}, } @Article{733, author = {Abraham Bookstein}, title = {When the Most "Pertinent" Document Should Not Be Retrieved - An Analysis of the {S}wets Model}, journal = {Information Processing and Management}, year = 1977, volume = 13, pages = {377--383} } @Article{741, author = {G. Salton and A. Wong and C. S. Yang}, title = {A Vector Space Model for Automatic Indexing}, journal = {Communications of the ACM}, volume = 18, number = 11, year = 1975, issn = {0001-0782}, pages = {613--620}, doi = {http://doi.acm.org/10.1145/361219.361220}, publisher = {ACM Press}, } @Article{743, author = {G. Salton and E.A. Fox and E. Voorhees}, title = {Advanced Feedback Methods in Information Retrieval}, journal = {Journal of the American Society for Information Science}, year = 1985, volume = 36, number = 3, pages = {200--210} } @Article{744, author = {C.J. van Rijsbergen and D.J. Harper and M.F. Porter}, title = {The Selection of Good Search Terms}, journal = {Information Processing and Management}, year = 1981, volume = 17, pages = {77-91} } @Article{745, author = {B.C. Brookes}, title = {The Measures of Information Retrieval Effectiveness Proposed by Swets}, journal = {Information Processing and Mangement}, year = 1968, volume = 24, number = 1, pages = {41--54} } @Article{746, author = {G. Salton and E. Voorhees and E.A. Fox}, title = {A Comparison of Two Methods for Boolean Query Relevancy Feedback}, journal = {Information Processing and Mangement}, year = 1984, volume = 20, number = {5/6}, pages = {637--651} } @Article{747, author = {Tadeusz Radecki}, title = {Probabilistic Methods for Ranking Output Documents in Conventional Boolean Retrieval Systems}, journal = {Information Processing and Management}, year = 1988, volume = 24, number = 3, pages = {281--302} } @Article{748, author = {Paul B. Kantor}, title = {The Logic of Weighted Queries}, journal = {IEEE Transactions on Systems, Man and Cybernetics}, year = 1981, volume = 11, number = 12, pages = {816--821} } @InBook{749, author = {Rocchio, Jr., J.J.}, editor = {Gerard Salton}, title = {The SMART Retrieval System: Experiments in Automatic Document Processing}, chapter = {Relevance Feedback in Information Retrieval}, publisher = {Prentice-Hall}, year = 1971, pages = {313--323} } @Article{750, author = {Zhixiang Chen and Binhai Zhu}, title = {Some Formal Analysis of Rocchio's Similarity-Based Relevance Feedback Algorithm}, journal = {Information Retrieval}, year = 2002, volume = 5, number = 1, pages = {61--86} } @TechReport{756, author = {S.E. Robertson and K. Spark Jones}, title = {Simple Proven Approaches to Text Retrieval}, institution = {Cambridge University Computer Laboratory}, year = 1997, number = {TR356}, annote = {Tech. Rep. TR356} } @InProceedings{757, author = "Warren R. Greiff", title = "A Theory of Term Weighting Based on Exploratory Data Analysis", booktitle = "Research and Development in Information Retrieval", pages = "11-19", year = 1998 } @InProceedings{758, author = {Harry Wu and Gerard Salton}, title = {A Comparison of Search Term Weighting: Term Relevance vs. Inverse Document Frequency}, booktitle = {Proceedings of the 4th annual international ACM SIGIR conference on Information storage and retrieval}, year = 1981, isbn = {0-89791-052-4}, pages = {30--39}, location = {Oakland, California}, doi = {http://doi.acm.org/10.1145/511754.511759}, publisher = {ACM Press}, } @InProceedings{759, author = {S. E. Robertson and S. Walker}, title = {On Relevance Weights with Little Relevance Information}, booktitle = {Proceedings of the 20th annual international ACM SIGIR conference on Research and development in information retrieval}, year = 1997, isbn = {0-89791-836-3}, pages = {16--24}, location = {Philadelphia, Pennsylvania, United States}, doi = {http://doi.acm.org/10.1145/258525.258529}, publisher = {ACM Press}, } @Article{788, author = {W.J. Thompson}, title = {Poisson Distributions}, journal = {Computing in Science and Engineering}, year = 2001, volume = 3, number = 3, pages = {78--82}, month = {May-June}, note = {see also IEEE Computational Science and Engineering} } @Article{789, author = {W.J. Thompson}, title = {Don't Subtract the Background}, journal = {Computing in Science and Engineering}, year = 1999, volume = 1, number = 5, pages = {84--88}, month = {September-October}, note = {see also IEEE Computational Science and Engineering, signal count analysis method} } @Article{793, author = {D. E. Johnson, F. J. Oles, T. Zhang, T. Goetz}, title = {A Decision-Tree Based Symbolic Rule Induction System for Text Categorization}, journal = {IBM Systems Journal}, year = 2002, volume = 41, number = 3, pages = {428-436} }