############################################################################### ## ## ## ALEXANDRIA DIGITAL LIBRARY ## ## University of California at Santa Barbara ## ## ## ## ------------------------------------------------------------------------- ## ## ## ## Copyright (c) 2002 by the Regents of the University of California ## ## All rights reserved ## ## ## ## Redistribution and use in source and binary forms, with or without ## ## modification, are permitted provided that the following conditions are ## ## met: ## ## ## ## 1. Redistributions of source code must retain the above copyright ## ## notice, this list of conditions, and the following disclaimer. ## ## ## ## 2. Redistributions in binary form must reproduce the above copyright ## ## notice, this list of conditions, and the following disclaimer in ## ## the documentation and/or other materials provided with the ## ## distribution. ## ## ## ## 3. All advertising materials mentioning features or use of this ## ## software must display the following acknowledgement: This product ## ## includes software developed by the Alexandria Digital Library, ## ## University of California at Santa Barbara, and its contributors. ## ## ## ## 4. Neither the name of the University nor the names of its ## ## contributors may be used to endorse or promote products derived ## ## from this software without specific prior written permission. ## ## ## ## THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS" AND ANY ## ## EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED ## ## WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE ## ## DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ## ## ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL ## ## DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS ## ## OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ## ## HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, ## ## STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ## ## ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ## ## POSSIBILITY OF SUCH DAMAGE. ## ## ## ############################################################################### # $Header: /export/home/gjanee/bucket99/paradigms/RCS/Textual_Constant.py,v 1.1 2002/11/04 22:46:14 gjanee Exp $ # SYNOPSIS # # Textual_Constant (table, idColumn, cardinality, phraseList, # mapping=TextUtils.mappings.uppercaseAlphanumericOthersToWhitespace, # deleteList=TextUtils.deleteLists.keepAll) # # table # A table to query, e.g., "holding". # # idColumn # The table's object identifier column (i.e., the column # to be selected), e.g., "holding_id". # # cardinality # A Cardinality object representing the cardinality of # 'table'. Should be Cardinality("1") or # Cardinality("1?"). # # phraseList # A list of zero or more phrases, e.g., # ["I am Sam", "Sam am I", "Green Eggs and Ham"]. # # mapping # A Python character mapping table (i.e., a string of # length 256, indexed by ASCII character code) to process # 'phraseList' and constraint text with. Defaults to # 'uppercaseAlphanumericOthersToWhitespace', which maps # alphanumeric characters to their uppercase equivalents # and all other characters to whitespace (i.e., to word # separators). # # deleteList # A string of zero or more characters to delete from # 'phraseList' and constraint text. The default is the # empty string, which keeps all characters. # # DESCRIPTION # # Translates a textual constraint to a constant TRUE or FALSE # depending on whether the constraint text matches a body of # constant text. # # This paradigm parses both constraint text and the phrases in # 'phraseList' into word sequences by: 1) deleting any characters # that appear in 'deleteList'; 2) mapping the remaining characters # using 'mapping'; and 3) treating sequences of whitespace # characters as word separators. Comparison of words is then # based on straight string equality. # # A constraint that matches 'phraseList' results in the query # # SELECT idColumn FROM table # WHERE 1 = 1 # # being returned. Otherwise, the query # # SELECT idColumn FROM table # WHERE 1 = 0 # # is returned. # # Exceptions thrown: # # no query words specified # # AUTHOR # # Greg Janee # gjanee@alexandria.ucsb.edu # # HISTORY # # $Log: Textual_Constant.py,v $ # Revision 1.1 2002/11/04 22:46:14 gjanee # Initial revision # import string import types import edu.ucsb.adl.middleware M = edu.ucsb.adl.middleware import UniversalTranslator UT = UniversalTranslator import paradigms P = paradigms class Textual_Constant (UT.Paradigm): def __init__ (self, table, idColumn, cardinality, phraseList, mapping=P.TextUtils.mappings.uppercaseAlphanumericOthersToWhitespace, deleteList=P.TextUtils.deleteLists.keepAll): UT.assertType(table, types.StringType) UT.assertType(idColumn, types.StringType) UT.assertType(cardinality, UT.Cardinality) UT.assertType(phraseList, types.ListType) UT.assertListElementType(phraseList, types.StringType) UT.assertType(mapping, types.StringType) assert len(mapping) == 256, "character mapping table has length " +\ str(len(mapping)) + ", should be 256" UT.assertType(deleteList, types.StringType) self.table = table self.idColumn = idColumn self.cardinality = cardinality self.mapping = mapping self.deleteList = deleteList self.phraseList = [] for phrase in phraseList: wordList = string.split(string.translate(phrase, mapping, deleteList)) assert len(wordList) > 0, "no words in phrase after processing" self.phraseList += [wordList] def translateBucketAtomic (self, constraint, vocabularies): UT.assertType(constraint, M.Query.TextualConstraint) assert constraint.getOperator() in UT.standardTextualOperators,\ "unsupported operator: " + constraint.getOperator() wordList = string.split(string.translate(constraint.getText(), self.mapping, self.deleteList)) if len(wordList) == 0: raise UT.QueryError, "no query words specified in constraint " +\ "on bucket '" + constraint.getBucket() + "'" if constraint.getOperator() == "contains-all-words": for word in wordList: foundWord = 0 for phrase in self.phraseList: if word in phrase: foundWord = 1 break if not foundWord: return self._returnFalse() return self._returnTrue() elif constraint.getOperator() == "contains-any-words": for word in wordList: for phrase in self.phraseList: if word in phrase: return self._returnTrue() return self._returnFalse() elif constraint.getOperator() == "contains-phrase": for phrase in self.phraseList: for i in range(0, len(phrase)-len(wordList)+1): match = 1 for j in range(len(wordList)): if wordList[j] != phrase[i+j]: match = 0 break if match: return self._returnTrue() return self._returnFalse() else: UT.unhandledCase() def _returnTrue (self): return UT.Select( [UT.MainFrom(UT.TableRef(self.table), self.idColumn, self.cardinality)], UT.Expression(["1 = 1"])) def _returnFalse (self): return UT.Select( [UT.MainFrom(UT.TableRef(self.table), self.idColumn, self.cardinality)], UT.Expression(["1 = 0"]))