############################################################################### ## ## ## ALEXANDRIA DIGITAL LIBRARY ## ## University of California at Santa Barbara ## ## ## ## ------------------------------------------------------------------------- ## ## ## ## Copyright (c) 2003 by the Regents of the University of California ## ## All rights reserved ## ## ## ## Redistribution and use in source and binary forms, with or without ## ## modification, are permitted provided that the following conditions are ## ## met: ## ## ## ## 1. Redistributions of source code must retain the above copyright ## ## notice, this list of conditions, and the following disclaimer. ## ## ## ## 2. Redistributions in binary form must reproduce the above copyright ## ## notice, this list of conditions, and the following disclaimer in ## ## the documentation and/or other materials provided with the ## ## distribution. ## ## ## ## 3. All advertising materials mentioning features or use of this ## ## software must display the following acknowledgement: This product ## ## includes software developed by the Alexandria Digital Library, ## ## University of California at Santa Barbara, and its contributors. ## ## ## ## 4. Neither the name of the University nor the names of its ## ## contributors may be used to endorse or promote products derived ## ## from this software without specific prior written permission. ## ## ## ## THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS" AND ANY ## ## EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED ## ## WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE ## ## DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ## ## ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL ## ## DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS ## ## OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ## ## HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, ## ## STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ## ## ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ## ## POSSIBILITY OF SUCH DAMAGE. ## ## ## ############################################################################### # $Header: /export/home/gjanee/bucket99/paradigms/RCS/Textual_InformixVerity.py,v 1.2 2003/10/21 20:34:37 gjanee Exp $ # SYNOPSIS # # Textual_InformixVerity (table, idColumn, textColumn, cardinality, # intraColumnField=None, # mapping=TextUtils.mappings.nonAlphanumericToWhitespace, # deleteList=TextUtils.deleteLists.keepAll) # # table # A table to query, e.g., "holding". # # idColumn # The table's identifier column (i.e., the column to be # selected), e.g., "holding_id". # # textColumn # The table column containing the text to search over # (i.e., the column against which the constraint is to be # placed), e.g., "subject_text". # # cardinality # A Cardinality object representing the cardinality of # 'table' with respect to 'textColumn'. # # intraColumnField # The intra-column field to restrict the search to, e.g., # "title". The default value of None means the search is # to be performed over the entire column. # # mapping # A Python character mapping table (i.e., a string of # length 256, indexed by ASCII character code) to process # constraint text with. Defaults to # 'nonAlphanumericToWhitespace', which maps # non-alphanumeric characters to whitespace (i.e., to word # separators). # # deleteList # A string of zero or more characters to delete from # constraint text. The default is the empty string, which # keeps all characters. # # DESCRIPTION # # Translates a textual constraint to an Informix Verity DataBlade # function call of the form # # SELECT idColumn FROM table # WHERE vts_contains(textColumn, 'op("W1", "W2", ...)') # # or # # SELECT idColumn FROM table # WHERE vts_contains(textColumn, # 'op("W1", "W2", ...) intraColumnField') # # where 'op' is the Verity operator equivalent of the constraint # operator. ("W1", "W2", ...) is a sequence of one or more words # formed from the constraint text T by 1) deleting from T any # characters that appear in 'deleteList'; 2) mapping the remaining # characters using 'mapping'; and 3) treating sequences of # whitespace characters as word separators. # # The semantics of the "contains-all-words" operator will # generally be correct only if the cardinality is "1" or "1?". If # the cardinality is "0+" or "1+", wrap this paradigm in an # Adaptor_IndivisibleConcatenation paradigm. # # Exceptions thrown: # # no query words specified # # AUTHOR # # Greg Janee # gjanee@alexandria.ucsb.edu # # HISTORY # # $Log: Textual_InformixVerity.py,v $ # Revision 1.2 2003/10/21 20:34:37 gjanee # Minor (but critical) documentation change. # # Revision 1.1 2003/02/05 00:10:34 gjanee # Initial revision # import string import types import edu.ucsb.adl.middleware M = edu.ucsb.adl.middleware import UniversalTranslator UT = UniversalTranslator import paradigms P = paradigms _operatorMapping = { "contains-all-words" : "AND", "contains-any-words" : "OR", "contains-phrase" : "PHRASE" } class Textual_InformixVerity (UT.Paradigm): def __init__ (self, table, idColumn, textColumn, cardinality, intraColumnField=None, mapping=P.TextUtils.mappings.nonAlphanumericToWhitespace, deleteList=P.TextUtils.deleteLists.keepAll): UT.assertType(table, types.StringType) UT.assertType(idColumn, types.StringType) UT.assertType(textColumn, types.StringType) UT.assertType(cardinality, UT.Cardinality) UT.assertPolytype(intraColumnField, [types.StringType, types.NoneType]) UT.assertType(mapping, types.StringType) assert len(mapping) == 256, "character mapping table has length " +\ str(len(mapping)) + ", should be 256" UT.assertType(deleteList, types.StringType) self.table = table self.idColumn = idColumn self.textColumn = textColumn self.cardinality = cardinality self.intraColumnField = intraColumnField self.mapping = mapping self.deleteList = deleteList def translateBucketAtomic (self, constraint, vocabularies): UT.assertType(constraint, M.Query.TextualConstraint) assert constraint.getOperator() in UT.standardTextualOperators,\ "unsupported operator: " + constraint.getOperator() wordList = string.split(string.translate(constraint.getText(), self.mapping, self.deleteList)) if len(wordList) == 0: raise UT.QueryError, "no query words specified in constraint " +\ "on bucket '" + constraint.getBucket() + "'" table = UT.TableRef(self.table) wordClause = "" for word in wordList: if wordClause != "": wordClause = wordClause + ", " wordClause = wordClause + '"' +\ string.replace(string.replace(word, "'", "''"), '"', '\\"') +\ '"' if self.intraColumnField != None: fieldClause = " " + self.intraColumnField else: fieldClause = "" expression = UT.Expression(["vts_contains(", table, "." +\ self.textColumn + ", '<" +\ _operatorMapping[constraint.getOperator()] + ">(" + wordClause +\ ")" + fieldClause + "')"]) return UT.Select( [UT.MainFrom(table, self.idColumn, self.cardinality)], expression) def translateBucketBoolean (self, operator, constraints, vocabularies): UT.assertBooleanOperator(operator) UT.assertType(constraints, types.ListType) UT.assertListElementType(constraints, M.Query.TextualConstraint) UT.assertBooleanOperatorOperandConsistency(operator, constraints) UT.assertListElementCommonValue(constraints, lambda c: c.getBucket()) UT.assertListElementPredicateAll(constraints, lambda c: c.getOperator() in UT.standardTextualOperators) if operator == "AND": text = "" for constraint in constraints: if constraint.getOperator() == "contains-all-words": text += " " + constraint.getText() else: return None return self.translateBucketAtomic( M.Query.TextualConstraint(constraints[0].getBucket(), constraints[0].getField(), constraints[0].getOperator(), text), vocabularies) elif operator == "OR": text = "" for constraint in constraints: if constraint.getOperator() == "contains-any-words": text += " " + constraint.getText() else: return None return self.translateBucketAtomic( M.Query.TextualConstraint(constraints[0].getBucket(), constraints[0].getField(), constraints[0].getOperator(), text), vocabularies) else: return None