############################################################################### ## ## ## ALEXANDRIA DIGITAL LIBRARY ## ## University of California at Santa Barbara ## ## ## ## ------------------------------------------------------------------------- ## ## ## ## Copyright (c) 2003 by the Regents of the University of California ## ## All rights reserved ## ## ## ## Redistribution and use in source and binary forms, with or without ## ## modification, are permitted provided that the following conditions are ## ## met: ## ## ## ## 1. Redistributions of source code must retain the above copyright ## ## notice, this list of conditions, and the following disclaimer. ## ## ## ## 2. Redistributions in binary form must reproduce the above copyright ## ## notice, this list of conditions, and the following disclaimer in ## ## the documentation and/or other materials provided with the ## ## distribution. ## ## ## ## 3. All advertising materials mentioning features or use of this ## ## software must display the following acknowledgement: This product ## ## includes software developed by the Alexandria Digital Library, ## ## University of California at Santa Barbara, and its contributors. ## ## ## ## 4. Neither the name of the University nor the names of its ## ## contributors may be used to endorse or promote products derived ## ## from this software without specific prior written permission. ## ## ## ## THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS" AND ANY ## ## EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED ## ## WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE ## ## DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ## ## ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL ## ## DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS ## ## OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ## ## HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, ## ## STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ## ## ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ## ## POSSIBILITY OF SUCH DAMAGE. ## ## ## ############################################################################### # $Header: /export/home/gjanee/bucket99/paradigms/RCS/Adaptor_Concatenation.py,v 1.2 2003/10/29 21:48:26 gjanee Exp $ # SYNOPSIS # # Adaptor_Concatenation (paradigms, # mapping=TextUtils.mappings.nonAlphanumericToWhitespace, # deleteList=TextUtils.deleteLists.keepAll) # # paradigms # A dictionary that maps one or more field URIs # (e.g., "http://purl.org/dc/elements/1.1/creator") to # underlying paradigms. # # mapping # A Python character mapping table (i.e., a string of # length 256, indexed by ASCII character code) to process # constraint text with. Defaults to # 'nonAlphanumericToWhitespace', which preserves # alphanumeric characters and maps all other characters to # whitespace (i.e., to word separators). Used only when # the constraint operator is "contains-all-words". # # deleteList # A string of zero or more characters to delete from # constraint text. The default is the empty string, which # keeps all characters. Used only when the constraint # operator is "contains-all-words". # # DESCRIPTION # # An adaptor that adds support for bucket-level textual searching # to a set of paradigms (the "underlying" paradigms), each of # which supports a specific field-level textual search, by # treating a bucket-level search as a virtual search over the # logical concatenation of the field-level textual content. # # Specifically, a field-level constraint matching one of the URIs # listed in 'paradigms' is passed through to the corresponding # underlying paradigm; if the paradigm does not support # field-level searching, it should treat the constraint as being # bucket-level. A field-level constraint not matching any listed # URI results in a query of the form # # SELECT id FROM table # WHERE 1 = 0 # # being returned. # # A bucket-level constraint (O, T), where O is a textual operator # and T is constraint text, is handled as follows. If O is # "contains-any-words" or "contains-phrase", the constraint is # passed to all underlying paradigms and the resulting queries are # UNIONed together. Otherwise, if O is "contains-all-words", this # paradigm parses T into one or more words (W1, W2, W3, ...) by: # 1) deleting from T any characters that appear in 'deleteList'; # 2) mapping the remaining characters using 'mapping'; and 3) # treating sequences of whitespace characters as word separators. # For each word W this paradigm then passes a new constraint # (O, W) to each underlying paradigm and UNIONs the resulting # queries, and those UNIONs are then INTERSECTed. If underlying # paradigm i returns query Qi(W) on word W, then the overall # returned query has the form: # # (Q1(W1) UNION Q2(W1) UNION Q3(W1) ...) INTERSECT # (Q1(W2) UNION Q2(W2) UNION Q3(W2) ...) INTERSECT ... # # Exceptions thrown: # # no query words specified # # AUTHOR # # Greg Janee # gjanee@alexandria.ucsb.edu # # HISTORY # # $Log: Adaptor_Concatenation.py,v $ # Revision 1.2 2003/10/29 21:48:26 gjanee # Per revision 1.8 of UniversalTranslator.py, this paradigm now # invokes field-level methods of the underlying paradigms if the # latter support field-level searching. Unresolved issue: given a # *bucket-level* constraint, this paradigm still calls, for each # underlying paradigm, the underlying paradigm's bucket-level # methods, even if the underlying paradigm supports field-level # searching. It's not clear what the correct behavior is in this # case; perhaps it should be configurable. At any rate, nesting a # field adaptor inside this paradigm may lead to unexpected # behavior. # # Revision 1.1 2002/11/04 22:46:40 gjanee # Initial revision # import string import types import edu.ucsb.adl.middleware M = edu.ucsb.adl.middleware import UniversalTranslator UT = UniversalTranslator import paradigms P = paradigms class Adaptor_Concatenation (UT.FieldSearchableParadigm): def __init__ (self, paradigms, mapping=P.TextUtils.mappings.nonAlphanumericToWhitespace, deleteList=P.TextUtils.deleteLists.keepAll): UT.assertType(paradigms, types.DictionaryType) UT.assertDictionaryElementType(paradigms, types.StringType, UT.Paradigm) UT.assertDictionaryNonempty(paradigms) UT.assertType(mapping, types.StringType) assert len(mapping) == 256, "character mapping table has length " +\ str(len(mapping)) + ", should be 256" UT.assertType(deleteList, types.StringType) self.paradigms = paradigms self.mapping = mapping self.deleteList = deleteList def translateBucketAtomic (self, constraint, vocabularies): UT.assertType(constraint, M.Query.TextualConstraint) assert constraint.getOperator() in UT.standardTextualOperators,\ "unsupported operator: " + constraint.getOperator() if constraint.getOperator() == "contains-all-words": wordList = string.split(string.translate(constraint.getText(), self.mapping, self.deleteList)) if len(wordList) == 0: raise UT.QueryError, "no query words specified in " +\ "constraint on bucket '" + constraint.getBucket() + "'" queries = [] for word in wordList: c = M.Query.TextualConstraint(constraint.getBucket(), constraint.getField(), constraint.getOperator(), word) items = [] for paradigm in self.paradigms.values(): item = paradigm.translateBucketAtomic(c, vocabularies) UT.assertPolytype(item, [UT.Select, UT.Query]) items += [item] queries += [UT._unify("OR", items)] return UT._unify("AND", queries) elif constraint.getOperator() == "contains-any-words" or\ constraint.getOperator() == "contains-phrase": items = [] for paradigm in self.paradigms.values(): item = paradigm.translateBucketAtomic(constraint, vocabularies) UT.assertPolytype(item, [UT.Select, UT.Query]) items += [item] return UT._unify("OR", items) else: UT.unhandledCase() def translateFieldAtomic (self, constraint, vocabularies): UT.assertType(constraint, M.Query.TextualConstraint) assert constraint.getField() != None, "no field constraint" if constraint.getField().uri in self.paradigms.keys(): paradigm = self.paradigms[constraint.getField().uri] if isinstance(paradigm, UT.FieldSearchableParadigm): return paradigm.translateFieldAtomic(constraint, vocabularies) else: return paradigm.translateBucketAtomic(constraint, vocabularies) else: paradigm = self.paradigms.values()[0] return UT.falsify(paradigm.translateBucketAtomic(constraint, vocabularies)) def translateFieldBoolean (self, operator, constraints, vocabularies): UT.assertBooleanOperator(operator) UT.assertType(constraints, types.ListType) UT.assertListElementType(constraints, M.Query.TextualConstraint) UT.assertBooleanOperatorOperandConsistency(operator, constraints) UT.assertListElementCommonValue(constraints, lambda c: c.getBucket()) UT.assertListElementPredicateAll(constraints, lambda c: c.getField() != None) UT.assertListElementCommonValue(constraints, lambda c: c.getField().uri) if constraints[0].getField().uri in self.paradigms.keys(): paradigm = self.paradigms[constraints[0].getField().uri] if isinstance(paradigm, UT.FieldSearchableParadigm): return paradigm.translateFieldBoolean(operator, constraints, vocabularies) else: return paradigm.translateBucketBoolean(operator, constraints, vocabularies) else: return None