###############################################################################
## ##
## ALEXANDRIA DIGITAL LIBRARY ##
## University of California at Santa Barbara ##
## ##
## ------------------------------------------------------------------------- ##
## ##
## Copyright (c) 2008 by the Regents of the University of California ##
## All rights reserved ##
## ##
## Redistribution and use in source and binary forms, with or without ##
## modification, are permitted provided that the following conditions are ##
## met: ##
## ##
## 1. Redistributions of source code must retain the above copyright ##
## notice, this list of conditions, and the following disclaimer. ##
## ##
## 2. Redistributions in binary form must reproduce the above copyright ##
## notice, this list of conditions, and the following disclaimer in ##
## the documentation and/or other materials provided with the ##
## distribution. ##
## ##
## 3. All advertising materials mentioning features or use of this ##
## software must display the following acknowledgement: This product ##
## includes software developed by the Alexandria Digital Library, ##
## University of California at Santa Barbara, and its contributors. ##
## ##
## THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS" AND ANY ##
## EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED ##
## WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE ##
## DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ##
## ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL ##
## DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS ##
## OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ##
## HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, ##
## STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ##
## ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ##
## POSSIBILITY OF SUCH DAMAGE. ##
## ##
###############################################################################
# $Header: /export/home/gjanee/bucket99/paradigms/RCS/Textual_OracleText.py,v 1.1 2008/05/19 20:11:57 gjanee Exp $
# SYNOPSIS
#
# Textual_OracleText (table, idColumn, textColumn, indexType,
# cardinality,
# mapping=TextUtils.mappings.nonAlphanumericToWhitespace,
# deleteList=TextUtils.deleteLists.keepAll)
#
# table
# A table to query, e.g., "holding".
#
# idColumn
# The table's identifier column (i.e., the column to be
# selected), e.g., "holding_id".
#
# textColumn
# The table column to query, e.g., "subject_text".
#
# indexType
# The type of index on 'textColumn'. Must be either
# "CONTEXT" or "CTXCAT".
#
# cardinality
# A Cardinality object representing the cardinality of
# 'table' with respect to 'textColumn'.
#
# mapping
# A Python character mapping table (i.e., a string of
# length 256, indexed by ASCII character code) to process
# constraint text with. Defaults to
# 'nonAlphanumericToWhitespace', which maps
# non-alphanumeric characters to whitespace (i.e., to word
# separators).
#
# deleteList
# A string of zero or more characters to delete from
# constraint text. The default is the empty string, which
# keeps all characters.
#
# DESCRIPTION
#
# Translates a textual constraint to an Oracle Text constraint of
# the form (for a CONTEXT index)
#
# SELECT idColumn FROM table
# WHERE CONTAINS(textColumn, 'expression', 1) > 0
#
# or (for a CTXCAT index)
#
# SELECT idColumn FROM table
# WHERE CATSEARCH(textColumn, 'expression', NULL) > 0
#
# where 'expression' is the constraint text processed as follows:
# 1) any characters that appear in 'deleteList' are deleted; 2)
# the remaining characters are mapped using 'mapping'; 3) words
# within the resulting constraint text are extracted using maximal
# sequences of whitespace characters as word separators; 4) words
# are escaped appropriately and recombined according to the index
# type and constraint operator. For example, for index type
# CONTEXT, constraint
#
#
# ...
# contains-all-words
# Elmer J. Fudd
#
#
# results in expression
#
# '{Elmer} & {J} & {Fudd}'
#
# For more information, see: Oracle Text Application Developer's
# Guide, version 10g Release 2 (June 2005),
# , and Oracle Text Reference
# .
#
# This paradigm assumes that the text processing specified by
# 'mapping' and 'deleteList' is compatible with Oracle's notion of
# words (specifically, with the lexer used by the Oracle text
# index), which it is, by default.
#
# The semantics of the "contains-all-words" operator will
# generally be correct only if the cardinality is "1" or "1?". If
# the cardinality is "0+" or "1+", wrap this paradigm in an
# Adaptor_IndivisibleConcatenation paradigm.
#
# Exceptions thrown:
#
# no query words specified
#
# AUTHOR
#
# Greg Janee
# gjanee@alexandria.ucsb.edu
#
# HISTORY
#
# $Log: Textual_OracleText.py,v $
# Revision 1.1 2008/05/19 20:11:57 gjanee
# Initial revision
#
import string
import types
import edu.ucsb.adl.middleware
M = edu.ucsb.adl.middleware
import UniversalTranslator
UT = UniversalTranslator
import paradigms
P = paradigms
_separator = {
"CONTEXT" : {
"contains-any-words" : " | ",
"contains-all-words" : " & ",
"contains-phrase" : " " },
"CTXCAT" : {
"contains-any-words" : " | ",
"contains-all-words" : " ",
"contains-phrase" : " " }}
def _escapeWord (word):
return "{" + string.replace(word, "}", "}}") + "}"
class Textual_OracleText (UT.Paradigm):
def __init__ (self, table, idColumn, textColumn, indexType, cardinality,
mapping=P.TextUtils.mappings.nonAlphanumericToWhitespace,
deleteList=P.TextUtils.deleteLists.keepAll):
UT.assertType(table, types.StringType)
self.table = table
UT.assertType(idColumn, types.StringType)
self.idColumn = idColumn
UT.assertType(textColumn, types.StringType)
self.textColumn = textColumn
UT.assertType(indexType, types.StringType)
assert indexType in ["CONTEXT", "CTXCAT"], "unrecognized Oracle Text " +\
"index type: " + indexType
self.indexType = indexType
UT.assertType(cardinality, UT.Cardinality)
self.cardinality = cardinality
UT.assertType(mapping, types.StringType)
assert len(mapping) == 256, "character mapping table has length " +\
str(len(mapping)) + ", should be 256"
self.mapping = mapping
UT.assertType(deleteList, types.StringType)
self.deleteList = deleteList
def translateBucketAtomic (self, constraint, vocabularies):
UT.assertType(constraint, M.Query.TextualConstraint)
assert constraint.getOperator() in UT.standardTextualOperators,\
"unsupported operator: " + constraint.getOperator()
wordList = string.split(string.translate(constraint.getText(),
self.mapping, self.deleteList))
if len(wordList) == 0:
raise UT.QueryError, "no query words specified in constraint " +\
"on bucket '" + constraint.getBucket() + "'"
expression = _separator[self.indexType][constraint.getOperator()].join(
[_escapeWord(w) for w in wordList])
if self.indexType == "CTXCAT" and\
constraint.getOperator() == "contains-phrase":
expression = "\"" + expression + "\""
table = UT.TableRef(self.table)
if self.indexType == "CONTEXT":
e = UT.Expression(["CONTAINS(", table, "." + self.textColumn + ", '" +\
string.replace(expression, "'", "''") + "', 1) > 0"])
else:
e = UT.Expression(["CATSEARCH(", table, "." + self.textColumn + ", '" +\
string.replace(expression, "'", "''") + "', NULL) > 0"])
return UT.Select([UT.MainFrom(table, self.idColumn, self.cardinality)], e)