Viewing file: SwishHelper.py (7.09 KB) -rw-r--r-- Select action/file-type: (+) | (+) | (+) | Code (+) | Session (+) | (+) | SDB (+) | (+) | (+) | (+) | (+) | (+) |
import os, tempfile, glob, re from Ft.Server.Common import Schema #from Ft.Server.Common import ResourceTypes from Ft.Lib.Set import Unique from Ft.Server import SWISH_SCHEMA from Ft.Server.Server import FtServerServerException, Error
class SwishParameters: """ Data structure which hold information about 4SS's swish configuration """ def __init__(self):
winTempPath = os.environ.get('TEMP')
self.TMP_MAIN_TMP_DIRECTORY= winTempPath or '/tmp' self.TMP_DIRECTORY=os.path.join(self.TMP_MAIN_TMP_DIRECTORY,'xmlserver') self.INDEX_PATH=os.path.join(self.TMP_MAIN_TMP_DIRECTORY,'swish.index') self.INCREMENTED_INDEX_PATH=os.path.join(self.TMP_MAIN_TMP_DIRECTORY,'swish.index.new') self.DUMMY_PATH=os.path.join(self.TMP_DIRECTORY,'dummy.xml') self.SEARCH_OUTPUT=os.path.join(self.TMP_MAIN_TMP_DIRECTORY,'output')
self.SEARCH_EXEC='search' self.INDEX_EXEC='index'
self.FTSS_SWISH_INDEX = "/ftss/_4ss_swishIndex"
def SearchDocuments(searchString,repo):
swishParams=GenerateSwishParams()
resultPaths=[]
# if there is no index, then the search should automatically return nothing if repo.hasResource(swishParams.FTSS_SWISH_INDEX): indexSrc=repo.fetchResource(swishParams.FTSS_SWISH_INDEX).getContent()
# write index file f=open(swishParams.INDEX_PATH,'w') f.write(indexSrc) f.close()
execString=swishParams.SEARCH_EXEC+' -i %s %s > %s'%(swishParams.INDEX_PATH,searchString,swishParams.SEARCH_OUTPUT)
# run search os.system(execString) result=open(swishParams.SEARCH_OUTPUT).read() match=re.compile(r'[\S]+\.xml').findall( result )
tmpDir = swishParams.TMP_DIRECTORY match=filter(lambda x,t=tmpDir: x.find(t), match) sha_paths=map(lambda x: x.split('.xml')[0],match)
tempDict={}
# remove duplioates for sha_path in sha_paths: path=repo.getModel().complete('',SWISH_SCHEMA,sha_path)[0].subject tempDict[path]=''
resultPaths=tempDict.keys()
return resultPaths
def ReIndex(repo):
#create temporary directory swishParams=GenerateSwishParams()
if not os.path.exists( swishParams.TMP_DIRECTORY ): os.mkdir( swishParams.TMP_DIRECTORY )
# retrieve uris of all documents whose doc defs specify full text indexing
indexUris = [] for docDef in repo.getModel().complete(None,Schema.FULL_TEXT_INDEX,'1'): dd = repo.fetchResource(docDef.subject) indexUris.extend(dd.getMetaDocumentUris())
if indexUris:
# extract all marked documents for uri in indexUris: sha_path=ShaPath(uri,None,0) f=open( os.path.join(swishParams.TMP_DIRECTORY,sha_path+'.xml') ,'w') f.write(repo.fetchResource(uri).getContent()) f.close()
#write dummy document if there is only one metadocument if len(indexUris)==1: f=open(swishParams.DUMMY_PATH,'w') f.write('<empty></empty>') f.close()
# index metadocuments outFileName = tempfile.mktemp() indexCmd=swishParams.INDEX_EXEC+' -v 4 -p 101 -i %s --html-pattern "*.xml" %s > %s'%(swishParams.INDEX_PATH, swishParams.TMP_DIRECTORY, outFileName) os.system(indexCmd) os.unlink(outFileName)
# store new index file f=open( swishParams.INDEX_PATH ) if repo.hasResource(swishParams.FTSS_SWISH_INDEX): indexResource=repo.fetchResource(swishParams.FTSS_SWISH_INDEX) indexResource.setContent(f.read()) else: repo.createRawFile(swishParams.FTSS_SWISH_INDEX,'application/x-swish',f.read()) f.close()
#clean up temp directory tempXmlFiles=glob.glob(os.path.join(swishParams.TMP_DIRECTORY,'*')) for tempXmlFile in tempXmlFiles: os.remove(tempXmlFile) os.rmdir( swishParams.TMP_DIRECTORY )
#clean up index file(s) indexFiles=glob.glob(os.path.join(swishParams.TMP_MAIN_TMP_DIRECTORY,'swish*')) for indexFile in indexFiles: os.remove(indexFile)
else: if repo.hasResource(swishParams.FTSS_SWISH_INDEX): repo.deleteResource(swishParams.FTSS_SWISH_INDEX)
def ShaPath(path,repo,addToModel=1): import sha sha_path=sha.new(path).hexdigest() if addToModel: from Ft.Rdf.Statement import Statement st=Statement(path,SWISH_SCHEMA,sha_path) repo.getModel().add(st) return sha_path
def Index(uri, xmlSrc,repo):
swishParams=GenerateSwishParams()
indexSrc='' if repo.hasResource(swishParams.FTSS_SWISH_INDEX): indexSrc=repo.fetchResource(swishParams.FTSS_SWISH_INDEX).getContent()
# create temp directory if not os.path.exists(swishParams.TMP_DIRECTORY): os.mkdir( swishParams.TMP_DIRECTORY )
#Generate a hashed representation of the path to use for the filename # of the indexed document sha_path=ShaPath(uri,repo)
# write xml source file and dummy file (need at least 2 documents for an index) xmlFilePath=os.path.join(swishParams.TMP_DIRECTORY,sha_path+'.xml') f=open(xmlFilePath,'w') f.write(xmlSrc) f.close()
f=open(swishParams.DUMMY_PATH,'w') f.write('<b></b>') f.close()
incrementalOpt=''
# write index file (if exists) if indexSrc: incrementalOpt='-I' f=open(swishParams.INDEX_PATH,'w') f.write(indexSrc) f.close()
outFileName = tempfile.mktemp() indexCmd=swishParams.INDEX_EXEC+' -v 4 -p 101 %s -i %s --html-pattern "*.xml" %s > %s'%(incrementalOpt, swishParams.INDEX_PATH, swishParams.TMP_DIRECTORY, outFileName, ) os.system(indexCmd)
os.unlink(outFileName)
# update/store updated index file if indexSrc: f=open(swishParams.INCREMENTED_INDEX_PATH) indexResource=repo.fetchResource(swishParams.FTSS_SWISH_INDEX) indexResource.setContent(f.read()) else: f=open(swishParams.INDEX_PATH) repo.createRawFile(swishParams.FTSS_SWISH_INDEX,'application/x-swish',f.read()) f.close()
#clean up temp directory tempXmlFiles=glob.glob(os.path.join(swishParams.TMP_DIRECTORY,'*')) for tempXmlFile in tempXmlFiles: os.remove(tempXmlFile) os.rmdir( swishParams.TMP_DIRECTORY )
def GenerateSwishParams(): swishParams=SwishParameters() print "Check for Swish support" #if not os.path.exists(swishParams.INDEX_EXEC): # raise FtServerServerException(Error.SWISH_NOT_SUPPORTED,reason="The index executable %s could not be found" % swishParams.INDEX_EXEC)
return swishParams
|