update solr index

This commit is contained in:
Arno Kaimbacher 2019-02-14 15:09:11 +01:00
parent 7c6654398d
commit 535a9363cc
18 changed files with 773 additions and 90 deletions

View file

@ -3,9 +3,12 @@
namespace App\Library\Search;
//use App\Library\Util\SolrSearchQuery;
use App\Library\Util\SearchParameter;
use App\Library\Search\SearchResult;
use App\Library\Util\SearchParameter;
use Illuminate\Support\Facades\Log;
use App\Library\Search\SolariumDocument;
use App\Models\Dataset;
use \Solarium\QueryType\Select\Query\Query;
class SolariumAdapter
{
@ -50,7 +53,55 @@ class SolariumAdapter
return 'solr';
}
public function createQuery() : SearchParameter
public function addDatasetsToIndex($datasets)
{
$datasets = $this->normalizeDocuments($datasets);
$builder = new SolariumDocument($this->options);
$slices = array_chunk($datasets, 16);
// update documents of every chunk in a separate request
foreach ($slices as $slice) {
$update = $this->client->createUpdate();
$updateDocs = array_map(function ($rdrDoc) use ($builder, $update) {
return $builder->toSolrUpdateDocument($rdrDoc, $update->createDocument());
}, $slice);
// adding the document to the update query
$update->addDocuments($updateDocs);
// Then commit the update:
$update->addCommit();
$result = $this->client->update($update);
//$this->execute($update, 'failed updating slice of documents');
}
// finally commit all updates
// $update = $this->client->createUpdate();
// $update->addCommit();
// $this->execute($update, 'failed committing update of documents');
return $this;
}
protected function normalizeDocuments($documents)
{
if (!is_array($documents)) {
$documents = array($documents);
}
foreach ($documents as $document) {
if (!($document instanceof Dataset)) {
throw new InvalidArgumentException("invalid dataset in provided set");
}
}
return $documents;
}
public function createQuery(): SearchParameter
{
return new SearchParameter();
}
@ -63,13 +114,14 @@ class SolariumAdapter
return $searchResult;
}
protected function applyParametersToSolariumQuery(\Solarium\QueryType\Select\Query\Query $query, SearchParameter $parameters = null, $preferOriginalQuery = false)
protected function applyParametersToSolariumQuery(Query $query, SearchParameter $parameters, $preferOriginalQuery)
{
if ($parameters) {
//$subfilters = $parameters->getSubFilters();
//if ( $subfilters !== null ) {
// foreach ( $subfilters as $name => $subfilter ) {
// if ( $subfilter instanceof Opus_Search_Solr_Filter_Raw || $subfilter instanceof Opus_Search_Solr_Solarium_Filter_Complex ) {
// if ( $subfilter instanceof Opus_Search_Solr_Filter_Raw
//|| $subfilter instanceof Opus_Search_Solr_Solarium_Filter_Complex ) {
// $query->createFilterQuery( $name )
// ->setQuery( $subfilter->compile( $query ) );
// }
@ -87,14 +139,13 @@ class SolariumAdapter
// }
// }
$filter = $parameters->getFilter();//"aa" all: '*:*'
$filter = $parameters->getFilter(); //"aa" all: '*:*'
if ($filter !== null) {
//$query->setStart( intval( $start ) );
//$query->setQuery('%P1%', array($filter));
$query->setQuery($filter);
}
$start = $parameters->getStart();
if ($start !== null) {
$query->setStart(intval($start));
@ -154,7 +205,7 @@ class SolariumAdapter
// }
}
protected function processQuery(\Solarium\QueryType\Select\Query\Query $query) : SearchResult
protected function processQuery(\Solarium\QueryType\Select\Query\Query $query): SearchResult
{
// send search query to service
$request = $this->execute($query, 'failed querying search engine');

View file

@ -0,0 +1,34 @@
<?php
namespace App\Library\Search;
use App\Models\Dataset;
use Solarium\QueryType\Update\Query\Document\Document;
class SolariumDocument extends SolrDocumentXslt
{
public function __construct($options)
{
parent::__construct($options);
}
public function toSolrUpdateDocument(Dataset $rdrDataset, Document $solrDoc)
{
if (!($solrDoc instanceof Document)) {
throw new \Exception('provided Solr document must be instance of Solarium Update Document');
}
// convert Opus document to Solr XML document for supporting custom transformations
$solrDomDoc = parent::toSolrDocument($rdrDataset, new \DomDocument());
// read back fields from generated Solr XML document
$solrXmlDoc = simplexml_import_dom($solrDomDoc)->doc[0];
$solrDoc->clear();
foreach ($solrXmlDoc->field as $field) {
$solrDoc->addField(strval($field['name']), strval($field));
}
return $solrDoc;
}
}

View file

@ -0,0 +1,79 @@
<?php
namespace App\Library\Search;
use App\Models\Dataset;
class SolrDocumentXslt
{
/**
* @var XSLTProcessor
*/
protected $processor;
public function __construct($options)
{
//parent::__construct($options);
try {
$xslt = new \DomDocument;
$xslt->load($options['xsltfile']);
$this->processor = new \XSLTProcessor;
$this->processor->importStyleSheet($xslt);
} catch (Exception $e) {
throw new Exception('invalid XSLT file for deriving Solr documents', 0, $e);
}
}
public function toSolrDocument(Dataset $rdrDataset, \DOMDocument $solrDoc)
{
if (!($solrDoc instanceof \DOMDocument)) {
throw new Exception('provided Solr document must be instance of DOMDocument');
}
$modelXml = $this->getModelXml($rdrDataset);//->saveXML();
$solrDoc->preserveWhiteSpace = false;
$solrDoc->loadXML($this->processor->transformToXML($modelXml));
// if (Opus_Config::get()->log->prepare->xml) {
// $modelXml->formatOutput = true;
// Opus_Log::get()->debug("input xml\n" . $modelXml->saveXML());
// $solrDoc->formatOutput = true;
// Opus_Log::get()->debug("transformed solr xml\n" . $solrDoc->saveXML());
// }
return $solrDoc;
}
/**
* Retrieves XML describing model data of provided RDR dataset.
*
* @param Dataset $rdrDataset
* @return DOMDocument
*/
protected function getModelXml(Dataset $rdrDataset)
{
$rdrDataset->fetchValues();
// Set up caching xml-model and get XML representation of document.
$xmlModel = new \App\Library\Xml\XmlModel();
//$caching_xml_model = new Opus_Model_Xml;
//$caching_xml_model->setModel($opusDoc);
$xmlModel->setModel($rdrDataset);
$xmlModel->excludeEmptyFields();
//$xmlModel->setStrategy(new Opus_Model_Xml_Version1);
//$cache = new Opus_Model_Xml_Cache($opusDoc->hasPlugin('Opus_Document_Plugin_Index'));
//$xmlModel->setXmlCache($cache);
$cache = ($rdrDataset->xmlCache) ? $rdrDataset->xmlCache : new \App\Models\XmlCache();
$xmlModel->setXmlCache($cache);
$modelXml = $xmlModel->getDomDocument();
// extract fulltext from file and append it to the generated xml.
//$this->attachFulltextToXml($modelXml, $opusDoc->getFile(), $opusDoc->getId());
return $modelXml;
}
}

View file

@ -133,12 +133,21 @@ class XmlModel
return $domDocument;
} else {
//create cache relation
$this->cache->fill(array(
'document_id' => $dataset->id,
'xml_version' => (int)$this->strategy->getVersion(),
'server_date_modified' => $dataset->server_date_modified,
'xml_data' => $domDocument->saveXML()
));
// $this->cache->updateOrCreate(array(
// 'document_id' => $dataset->id,
// 'xml_version' => (int)$this->strategy->getVersion(),
// 'server_date_modified' => $dataset->server_date_modified,
// 'xml_data' => $domDocument->saveXML()
// ));
if (!$this->cache->document_id) {
$this->cache->document_id = $dataset->id;
}
$this->cache->xml_version = (int)$this->strategy->getVersion();
$this->cache->server_date_modified = $dataset->server_date_modified;
$this->cache->xml_data = $domDocument->saveXML();
$this->cache->save();
Log::debug(__METHOD__ . ' cache refreshed for ' . get_class($dataset) . '#' . $dataset->id);
@ -161,20 +170,35 @@ class XmlModel
Log::debug(__METHOD__ . ' skipping cache for ' . get_class($dataset));
return null;
}
//$cached = $this->cache->hasValidEntry(
// $dataset->id,
// (int) $this->strategy->getVersion(),
// $dataset->server_date_modified
//);
//$cached = false;
$cache = XmlCache::where('document_id', $dataset->id)
->first();// model or null
if (!$cache) {
$actuallyCached = $this->cache->hasValidEntry(
$dataset->id,
$dataset->server_date_modified
);
//no actual cache
if (true !== $actuallyCached) {
Log::debug(__METHOD__ . ' cache miss for ' . get_class($dataset) . '#' . $dataset->id);
return null;
} else {
return $cache->getDomDocument();
}
//cache is actual return it for oai:
Log::debug(__METHOD__ . ' cache hit for ' . get_class($dataset) . '#' . $dataset->id);
try {
//return $this->_cache->get($model->getId(), (int) $this->_strategy->getVersion());
$cache = XmlCache::where('document_id', $dataset->id)->first();
return $cache->getDomDocument();
} catch (Exception $e) {
Log::warning(__METHOD__ . " Access to XML cache failed on " . get_class($dataset) . '#' . $dataset->id . ". Trying to recover.");
}
return null;
// // $cache = XmlCache::where('document_id', $dataset->id)
// // ->first();// model or null
// if (!$cache) {
// Log::debug(__METHOD__ . ' cache miss for ' . get_class($dataset) . '#' . $dataset->id);
// return null;
// } else {
// return $cache->getDomDocument();
// }
}
}