update solr index

This commit is contained in:
Arno Kaimbacher 2019-02-14 15:09:11 +01:00
parent 7c6654398d
commit 535a9363cc
18 changed files with 773 additions and 90 deletions

View file

@ -3,9 +3,12 @@
namespace App\Library\Search;
//use App\Library\Util\SolrSearchQuery;
use App\Library\Util\SearchParameter;
use App\Library\Search\SearchResult;
use App\Library\Util\SearchParameter;
use Illuminate\Support\Facades\Log;
use App\Library\Search\SolariumDocument;
use App\Models\Dataset;
use \Solarium\QueryType\Select\Query\Query;
class SolariumAdapter
{
@ -50,7 +53,55 @@ class SolariumAdapter
return 'solr';
}
public function createQuery() : SearchParameter
public function addDatasetsToIndex($datasets)
{
$datasets = $this->normalizeDocuments($datasets);
$builder = new SolariumDocument($this->options);
$slices = array_chunk($datasets, 16);
// update documents of every chunk in a separate request
foreach ($slices as $slice) {
$update = $this->client->createUpdate();
$updateDocs = array_map(function ($rdrDoc) use ($builder, $update) {
return $builder->toSolrUpdateDocument($rdrDoc, $update->createDocument());
}, $slice);
// adding the document to the update query
$update->addDocuments($updateDocs);
// Then commit the update:
$update->addCommit();
$result = $this->client->update($update);
//$this->execute($update, 'failed updating slice of documents');
}
// finally commit all updates
// $update = $this->client->createUpdate();
// $update->addCommit();
// $this->execute($update, 'failed committing update of documents');
return $this;
}
protected function normalizeDocuments($documents)
{
if (!is_array($documents)) {
$documents = array($documents);
}
foreach ($documents as $document) {
if (!($document instanceof Dataset)) {
throw new InvalidArgumentException("invalid dataset in provided set");
}
}
return $documents;
}
public function createQuery(): SearchParameter
{
return new SearchParameter();
}
@ -63,13 +114,14 @@ class SolariumAdapter
return $searchResult;
}
protected function applyParametersToSolariumQuery(\Solarium\QueryType\Select\Query\Query $query, SearchParameter $parameters = null, $preferOriginalQuery = false)
protected function applyParametersToSolariumQuery(Query $query, SearchParameter $parameters, $preferOriginalQuery)
{
if ($parameters) {
//$subfilters = $parameters->getSubFilters();
//if ( $subfilters !== null ) {
// foreach ( $subfilters as $name => $subfilter ) {
// if ( $subfilter instanceof Opus_Search_Solr_Filter_Raw || $subfilter instanceof Opus_Search_Solr_Solarium_Filter_Complex ) {
// if ( $subfilter instanceof Opus_Search_Solr_Filter_Raw
//|| $subfilter instanceof Opus_Search_Solr_Solarium_Filter_Complex ) {
// $query->createFilterQuery( $name )
// ->setQuery( $subfilter->compile( $query ) );
// }
@ -87,14 +139,13 @@ class SolariumAdapter
// }
// }
$filter = $parameters->getFilter();//"aa" all: '*:*'
$filter = $parameters->getFilter(); //"aa" all: '*:*'
if ($filter !== null) {
//$query->setStart( intval( $start ) );
//$query->setQuery('%P1%', array($filter));
$query->setQuery($filter);
}
$start = $parameters->getStart();
if ($start !== null) {
$query->setStart(intval($start));
@ -154,7 +205,7 @@ class SolariumAdapter
// }
}
protected function processQuery(\Solarium\QueryType\Select\Query\Query $query) : SearchResult
protected function processQuery(\Solarium\QueryType\Select\Query\Query $query): SearchResult
{
// send search query to service
$request = $this->execute($query, 'failed querying search engine');

View file

@ -0,0 +1,34 @@
<?php
namespace App\Library\Search;
use App\Models\Dataset;
use Solarium\QueryType\Update\Query\Document\Document;
class SolariumDocument extends SolrDocumentXslt
{
public function __construct($options)
{
parent::__construct($options);
}
public function toSolrUpdateDocument(Dataset $rdrDataset, Document $solrDoc)
{
if (!($solrDoc instanceof Document)) {
throw new \Exception('provided Solr document must be instance of Solarium Update Document');
}
// convert Opus document to Solr XML document for supporting custom transformations
$solrDomDoc = parent::toSolrDocument($rdrDataset, new \DomDocument());
// read back fields from generated Solr XML document
$solrXmlDoc = simplexml_import_dom($solrDomDoc)->doc[0];
$solrDoc->clear();
foreach ($solrXmlDoc->field as $field) {
$solrDoc->addField(strval($field['name']), strval($field));
}
return $solrDoc;
}
}

View file

@ -0,0 +1,79 @@
<?php
namespace App\Library\Search;
use App\Models\Dataset;
class SolrDocumentXslt
{
/**
* @var XSLTProcessor
*/
protected $processor;
public function __construct($options)
{
//parent::__construct($options);
try {
$xslt = new \DomDocument;
$xslt->load($options['xsltfile']);
$this->processor = new \XSLTProcessor;
$this->processor->importStyleSheet($xslt);
} catch (Exception $e) {
throw new Exception('invalid XSLT file for deriving Solr documents', 0, $e);
}
}
public function toSolrDocument(Dataset $rdrDataset, \DOMDocument $solrDoc)
{
if (!($solrDoc instanceof \DOMDocument)) {
throw new Exception('provided Solr document must be instance of DOMDocument');
}
$modelXml = $this->getModelXml($rdrDataset);//->saveXML();
$solrDoc->preserveWhiteSpace = false;
$solrDoc->loadXML($this->processor->transformToXML($modelXml));
// if (Opus_Config::get()->log->prepare->xml) {
// $modelXml->formatOutput = true;
// Opus_Log::get()->debug("input xml\n" . $modelXml->saveXML());
// $solrDoc->formatOutput = true;
// Opus_Log::get()->debug("transformed solr xml\n" . $solrDoc->saveXML());
// }
return $solrDoc;
}
/**
* Retrieves XML describing model data of provided RDR dataset.
*
* @param Dataset $rdrDataset
* @return DOMDocument
*/
protected function getModelXml(Dataset $rdrDataset)
{
$rdrDataset->fetchValues();
// Set up caching xml-model and get XML representation of document.
$xmlModel = new \App\Library\Xml\XmlModel();
//$caching_xml_model = new Opus_Model_Xml;
//$caching_xml_model->setModel($opusDoc);
$xmlModel->setModel($rdrDataset);
$xmlModel->excludeEmptyFields();
//$xmlModel->setStrategy(new Opus_Model_Xml_Version1);
//$cache = new Opus_Model_Xml_Cache($opusDoc->hasPlugin('Opus_Document_Plugin_Index'));
//$xmlModel->setXmlCache($cache);
$cache = ($rdrDataset->xmlCache) ? $rdrDataset->xmlCache : new \App\Models\XmlCache();
$xmlModel->setXmlCache($cache);
$modelXml = $xmlModel->getDomDocument();
// extract fulltext from file and append it to the generated xml.
//$this->attachFulltextToXml($modelXml, $opusDoc->getFile(), $opusDoc->getId());
return $modelXml;
}
}