update solr index

This commit is contained in:
Arno Kaimbacher 2019-02-14 15:09:11 +01:00
parent 7c6654398d
commit 535a9363cc
18 changed files with 773 additions and 90 deletions

View file

@ -0,0 +1,30 @@
<?php
namespace App\Events\Dataset;
use Illuminate\Queue\SerializesModels;
use Illuminate\Foundation\Events\Dispatchable;
use Illuminate\Broadcasting\InteractsWithSockets;
/**
* Class DatasetUpdated.
*/
class DatasetUpdated
{
use Dispatchable, InteractsWithSockets, SerializesModels;
/**
* @var
*/
public $dataset;
/**
* Create a new event instance.
*
* @return void
*/
public function __construct($dataset)
{
$this->dataset = $dataset;
}
}

View file

@ -1,7 +1,7 @@
<?php namespace App\Events;
abstract class Event {
//
<?php
namespace App\Events;
abstract class Event
{
//
}

View file

@ -32,11 +32,13 @@ class PagesController extends Controller
$dataset->load('titles');
$dataset->load('abstracts');
$authors = $dataset->authors()
$authors = $dataset->persons()
->wherePivot('role', 'author')
->orderBy('link_documents_persons.sort_order', 'desc')
->get();
$contributors = $dataset->contributors()
$contributors = $dataset->persons()
->wherePivot('role', 'contributor')
->orderBy('link_documents_persons.sort_order', 'desc')
->get();

View file

@ -449,7 +449,8 @@ class RequestController extends Controller
$xmlModel = new \App\Library\Xml\XmlModel();
$xmlModel->setModel($dataset);
$xmlModel->excludeEmptyFields();
$xmlModel->setXmlCache(new \App\Models\XmlCache());
$cache = ($dataset->xmlCache) ? $dataset->xmlCache : new \App\Models\XmlCache();
$xmlModel->setXmlCache($cache);
return $xmlModel->getDomDocument()->getElementsByTagName('Rdr_Dataset')->item(0);
}

View file

@ -12,6 +12,7 @@ use Illuminate\View\View;
use Illuminate\Http\RedirectResponse;
use Illuminate\Support\Facades\DB;
use Illuminate\Http\Request;
use App\Exceptions\GeneralException;
class DatasetController extends Controller
{
@ -22,7 +23,6 @@ class DatasetController extends Controller
public function index(Request $request) : View
{
$searchType = $request->input('searchtype');
$builder = Dataset::query();
//$registers = array();
@ -155,8 +155,11 @@ class DatasetController extends Controller
{
$dataset = Dataset::findOrFail($id);
//$input = $request->all();
$input = $request->except('licenses', 'titles');
$dataset->update($input);
$input = $request->except('abstracts', 'licenses', 'titles', '_method', '_token');
// foreach ($input as $key => $value) {
// $dataset[$key] = $value;
// }
//$dataset->update($input);
// $dataset->type = $input['type'];
// $dataset->thesis_year_accepted = $input['thesis_year_accepted'];
// $dataset->project_id = $input['project_id'];
@ -188,8 +191,17 @@ class DatasetController extends Controller
}
}
session()->flash('flash_message', 'You have updated 1 dataset!');
return redirect()->route('settings.document');
if (! $dataset->isDirty(dataset::UPDATED_AT)) {
$time = new \Illuminate\Support\Carbon();
$dataset->setUpdatedAt($time);
}
// $dataset->save();
if ($dataset->update($input)) {
//event(new DatasetUpdated($dataset));
session()->flash('flash_message', 'You have updated 1 dataset!');
return redirect()->route('settings.document');
}
throw new GeneralException(trans('exceptions.backend.dataset.update_error'));
}

View file

@ -3,9 +3,12 @@
namespace App\Library\Search;
//use App\Library\Util\SolrSearchQuery;
use App\Library\Util\SearchParameter;
use App\Library\Search\SearchResult;
use App\Library\Util\SearchParameter;
use Illuminate\Support\Facades\Log;
use App\Library\Search\SolariumDocument;
use App\Models\Dataset;
use \Solarium\QueryType\Select\Query\Query;
class SolariumAdapter
{
@ -50,7 +53,55 @@ class SolariumAdapter
return 'solr';
}
public function createQuery() : SearchParameter
public function addDatasetsToIndex($datasets)
{
$datasets = $this->normalizeDocuments($datasets);
$builder = new SolariumDocument($this->options);
$slices = array_chunk($datasets, 16);
// update documents of every chunk in a separate request
foreach ($slices as $slice) {
$update = $this->client->createUpdate();
$updateDocs = array_map(function ($rdrDoc) use ($builder, $update) {
return $builder->toSolrUpdateDocument($rdrDoc, $update->createDocument());
}, $slice);
// adding the document to the update query
$update->addDocuments($updateDocs);
// Then commit the update:
$update->addCommit();
$result = $this->client->update($update);
//$this->execute($update, 'failed updating slice of documents');
}
// finally commit all updates
// $update = $this->client->createUpdate();
// $update->addCommit();
// $this->execute($update, 'failed committing update of documents');
return $this;
}
protected function normalizeDocuments($documents)
{
if (!is_array($documents)) {
$documents = array($documents);
}
foreach ($documents as $document) {
if (!($document instanceof Dataset)) {
throw new InvalidArgumentException("invalid dataset in provided set");
}
}
return $documents;
}
public function createQuery(): SearchParameter
{
return new SearchParameter();
}
@ -63,13 +114,14 @@ class SolariumAdapter
return $searchResult;
}
protected function applyParametersToSolariumQuery(\Solarium\QueryType\Select\Query\Query $query, SearchParameter $parameters = null, $preferOriginalQuery = false)
protected function applyParametersToSolariumQuery(Query $query, SearchParameter $parameters, $preferOriginalQuery)
{
if ($parameters) {
//$subfilters = $parameters->getSubFilters();
//if ( $subfilters !== null ) {
// foreach ( $subfilters as $name => $subfilter ) {
// if ( $subfilter instanceof Opus_Search_Solr_Filter_Raw || $subfilter instanceof Opus_Search_Solr_Solarium_Filter_Complex ) {
// if ( $subfilter instanceof Opus_Search_Solr_Filter_Raw
//|| $subfilter instanceof Opus_Search_Solr_Solarium_Filter_Complex ) {
// $query->createFilterQuery( $name )
// ->setQuery( $subfilter->compile( $query ) );
// }
@ -87,14 +139,13 @@ class SolariumAdapter
// }
// }
$filter = $parameters->getFilter();//"aa" all: '*:*'
$filter = $parameters->getFilter(); //"aa" all: '*:*'
if ($filter !== null) {
//$query->setStart( intval( $start ) );
//$query->setQuery('%P1%', array($filter));
$query->setQuery($filter);
}
$start = $parameters->getStart();
if ($start !== null) {
$query->setStart(intval($start));
@ -154,7 +205,7 @@ class SolariumAdapter
// }
}
protected function processQuery(\Solarium\QueryType\Select\Query\Query $query) : SearchResult
protected function processQuery(\Solarium\QueryType\Select\Query\Query $query): SearchResult
{
// send search query to service
$request = $this->execute($query, 'failed querying search engine');

View file

@ -0,0 +1,34 @@
<?php
namespace App\Library\Search;
use App\Models\Dataset;
use Solarium\QueryType\Update\Query\Document\Document;
class SolariumDocument extends SolrDocumentXslt
{
public function __construct($options)
{
parent::__construct($options);
}
public function toSolrUpdateDocument(Dataset $rdrDataset, Document $solrDoc)
{
if (!($solrDoc instanceof Document)) {
throw new \Exception('provided Solr document must be instance of Solarium Update Document');
}
// convert Opus document to Solr XML document for supporting custom transformations
$solrDomDoc = parent::toSolrDocument($rdrDataset, new \DomDocument());
// read back fields from generated Solr XML document
$solrXmlDoc = simplexml_import_dom($solrDomDoc)->doc[0];
$solrDoc->clear();
foreach ($solrXmlDoc->field as $field) {
$solrDoc->addField(strval($field['name']), strval($field));
}
return $solrDoc;
}
}

View file

@ -0,0 +1,79 @@
<?php
namespace App\Library\Search;
use App\Models\Dataset;
class SolrDocumentXslt
{
/**
* @var XSLTProcessor
*/
protected $processor;
public function __construct($options)
{
//parent::__construct($options);
try {
$xslt = new \DomDocument;
$xslt->load($options['xsltfile']);
$this->processor = new \XSLTProcessor;
$this->processor->importStyleSheet($xslt);
} catch (Exception $e) {
throw new Exception('invalid XSLT file for deriving Solr documents', 0, $e);
}
}
public function toSolrDocument(Dataset $rdrDataset, \DOMDocument $solrDoc)
{
if (!($solrDoc instanceof \DOMDocument)) {
throw new Exception('provided Solr document must be instance of DOMDocument');
}
$modelXml = $this->getModelXml($rdrDataset);//->saveXML();
$solrDoc->preserveWhiteSpace = false;
$solrDoc->loadXML($this->processor->transformToXML($modelXml));
// if (Opus_Config::get()->log->prepare->xml) {
// $modelXml->formatOutput = true;
// Opus_Log::get()->debug("input xml\n" . $modelXml->saveXML());
// $solrDoc->formatOutput = true;
// Opus_Log::get()->debug("transformed solr xml\n" . $solrDoc->saveXML());
// }
return $solrDoc;
}
/**
* Retrieves XML describing model data of provided RDR dataset.
*
* @param Dataset $rdrDataset
* @return DOMDocument
*/
protected function getModelXml(Dataset $rdrDataset)
{
$rdrDataset->fetchValues();
// Set up caching xml-model and get XML representation of document.
$xmlModel = new \App\Library\Xml\XmlModel();
//$caching_xml_model = new Opus_Model_Xml;
//$caching_xml_model->setModel($opusDoc);
$xmlModel->setModel($rdrDataset);
$xmlModel->excludeEmptyFields();
//$xmlModel->setStrategy(new Opus_Model_Xml_Version1);
//$cache = new Opus_Model_Xml_Cache($opusDoc->hasPlugin('Opus_Document_Plugin_Index'));
//$xmlModel->setXmlCache($cache);
$cache = ($rdrDataset->xmlCache) ? $rdrDataset->xmlCache : new \App\Models\XmlCache();
$xmlModel->setXmlCache($cache);
$modelXml = $xmlModel->getDomDocument();
// extract fulltext from file and append it to the generated xml.
//$this->attachFulltextToXml($modelXml, $opusDoc->getFile(), $opusDoc->getId());
return $modelXml;
}
}

View file

@ -133,12 +133,21 @@ class XmlModel
return $domDocument;
} else {
//create cache relation
$this->cache->fill(array(
'document_id' => $dataset->id,
'xml_version' => (int)$this->strategy->getVersion(),
'server_date_modified' => $dataset->server_date_modified,
'xml_data' => $domDocument->saveXML()
));
// $this->cache->updateOrCreate(array(
// 'document_id' => $dataset->id,
// 'xml_version' => (int)$this->strategy->getVersion(),
// 'server_date_modified' => $dataset->server_date_modified,
// 'xml_data' => $domDocument->saveXML()
// ));
if (!$this->cache->document_id) {
$this->cache->document_id = $dataset->id;
}
$this->cache->xml_version = (int)$this->strategy->getVersion();
$this->cache->server_date_modified = $dataset->server_date_modified;
$this->cache->xml_data = $domDocument->saveXML();
$this->cache->save();
Log::debug(__METHOD__ . ' cache refreshed for ' . get_class($dataset) . '#' . $dataset->id);
@ -161,20 +170,35 @@ class XmlModel
Log::debug(__METHOD__ . ' skipping cache for ' . get_class($dataset));
return null;
}
//$cached = $this->cache->hasValidEntry(
// $dataset->id,
// (int) $this->strategy->getVersion(),
// $dataset->server_date_modified
//);
//$cached = false;
$cache = XmlCache::where('document_id', $dataset->id)
->first();// model or null
if (!$cache) {
$actuallyCached = $this->cache->hasValidEntry(
$dataset->id,
$dataset->server_date_modified
);
//no actual cache
if (true !== $actuallyCached) {
Log::debug(__METHOD__ . ' cache miss for ' . get_class($dataset) . '#' . $dataset->id);
return null;
} else {
return $cache->getDomDocument();
}
//cache is actual return it for oai:
Log::debug(__METHOD__ . ' cache hit for ' . get_class($dataset) . '#' . $dataset->id);
try {
//return $this->_cache->get($model->getId(), (int) $this->_strategy->getVersion());
$cache = XmlCache::where('document_id', $dataset->id)->first();
return $cache->getDomDocument();
} catch (Exception $e) {
Log::warning(__METHOD__ . " Access to XML cache failed on " . get_class($dataset) . '#' . $dataset->id . ". Trying to recover.");
}
return null;
// // $cache = XmlCache::where('document_id', $dataset->id)
// // ->first();// model or null
// if (!$cache) {
// Log::debug(__METHOD__ . ' cache miss for ' . get_class($dataset) . '#' . $dataset->id);
// return null;
// } else {
// return $cache->getDomDocument();
// }
}
}

View file

@ -0,0 +1,68 @@
<?php
namespace App\Listeners;
// use Illuminate\Queue\InteractsWithQueue;
// use Illuminate\Contracts\Queue\ShouldQueue;
use App\Events\Dataset\DatasetUpdated as DatasetUpdatedEvent;
use App\Models\Dataset;
use Illuminate\Support\Facades\Log;
use App\Library\Search\SolariumAdapter;
class DatasetUpdated
{
/**
* Create the event listener.
*
* @return void
*/
public function __construct()
{
//
}
/**
* Handle the event.
*
* @param DatasetUpdatedEvent $event
* @return void
*/
public function handle(DatasetUpdatedEvent $event)
{
$dataset = $event->dataset;
// only index Opus_Document instances
if (false === ($dataset instanceof Dataset)) {
return;
}
if ($dataset->server_state !== 'published') {
// if ($dataset->getServerState() !== 'temporary') {
// $this->removeDocumentFromIndexById($model->getId());
// }
return;
}
$this->addDatasetToIndex($dataset);
}
/**
* Helper method to add dataset to index.
*
* @param Opus_Document $document
* @return void
*/
private function addDatasetToIndex(Dataset $dataset)
{
$datasetId = $dataset->id;
Log::debug(__METHOD__ . ': ' . 'Adding index job for dataset ' . $datasetId . '.');
try {
// Opus_Search_Service::selectIndexingService('onDocumentChange')
$service = new SolariumAdapter("solr", config('solarium'));
$service->addDatasetsToIndex($dataset);
} catch (Opus_Search_Exception $e) {
Log::debug(__METHOD__ . ': ' . 'Indexing document ' . $documentId . ' failed: ' . $e->getMessage());
} catch (InvalidArgumentException $e) {
Log::warning(__METHOD__ . ': ' . $e->getMessage());
}
}
}

View file

@ -26,16 +26,17 @@ class Dataset extends Model
const UPDATED_AT = 'server_date_modified';
const PUBLISHED_AT = 'server_date_published';
protected $fillable = [
'type',
'language',
'server_state',
'creating_corporation',
'project_id',
'embargo_date',
'belongs_to_bibliography',
];
/**
// protected $fillable = [
// 'type',
// 'language',
// 'server_state',
// 'creating_corporation',
// 'project_id',
// 'embargo_date',
// 'belongs_to_bibliography',
// ];
protected $guarded = [];
/**
* The attributes that should be mutated to dates.
*
* @var array
@ -54,6 +55,11 @@ class Dataset extends Model
// $this->_init();
}
// public function setUpdatedAt($value)
// {
// $this->{static::UPDATED_AT} = $value;
// }
/**
* Get the geolocation that owns the dataset.
*/
@ -62,6 +68,8 @@ class Dataset extends Model
return $this->hasOne(GeolocationBox::class, 'dataset_id', 'id');
}
/**
* Get the project that the dataset belongs to.
*/

View file

@ -4,6 +4,7 @@ namespace App\Models;
use Illuminate\Database\Eloquent\Model;
use App\Models\Dataset;
use Illuminate\Support\Facades\DB;
class XmlCache extends Model
{
@ -13,7 +14,7 @@ class XmlCache extends Model
* @var string
*/
protected $table = 'document_xml_cache';
public $timestamps = false;
public $timestamps = false;
/**
@ -22,7 +23,8 @@ class XmlCache extends Model
* @var integer
* @access protected
*/
protected $primaryKey = null;
//protected $primaryKey = null;
public $primaryKey = 'document_id';
public $incrementing = false;
/**
@ -61,22 +63,18 @@ class XmlCache extends Model
* @param mixed $serverDateModified
* @return bool Returns true on cached hit else false.
*/
//public function scopeHasValidEntry($query, $datasetId, $xmlVersion, $serverDateModified)
//{
// //$select = $this->_table->select()->from($this->_table);
// $query->where('document_id = ?', $datasetId)
// ->where('xml_version = ?', $xmlVersion)
// ->where('server_date_modified = ?', $serverDateModified);
public function hasValidEntry($datasetId, $serverDateModified)
{
$select = DB::table('document_xml_cache');
$select->where('document_id', '=', $datasetId)
->where('server_date_modified', '=', $serverDateModified);
$row = $select->first();
// $row = $query->get();
// if (null === $row)
// {
// return false;
// }
// else
// {
// return true;
// }
//}
if (null === $row) {
return false;
} else {
return true;
}
}
}

View file

@ -5,6 +5,7 @@ namespace App\Observers;
//php artisan make:observer DatasetObserver --model=Models\Dataset
use App\Models\Dataset;
use Illuminate\Support\Facades\Log;
use App\Library\Search\SolariumAdapter;
class DatasetObserver
{
@ -33,12 +34,12 @@ class DatasetObserver
if (false === ($dataset instanceof Dataset)) {
return;
}
// if ($dataset->getServerState() !== 'published') {
// if ($model->getServerState() !== 'temporary') {
// $this->removeDocumentFromIndexById($model->getId());
// }
// return;
// }
if ($dataset->server_state !== 'published') {
// if ($dataset->getServerState() !== 'temporary') {
// $this->removeDocumentFromIndexById($model->getId());
// }
return;
}
$this->addDatasetToIndex($dataset);
}
@ -85,6 +86,16 @@ class DatasetObserver
private function addDatasetToIndex(Dataset $dataset)
{
$datasetId = $dataset->id;
Log::debug(__METHOD__ . ': ' . 'Adding index job for document ' . $datasetId . '.');
Log::debug(__METHOD__ . ': ' . 'Adding index job for dataset ' . $datasetId . '.');
try {
// Opus_Search_Service::selectIndexingService('onDocumentChange')
$service = new SolariumAdapter("solr", config('solarium'));
$service->addDatasetsToIndex($dataset);
} catch (Opus_Search_Exception $e) {
Log::debug(__METHOD__ . ': ' . 'Indexing document ' . $documentId . ' failed: ' . $e->getMessage());
} catch (InvalidArgumentException $e) {
Log::warning(__METHOD__ . ': ' . $e->getMessage());
}
}
}

View file

@ -14,6 +14,9 @@ class EventServiceProvider extends ServiceProvider
'App\Events\Event' => [
'App\Listeners\EventListener',
],
\App\Events\Dataset\DatasetUpdated::class => [
\App\Listeners\DatasetUpdated::class,
],
];
/**