forked from geolba/tethys.frontend
384 lines
18 KiB
TypeScript
384 lines
18 KiB
TypeScript
import api from "../api/api";
|
|
// import { Observable, of } from "rxjs";
|
|
import { Observable } from "rxjs";
|
|
import { tap, map } from "rxjs/operators";
|
|
import { Dataset, DbDataset, Suggestion } from "@/models/dataset";
|
|
import { HitHighlight, OpenSearchResponse, SolrResponse } from "@/models/headers";
|
|
import { ActiveFilterCategories } from "@/models/solr";
|
|
import { VUE_API } from "@/constants";
|
|
import { deserialize } from "class-transformer";
|
|
|
|
class DatasetService {
|
|
/**
|
|
* Fetch data from the OpenSearch endpoint with fuzzy search enabled.
|
|
* This function allows for misspellings in the search term and boosts
|
|
* the relevance of matches in the title, author, and subject fields.
|
|
*
|
|
* @param {string} searchTerm - The search term to query.
|
|
*/
|
|
|
|
/* https://tethys.at/solr/rdr_data/select?&0=fl%3Did%2Clicence%2Cserver_date_published%2Cabstract_output%2Cidentifier%2Ctitle_output%2Ctitle_additional%2Cauthor%2Csubject%2Cdoctype&q=%2A
|
|
&q.op=or&defType=edismax&qf=title%5E3%20author%5E2%20subject%5E1&indent=on&wt=json&rows=10&start=0&sort=server_date_published%20desc&facet=on&json.facet.language=%7B%20type%3A%20%22
|
|
terms%22%2C%20field%3A%20%22language%22%20%7D&json.facet.subject=%7B%20type%3A%20%22terms%22%2C%20field%3A%20%22subject%22%2C%20limit%3A%20-1%20%7D&json.facet.year=%7B%20type%3A%20%22
|
|
terms%22%2C%20field%3A%20%22year%22%20%7D&json.facet.author=%7B%20type%3A%20%22terms%22%2C%20field%3A%20%22author_facet%22%2C%20limit%3A%20-1%20%7D
|
|
*/
|
|
|
|
// private openSearchUrl = "http://opensearch.geoinformation.dev/tethys-records/_search";
|
|
// private openSearchUrl = "http://192.168.21.18/tethys-records/_search";
|
|
|
|
// public searchTerm(term: string): Observable<Dataset[]> {
|
|
public searchTerm(term: string, openCore: string, openHost: string): Observable<{ datasets: Dataset[], highlights: HitHighlight[] }> {
|
|
// OpenSearch endpoint
|
|
const host = "https://" + openHost; // When using geoinformation.dev
|
|
// const host = "http://" + openHost; // When using local OpenSearch dev endpoint
|
|
const path = "/" + openCore + "/_search";
|
|
const base = host + path;
|
|
/**
|
|
* The match query used for title, author, and subjects fields is case-insensitive by default. The standard analyzer is typically used, which lowercases the terms.
|
|
* The wildcard query is case-sensitive by default. To make it case-insensitive, it is needed to use a lowercase filter */
|
|
const lowercaseTerm = term.toLowerCase(); // Lowercase the search term
|
|
const body = {
|
|
query: {
|
|
bool: {
|
|
should: [
|
|
{ match: { title: { query: term, fuzziness: "AUTO", boost: 3 } } },
|
|
{ match: { author: { query: term, fuzziness: "AUTO", boost: 2 } } },
|
|
{ match: { subjects: { query: term, fuzziness: "AUTO", boost: 1 } } }, // In SOLR is "subject"!
|
|
{ wildcard: { title: { value: `${lowercaseTerm}*`, boost: 3 } } },
|
|
{ wildcard: { author: { value: `${lowercaseTerm}*`, boost: 2 } } },
|
|
{ wildcard: { subjects: { value: `${lowercaseTerm}*`, boost: 1 } } } // In SOLR is "subject"!
|
|
],
|
|
minimum_should_match: 1
|
|
}
|
|
},
|
|
size: 10,
|
|
from: 0,
|
|
// sort: [{ server_date_published: { order: "desc" } }],
|
|
sort: [{ _score: { order: "desc" } }], // Sort by _score in descending order
|
|
track_scores: true, // This ensures "_score" is included even when sorting by other criteria. Otherwise the relevance score is not calculated
|
|
aggs: {
|
|
language: { terms: { field: "language.keyword" } },
|
|
subjects: { terms: { field: "subjects.keyword", size: 10 } } // In SOLR is "subject"!
|
|
},
|
|
highlight: {
|
|
fields: {
|
|
title: {},
|
|
author: {},
|
|
subjects: {}
|
|
}
|
|
}
|
|
};
|
|
|
|
// Make API call to OpenSearch and return the result
|
|
/**
|
|
* When a POST request is made to the OpenSearch server using the api.post<OpenSearchResponse> method, the response received from OpenSearch is an object that includes various details about the search results.
|
|
* One of the key properties of this response object is _source, which is an array of documents (datasets) that match the search criteria.
|
|
* It is used the pipe method to chain RxJS operators to the Observable returned by api.get. The map operator is used to transform the emitted items of the Observable.
|
|
*/
|
|
return api.post<OpenSearchResponse>(base, body).pipe(
|
|
// tap(response => console.log("OpenSearchResponse:", response)), // Log the complete response
|
|
// tap(response => console.log("Aggre:", response.aggregations?.subjects.buckets[0])), // log the first subject of the array of subjects returned
|
|
// tap(response => console.log("Hits:", response.hits)), // log the first subject of the array of subjects returned
|
|
|
|
// map(response => response.hits.hits.map(hit => hit._source))
|
|
|
|
map(response => ({
|
|
datasets: response.hits.hits.map(hit => hit._source),
|
|
highlights: response.hits.hits.map(hit => hit.highlight)
|
|
}))
|
|
);
|
|
}
|
|
|
|
// // For the autocomplete search. Method to perform a search based on a term
|
|
// public searchTerm_SOLR(term: string, solrCore: string, solrHost: string): Observable<Dataset[]> {
|
|
// // SOLR endpoint
|
|
// const host = "https://" + solrHost;
|
|
// const path = "/solr/" + solrCore + "/select?";
|
|
// const base = host + path;
|
|
|
|
// //const fields = 'id,server_date_published,abstract_output,title_output,title_additional,author,subject'; // fields we want returned
|
|
// const fields = [
|
|
// "id",
|
|
// "licence",
|
|
// "server_date_published",
|
|
// "abstract_output",
|
|
// "title_output",
|
|
// "title_additional",
|
|
// "author",
|
|
// "subject",
|
|
// "doctype",
|
|
// ].toString();
|
|
|
|
|
|
// const qfFields = "title^3 author^2 subject^1";
|
|
|
|
// const q_params = {
|
|
// "0": "fl=" + fields,
|
|
// q: term + "*",
|
|
// defType: "edismax",
|
|
// qf: qfFields,
|
|
// indent: "on",
|
|
// wt: "json",
|
|
// };
|
|
|
|
// // Make API call to Solr and return the result
|
|
// /**
|
|
// * When a GET request is made to the Solr server using the api.get<SolrResponse> method, the response received from Solr is an object that includes various details about the search results.
|
|
// * One of the key properties of this response object is docs, which is an array of documents (datasets) that match the search criteria.
|
|
// * It is used the pipe method to chain RxJS operators to the Observable returned by api.get. The map operator is used to transform the emitted items of the Observable.
|
|
// */
|
|
// const stations = api.get<SolrResponse>(base, q_params).pipe(map((res: SolrResponse) => res.response.docs));
|
|
|
|
// return stations;
|
|
// }
|
|
|
|
public facetedSearchOPEN(
|
|
suggestion: Suggestion | string,
|
|
activeFilterCategories: ActiveFilterCategories,
|
|
openCore: string,
|
|
openHost: string,
|
|
start?: string, // Starting page
|
|
): Observable<OpenSearchResponse> {
|
|
// OpenSearch endpoint
|
|
const host = "https://" + openHost;
|
|
const path = "/" + openCore + "/_search";
|
|
const base = host + path;
|
|
|
|
const lowercaseTerm = typeof suggestion === 'string' ? suggestion.toLowerCase() : suggestion.value.toLowerCase();
|
|
|
|
console.log("facetedsearchOPEN > suggestion entered:");
|
|
console.log(suggestion);
|
|
|
|
/**
|
|
* The query construction depends on whether the suggestion is a string or a Suggestion object. */
|
|
// When suggestion is a string:
|
|
const query = typeof suggestion === 'string'
|
|
? {
|
|
bool: {
|
|
should: [
|
|
{ match: { title: { query: suggestion, fuzziness: "AUTO", boost: 3 } } },
|
|
{ match: { author: { query: suggestion, fuzziness: "AUTO", boost: 2 } } },
|
|
{ match: { subjects: { query: suggestion, fuzziness: "AUTO", boost: 1 } } },
|
|
{ wildcard: { title: { value: `${lowercaseTerm}*`, boost: 3 } } },
|
|
{ wildcard: { author: { value: `${lowercaseTerm}*`, boost: 2 } } },
|
|
{ wildcard: { subjects: { value: `${lowercaseTerm}*`, boost: 1 } } }
|
|
],
|
|
minimum_should_match: 1
|
|
}
|
|
}
|
|
// When suggestion is a suggestion object
|
|
: {
|
|
match: {
|
|
[suggestion.type.toLowerCase()]: {
|
|
query: suggestion.value,
|
|
operator: 'and' // all the terms in the query must be present in the field
|
|
}
|
|
}
|
|
};
|
|
|
|
// Constructing Filters Based on Active Filter Categories
|
|
const filters = Object.entries(activeFilterCategories).map(([category, values]) => ({
|
|
terms: { [`${category}.keyword`]: values }
|
|
// terms: { [category]: values }
|
|
}));
|
|
|
|
const body = {
|
|
query: {
|
|
bool: {
|
|
must: query, // Contains the main query constructed earlier.
|
|
filter: filters // Contains the filters constructed from activeFilterCategories.
|
|
}
|
|
},
|
|
size: 10,
|
|
from: start ? parseInt(start) : 0,
|
|
sort: [{ _score: { order: "desc" } }],
|
|
track_scores: true,
|
|
aggs: { // Defines aggregations for facets
|
|
// terms: Aggregation type that returns the most common terms in a field.
|
|
// !For a large number of terms setting an extremely large size might not be efficient
|
|
// If you genuinely need all unique terms and expect a large number of them, consider using a composite aggregation for more efficient pagination of terms.
|
|
subjects: { terms: { field: "subjects.keyword", size: 1000 } },
|
|
language: { terms: { field: "language.keyword" } },
|
|
author: { terms: { field: "author.keyword", size: 1000 } },
|
|
year: { terms: { field: "year.keyword", size: 100 } }
|
|
},
|
|
highlight: {
|
|
fields: {
|
|
title: {},
|
|
author: {},
|
|
subjects: {}
|
|
}
|
|
}
|
|
};
|
|
|
|
// return api.post<OpenSearchResponse>(base, body).pipe(
|
|
// // map(response => ({
|
|
// // datasets: response.hits.hits.map(hit => hit._source),
|
|
// // highlights: response.hits.hits.map(hit => hit.highlight),
|
|
// // // aggregations: response.aggregations
|
|
// // }))
|
|
// );
|
|
const stations = api.post<OpenSearchResponse>(base, body);
|
|
|
|
return stations;
|
|
}
|
|
|
|
/**
|
|
* This method performs a faceted search on a Solr core. Faceted search allows the user to filter search results based on various categories (facets)
|
|
*/
|
|
public facetedSearch(
|
|
suggestion: Suggestion | string,
|
|
activeFilterCategories: ActiveFilterCategories,
|
|
solrCore: string,
|
|
solrHost: string,
|
|
start?: string, // Starting page
|
|
): Observable<SolrResponse> {
|
|
// console.log("face:", suggestion);
|
|
// console.log(activeFilterCategories);
|
|
// console.log(solrCore);
|
|
// console.log(solrHost);
|
|
// console.log(start);
|
|
|
|
console.log("facetedsearchSOLR > suggestion entered:");
|
|
console.log(suggestion);
|
|
|
|
// Construct Solr query parameters
|
|
const host = "https://" + solrHost;
|
|
const path = "/solr/" + solrCore + "/select?";
|
|
const base = host + path;
|
|
|
|
const fields = [
|
|
"id",
|
|
"licence",
|
|
"server_date_published",
|
|
"abstract_output",
|
|
"identifier",
|
|
"title_output",
|
|
"title_additional",
|
|
"author",
|
|
"subject",
|
|
"doctype",
|
|
].toString();
|
|
|
|
// Determine search term, query operator, and query fields based on the suggestion type. Depending on whether suggestion is a string or a Suggestion object, it constructs the search term and query fields differently.
|
|
let term, queryOperator, qfFields;
|
|
if (typeof suggestion === "string") { // f suggestion is a string, it appends a wildcard (*) for partial matches.
|
|
term = suggestion + "*";
|
|
queryOperator = "or";
|
|
qfFields = "title^3 author^2 subject^1";
|
|
} else if (suggestion instanceof Suggestion) { // If suggestion is a Suggestion object, it forms a more specific query based on the type and value of the suggestion.
|
|
term = suggestion.type + ':"' + suggestion.value + '"';
|
|
queryOperator = "and";
|
|
qfFields = undefined;
|
|
}
|
|
|
|
// Set default value for start if not provided
|
|
if (start === undefined) start = "0";
|
|
|
|
// Construct filter fields based on active filter categories
|
|
const filterFields = new Array<string>();
|
|
if (Object.keys(activeFilterCategories).length > 0) {
|
|
/* Declare variable prop with a type that is a key of the activeFilterCategories. The 'keyof typeof' activeFilterCategories type represents all possible keys
|
|
that can exist on the activeFilterCategories --> prop can only be assigned a value that is a key of the activeFilterCategories object */
|
|
let prop: keyof typeof activeFilterCategories;
|
|
for (prop in activeFilterCategories) {
|
|
const filterItems = activeFilterCategories[prop];
|
|
filterItems.forEach(function (value: string) {
|
|
filterFields.push(prop + ':("' + value + '")');
|
|
// e.g. Array [ 'subject:("Vektordaten")', 'author:("GeoSphere Austria, ")' ]
|
|
});
|
|
}
|
|
}
|
|
|
|
// https://solr.apache.org/guide/8_4/json-request-api.html
|
|
// Construct Solr query parameters
|
|
const q_params = {
|
|
"0": "fl=" + fields,
|
|
q: term,
|
|
"q.op": queryOperator,
|
|
defType: "edismax",
|
|
qf: qfFields,
|
|
// df: "title",
|
|
indent: "on",
|
|
wt: "json",
|
|
rows: 10,
|
|
// fq: ["subject:Steiermark", "language:de"],
|
|
fq: filterFields,
|
|
start: start,
|
|
sort: "server_date_published desc",
|
|
facet: "on",
|
|
// "facet.field": "language",
|
|
"json.facet.language": '{ type: "terms", field: "language" }',
|
|
"json.facet.subject": '{ type: "terms", field: "subject", limit: -1 }',
|
|
"json.facet.year": '{ type: "terms", field: "year" }',
|
|
"json.facet.author": '{ type: "terms", field: "author_facet", limit: -1 }',
|
|
};
|
|
/* E.g.
|
|
{"0":"fl=id,licence,server_date_published,abstract_output,identifier,title_output,title_additional,author,subject,doctype","q":"*","q.op":"or","defType":"edismax",
|
|
"qf":"title^3 author^2 subject^1",
|
|
"indent":"on","wt":"json","rows":10,
|
|
"fq":["subject:(\"Vektordaten\")","author:(\"GeoSphere Austria, \")"],
|
|
"start":"0","sort":"server_date_published desc","facet":"on",
|
|
"json.facet.language":"{ type: \"terms\", field: \"language\" }",
|
|
"json.facet.subject":"{ type: \"terms\", field: \"subject\", limit: -1 }",
|
|
"json.facet.year":"{ type: \"terms\", field: \"year\" }",
|
|
"json.facet.author":"{ type: \"terms\", field: \"author_facet\", limit: -1 }"}
|
|
*/
|
|
// console.log(JSON.stringify(q_params));
|
|
|
|
// Make API call to Solr and return the result
|
|
const stations = api.get<SolrResponse>(base, q_params);
|
|
|
|
return stations;
|
|
}
|
|
|
|
// Method to fetch years
|
|
public getYears(): Observable<string[]> {
|
|
const host = VUE_API;
|
|
const path = "/api/years";
|
|
const base = host + path;
|
|
|
|
const years = api.get<string[]>(base);
|
|
return years;
|
|
}
|
|
|
|
// Method to fetch documents for a specific year
|
|
public getDocuments(year: string): Observable<Array<DbDataset>> {
|
|
const host = VUE_API;
|
|
const path = "/api/sitelinks/" + year;
|
|
const base = host + path;
|
|
|
|
const documents: Observable<DbDataset[]> = api.get<Array<DbDataset>>(base);
|
|
return documents;
|
|
}
|
|
|
|
// Method to fetch a dataset by its ID
|
|
public getDataset(id: number): Observable<DbDataset> {
|
|
const host = VUE_API;
|
|
const path = "/api/dataset/" + id;
|
|
const apiUrl = host + path;
|
|
const dataset = api.get<DbDataset>(apiUrl).pipe(map((res) => this.prepareDataset(res)));
|
|
|
|
return dataset;
|
|
}
|
|
|
|
// Method to fetch a dataset by its DOI
|
|
public getDatasetByDoi(doi: string): Observable<DbDataset> {
|
|
const host = VUE_API;
|
|
const path = "/api/dataset/10.24341/tethys." + doi;
|
|
const apiUrl = host + path;
|
|
const dataset = api.get<DbDataset>(apiUrl).pipe(map((res) => this.prepareDataset(res)));
|
|
|
|
return dataset;
|
|
}
|
|
|
|
// Method to prepare dataset object
|
|
private prepareDataset(datasetObj: DbDataset): DbDataset {
|
|
const dataset = deserialize<DbDataset>(DbDataset, JSON.stringify(datasetObj));
|
|
dataset.url = document.documentURI;
|
|
|
|
return dataset;
|
|
}
|
|
}
|
|
|
|
export default new DatasetService();
|