tethys.frontend/src/services/dataset.service.ts
frankporras 6f1b9f4c5f - Code cleaning for OpenSearch
- Added comments
- Facets menu small change
2024-09-12 15:54:59 +02:00

388 lines
18 KiB
TypeScript

import api from "../api/api";
import { Observable } from "rxjs";
import { tap, map } from "rxjs/operators";
import { Dataset, DbDataset, Suggestion } from "@/models/dataset";
import { HitHighlight, OpenSearchResponse, SolrResponse } from "@/models/headers";
import { ActiveFilterCategories } from "@/models/solr";
import { VUE_API } from "@/constants";
import { deserialize } from "class-transformer";
class DatasetService {
/**
* Search datasets with OpenSearch API, allowing for fuzzy search and boosting relevance in title, author, and subject fields.
* @param {string} searchTerm - Search query term
* @param {string} openCore - The OpenSearch core to search in
* @param {string} openHost - The OpenSearch host URL
* @returns {Observable} - Observable emitting datasets and their highlights
*/
public searchTerm(term: string, openCore: string, openHost: string): Observable<{ datasets: Dataset[], highlights: HitHighlight[] }> {
const host = openHost; // OpenSearch host URL
const path = "/" + openCore + "/_search"; // API endpoint for searching
const base = host + path; // Complete URL for the request
/**
* The match query used for title, author, and subjects fields is case-insensitive by default. The standard analyzer is typically used, which lowercases the terms.
* The wildcard query is case-sensitive by default. To make it case-insensitive, it is needed to use a lowercase filter */
const lowercaseTerm = term.toLowerCase(); // Lowercase the search term
// Request body defining search query logic
const body = {
query: {
bool: {
should: [
{ match: { title: { query: term, fuzziness: "AUTO", boost: 3 } } },
{ match: { author: { query: term, fuzziness: "AUTO", boost: 2 } } },
{ match: { subjects: { query: term, fuzziness: "AUTO", boost: 1 } } }, // In SOLR is "subject"!
{ match: { doctype: { query: term, fuzziness: "AUTO", boost: 1 } } }, // doctype
{ wildcard: { title: { value: `${lowercaseTerm}*`, boost: 3 } } },
{ wildcard: { author: { value: `${lowercaseTerm}*`, boost: 2 } } },
{ wildcard: { subjects: { value: `${lowercaseTerm}*`, boost: 1 } } }, // In SOLR is "subject"!
{ wildcard: { doctype: { value: `${lowercaseTerm}*`, boost: 1 } } } // doctype
],
minimum_should_match: 1 // Require at least one match
}
},
size: 10, // Limit to 10 results
from: 0, // Pagination: start from the first result
sort: [{ _score: { order: "desc" } }], // Sort by relevance (_score)
// sort: [{ server_date_published: { order: "desc" } }],
track_scores: true, // This ensures "_score" is included even when sorting by other criteria. Otherwise the relevance score is not calculated
aggs: {
subjects: { terms: { field: "subjects.keyword", size: 1000 } }, // In SOLR is "subject"!
language: { terms: { field: "language" } }, // << ".keyword" HAS TO BE REMOVED. OTHERWISE BUCKETS ARE NOT OBTAINED FOR THIS
author: { terms: { field: "author.keyword", size: 1000 } },
year: { terms: { field: "year", size: 100 } }, // << ".keyword" HAS TO BE REMOVED. OTHERWISE BUCKETS ARE NOT OBTAINED FOR THIS
doctype: { terms: { field: "doctype", size: 50 } } // << ".keyword" HAS TO BE REMOVED. OTHERWISE BUCKETS ARE NOT OBTAINED FOR THIS
},
highlight: {
fields: {
title: {}, // Highlight matching terms in title
author: {}, // Highlight matching terms in author
subjects: {}, // Highlight matching terms in subjects
doctype: {} // Highlight matching terms in document type
}
}
};
/**
* Make API call to OpenSearch and return the result
* When a POST request is made to the OpenSearch server using the api.post<OpenSearchResponse> method, the response received from OpenSearch is an object that includes various details about the search results.
* One of the key properties of this response object is _source, which is an array of documents (datasets) that match the search criteria.
* It is used the pipe method to chain RxJS operators to the Observable returned by api.get. The map operator is used to transform the emitted items of the Observable.
*/
return api.post<OpenSearchResponse>(base, body).pipe(
map(response => ({
datasets: response.hits.hits.map(hit => hit._source),
highlights: response.hits.hits.map(hit => hit.highlight)
}))
);
}
// // For the autocomplete search. Method to perform a search based on a term
// public searchTermSOLR(term: string, solrCore: string, solrHost: string): Observable<Dataset[]> {
// // SOLR endpoint
// const host = "https://" + solrHost;
// const path = "/solr/" + solrCore + "/select?";
// const base = host + path;
// //const fields = 'id,server_date_published,abstract_output,title_output,title_additional,author,subject'; // fields we want returned
// const fields = [
// "id",
// "licence",
// "server_date_published",
// "abstract_output",
// "title_output",
// "title_additional",
// "author",
// "subject",
// "doctype",
// ].toString();
// const qfFields = "title^3 author^2 subject^1";
// const q_params = {
// "0": "fl=" + fields,
// q: term + "*",
// defType: "edismax",
// qf: qfFields,
// indent: "on",
// wt: "json",
// };
// // Make API call to Solr and return the result
// /**
// * When a GET request is made to the Solr server using the api.get<SolrResponse> method, the response received from Solr is an object that includes various details about the search results.
// * One of the key properties of this response object is docs, which is an array of documents (datasets) that match the search criteria.
// * It is used the pipe method to chain RxJS operators to the Observable returned by api.get. The map operator is used to transform the emitted items of the Observable.
// */
// const stations = api.get<SolrResponse>(base, q_params).pipe(map((res: SolrResponse) => res.response.docs));
// return stations;
// }
/**
* Perform faceted search with OpenSearch API using filters and suggestions
* @param {Suggestion | string} suggestion - Search term or suggestion
* @param {ActiveFilterCategories} activeFilterCategories - Active filters to apply
* @param {string} openCore - The OpenSearch core to search in
* @param {string} openHost - The OpenSearch host URL
* @param {string} start - Optional: starting page
* @returns {Observable<OpenSearchResponse>} - Observable emitting search results
*/
public facetedSearch(
suggestion: Suggestion | string,
activeFilterCategories: ActiveFilterCategories,
openCore: string,
openHost: string,
start?: string, // Starting page
): Observable<OpenSearchResponse> {
const host = openHost;
const path = "/" + openCore + "/_search";
const base = host + path;
const lowercaseTerm = typeof suggestion === 'string' ? suggestion.toLowerCase() : suggestion.value;
/**
* The query construction depends on whether the suggestion is a string or a Suggestion object.
* */
// When suggestion is a string:
const mainQuery = typeof suggestion === 'string'
? {
bool: {
should: [
{ match: { title: { query: suggestion, fuzziness: "AUTO", boost: 3 } } },
{ match: { author: { query: suggestion, fuzziness: "AUTO", boost: 2 } } },
{ match: { subjects: { query: suggestion, fuzziness: "AUTO", boost: 1 } } },
{ match: { doctype: { query: suggestion, fuzziness: "AUTO", boost: 1 } } },
{ wildcard: { title: { value: `${lowercaseTerm}*`, boost: 3 } } },
{ wildcard: { author: { value: `${lowercaseTerm}*`, boost: 2 } } },
{ wildcard: { subjects: { value: `${lowercaseTerm}*`, boost: 1 } } },
{ wildcard: { doctype: { value: `${lowercaseTerm}*`, boost: 1 } } }
],
minimum_should_match: 1
}
}
// When suggestion is a suggestion object:
: {
match: {
[suggestion.type]: {
query: suggestion.value,
operator: 'and' // all the terms in the query must be present in the field
}
}
};
// Build filters based on the active filter categories
const filters = Object.entries(activeFilterCategories).map(([category, values]) => {
if (category === "language" || category === "year" || category === "doctype") {
return values.map(value => ({ term: { [category]: value } }));
} else {
return values.map(value => ({ term: { [`${category}.keyword`]: value } }));
}
}).flat();
// Request body for the faceted search
const body = {
query: {
bool: {
must: [
mainQuery, // Ensure the main query must be satisfied
...filters // Ensure all filters must be satisfied
]
}
},
size: 10,
from: start ? parseInt(start) : 0,
sort: [{ server_date_published: { order: "desc" } }], // Sort by publication date
// sort: [{ _score: { order: "desc" } }], // Sort by _score in descending order
track_scores: true,
/**
* Defines aggregations for facets
* terms: Aggregation type that returns the most common terms in a field.
* !For a large number of terms setting an extremely large size might not be efficient
* If you genuinely need all unique terms and expect a large number of them, consider using a composite aggregation for more efficient pagination of terms.
*/
aggs: {
subjects: { terms: { field: "subjects.keyword", size: 1000 } }, // In SOLR is "subject"!
language: { terms: { field: "language" } }, // ".keyword" HAS TO BE REMOVED. OTHERWISE BUCKETS ARE NOT OBTAINED FOR THIS
author: { terms: { field: "author.keyword", size: 1000 } },
year: { terms: { field: "year", size: 100 } }, // ".keyword" HAS TO BE REMOVED. OTHERWISE BUCKETS ARE NOT OBTAINED FOR THIS
doctype: { terms: { field: "doctype", size: 50 } } // ".keyword" HAS TO BE REMOVED. OTHERWISE BUCKETS ARE NOT OBTAINED FOR THIS
},
highlight: {
fields: {
title: {},
author: {},
subjects: {},
doctype: {}
}
}
};
// API call and return observable of search results
const stations = api.post<OpenSearchResponse>(base, body);
return stations;
}
// /**
// * This method performs a faceted search on a Solr core. Faceted search allows the user to filter search results based on various categories (facets)
// */
// public facetedSearchSOLR(
// suggestion: Suggestion | string,
// activeFilterCategories: ActiveFilterCategories,
// solrCore: string,
// solrHost: string,
// start?: string, // Starting page
// ): Observable<SolrResponse> {
// // console.log("face:", suggestion);
// // console.log(activeFilterCategories);
// // console.log(solrCore);
// // console.log(solrHost);
// // console.log(start);
// console.log("facetedsearchSOLR > suggestion entered:");
// console.log(suggestion);
// // Construct Solr query parameters
// const host = "https://" + solrHost;
// const path = "/solr/" + solrCore + "/select?";
// const base = host + path;
// const fields = [
// "id",
// "licence",
// "server_date_published",
// "abstract_output",
// "identifier",
// "title_output",
// "title_additional",
// "author",
// "subject",
// "doctype",
// ].toString();
// // Determine search term, query operator, and query fields based on the suggestion type. Depending on whether suggestion is a string or a Suggestion object, it constructs the search term and query fields differently.
// let term, queryOperator, qfFields;
// if (typeof suggestion === "string") { // f suggestion is a string, it appends a wildcard (*) for partial matches.
// term = suggestion + "*";
// queryOperator = "or";
// qfFields = "title^3 author^2 subject^1";
// } else if (suggestion instanceof Suggestion) { // If suggestion is a Suggestion object, it forms a more specific query based on the type and value of the suggestion.
// term = suggestion.type + ':"' + suggestion.value + '"';
// queryOperator = "and";
// qfFields = undefined;
// }
// // Set default value for start if not provided
// if (start === undefined) start = "0";
// // Construct filter fields based on active filter categories
// const filterFields = new Array<string>();
// if (Object.keys(activeFilterCategories).length > 0) {
// /* Declare variable prop with a type that is a key of the activeFilterCategories. The 'keyof typeof' activeFilterCategories type represents all possible keys
// that can exist on the activeFilterCategories --> prop can only be assigned a value that is a key of the activeFilterCategories object */
// let prop: keyof typeof activeFilterCategories;
// for (prop in activeFilterCategories) {
// const filterItems = activeFilterCategories[prop];
// filterItems.forEach(function (value: string) {
// filterFields.push(prop + ':("' + value + '")');
// // e.g. Array [ 'subject:("Vektordaten")', 'author:("GeoSphere Austria, ")' ]
// });
// }
// }
// // https://solr.apache.org/guide/8_4/json-request-api.html
// // Construct Solr query parameters
// const q_params = {
// "0": "fl=" + fields,
// q: term,
// "q.op": queryOperator,
// defType: "edismax",
// qf: qfFields,
// // df: "title",
// indent: "on",
// wt: "json",
// rows: 10,
// // fq: ["subject:Steiermark", "language:de"],
// fq: filterFields,
// start: start,
// sort: "server_date_published desc",
// facet: "on",
// // "facet.field": "language",
// "json.facet.language": '{ type: "terms", field: "language" }',
// "json.facet.subject": '{ type: "terms", field: "subject", limit: -1 }',
// "json.facet.year": '{ type: "terms", field: "year" }',
// "json.facet.author": '{ type: "terms", field: "author_facet", limit: -1 }',
// };
// /* E.g.
// {"0":"fl=id,licence,server_date_published,abstract_output,identifier,title_output,title_additional,author,subject,doctype","q":"*","q.op":"or","defType":"edismax",
// "qf":"title^3 author^2 subject^1",
// "indent":"on","wt":"json","rows":10,
// "fq":["subject:(\"Vektordaten\")","author:(\"GeoSphere Austria, \")"],
// "start":"0","sort":"server_date_published desc","facet":"on",
// "json.facet.language":"{ type: \"terms\", field: \"language\" }",
// "json.facet.subject":"{ type: \"terms\", field: \"subject\", limit: -1 }",
// "json.facet.year":"{ type: \"terms\", field: \"year\" }",
// "json.facet.author":"{ type: \"terms\", field: \"author_facet\", limit: -1 }"}
// */
// // console.log(JSON.stringify(q_params));
// // Make API call to Solr and return the result
// const stations = api.get<SolrResponse>(base, q_params);
// return stations;
// }
// Method to fetch years
public getYears(): Observable<string[]> {
const host = VUE_API;
const path = "/api/years";
const base = host + path;
const years = api.get<string[]>(base);
return years;
}
// Method to fetch documents for a specific year
public getDocuments(year: string): Observable<Array<DbDataset>> {
const host = VUE_API;
const path = "/api/sitelinks/" + year;
const base = host + path;
const documents: Observable<DbDataset[]> = api.get<Array<DbDataset>>(base);
return documents;
}
// Method to fetch a dataset by its ID
public getDataset(id: number): Observable<DbDataset> {
const host = VUE_API;
const path = "/api/dataset/" + id;
const apiUrl = host + path;
const dataset = api.get<DbDataset>(apiUrl).pipe(map((res) => this.prepareDataset(res)));
return dataset;
}
// Method to fetch a dataset by its DOI
public getDatasetByDoi(doi: string): Observable<DbDataset> {
const host = VUE_API;
const path = "/api/dataset/10.24341/tethys." + doi;
const apiUrl = host + path;
const dataset = api.get<DbDataset>(apiUrl).pipe(map((res) => this.prepareDataset(res)));
return dataset;
}
// Prepare dataset object by deserializing it and adding a URL
private prepareDataset(datasetObj: DbDataset): DbDataset {
const dataset = deserialize<DbDataset>(DbDataset, JSON.stringify(datasetObj));
dataset.url = document.documentURI;
return dataset;
}
}
export default new DatasetService();