tethys.frontend/src/services/dataset.service.ts

440 lines
19 KiB
TypeScript

import api from "../api/api";
// import { Observable, of } from "rxjs";
import { Observable } from "rxjs";
import { map } from "rxjs/operators";
import { Dataset, DbDataset, Suggestion } from "@/models/dataset";
import { SolrResponse } from "@/models/headers";
import { ActiveFilterCategories } from "@/models/solr";
import { VUE_API } from "@/constants";
import { deserialize } from "class-transformer";
class DatasetService {
// /* Initial test method to fetch and log data from the local OpenSearch endpoint (new backend) */
// async fetchDataFromOpenSearch(searchTerm: string): Promise<void> {
// const url = "http://192.168.21.18/tethys-records/_search";
// const headers = {
// "Content-Type": "application/json",
// };
// const body = {
// query: {
// match: {
// title: searchTerm,
// },
// },
// };
// try {
// const response = await fetch(url, {
// method: "POST",
// headers: headers,
// body: JSON.stringify(body),
// });
// if (!response.ok) {
// throw new Error(`Failed to fetch data from ${url}, status: ${response.status}`);
// }
// const data = await response.json();
// console.log("Data from OpenSearch:", data);
// } catch (error) {
// console.error("Error fetching data:", error);
// }
// }
/**
* Fetch data from the OpenSearch endpoint with fuzzy search enabled.
* This function allows for misspellings in the search term and boosts
* the relevance of matches in the title, author, and subject fields.
*
* @param {string} searchTerm - The search term to query.
*/
async fetchDataFromOpenSearch(searchTerm: string): Promise<void> {
// Define the OpenSearch endpoint URL
const url = "http://192.168.21.18/tethys-records/_search";
// Set the headers for the POST request
const headers = {
"Content-Type": "application/json",
};
// Construct the body of the POST request
const body = {
query: {
bool: {
// The `should` clause specifies that at least one of these conditions must match
should: [
{
// Match the search term in the title field with fuzziness enabled and a boost of 3
match: {
title: {
query: searchTerm,
fuzziness: "AUTO", // Enable fuzzy search
boost: 3 // Boosting the relevance of title matches
}
}
},
{
// Match the search term in the author field with fuzziness enabled and a boost of 2
match: {
author: {
query: searchTerm,
fuzziness: "AUTO", // Enable fuzzy search
boost: 2 // Boosting the relevance of author matches
}
}
},
{
// Match the search term in the subject field with fuzziness enabled and a boost of 1
match: {
subject: {
query: searchTerm,
fuzziness: "AUTO", // Enable fuzzy search
boost: 1 // Boosting the relevance of subject matches
}
}
},
{
// Match the search term in the title field with a wildcard
wildcard: {
title: {
value: `${searchTerm}*`, // Wildcard search for terms starting with searchTerm
boost: 3 // Boosting the relevance of title matches
}
}
},
{
// Match the search term in the author field with a wildcard
wildcard: {
author: {
value: `${searchTerm}*`, // Wildcard search for terms starting with searchTerm
boost: 2 // Boosting the relevance of author matches
}
}
},
{
// Match the search term in the subject field with a wildcard
wildcard: {
subject: {
value: `${searchTerm}*`, // Wildcard search for terms starting with searchTerm
boost: 1 // Boosting the relevance of subject matches
}
}
}
],
// Ensure that at least one of the `should` clauses must match
minimum_should_match: 1
}
},
// Limit the number of search results to 10
size: 10,
// Start from the first result (pagination)
from: 0,
// Sort the results by the `server_date_published` field in descending order
sort: [
{ server_date_published: { order: "desc" } }
],
// Aggregations to provide facets for the `language` and `subject` fields
aggs: {
language: {
terms: {
field: "language.keyword" // Aggregate by the exact values of the `language` field
}
},
subject: {
terms: {
field: "subjects.keyword", // Aggregate by the exact values of the `subjects` field
size: 10 // Limit the number of aggregation buckets to 10
}
}
}
};
// // Construct the body of the POST request
// const body = {
// query: {
// bool: {
// // The `should` clause specifies that at least one of these conditions must match
// should: [
// {
// // Match the search term in the title field with a wildcard
// wildcard: {
// title: {
// value: `${searchTerm}*`, // Wildcard search for terms starting with searchTerm
// boost: 3 // Boosting the relevance of title matches
// }
// }
// },
// {
// // Match the search term in the author field with a wildcard
// wildcard: {
// author: {
// value: `${searchTerm}*`, // Wildcard search for terms starting with searchTerm
// boost: 2 // Boosting the relevance of author matches
// }
// }
// },
// {
// // Match the search term in the subject field with a wildcard
// wildcard: {
// subject: {
// value: `${searchTerm}*`, // Wildcard search for terms starting with searchTerm
// boost: 1 // Boosting the relevance of subject matches
// }
// }
// }
// ],
// // Ensure that at least one of the `should` clauses must match
// minimum_should_match: 1
// }
// },
// // Limit the number of search results to 10
// size: 10,
// // Start from the first result (pagination)
// from: 0,
// // Sort the results by the `server_date_published` field in descending order
// sort: [
// { server_date_published: { order: "desc" } }
// ],
// // Aggregations to provide facets for the `language` and `subject` fields
// aggs: {
// language: {
// terms: {
// field: "language.keyword" // Aggregate by the exact values of the `language` field
// }
// },
// subject: {
// terms: {
// field: "subjects.keyword", // Aggregate by the exact values of the `subjects` field
// size: 10 // Limit the number of aggregation buckets to 10
// }
// }
// }
// };
try {
// Send the POST request to the OpenSearch endpoint
const response = await fetch(url, {
method: "POST",
headers: headers,
body: JSON.stringify(body),
});
// Check if the response is not successful
if (!response.ok) {
throw new Error(`Failed to fetch data from ${url}, status: ${response.status}`);
}
// Parse the response JSON
const data = await response.json();
// Log the data from OpenSearch
console.log("Data from OpenSearch:", data);
console.log("Hits:", data.hits.total.value);
} catch (error) {
// Log any errors that occur during the fetch process
console.error("Error fetching data:", error);
}
}
/* https://tethys.at/solr/rdr_data/select?&0=fl%3Did%2Clicence%2Cserver_date_published%2Cabstract_output%2Cidentifier%2Ctitle_output%2Ctitle_additional%2Cauthor%2Csubject%2Cdoctype&q=%2A
&q.op=or&defType=edismax&qf=title%5E3%20author%5E2%20subject%5E1&indent=on&wt=json&rows=10&start=0&sort=server_date_published%20desc&facet=on&json.facet.language=%7B%20type%3A%20%22
terms%22%2C%20field%3A%20%22language%22%20%7D&json.facet.subject=%7B%20type%3A%20%22terms%22%2C%20field%3A%20%22subject%22%2C%20limit%3A%20-1%20%7D&json.facet.year=%7B%20type%3A%20%22
terms%22%2C%20field%3A%20%22year%22%20%7D&json.facet.author=%7B%20type%3A%20%22terms%22%2C%20field%3A%20%22author_facet%22%2C%20limit%3A%20-1%20%7D
*/
// For the autocomplete search. Method to perform a search based on a term
public searchTerm(term: string, solrCore: string, solrHost: string): Observable<Dataset[]> {
// Calling the test method for
this.fetchDataFromOpenSearch(term);
// solr endpoint
const host = "https://" + solrHost;
const path = "/solr/" + solrCore + "/select?";
const base = host + path;
//const fields = 'id,server_date_published,abstract_output,title_output,title_additional,author,subject'; // fields we want returned
const fields = [
"id",
"licence",
"server_date_published",
"abstract_output",
"title_output",
"title_additional",
"author",
"subject",
"doctype",
].toString();
const qfFields = "title^3 author^2 subject^1";
const q_params = {
"0": "fl=" + fields,
q: term + "*",
defType: "edismax",
qf: qfFields,
indent: "on",
wt: "json",
};
// Make API call to Solr and return the result
const stations = api.get<SolrResponse>(base, q_params).pipe(map((res: SolrResponse) => res.response.docs));
return stations;
}
/* E.g. Only one facet => Author: Coric, Stjepan (16)
https://tethys.at/solr/rdr_data/select?&0=fl%3Did%2Clicence%2Cserver_date_published%2Cabstract_output%2Cidentifier%2Ctitle_output%2Ctitle_additional%2Cauthor%2Csubject%2Cdoctype&q=%2A
&q.op=or&defType=edismax&qf=title%5E3%20author%5E2%20subject%5E1&indent=on&wt=json&rows=10&fq=author%3A%28%22Coric%2C%20Stjepan%22%29&start=0&sort=server_date_published%20desc&facet=on
&json.facet.language=%7B%20type%3A%20%22terms%22%2C%20field%3A%20%22language%22%20%7D
&json.facet.subject=%7B%20type%3A%20%22terms%22%2C%20field%3A%20%22subject%22%2C%20limit%3A%20-1%20%7D
&json.facet.year=%7B%20type%3A%20%22terms%22%2C%20field%3A%20%22year%22%20%7D
&json.facet.author=%7B%20type%3A%20%22terms%22%2C%20field%3A%20%22author_facet%22%2C%20limit%3A%20-1%20%7D */
// Method to perform a faceted search
public facetedSearch(
suggestion: Suggestion | string,
activeFilterCategories: ActiveFilterCategories,
solrCore: string,
solrHost: string,
start?: string, // Starting page
): Observable<SolrResponse> {
// Construct Solr query parameters
const host = "https://" + solrHost;
const path = "/solr/" + solrCore + "/select?";
const base = host + path;
const fields = [
"id",
"licence",
"server_date_published",
"abstract_output",
"identifier",
"title_output",
"title_additional",
"author",
"subject",
"doctype",
].toString();
// Determine search term, query operator, and query fields based on the suggestion type
let term, queryOperator, qfFields;
if (typeof suggestion === "string") {
term = suggestion + "*";
queryOperator = "or";
qfFields = "title^3 author^2 subject^1";
} else if (suggestion instanceof Suggestion) {
term = suggestion.type + ':"' + suggestion.value + '"';
queryOperator = "and";
qfFields = undefined;
}
// Set default value for start if not provided
if (start === undefined) start = "0";
// Construct filter fields based on active filter categories
const filterFields = new Array<string>();
if (Object.keys(activeFilterCategories).length > 0) {
/* Declare variable prop with a type that is a key of the activeFilterCategories. The 'keyof typeof' activeFilterCategories type represents all possible keys
that can exist on the activeFilterCategories --> prop can only be assigned a value that is a key of the activeFilterCategories object */
let prop: keyof typeof activeFilterCategories;
for (prop in activeFilterCategories) {
const filterItems = activeFilterCategories[prop];
filterItems.forEach(function (value: string) {
filterFields.push(prop + ':("' + value + '")');
// e.g. Array [ 'subject:("Vektordaten")', 'author:("GeoSphere Austria, ")' ]
});
}
}
// https://solr.apache.org/guide/8_4/json-request-api.html
// Construct Solr query parameters
const q_params = {
"0": "fl=" + fields,
q: term,
"q.op": queryOperator,
defType: "edismax",
qf: qfFields,
// df: "title",
indent: "on",
wt: "json",
rows: 10,
// fq: ["subject:Steiermark", "language:de"],
fq: filterFields,
start: start,
sort: "server_date_published desc",
facet: "on",
// "facet.field": "language",
"json.facet.language": '{ type: "terms", field: "language" }',
"json.facet.subject": '{ type: "terms", field: "subject", limit: -1 }',
"json.facet.year": '{ type: "terms", field: "year" }',
"json.facet.author": '{ type: "terms", field: "author_facet", limit: -1 }',
};
/* E.g.
{"0":"fl=id,licence,server_date_published,abstract_output,identifier,title_output,title_additional,author,subject,doctype","q":"*","q.op":"or","defType":"edismax",
"qf":"title^3 author^2 subject^1",
"indent":"on","wt":"json","rows":10,
"fq":["subject:(\"Vektordaten\")","author:(\"GeoSphere Austria, \")"],
"start":"0","sort":"server_date_published desc","facet":"on",
"json.facet.language":"{ type: \"terms\", field: \"language\" }",
"json.facet.subject":"{ type: \"terms\", field: \"subject\", limit: -1 }",
"json.facet.year":"{ type: \"terms\", field: \"year\" }",
"json.facet.author":"{ type: \"terms\", field: \"author_facet\", limit: -1 }"}
*/
// console.log(JSON.stringify(q_params));
// Make API call to Solr and return the result
const stations = api.get<SolrResponse>(base, q_params);
return stations;
}
// Method to fetch years
public getYears(): Observable<string[]> {
const host = VUE_API;
const path = "/api/years";
const base = host + path;
const years = api.get<string[]>(base);
return years;
}
// Method to fetch documents for a specific year
public getDocuments(year: string): Observable<Array<DbDataset>> {
const host = VUE_API;
const path = "/api/sitelinks/" + year;
const base = host + path;
const documents: Observable<DbDataset[]> = api.get<Array<DbDataset>>(base);
return documents;
}
// Method to fetch a dataset by its ID
public getDataset(id: number): Observable<DbDataset> {
const host = VUE_API;
const path = "/api/dataset/" + id;
const apiUrl = host + path;
const dataset = api.get<DbDataset>(apiUrl).pipe(map((res) => this.prepareDataset(res)));
return dataset;
}
// Method to fetch a dataset by its DOI
public getDatasetByDoi(doi: string): Observable<DbDataset> {
const host = VUE_API;
const path = "/api/dataset/10.24341/tethys." + doi;
const apiUrl = host + path;
const dataset = api.get<DbDataset>(apiUrl).pipe(map((res) => this.prepareDataset(res)));
return dataset;
}
// Method to prepare dataset object
private prepareDataset(datasetObj: DbDataset): DbDataset {
const dataset = deserialize<DbDataset>(DbDataset, JSON.stringify(datasetObj));
dataset.url = document.documentURI;
return dataset;
}
}
export default new DatasetService();