import api from "../api/api"; import { Observable } from "rxjs"; import { tap, map } from "rxjs/operators"; import { Dataset, DbDataset, Suggestion } from "@/models/dataset"; import { HitHighlight, OpenSearchResponse, SolrResponse } from "@/models/headers"; import { ActiveFilterCategories } from "@/models/solr"; import { VUE_API } from "@/constants"; import { deserialize } from "class-transformer"; class DatasetService { /** * Search datasets with OpenSearch API, allowing for fuzzy search and boosting relevance in title, author, and subject fields. * @param {string} searchTerm - Search query term * @param {string} openCore - The OpenSearch core to search in * @param {string} openHost - The OpenSearch host URL * @returns {Observable} - Observable emitting datasets and their highlights */ public searchTerm(term: string, openCore: string, openHost: string): Observable<{ datasets: Dataset[], highlights: HitHighlight[] }> { const host = openHost; // OpenSearch host URL const path = "/" + openCore + "/_search"; // API endpoint for searching const base = host + path; // Complete URL for the request /** * The match query used for title, author, and subjects fields is case-insensitive by default. The standard analyzer is typically used, which lowercases the terms. * The wildcard query is case-sensitive by default. To make it case-insensitive, it is needed to use a lowercase filter */ const lowercaseTerm = term.toLowerCase(); // Lowercase the search term // Request body defining search query logic const body = { query: { bool: { should: [ { match: { title: { query: term, fuzziness: "AUTO", boost: 3 } } }, { match: { author: { query: term, fuzziness: "AUTO", boost: 2 } } }, { match: { subjects: { query: term, fuzziness: "AUTO", boost: 1 } } }, // In SOLR is "subject"! { match: { doctype: { query: term, fuzziness: "AUTO", boost: 1 } } }, // doctype { wildcard: { title: { value: `${lowercaseTerm}*`, boost: 3 } } }, { wildcard: { author: { value: `${lowercaseTerm}*`, boost: 2 } } }, { wildcard: { subjects: { value: `${lowercaseTerm}*`, boost: 1 } } }, // In SOLR is "subject"! { wildcard: { doctype: { value: `${lowercaseTerm}*`, boost: 1 } } } // doctype ], minimum_should_match: 1 // Require at least one match } }, size: 10, // Limit to 10 results from: 0, // Pagination: start from the first result sort: [{ _score: { order: "desc" } }], // Sort by relevance (_score) // sort: [{ server_date_published: { order: "desc" } }], track_scores: true, // This ensures "_score" is included even when sorting by other criteria. Otherwise the relevance score is not calculated aggs: { subjects: { terms: { field: "subjects.keyword", size: 1000 } }, // In SOLR is "subject"! language: { terms: { field: "language" } }, // << ".keyword" HAS TO BE REMOVED. OTHERWISE BUCKETS ARE NOT OBTAINED FOR THIS author: { terms: { field: "author.keyword", size: 1000 } }, year: { terms: { field: "year", size: 100 } }, // << ".keyword" HAS TO BE REMOVED. OTHERWISE BUCKETS ARE NOT OBTAINED FOR THIS doctype: { terms: { field: "doctype", size: 50 } } // << ".keyword" HAS TO BE REMOVED. OTHERWISE BUCKETS ARE NOT OBTAINED FOR THIS }, highlight: { fields: { title: {}, // Highlight matching terms in title author: {}, // Highlight matching terms in author subjects: {}, // Highlight matching terms in subjects doctype: {} // Highlight matching terms in document type } } }; /** * Make API call to OpenSearch and return the result * When a POST request is made to the OpenSearch server using the api.post method, the response received from OpenSearch is an object that includes various details about the search results. * One of the key properties of this response object is _source, which is an array of documents (datasets) that match the search criteria. * It is used the pipe method to chain RxJS operators to the Observable returned by api.get. The map operator is used to transform the emitted items of the Observable. */ return api.post(base, body).pipe( map(response => ({ datasets: response.hits.hits.map(hit => hit._source), highlights: response.hits.hits.map(hit => hit.highlight) })) ); } // // For the autocomplete search. Method to perform a search based on a term // public searchTermSOLR(term: string, solrCore: string, solrHost: string): Observable { // // SOLR endpoint // const host = "https://" + solrHost; // const path = "/solr/" + solrCore + "/select?"; // const base = host + path; // //const fields = 'id,server_date_published,abstract_output,title_output,title_additional,author,subject'; // fields we want returned // const fields = [ // "id", // "licence", // "server_date_published", // "abstract_output", // "title_output", // "title_additional", // "author", // "subject", // "doctype", // ].toString(); // const qfFields = "title^3 author^2 subject^1"; // const q_params = { // "0": "fl=" + fields, // q: term + "*", // defType: "edismax", // qf: qfFields, // indent: "on", // wt: "json", // }; // // Make API call to Solr and return the result // /** // * When a GET request is made to the Solr server using the api.get method, the response received from Solr is an object that includes various details about the search results. // * One of the key properties of this response object is docs, which is an array of documents (datasets) that match the search criteria. // * It is used the pipe method to chain RxJS operators to the Observable returned by api.get. The map operator is used to transform the emitted items of the Observable. // */ // const stations = api.get(base, q_params).pipe(map((res: SolrResponse) => res.response.docs)); // return stations; // } /** * Perform faceted search with OpenSearch API using filters and suggestions * @param {Suggestion | string} suggestion - Search term or suggestion * @param {ActiveFilterCategories} activeFilterCategories - Active filters to apply * @param {string} openCore - The OpenSearch core to search in * @param {string} openHost - The OpenSearch host URL * @param {string} start - Optional: starting page * @returns {Observable} - Observable emitting search results */ public facetedSearch( suggestion: Suggestion | string, activeFilterCategories: ActiveFilterCategories, openCore: string, openHost: string, start?: string, // Starting page ): Observable { const host = openHost; const path = "/" + openCore + "/_search"; const base = host + path; const lowercaseTerm = typeof suggestion === 'string' ? suggestion.toLowerCase() : suggestion.value; /** * The query construction depends on whether the suggestion is a string or a Suggestion object. * */ // When suggestion is a string: const mainQuery = typeof suggestion === 'string' ? { bool: { should: [ { match: { title: { query: suggestion, fuzziness: "AUTO", boost: 3 } } }, { match: { author: { query: suggestion, fuzziness: "AUTO", boost: 2 } } }, { match: { subjects: { query: suggestion, fuzziness: "AUTO", boost: 1 } } }, { match: { doctype: { query: suggestion, fuzziness: "AUTO", boost: 1 } } }, { wildcard: { title: { value: `${lowercaseTerm}*`, boost: 3 } } }, { wildcard: { author: { value: `${lowercaseTerm}*`, boost: 2 } } }, { wildcard: { subjects: { value: `${lowercaseTerm}*`, boost: 1 } } }, { wildcard: { doctype: { value: `${lowercaseTerm}*`, boost: 1 } } } ], minimum_should_match: 1 } } // When suggestion is a suggestion object: : { match: { [suggestion.type]: { query: suggestion.value, operator: 'and' // all the terms in the query must be present in the field } } }; // Build filters based on the active filter categories const filters = Object.entries(activeFilterCategories).map(([category, values]) => { if (category === "language" || category === "year" || category === "doctype") { return values.map(value => ({ term: { [category]: value } })); } else { return values.map(value => ({ term: { [`${category}.keyword`]: value } })); } }).flat(); // Request body for the faceted search const body = { query: { bool: { must: [ mainQuery, // Ensure the main query must be satisfied ...filters // Ensure all filters must be satisfied ] } }, size: 10, from: start ? parseInt(start) : 0, sort: [{ server_date_published: { order: "desc" } }], // Sort by publication date // sort: [{ _score: { order: "desc" } }], // Sort by _score in descending order track_scores: true, /** * Defines aggregations for facets * terms: Aggregation type that returns the most common terms in a field. * !For a large number of terms setting an extremely large size might not be efficient * If you genuinely need all unique terms and expect a large number of them, consider using a composite aggregation for more efficient pagination of terms. */ aggs: { subjects: { terms: { field: "subjects.keyword", size: 1000 } }, // In SOLR is "subject"! language: { terms: { field: "language" } }, // ".keyword" HAS TO BE REMOVED. OTHERWISE BUCKETS ARE NOT OBTAINED FOR THIS author: { terms: { field: "author.keyword", size: 1000 } }, year: { terms: { field: "year", size: 100 } }, // ".keyword" HAS TO BE REMOVED. OTHERWISE BUCKETS ARE NOT OBTAINED FOR THIS doctype: { terms: { field: "doctype", size: 50 } } // ".keyword" HAS TO BE REMOVED. OTHERWISE BUCKETS ARE NOT OBTAINED FOR THIS }, highlight: { fields: { title: {}, author: {}, subjects: {}, doctype: {} } } }; // API call and return observable of search results const stations = api.post(base, body); return stations; } // /** // * This method performs a faceted search on a Solr core. Faceted search allows the user to filter search results based on various categories (facets) // */ // public facetedSearchSOLR( // suggestion: Suggestion | string, // activeFilterCategories: ActiveFilterCategories, // solrCore: string, // solrHost: string, // start?: string, // Starting page // ): Observable { // // console.log("face:", suggestion); // // console.log(activeFilterCategories); // // console.log(solrCore); // // console.log(solrHost); // // console.log(start); // console.log("facetedsearchSOLR > suggestion entered:"); // console.log(suggestion); // // Construct Solr query parameters // const host = "https://" + solrHost; // const path = "/solr/" + solrCore + "/select?"; // const base = host + path; // const fields = [ // "id", // "licence", // "server_date_published", // "abstract_output", // "identifier", // "title_output", // "title_additional", // "author", // "subject", // "doctype", // ].toString(); // // Determine search term, query operator, and query fields based on the suggestion type. Depending on whether suggestion is a string or a Suggestion object, it constructs the search term and query fields differently. // let term, queryOperator, qfFields; // if (typeof suggestion === "string") { // f suggestion is a string, it appends a wildcard (*) for partial matches. // term = suggestion + "*"; // queryOperator = "or"; // qfFields = "title^3 author^2 subject^1"; // } else if (suggestion instanceof Suggestion) { // If suggestion is a Suggestion object, it forms a more specific query based on the type and value of the suggestion. // term = suggestion.type + ':"' + suggestion.value + '"'; // queryOperator = "and"; // qfFields = undefined; // } // // Set default value for start if not provided // if (start === undefined) start = "0"; // // Construct filter fields based on active filter categories // const filterFields = new Array(); // if (Object.keys(activeFilterCategories).length > 0) { // /* Declare variable prop with a type that is a key of the activeFilterCategories. The 'keyof typeof' activeFilterCategories type represents all possible keys // that can exist on the activeFilterCategories --> prop can only be assigned a value that is a key of the activeFilterCategories object */ // let prop: keyof typeof activeFilterCategories; // for (prop in activeFilterCategories) { // const filterItems = activeFilterCategories[prop]; // filterItems.forEach(function (value: string) { // filterFields.push(prop + ':("' + value + '")'); // // e.g. Array [ 'subject:("Vektordaten")', 'author:("GeoSphere Austria, ")' ] // }); // } // } // // https://solr.apache.org/guide/8_4/json-request-api.html // // Construct Solr query parameters // const q_params = { // "0": "fl=" + fields, // q: term, // "q.op": queryOperator, // defType: "edismax", // qf: qfFields, // // df: "title", // indent: "on", // wt: "json", // rows: 10, // // fq: ["subject:Steiermark", "language:de"], // fq: filterFields, // start: start, // sort: "server_date_published desc", // facet: "on", // // "facet.field": "language", // "json.facet.language": '{ type: "terms", field: "language" }', // "json.facet.subject": '{ type: "terms", field: "subject", limit: -1 }', // "json.facet.year": '{ type: "terms", field: "year" }', // "json.facet.author": '{ type: "terms", field: "author_facet", limit: -1 }', // }; // /* E.g. // {"0":"fl=id,licence,server_date_published,abstract_output,identifier,title_output,title_additional,author,subject,doctype","q":"*","q.op":"or","defType":"edismax", // "qf":"title^3 author^2 subject^1", // "indent":"on","wt":"json","rows":10, // "fq":["subject:(\"Vektordaten\")","author:(\"GeoSphere Austria, \")"], // "start":"0","sort":"server_date_published desc","facet":"on", // "json.facet.language":"{ type: \"terms\", field: \"language\" }", // "json.facet.subject":"{ type: \"terms\", field: \"subject\", limit: -1 }", // "json.facet.year":"{ type: \"terms\", field: \"year\" }", // "json.facet.author":"{ type: \"terms\", field: \"author_facet\", limit: -1 }"} // */ // // console.log(JSON.stringify(q_params)); // // Make API call to Solr and return the result // const stations = api.get(base, q_params); // return stations; // } // Method to fetch years public getYears(): Observable { const host = VUE_API; const path = "/api/years"; const base = host + path; const years = api.get(base); return years; } // Method to fetch documents for a specific year public getDocuments(year: string): Observable> { const host = VUE_API; const path = "/api/sitelinks/" + year; const base = host + path; const documents: Observable = api.get>(base); return documents; } // Method to fetch a dataset by its ID public getDataset(id: number): Observable { const host = VUE_API; const path = "/api/dataset/" + id; const apiUrl = host + path; const dataset = api.get(apiUrl).pipe(map((res) => this.prepareDataset(res))); return dataset; } // Method to fetch a dataset by its DOI public getDatasetByDoi(doi: string): Observable { const host = VUE_API; const path = "/api/dataset/10.24341/tethys." + doi; const apiUrl = host + path; const dataset = api.get(apiUrl).pipe(map((res) => this.prepareDataset(res))); return dataset; } // Prepare dataset object by deserializing it and adding a URL private prepareDataset(datasetObj: DbDataset): DbDataset { const dataset = deserialize(DbDataset, JSON.stringify(datasetObj)); dataset.url = document.documentURI; return dataset; } } export default new DatasetService();