tethys.frontend/src/services/dataset.service.ts

import api from "../api/api";
// import { Observable, of } from "rxjs";
import { Observable } from "rxjs";
import { tap, map } from "rxjs/operators";
import { Dataset, DbDataset, Suggestion } from "@/models/dataset";
import { HitHighlight, OpenSearchResponse, SolrResponse } from "@/models/headers";
import { ActiveFilterCategories } from "@/models/solr";
import { VUE_API } from "@/constants";
import { deserialize } from "class-transformer";

class DatasetService {
    /**
     * Fetch data from the OpenSearch endpoint with fuzzy search enabled.
     * This function allows for misspellings in the search term and boosts
     * the relevance of matches in the title, author, and subject fields.
     *
     * @param {string} searchTerm - The search term to query.
     */

    /* https://tethys.at/solr/rdr_data/select?&0=fl%3Did%2Clicence%2Cserver_date_published%2Cabstract_output%2Cidentifier%2Ctitle_output%2Ctitle_additional%2Cauthor%2Csubject%2Cdoctype&q=%2A
    &q.op=or&defType=edismax&qf=title%5E3%20author%5E2%20subject%5E1&indent=on&wt=json&rows=10&start=0&sort=server_date_published%20desc&facet=on&json.facet.language=%7B%20type%3A%20%22
    terms%22%2C%20field%3A%20%22language%22%20%7D&json.facet.subject=%7B%20type%3A%20%22terms%22%2C%20field%3A%20%22subject%22%2C%20limit%3A%20-1%20%7D&json.facet.year=%7B%20type%3A%20%22
    terms%22%2C%20field%3A%20%22year%22%20%7D&json.facet.author=%7B%20type%3A%20%22terms%22%2C%20field%3A%20%22author_facet%22%2C%20limit%3A%20-1%20%7D
    */

    // private openSearchUrl = "http://opensearch.geoinformation.dev/tethys-records/_search";
    // private openSearchUrl = "http://192.168.21.18/tethys-records/_search";

    // public searchTerm(term: string): Observable<Dataset[]> {
    public searchTerm(term: string, openCore: string, openHost: string): Observable<{ datasets: Dataset[], highlights: HitHighlight[] }> {
        // OpenSearch endpoint
        const host = "https://" + openHost; // When using geoinformation.dev
        // const host = "http://" + openHost; // When using local OpenSearch dev endpoint
        const path = "/" + openCore + "/_search";
        const base = host + path;
        /**
         * The match query used for title, author, and subjects fields is case-insensitive by default. The standard analyzer is typically used, which lowercases the terms.
         * The wildcard query is case-sensitive by default. To make it case-insensitive, it is needed to use a lowercase filter */
        const lowercaseTerm = term.toLowerCase(); // Lowercase the search term
        const body = {
            query: {
                bool: {
                    should: [
                        { match: { title: { query: term, fuzziness: "AUTO", boost: 3 } } },
                        { match: { author: { query: term, fuzziness: "AUTO", boost: 2 } } },
                        { match: { subjects: { query: term, fuzziness: "AUTO", boost: 1 } } }, // In SOLR is "subject"!
                        { wildcard: { title: { value: `${lowercaseTerm}*`, boost: 3 } } },
                        { wildcard: { author: { value: `${lowercaseTerm}*`, boost: 2 } } },
                        { wildcard: { subjects: { value: `${lowercaseTerm}*`, boost: 1 } } } // In SOLR is "subject"!
                    ],
                    minimum_should_match: 1
                }
            },
            size: 10,
            from: 0,
            // sort: [{ server_date_published: { order: "desc" } }],
            sort: [{ _score: { order: "desc" } }], // Sort by _score in descending order
            track_scores: true, // This ensures "_score" is included even when sorting by other criteria. Otherwise the relevance score is not calculated
            aggs: {
                language: { terms: { field: "language.keyword" } },
                subjects: { terms: { field: "subjects.keyword", size: 10 } } // In SOLR is "subject"!
            },
            highlight: {
                fields: {
                    title: {},
                    author: {},
                    subjects: {}
                }
            }
        };

        // Make API call to OpenSearch and return the result
        /**
         * When a POST request is made to the OpenSearch server using the api.post<OpenSearchResponse> method, the response received from OpenSearch is an object that includes various details about the search results.
         * One of the key properties of this response object is _source, which is an array of documents (datasets) that match the search criteria.
         * It is used the pipe method to chain RxJS operators to the Observable returned by api.get. The map operator is used to transform the emitted items of the Observable.
         */
        return api.post<OpenSearchResponse>(base, body).pipe(
            // tap(response => console.log("OpenSearchResponse:", response)), // Log the complete response
            // tap(response => console.log("Aggre:", response.aggregations?.subjects.buckets[0])), // log the first subject of the array of subjects returned
            // tap(response => console.log("Hits:", response.hits)), // log the first subject of the array of subjects returned

            // map(response => response.hits.hits.map(hit => hit._source))

            map(response => ({
                datasets: response.hits.hits.map(hit => hit._source),
                highlights: response.hits.hits.map(hit => hit.highlight)
            }))
        );
    }

    // // For the autocomplete search. Method to perform a search based on a term
    // public searchTerm_SOLR(term: string, solrCore: string, solrHost: string): Observable<Dataset[]> {
    //     // SOLR endpoint
    //     const host = "https://" + solrHost;
    //     const path = "/solr/" + solrCore + "/select?";
    //     const base = host + path;

    //     //const fields = 'id,server_date_published,abstract_output,title_output,title_additional,author,subject'; // fields we want returned
    //     const fields = [
    //         "id",
    //         "licence",
    //         "server_date_published",
    //         "abstract_output",
    //         "title_output",
    //         "title_additional",
    //         "author",
    //         "subject",
    //         "doctype",
    //     ].toString();


    //     const qfFields = "title^3 author^2 subject^1";

    //     const q_params = {
    //         "0": "fl=" + fields,
    //         q: term + "*",
    //         defType: "edismax",
    //         qf: qfFields,
    //         indent: "on",
    //         wt: "json",
    //     };

    //     // Make API call to Solr and return the result
    //     /**
    //      * When a GET request is made to the Solr server using the api.get<SolrResponse> method, the response received from Solr is an object that includes various details about the search results.
    //      * One of the key properties of this response object is docs, which is an array of documents (datasets) that match the search criteria.
    //      * It is used the pipe method to chain RxJS operators to the Observable returned by api.get. The map operator is used to transform the emitted items of the Observable.
    //      */
    //     const stations = api.get<SolrResponse>(base, q_params).pipe(map((res: SolrResponse) => res.response.docs));

    //     return stations;
    // }

    public facetedSearchOPEN(
        suggestion: Suggestion | string,
        activeFilterCategories: ActiveFilterCategories,
        openCore: string,
        openHost: string,
        start?: string, // Starting page
    ): Observable<OpenSearchResponse> {
        // OpenSearch endpoint
        const host = "https://" + openHost;
        const path = "/" + openCore + "/_search";
        const base = host + path;

        const lowercaseTerm = typeof suggestion === 'string' ? suggestion.toLowerCase() : suggestion.value.toLowerCase();

        console.log("facetedsearchOPEN > suggestion entered:");
        console.log(suggestion);

        /**
         * The query construction depends on whether the suggestion is a string or a Suggestion object. */
        // When suggestion is a string:
        const query = typeof suggestion === 'string'
            ? {
                bool: {
                    should: [
                        { match: { title: { query: suggestion, fuzziness: "AUTO", boost: 3 } } },
                        { match: { author: { query: suggestion, fuzziness: "AUTO", boost: 2 } } },
                        { match: { subjects: { query: suggestion, fuzziness: "AUTO", boost: 1 } } },
                        { wildcard: { title: { value: `${lowercaseTerm}*`, boost: 3 } } },
                        { wildcard: { author: { value: `${lowercaseTerm}*`, boost: 2 } } },
                        { wildcard: { subjects: { value: `${lowercaseTerm}*`, boost: 1 } } }
                    ],
                    minimum_should_match: 1
                }
            }
            // When suggestion is a suggestion object
            : {
                match: {
                    [suggestion.type.toLowerCase()]: {
                        query: suggestion.value,
                        operator: 'and' // all the terms in the query must be present in the field
                    }
                }
            };

        // Constructing Filters Based on Active Filter Categories
        const filters = Object.entries(activeFilterCategories).map(([category, values]) => ({
            terms: { [`${category}.keyword`]: values }
            // terms: { [category]: values }
        }));

        const body = {
            query: {
                bool: {
                    must: query, // Contains the main query constructed earlier.
                    filter: filters // Contains the filters constructed from activeFilterCategories.
                }
            },
            size: 10,
            from: start ? parseInt(start) : 0,
            sort: [{ _score: { order: "desc" } }],
            track_scores: true,
            aggs: { // Defines aggregations for facets
                // terms: Aggregation type that returns the most common terms in a field.
                // !For a large number of terms setting an extremely large size might not be efficient
                // If you genuinely need all unique terms and expect a large number of them, consider using a composite aggregation for more efficient pagination of terms.
                subjects: { terms: { field: "subjects.keyword", size: 1000 } },
                language: { terms: { field: "language.keyword" } },
                author: { terms: { field: "author.keyword", size: 1000 } },
                year: { terms: { field: "year.keyword", size: 100 } }
            },
            highlight: {
                fields: {
                    title: {},
                    author: {},
                    subjects: {}
                }
            }
        };

        // return api.post<OpenSearchResponse>(base, body).pipe(
        //     // map(response => ({
        //     //     datasets: response.hits.hits.map(hit => hit._source),
        //     //     highlights: response.hits.hits.map(hit => hit.highlight),
        //     //     // aggregations: response.aggregations
        //     // }))
        // );
        const stations = api.post<OpenSearchResponse>(base, body);

        return stations;
    }

    /**
     * This method performs a faceted search on a Solr core. Faceted search allows the user to filter search results based on various categories (facets)
     */
    public facetedSearch(
        suggestion: Suggestion | string,
        activeFilterCategories: ActiveFilterCategories,
        solrCore: string,
        solrHost: string,
        start?: string, // Starting page
    ): Observable<SolrResponse> {
        // console.log("face:", suggestion);
        // console.log(activeFilterCategories);
        // console.log(solrCore);
        // console.log(solrHost);
        // console.log(start);

        console.log("facetedsearchSOLR > suggestion entered:");
        console.log(suggestion);

        // Construct Solr query parameters
        const host = "https://" + solrHost;
        const path = "/solr/" + solrCore + "/select?";
        const base = host + path;

        const fields = [
            "id",
            "licence",
            "server_date_published",
            "abstract_output",
            "identifier",
            "title_output",
            "title_additional",
            "author",
            "subject",
            "doctype",
        ].toString();

        // Determine search term, query operator, and query fields based on the suggestion type. Depending on whether suggestion is a string or a Suggestion object, it constructs the search term and query fields differently.
        let term, queryOperator, qfFields;
        if (typeof suggestion === "string") { // f suggestion is a string, it appends a wildcard (*) for partial matches.
            term = suggestion + "*";
            queryOperator = "or";
            qfFields = "title^3 author^2 subject^1";
        } else if (suggestion instanceof Suggestion) { // If suggestion is a Suggestion object, it forms a more specific query based on the type and value of the suggestion.
            term = suggestion.type + ':"' + suggestion.value + '"';
            queryOperator = "and";
            qfFields = undefined;
        }

        // Set default value for start if not provided
        if (start === undefined) start = "0";

        // Construct filter fields based on active filter categories
        const filterFields = new Array<string>();
        if (Object.keys(activeFilterCategories).length > 0) {
            /* Declare variable prop with a type that is a key of the activeFilterCategories. The 'keyof typeof' activeFilterCategories type represents all possible keys
            that can exist on the activeFilterCategories --> prop can only be assigned a value that is a key of the activeFilterCategories object */
            let prop: keyof typeof activeFilterCategories;
            for (prop in activeFilterCategories) {
                const filterItems = activeFilterCategories[prop];
                filterItems.forEach(function (value: string) {
                    filterFields.push(prop + ':("' + value + '")');
                    // e.g. Array [ 'subject:("Vektordaten")', 'author:("GeoSphere Austria, ")' ]
                });
            }
        }

        // https://solr.apache.org/guide/8_4/json-request-api.html
        // Construct Solr query parameters
        const q_params = {
            "0": "fl=" + fields,
            q: term,
            "q.op": queryOperator,
            defType: "edismax",
            qf: qfFields,
            // df: "title",
            indent: "on",
            wt: "json",
            rows: 10,
            // fq: ["subject:Steiermark", "language:de"],
            fq: filterFields,
            start: start,
            sort: "server_date_published desc",
            facet: "on",
            // "facet.field": "language",
            "json.facet.language": '{ type: "terms", field: "language" }',
            "json.facet.subject": '{ type: "terms", field: "subject", limit: -1 }',
            "json.facet.year": '{ type: "terms", field: "year" }',
            "json.facet.author": '{ type: "terms", field: "author_facet", limit: -1 }',
        };
        /* E.g.
        {"0":"fl=id,licence,server_date_published,abstract_output,identifier,title_output,title_additional,author,subject,doctype","q":"*","q.op":"or","defType":"edismax",
        "qf":"title^3 author^2 subject^1",
        "indent":"on","wt":"json","rows":10,
        "fq":["subject:(\"Vektordaten\")","author:(\"GeoSphere Austria, \")"],
        "start":"0","sort":"server_date_published desc","facet":"on",
        "json.facet.language":"{ type: \"terms\", field: \"language\" }",
        "json.facet.subject":"{ type: \"terms\", field: \"subject\", limit: -1 }",
        "json.facet.year":"{ type: \"terms\", field: \"year\" }",
        "json.facet.author":"{ type: \"terms\", field: \"author_facet\", limit: -1 }"}
         */
        // console.log(JSON.stringify(q_params));

        // Make API call to Solr and return the result
        const stations = api.get<SolrResponse>(base, q_params);

        return stations;
    }

    // Method to fetch years
    public getYears(): Observable<string[]> {
        const host = VUE_API;
        const path = "/api/years";
        const base = host + path;

        const years = api.get<string[]>(base);
        return years;
    }

    // Method to fetch documents for a specific year
    public getDocuments(year: string): Observable<Array<DbDataset>> {
        const host = VUE_API;
        const path = "/api/sitelinks/" + year;
        const base = host + path;

        const documents: Observable<DbDataset[]> = api.get<Array<DbDataset>>(base);
        return documents;
    }

    // Method to fetch a dataset by its ID
    public getDataset(id: number): Observable<DbDataset> {
        const host = VUE_API;
        const path = "/api/dataset/" + id;
        const apiUrl = host + path;
        const dataset = api.get<DbDataset>(apiUrl).pipe(map((res) => this.prepareDataset(res)));

        return dataset;
    }

    // Method to fetch a dataset by its DOI
    public getDatasetByDoi(doi: string): Observable<DbDataset> {
        const host = VUE_API;
        const path = "/api/dataset/10.24341/tethys." + doi;
        const apiUrl = host + path;
        const dataset = api.get<DbDataset>(apiUrl).pipe(map((res) => this.prepareDataset(res)));

        return dataset;
    }

    // Method to prepare dataset object
    private prepareDataset(datasetObj: DbDataset): DbDataset {
        const dataset = deserialize<DbDataset>(DbDataset, JSON.stringify(datasetObj));
        dataset.url = document.documentURI;

        return dataset;
    }
}

export default new DatasetService();