Opensearch progress.

- Term search works - Pending faceted search - Ongoing: highlight of fuzzy results
2024-06-07 17:44:13 +02:00 · 2024-06-07 17:44:13 +02:00 · 4f53411d07
commit 4f53411d07
parent 6d1c1b28c3
5 changed files with 292 additions and 204 deletions
--- a/src/services/dataset.service.ts
+++ b/src/services/dataset.service.ts
@ -1,7 +1,7 @@
 import api from "../api/api";
 // import { Observable, of } from "rxjs";
 import { Observable } from "rxjs";
-import { map } from "rxjs/operators";
+import { tap, map } from "rxjs/operators";
 import { Dataset, DbDataset, Suggestion } from "@/models/dataset";
 import { OpenSearchResponse, SolrResponse } from "@/models/headers";
 import { ActiveFilterCategories } from "@/models/solr";
@ -16,130 +16,130 @@ class DatasetService {
     * 
     * @param {string} searchTerm - The search term to query.
     */
-    async fetchDataFromOpenSearch(searchTerm: string): Promise<void> {
-        // Define the OpenSearch endpoint URL
-        const url = "http://opensearch.geoinformation.dev/tethys-records/_search";
+    // async fetchDataFromOpenSearch(searchTerm: string): Promise<void> {
+    //     // Define the OpenSearch endpoint URL
+    //     const url = "http://opensearch.geoinformation.dev/tethys-records/_search";
        
-        // Set the headers for the POST request
-        const headers = {
-            "Content-Type": "application/json",
-        };
+    //     // Set the headers for the POST request
+    //     const headers = {
+    //         "Content-Type": "application/json",
+    //     };

-        // Construct the body of the POST request
-        const body = {
-            query: {
-                bool: {
-                    // The `should` clause specifies that at least one of these conditions must match
-                    should: [
-                        {
-                            // Match the search term in the title field with fuzziness enabled and a boost of 3
-                            match: {
-                                title: {
-                                    query: searchTerm,
-                                    fuzziness: "AUTO", // Enable fuzzy search
-                                    boost: 3 // Boosting the relevance of title matches
-                                }
-                            }
-                        },
-                        {
-                            // Match the search term in the author field with fuzziness enabled and a boost of 2
-                            match: {
-                                author: {
-                                    query: searchTerm,
-                                    fuzziness: "AUTO", // Enable fuzzy search
-                                    boost: 2 // Boosting the relevance of author matches
-                                }
-                            }
-                        },
-                        {
-                            // Match the search term in the subject field with fuzziness enabled and a boost of 1
-                            match: {
-                                subject: {
-                                    query: searchTerm,
-                                    fuzziness: "AUTO", // Enable fuzzy search
-                                    boost: 1 // Boosting the relevance of subject matches
-                                }
-                            }
-                        },
-                        {
-                            // Match the search term in the title field with a wildcard
-                            wildcard: {
-                                title: {
-                                    value: `${searchTerm}*`, // Wildcard search for terms starting with searchTerm
-                                    boost: 3 // Boosting the relevance of title matches
-                                }
-                            }
-                        },
-                        {
-                            // Match the search term in the author field with a wildcard
-                            wildcard: {
-                                author: {
-                                    value: `${searchTerm}*`, // Wildcard search for terms starting with searchTerm
-                                    boost: 2 // Boosting the relevance of author matches
-                                }
-                            }
-                        },
-                        {
-                            // Match the search term in the subject field with a wildcard
-                            wildcard: {
-                                subject: {
-                                    value: `${searchTerm}*`, // Wildcard search for terms starting with searchTerm
-                                    boost: 1 // Boosting the relevance of subject matches
-                                }
-                            }
-                        }
-                    ],
-                    // Ensure that at least one of the `should` clauses must match
-                    minimum_should_match: 1
-                }
-            },
-            // Limit the number of search results to 10
-            size: 10,
-            // Start from the first result (pagination)
-            from: 0,
-            // Sort the results by the `server_date_published` field in descending order
-            sort: [
-                { server_date_published: { order: "desc" } }
-            ],
-            // Aggregations to provide facets for the `language` and `subject` fields
-            aggs: {
-                language: {
-                    terms: {
-                        field: "language.keyword" // Aggregate by the exact values of the `language` field
-                    }
-                },
-                subject: {
-                    terms: {
-                        field: "subjects.keyword", // Aggregate by the exact values of the `subjects` field
-                        size: 10 // Limit the number of aggregation buckets to 10
-                    }
-                }
-            }
-        };
+    //     // Construct the body of the POST request
+    //     const body = {
+    //         query: {
+    //             bool: {
+    //                 // The `should` clause specifies that at least one of these conditions must match
+    //                 should: [
+    //                     {
+    //                         // Match the search term in the title field with fuzziness enabled and a boost of 3
+    //                         match: {
+    //                             title: {
+    //                                 query: searchTerm,
+    //                                 fuzziness: "AUTO", // Enable fuzzy search
+    //                                 boost: 3 // Boosting the relevance of title matches
+    //                             }
+    //                         }
+    //                     },
+    //                     {
+    //                         // Match the search term in the author field with fuzziness enabled and a boost of 2
+    //                         match: {
+    //                             author: {
+    //                                 query: searchTerm,
+    //                                 fuzziness: "AUTO", // Enable fuzzy search
+    //                                 boost: 2 // Boosting the relevance of author matches
+    //                             }
+    //                         }
+    //                     },
+    //                     {
+    //                         // Match the search term in the subject field with fuzziness enabled and a boost of 1
+    //                         match: {
+    //                             subject: {
+    //                                 query: searchTerm,
+    //                                 fuzziness: "AUTO", // Enable fuzzy search
+    //                                 boost: 1 // Boosting the relevance of subject matches
+    //                             }
+    //                         }
+    //                     },
+    //                     {
+    //                         // Match the search term in the title field with a wildcard
+    //                         wildcard: {
+    //                             title: {
+    //                                 value: `${searchTerm}*`, // Wildcard search for terms starting with searchTerm
+    //                                 boost: 3 // Boosting the relevance of title matches
+    //                             }
+    //                         }
+    //                     },
+    //                     {
+    //                         // Match the search term in the author field with a wildcard
+    //                         wildcard: {
+    //                             author: {
+    //                                 value: `${searchTerm}*`, // Wildcard search for terms starting with searchTerm
+    //                                 boost: 2 // Boosting the relevance of author matches
+    //                             }
+    //                         }
+    //                     },
+    //                     {
+    //                         // Match the search term in the subject field with a wildcard
+    //                         wildcard: {
+    //                             subject: {
+    //                                 value: `${searchTerm}*`, // Wildcard search for terms starting with searchTerm
+    //                                 boost: 1 // Boosting the relevance of subject matches
+    //                             }
+    //                         }
+    //                     }
+    //                 ],
+    //                 // Ensure that at least one of the `should` clauses must match
+    //                 minimum_should_match: 1
+    //             }
+    //         },
+    //         // Limit the number of search results to 10
+    //         size: 10,
+    //         // Start from the first result (pagination)
+    //         from: 0,
+    //         // Sort the results by the `server_date_published` field in descending order
+    //         sort: [
+    //             { server_date_published: { order: "desc" } }
+    //         ],
+    //         // Aggregations to provide facets for the `language` and `subject` fields
+    //         aggs: {
+    //             language: {
+    //                 terms: {
+    //                     field: "language.keyword" // Aggregate by the exact values of the `language` field
+    //                 }
+    //             },
+    //             subject: {
+    //                 terms: {
+    //                     field: "subjects.keyword", // Aggregate by the exact values of the `subjects` field
+    //                     size: 10 // Limit the number of aggregation buckets to 10
+    //                 }
+    //             }
+    //         }
+    //     };

-        try {
-            // Send the POST request to the OpenSearch endpoint
-            const response = await fetch(url, {
-                method: "POST",
-                headers: headers,
-                body: JSON.stringify(body),
-            });
+    //     try {
+    //         // Send the POST request to the OpenSearch endpoint
+    //         const response = await fetch(url, {
+    //             method: "POST",
+    //             headers: headers,
+    //             body: JSON.stringify(body),
+    //         });

-            // Check if the response is not successful
-            if (!response.ok) {
-                throw new Error(`Failed to fetch data from ${url}, status: ${response.status}`);
-            }
+    //         // Check if the response is not successful
+    //         if (!response.ok) {
+    //             throw new Error(`Failed to fetch data from ${url}, status: ${response.status}`);
+    //         }

-            // Parse the response JSON
-            const data = await response.json();
-            // Log the data from OpenSearch
-            console.log("Data from OpenSearch:", data);
-            console.log("Hits:", data.hits.total.value);
-        } catch (error) {
-            // Log any errors that occur during the fetch process
-            console.error("Error fetching data:", error);
-        }
-    }
+    //         // Parse the response JSON
+    //         const data = await response.json();
+    //         // Log the data from OpenSearch
+    //         console.log("Data from OpenSearch:", data);
+    //         console.log("Hits:", data.hits.total.value);
+    //     } catch (error) {
+    //         // Log any errors that occur during the fetch process
+    //         console.error("Error fetching data:", error);
+    //     }
+    // }

    /* https://tethys.at/solr/rdr_data/select?&0=fl%3Did%2Clicence%2Cserver_date_published%2Cabstract_output%2Cidentifier%2Ctitle_output%2Ctitle_additional%2Cauthor%2Csubject%2Cdoctype&q=%2A
    &q.op=or&defType=edismax&qf=title%5E3%20author%5E2%20subject%5E1&indent=on&wt=json&rows=10&start=0&sort=server_date_published%20desc&facet=on&json.facet.language=%7B%20type%3A%20%22
@ -156,25 +156,53 @@ class DatasetService {
                    should: [
                        { match: { title: { query: term, fuzziness: "AUTO", boost: 3 } } },
                        { match: { author: { query: term, fuzziness: "AUTO", boost: 2 } } },
-                        { match: { subject: { query: term, fuzziness: "AUTO", boost: 1 } } },
+                        { match: { subjects: { query: term, fuzziness: "AUTO", boost: 1 } } }, // In SOLR is "subject"!
                        { wildcard: { title: { value: `${term}*`, boost: 3 } } },
                        { wildcard: { author: { value: `${term}*`, boost: 2 } } },
-                        { wildcard: { subject: { value: `${term}*`, boost: 1 } } }
+                        { wildcard: { subjects: { value: `${term}*`, boost: 1 } } } // In SOLR is "subject"!
                    ],
                    minimum_should_match: 1
                }
            },
            size: 10,
            from: 0,
-            sort: [{ server_date_published: { order: "desc" } }],
+            // sort: [{ server_date_published: { order: "desc" } }],
+            sort: [{ _score: { order: "desc" } }], // Sort by _score in descending order
+            track_scores: true, // This ensures "_score" is included even when sorting by other criteria. Otherwise the relevance score is not calculated
            aggs: {
                language: { terms: { field: "language.keyword" } },
-                subject: { terms: { field: "subjects.keyword", size: 10 } }
+                subjects: { terms: { field: "subjects.keyword", size: 10 } } // In SOLR is "subject"!
+            },
+            highlight: {
+                fields: {
+                    title: {},
+                    author: {},
+                    subjects: {}
+                }
            }
        };

+        // Make API call to OpenSearch and return the result
+        /**
+         * When a POST request is made to the OpenSearch server using the api.post<OpenSearchResponse> method, the response received from OpenSearch is an object that includes various details about the search results. 
+         * One of the key properties of this response object is _source, which is an array of documents (datasets) that match the search criteria.
+         * It is used the pipe method to chain RxJS operators to the Observable returned by api.get. The map operator is used to transform the emitted items of the Observable.
+         */
        return api.post<OpenSearchResponse>(this.openSearchUrl, body).pipe(
+            // tap(response => console.log("OpenSearchResponse:", response)), // Log the complete response
+            // tap(response => console.log("Aggre:", response.aggregations?.subjects.buckets[0])), // log the first subject of the array of subjects returned
+            // tap(response => console.log("Hits:", response.hits)), // log the first subject of the array of subjects returned
+            
            map(response => response.hits.hits.map(hit => hit._source))
+            
+            // map(response => response.hits.hits.map(hit => {
+            //     const source = hit._source;
+            //     const highlights = hit._highlight || {};
+            //     return {
+            //         ...source,
+            //         highlights
+            //     };
+            // }))
        );
    }

@ -219,7 +247,7 @@ class DatasetService {
         * It is used the pipe method to chain RxJS operators to the Observable returned by api.get. The map operator is used to transform the emitted items of the Observable.
         */
        const stations = api.get<SolrResponse>(base, q_params).pipe(map((res: SolrResponse) => res.response.docs));
-
+ 
        return stations;
    }