Opensearch progress.

- Term search works
- Pending faceted search
- Ongoing: highlight of fuzzy results
This commit is contained in:
Porras-Bernardez 2024-06-07 17:44:13 +02:00
parent 6d1c1b28c3
commit 4f53411d07
5 changed files with 292 additions and 204 deletions

View file

@ -1,7 +1,7 @@
import api from "../api/api";
// import { Observable, of } from "rxjs";
import { Observable } from "rxjs";
import { map } from "rxjs/operators";
import { tap, map } from "rxjs/operators";
import { Dataset, DbDataset, Suggestion } from "@/models/dataset";
import { OpenSearchResponse, SolrResponse } from "@/models/headers";
import { ActiveFilterCategories } from "@/models/solr";
@ -16,130 +16,130 @@ class DatasetService {
*
* @param {string} searchTerm - The search term to query.
*/
async fetchDataFromOpenSearch(searchTerm: string): Promise<void> {
// Define the OpenSearch endpoint URL
const url = "http://opensearch.geoinformation.dev/tethys-records/_search";
// async fetchDataFromOpenSearch(searchTerm: string): Promise<void> {
// // Define the OpenSearch endpoint URL
// const url = "http://opensearch.geoinformation.dev/tethys-records/_search";
// Set the headers for the POST request
const headers = {
"Content-Type": "application/json",
};
// // Set the headers for the POST request
// const headers = {
// "Content-Type": "application/json",
// };
// Construct the body of the POST request
const body = {
query: {
bool: {
// The `should` clause specifies that at least one of these conditions must match
should: [
{
// Match the search term in the title field with fuzziness enabled and a boost of 3
match: {
title: {
query: searchTerm,
fuzziness: "AUTO", // Enable fuzzy search
boost: 3 // Boosting the relevance of title matches
}
}
},
{
// Match the search term in the author field with fuzziness enabled and a boost of 2
match: {
author: {
query: searchTerm,
fuzziness: "AUTO", // Enable fuzzy search
boost: 2 // Boosting the relevance of author matches
}
}
},
{
// Match the search term in the subject field with fuzziness enabled and a boost of 1
match: {
subject: {
query: searchTerm,
fuzziness: "AUTO", // Enable fuzzy search
boost: 1 // Boosting the relevance of subject matches
}
}
},
{
// Match the search term in the title field with a wildcard
wildcard: {
title: {
value: `${searchTerm}*`, // Wildcard search for terms starting with searchTerm
boost: 3 // Boosting the relevance of title matches
}
}
},
{
// Match the search term in the author field with a wildcard
wildcard: {
author: {
value: `${searchTerm}*`, // Wildcard search for terms starting with searchTerm
boost: 2 // Boosting the relevance of author matches
}
}
},
{
// Match the search term in the subject field with a wildcard
wildcard: {
subject: {
value: `${searchTerm}*`, // Wildcard search for terms starting with searchTerm
boost: 1 // Boosting the relevance of subject matches
}
}
}
],
// Ensure that at least one of the `should` clauses must match
minimum_should_match: 1
}
},
// Limit the number of search results to 10
size: 10,
// Start from the first result (pagination)
from: 0,
// Sort the results by the `server_date_published` field in descending order
sort: [
{ server_date_published: { order: "desc" } }
],
// Aggregations to provide facets for the `language` and `subject` fields
aggs: {
language: {
terms: {
field: "language.keyword" // Aggregate by the exact values of the `language` field
}
},
subject: {
terms: {
field: "subjects.keyword", // Aggregate by the exact values of the `subjects` field
size: 10 // Limit the number of aggregation buckets to 10
}
}
}
};
// // Construct the body of the POST request
// const body = {
// query: {
// bool: {
// // The `should` clause specifies that at least one of these conditions must match
// should: [
// {
// // Match the search term in the title field with fuzziness enabled and a boost of 3
// match: {
// title: {
// query: searchTerm,
// fuzziness: "AUTO", // Enable fuzzy search
// boost: 3 // Boosting the relevance of title matches
// }
// }
// },
// {
// // Match the search term in the author field with fuzziness enabled and a boost of 2
// match: {
// author: {
// query: searchTerm,
// fuzziness: "AUTO", // Enable fuzzy search
// boost: 2 // Boosting the relevance of author matches
// }
// }
// },
// {
// // Match the search term in the subject field with fuzziness enabled and a boost of 1
// match: {
// subject: {
// query: searchTerm,
// fuzziness: "AUTO", // Enable fuzzy search
// boost: 1 // Boosting the relevance of subject matches
// }
// }
// },
// {
// // Match the search term in the title field with a wildcard
// wildcard: {
// title: {
// value: `${searchTerm}*`, // Wildcard search for terms starting with searchTerm
// boost: 3 // Boosting the relevance of title matches
// }
// }
// },
// {
// // Match the search term in the author field with a wildcard
// wildcard: {
// author: {
// value: `${searchTerm}*`, // Wildcard search for terms starting with searchTerm
// boost: 2 // Boosting the relevance of author matches
// }
// }
// },
// {
// // Match the search term in the subject field with a wildcard
// wildcard: {
// subject: {
// value: `${searchTerm}*`, // Wildcard search for terms starting with searchTerm
// boost: 1 // Boosting the relevance of subject matches
// }
// }
// }
// ],
// // Ensure that at least one of the `should` clauses must match
// minimum_should_match: 1
// }
// },
// // Limit the number of search results to 10
// size: 10,
// // Start from the first result (pagination)
// from: 0,
// // Sort the results by the `server_date_published` field in descending order
// sort: [
// { server_date_published: { order: "desc" } }
// ],
// // Aggregations to provide facets for the `language` and `subject` fields
// aggs: {
// language: {
// terms: {
// field: "language.keyword" // Aggregate by the exact values of the `language` field
// }
// },
// subject: {
// terms: {
// field: "subjects.keyword", // Aggregate by the exact values of the `subjects` field
// size: 10 // Limit the number of aggregation buckets to 10
// }
// }
// }
// };
try {
// Send the POST request to the OpenSearch endpoint
const response = await fetch(url, {
method: "POST",
headers: headers,
body: JSON.stringify(body),
});
// try {
// // Send the POST request to the OpenSearch endpoint
// const response = await fetch(url, {
// method: "POST",
// headers: headers,
// body: JSON.stringify(body),
// });
// Check if the response is not successful
if (!response.ok) {
throw new Error(`Failed to fetch data from ${url}, status: ${response.status}`);
}
// // Check if the response is not successful
// if (!response.ok) {
// throw new Error(`Failed to fetch data from ${url}, status: ${response.status}`);
// }
// Parse the response JSON
const data = await response.json();
// Log the data from OpenSearch
console.log("Data from OpenSearch:", data);
console.log("Hits:", data.hits.total.value);
} catch (error) {
// Log any errors that occur during the fetch process
console.error("Error fetching data:", error);
}
}
// // Parse the response JSON
// const data = await response.json();
// // Log the data from OpenSearch
// console.log("Data from OpenSearch:", data);
// console.log("Hits:", data.hits.total.value);
// } catch (error) {
// // Log any errors that occur during the fetch process
// console.error("Error fetching data:", error);
// }
// }
/* https://tethys.at/solr/rdr_data/select?&0=fl%3Did%2Clicence%2Cserver_date_published%2Cabstract_output%2Cidentifier%2Ctitle_output%2Ctitle_additional%2Cauthor%2Csubject%2Cdoctype&q=%2A
&q.op=or&defType=edismax&qf=title%5E3%20author%5E2%20subject%5E1&indent=on&wt=json&rows=10&start=0&sort=server_date_published%20desc&facet=on&json.facet.language=%7B%20type%3A%20%22
@ -156,25 +156,53 @@ class DatasetService {
should: [
{ match: { title: { query: term, fuzziness: "AUTO", boost: 3 } } },
{ match: { author: { query: term, fuzziness: "AUTO", boost: 2 } } },
{ match: { subject: { query: term, fuzziness: "AUTO", boost: 1 } } },
{ match: { subjects: { query: term, fuzziness: "AUTO", boost: 1 } } }, // In SOLR is "subject"!
{ wildcard: { title: { value: `${term}*`, boost: 3 } } },
{ wildcard: { author: { value: `${term}*`, boost: 2 } } },
{ wildcard: { subject: { value: `${term}*`, boost: 1 } } }
{ wildcard: { subjects: { value: `${term}*`, boost: 1 } } } // In SOLR is "subject"!
],
minimum_should_match: 1
}
},
size: 10,
from: 0,
sort: [{ server_date_published: { order: "desc" } }],
// sort: [{ server_date_published: { order: "desc" } }],
sort: [{ _score: { order: "desc" } }], // Sort by _score in descending order
track_scores: true, // This ensures "_score" is included even when sorting by other criteria. Otherwise the relevance score is not calculated
aggs: {
language: { terms: { field: "language.keyword" } },
subject: { terms: { field: "subjects.keyword", size: 10 } }
subjects: { terms: { field: "subjects.keyword", size: 10 } } // In SOLR is "subject"!
},
highlight: {
fields: {
title: {},
author: {},
subjects: {}
}
}
};
// Make API call to OpenSearch and return the result
/**
* When a POST request is made to the OpenSearch server using the api.post<OpenSearchResponse> method, the response received from OpenSearch is an object that includes various details about the search results.
* One of the key properties of this response object is _source, which is an array of documents (datasets) that match the search criteria.
* It is used the pipe method to chain RxJS operators to the Observable returned by api.get. The map operator is used to transform the emitted items of the Observable.
*/
return api.post<OpenSearchResponse>(this.openSearchUrl, body).pipe(
// tap(response => console.log("OpenSearchResponse:", response)), // Log the complete response
// tap(response => console.log("Aggre:", response.aggregations?.subjects.buckets[0])), // log the first subject of the array of subjects returned
// tap(response => console.log("Hits:", response.hits)), // log the first subject of the array of subjects returned
map(response => response.hits.hits.map(hit => hit._source))
// map(response => response.hits.hits.map(hit => {
// const source = hit._source;
// const highlights = hit._highlight || {};
// return {
// ...source,
// highlights
// };
// }))
);
}
@ -219,7 +247,7 @@ class DatasetService {
* It is used the pipe method to chain RxJS operators to the Observable returned by api.get. The map operator is used to transform the emitted items of the Observable.
*/
const stations = api.get<SolrResponse>(base, q_params).pipe(map((res: SolrResponse) => res.response.docs));
return stations;
}