### Major Features - Add comprehensive ORCID validation with checksum verification - Implement unsaved changes detection and auto-save functionality - Enhanced form component reactivity and state management ### ORCID Implementation - Create custom VineJS ORCID validation rule with MOD-11-2 algorithm - Add ORCID fields to Person model and TablePersons component - Update dataset validators to include ORCID validation - Add descriptive placeholder text for ORCID input fields ### UI/UX Improvements - Add UnsavedChangesWarning component with detailed change tracking - Improve FormCheckRadio and FormCheckRadioGroup reactivity - Enhanced BaseButton with proper disabled state handling - Better error handling and user feedback in file validation ### Data Management - Implement sophisticated change detection for all dataset fields - Add proper handling of array ordering for authors/contributors - Improve license selection with better state management - Enhanced subject/keyword processing with duplicate detection ### Technical Improvements - Optimize search indexing with conditional updates based on modification dates - Update person model column mapping for ORCID - Improve validation error messages and user guidance - Better handling of file uploads and deletion tracking ### Dependencies - Update various npm packages (AWS SDK, Babel, Vite, etc.) - Add baseline-browser-mapping for better browser compatibility ### Bug Fixes - Fix form reactivity issues with checkbox/radio groups - Improve error handling in file validation rules - Better handling of edge cases in change detection
179 lines
7.3 KiB
TypeScript
179 lines
7.3 KiB
TypeScript
// podman exec -it tethys_backend_1 node ace validate:checksum
|
|
// sudo crontab -u www-data -e
|
|
// */5 * * * * podman exec -u www-data tethys_backend_1 node ace validate:checksum
|
|
import { XMLBuilder } from 'xmlbuilder2/lib/interfaces.js';
|
|
import { create } from 'xmlbuilder2';
|
|
import Dataset from '#models/dataset';
|
|
import XmlModel from '#app/Library/XmlModel';
|
|
import { readFileSync } from 'fs';
|
|
import SaxonJS from 'saxon-js';
|
|
import { Client } from '@opensearch-project/opensearch';
|
|
import { getDomain } from '#app/utils/utility-functions';
|
|
import { BaseCommand, flags } from '@adonisjs/core/ace';
|
|
import { CommandOptions } from '@adonisjs/core/types/ace';
|
|
import env from '#start/env';
|
|
import logger from '@adonisjs/core/services/logger';
|
|
import { DateTime } from 'luxon';
|
|
|
|
const opensearchNode = env.get('OPENSEARCH_HOST', 'localhost');
|
|
const client = new Client({ node: `${opensearchNode}` }); // replace with your OpenSearch endpoint
|
|
|
|
export default class IndexDatasets extends BaseCommand {
|
|
static commandName = 'index:datasets';
|
|
static description = 'Index datasets based on publish_id';
|
|
|
|
public static needsApplication = true;
|
|
|
|
@flags.number({ alias: 'p' })
|
|
public publish_id: number;
|
|
|
|
public static options: CommandOptions = {
|
|
startApp: true, // Ensures the IoC container is ready to use
|
|
staysAlive: false, // Command exits after running
|
|
};
|
|
|
|
async run() {
|
|
logger.debug('Hello world!');
|
|
// const { default: Dataset } = await import('#models/dataset');
|
|
// const datasets = await Dataset.query().where('server_state', 'published').exec(); //this.getDatasets();
|
|
const datasets = await this.getDatasets();
|
|
const proc = readFileSync('public/assets2/solr.sef.json');
|
|
const index_name = 'tethys-records';
|
|
|
|
for (var dataset of datasets) {
|
|
const shouldUpdate = await this.shouldUpdateDataset(dataset, index_name);
|
|
if (shouldUpdate) {
|
|
await this.indexDocument(dataset, index_name, proc);
|
|
} else {
|
|
logger.info(`Dataset with publish_id ${dataset.publish_id} is up to date, skipping indexing`);
|
|
}
|
|
}
|
|
}
|
|
|
|
private async getDatasets(): Promise<any[]> {
|
|
// const { default: Dataset } = await import('#models/dataset');
|
|
// const Dataset = (await import('#models/dataset')).default
|
|
// const Dataset = (
|
|
// await this.app.container.make('#models/dataset')
|
|
// ).default;
|
|
// const query: ModelQueryBuilder<Dataset, any> = db.from(Dataset);
|
|
const query = Dataset.query().preload('xmlCache').where('server_state', 'published');
|
|
if (this.publish_id) {
|
|
query.where('publish_id', this.publish_id);
|
|
}
|
|
return await query.exec();
|
|
}
|
|
|
|
private async shouldUpdateDataset(dataset: Dataset, index_name: string): Promise<boolean> {
|
|
try {
|
|
// Check if publish_id exists before proceeding
|
|
if (!dataset.publish_id) {
|
|
// Return true to update since document doesn't exist in OpenSearch yet
|
|
return true;
|
|
}
|
|
// Get the existing document from OpenSearch
|
|
const response = await client.get({
|
|
index: index_name,
|
|
id: dataset.publish_id?.toString(),
|
|
});
|
|
|
|
const existingDoc = response.body._source;
|
|
|
|
// Compare server_date_modified
|
|
if (existingDoc && existingDoc.server_date_modified) {
|
|
// Convert Unix timestamp (seconds) to milliseconds for DateTime.fromMillis()
|
|
const existingModified = DateTime.fromMillis(Number(existingDoc.server_date_modified) * 1000);
|
|
const currentModified = dataset.server_date_modified;
|
|
|
|
// Only update if the dataset has been modified more recently
|
|
if (currentModified <= existingModified) {
|
|
return false;
|
|
}
|
|
}
|
|
|
|
return true;
|
|
} catch (error) {
|
|
// If document doesn't exist or other error, we should index it
|
|
if (error.statusCode === 404) {
|
|
logger.info(`Dataset with publish_id ${dataset.publish_id} not found in index, will create new document`);
|
|
return true;
|
|
}
|
|
|
|
logger.warn(`Error checking existing document for publish_id ${dataset.publish_id}: ${error.message}`);
|
|
return true; // Index anyway if we can't determine the status
|
|
}
|
|
}
|
|
|
|
private async indexDocument(dataset: Dataset, index_name: string, proc: Buffer): Promise<void> {
|
|
try {
|
|
const doc = await this.getJsonString(dataset, proc);
|
|
|
|
let document = JSON.parse(doc);
|
|
await client.index({
|
|
id: dataset.publish_id?.toString(),
|
|
index: index_name,
|
|
body: document,
|
|
refresh: true,
|
|
});
|
|
logger.info(`dataset with publish_id ${dataset.publish_id} successfully indexed`);
|
|
} catch (error) {
|
|
logger.error(`An error occurred while indexing dataset with publish_id ${dataset.publish_id}.
|
|
Error: ${error.message}`);
|
|
}
|
|
}
|
|
|
|
private async getJsonString(dataset: Dataset, proc: Buffer) {
|
|
let xml = create({ version: '1.0', encoding: 'UTF-8', standalone: true }, '<root></root>');
|
|
const datasetNode = xml.root().ele('Dataset');
|
|
await this.createXmlRecord(dataset, datasetNode);
|
|
const xmlString = xml.end({ prettyPrint: false });
|
|
|
|
try {
|
|
const result = await SaxonJS.transform({
|
|
stylesheetText: proc,
|
|
destination: 'serialized',
|
|
sourceText: xmlString,
|
|
});
|
|
return result.principalResult;
|
|
} catch (error) {
|
|
logger.error(`An error occurred while creating the user, error: ${error.message},`);
|
|
return '';
|
|
}
|
|
}
|
|
|
|
private async createXmlRecord(dataset: Dataset, datasetNode: XMLBuilder): Promise<void> {
|
|
const domNode = await this.getDatasetXmlDomNode(dataset);
|
|
if (domNode) {
|
|
dataset.publish_id && this.addLandingPageAttribute(domNode, dataset.publish_id.toString());
|
|
this.addSpecInformation(domNode, 'data-type:' + dataset.type);
|
|
datasetNode.import(domNode);
|
|
}
|
|
}
|
|
|
|
private async getDatasetXmlDomNode(dataset: Dataset): Promise<XMLBuilder | null> {
|
|
const xmlModel = new XmlModel(dataset);
|
|
// xmlModel.setModel(dataset);
|
|
xmlModel.excludeEmptyFields();
|
|
xmlModel.caching = true;
|
|
// const cache = dataset.xmlCache ? dataset.xmlCache : null;
|
|
// dataset.load('xmlCache');
|
|
if (dataset.xmlCache) {
|
|
xmlModel.xmlCache = dataset.xmlCache;
|
|
}
|
|
|
|
// return cache.getDomDocument();
|
|
const domDocument: XMLBuilder | null = await xmlModel.getDomDocument();
|
|
return domDocument;
|
|
}
|
|
|
|
private addSpecInformation(domNode: XMLBuilder, information: string) {
|
|
domNode.ele('SetSpec').att('Value', information);
|
|
}
|
|
|
|
private addLandingPageAttribute(domNode: XMLBuilder, dataid: string) {
|
|
const baseDomain = process.env.OAI_BASE_DOMAIN || 'localhost';
|
|
const url = 'https://' + getDomain(baseDomain) + '/dataset/' + dataid;
|
|
// add attribute du dataset xml element
|
|
domNode.att('landingpage', url);
|
|
}
|
|
}
|