Compare commits
4 commits
8f67839f93
...
6757bdb77c
| Author | SHA1 | Date | |
|---|---|---|---|
| 6757bdb77c | |||
| 4c8cce27da | |||
| 2f079e6fdd | |||
| c049b22723 |
22 changed files with 2870 additions and 919 deletions
77
Dockerfile
77
Dockerfile
|
|
@ -1,55 +1,61 @@
|
||||||
################## First Stage - Creating base #########################
|
################## First Stage - Creating base #########################
|
||||||
|
|
||||||
# Created a variable to hold our node base image
|
# Created a variable to hold our node base image
|
||||||
ARG NODE_IMAGE=node:22-bookworm-slim
|
ARG NODE_IMAGE=node:22-trixie-slim
|
||||||
|
|
||||||
FROM $NODE_IMAGE AS base
|
FROM $NODE_IMAGE AS base
|
||||||
|
|
||||||
# Install dumb-init and ClamAV, and perform ClamAV database update
|
# Install dumb-init and ClamAV, and perform ClamAV database update
|
||||||
RUN apt update \
|
RUN apt-get update \
|
||||||
&& apt-get install -y dumb-init clamav clamav-daemon nano \
|
&& apt-get install -y --no-install-recommends \
|
||||||
|
dumb-init \
|
||||||
|
clamav \
|
||||||
|
clamav-daemon \
|
||||||
|
ca-certificates \
|
||||||
&& rm -rf /var/lib/apt/lists/* \
|
&& rm -rf /var/lib/apt/lists/* \
|
||||||
# Creating folders and changing ownerships
|
# Creating folders and changing ownerships
|
||||||
&& mkdir -p /home/node/app && chown node:node /home/node/app \
|
&& mkdir -p /home/node/app \
|
||||||
&& mkdir -p /var/lib/clamav \
|
&& mkdir -p /var/lib/clamav \
|
||||||
&& mkdir /usr/local/share/clamav \
|
&& mkdir /usr/local/share/clamav \
|
||||||
&& chown -R node:clamav /var/lib/clamav /usr/local/share/clamav /etc/clamav \
|
|
||||||
# permissions
|
|
||||||
&& mkdir /var/run/clamav \
|
&& mkdir /var/run/clamav \
|
||||||
&& chown node:clamav /var/run/clamav \
|
&& mkdir -p /var/log/clamav \
|
||||||
&& chmod 750 /var/run/clamav
|
&& mkdir -p /tmp/clamav-logs \
|
||||||
# -----------------------------------------------
|
|
||||||
# --- ClamAV & FeshClam -------------------------
|
|
||||||
# -----------------------------------------------
|
|
||||||
# RUN \
|
|
||||||
# chmod 644 /etc/clamav/freshclam.conf && \
|
|
||||||
# freshclam && \
|
|
||||||
# mkdir /var/run/clamav && \
|
|
||||||
# chown -R clamav:root /var/run/clamav
|
|
||||||
|
|
||||||
# # initial update of av databases
|
# Set ownership and permissions
|
||||||
# RUN freshclam
|
&& chown node:node /home/node/app \
|
||||||
|
# && chown -R node:clamav /var/lib/clamav /usr/local/share/clamav /etc/clamav /var/run/clamav \
|
||||||
|
&& chown -R clamav:clamav /var/lib/clamav /usr/local/share/clamav /etc/clamav /var/run/clamav /var/log/clamav \
|
||||||
|
&& chmod 755 /tmp/clamav-logs \
|
||||||
|
&& chmod 750 /var/run/clamav \
|
||||||
|
&& chmod 755 /var/lib/clamav \
|
||||||
|
&& chmod 755 /var/log/clamav \
|
||||||
|
# Add node user to clamav group and allow sudo for clamav commands
|
||||||
|
&& usermod -a -G clamav node \
|
||||||
|
&& chmod g+w /var/run/clamav /var/lib/clamav /var/log/clamav /tmp/clamav-logs
|
||||||
|
|
||||||
# Configure Clam AV...
|
|
||||||
COPY --chown=node:clamav ./*.conf /etc/clamav/
|
|
||||||
|
|
||||||
# # permissions
|
# Configure ClamAV - copy config files before switching user
|
||||||
# RUN mkdir /var/run/clamav && \
|
# COPY --chown=node:clamav ./*.conf /etc/clamav/
|
||||||
# chown node:clamav /var/run/clamav && \
|
COPY --chown=clamav:clamav ./*.conf /etc/clamav/
|
||||||
# chmod 750 /var/run/clamav
|
|
||||||
|
# Copy entrypoint script
|
||||||
|
COPY --chown=node:node docker-entrypoint.sh /home/node/app/docker-entrypoint.sh
|
||||||
|
RUN chmod +x /home/node/app/docker-entrypoint.sh
|
||||||
|
|
||||||
|
ENV TZ="Europe/Vienna"
|
||||||
|
|
||||||
# Setting the working directory
|
# Setting the working directory
|
||||||
WORKDIR /home/node/app
|
WORKDIR /home/node/app
|
||||||
# Changing the current active user to "node"
|
# Changing the current active user to "node"
|
||||||
|
|
||||||
|
# Download initial ClamAV database as root before switching users
|
||||||
|
USER root
|
||||||
|
RUN freshclam --quiet || echo "Initial database download failed - will retry at runtime"
|
||||||
|
|
||||||
USER node
|
USER node
|
||||||
|
|
||||||
# initial update of av databases
|
# Initial update of AV databases (moved after USER directive)
|
||||||
RUN freshclam
|
# RUN freshclam || true
|
||||||
|
|
||||||
# VOLUME /var/lib/clamav
|
|
||||||
COPY --chown=node:clamav docker-entrypoint.sh /home/node/app/docker-entrypoint.sh
|
|
||||||
RUN chmod +x /home/node/app/docker-entrypoint.sh
|
|
||||||
ENV TZ="Europe/Vienna"
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
################## Second Stage - Installing dependencies ##########
|
################## Second Stage - Installing dependencies ##########
|
||||||
|
|
@ -70,14 +76,13 @@ ENV NODE_ENV=production
|
||||||
# We run "node ace build" to build the app (dist folder) for production
|
# We run "node ace build" to build the app (dist folder) for production
|
||||||
RUN node ace build --ignore-ts-errors
|
RUN node ace build --ignore-ts-errors
|
||||||
# RUN node ace build --production
|
# RUN node ace build --production
|
||||||
# RUN node ace build --ignore-ts-errors
|
|
||||||
|
|
||||||
|
|
||||||
################## Final Stage - Production #########################
|
################## Final Stage - Production #########################
|
||||||
# In this final stage, we will start running the application
|
# In this final stage, we will start running the application
|
||||||
FROM base AS production
|
FROM base AS production
|
||||||
# Here, we include all the required environment variables
|
# Here, we include all the required environment variables
|
||||||
# ENV NODE_ENV=production
|
ENV NODE_ENV=production
|
||||||
# ENV PORT=$PORT
|
# ENV PORT=$PORT
|
||||||
# ENV HOST=0.0.0.0
|
# ENV HOST=0.0.0.0
|
||||||
|
|
||||||
|
|
@ -91,4 +96,4 @@ COPY --chown=node:node --from=build /home/node/app/build .
|
||||||
EXPOSE 3333
|
EXPOSE 3333
|
||||||
ENTRYPOINT ["/home/node/app/docker-entrypoint.sh"]
|
ENTRYPOINT ["/home/node/app/docker-entrypoint.sh"]
|
||||||
# Run the command to start the server using "dumb-init"
|
# Run the command to start the server using "dumb-init"
|
||||||
CMD [ "dumb-init", "node", "bin/server.js" ]
|
CMD [ "node", "bin/server.js" ]
|
||||||
22
LICENSE
Normal file
22
LICENSE
Normal file
|
|
@ -0,0 +1,22 @@
|
||||||
|
|
||||||
|
MIT License
|
||||||
|
|
||||||
|
Copyright (c) 2025 Tethys Research Repository
|
||||||
|
|
||||||
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
of this software and associated documentation files (the "Software"), to deal
|
||||||
|
in the Software without restriction, including without limitation the rights
|
||||||
|
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
copies of the Software, and to permit persons to whom the Software is
|
||||||
|
furnished to do so, subject to the following conditions:
|
||||||
|
|
||||||
|
The above copyright notice and this permission notice shall be included in all
|
||||||
|
copies or substantial portions of the Software.
|
||||||
|
|
||||||
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||||
|
SOFTWARE
|
||||||
|
|
@ -30,9 +30,9 @@ export default defineConfig({
|
||||||
() => import('#start/rules/unique'),
|
() => import('#start/rules/unique'),
|
||||||
() => import('#start/rules/translated_language'),
|
() => import('#start/rules/translated_language'),
|
||||||
() => import('#start/rules/unique_person'),
|
() => import('#start/rules/unique_person'),
|
||||||
() => import('#start/rules/file_length'),
|
// () => import('#start/rules/file_length'),
|
||||||
() => import('#start/rules/file_scan'),
|
// () => import('#start/rules/file_scan'),
|
||||||
() => import('#start/rules/allowed_extensions_mimetypes'),
|
// () => import('#start/rules/allowed_extensions_mimetypes'),
|
||||||
() => import('#start/rules/dependent_array_min_length'),
|
() => import('#start/rules/dependent_array_min_length'),
|
||||||
() => import('#start/rules/referenceValidation'),
|
() => import('#start/rules/referenceValidation'),
|
||||||
() => import('#start/rules/valid_mimetype'),
|
() => import('#start/rules/valid_mimetype'),
|
||||||
|
|
|
||||||
|
|
@ -1,12 +1,15 @@
|
||||||
import type { HttpContext } from '@adonisjs/core/http';
|
import type { HttpContext } from '@adonisjs/core/http';
|
||||||
// import Person from 'App/Models/Person';
|
|
||||||
import Dataset from '#models/dataset';
|
import Dataset from '#models/dataset';
|
||||||
import { StatusCodes } from 'http-status-codes';
|
import { StatusCodes } from 'http-status-codes';
|
||||||
|
|
||||||
// node ace make:controller Author
|
// node ace make:controller Author
|
||||||
export default class DatasetController {
|
export default class DatasetController {
|
||||||
public async index({}: HttpContext) {
|
/**
|
||||||
// Select datasets with server_state 'published' or 'deleted' and sort by the last published date
|
* GET /api/datasets
|
||||||
|
* Find all published datasets
|
||||||
|
*/
|
||||||
|
public async index({ response }: HttpContext) {
|
||||||
|
try {
|
||||||
const datasets = await Dataset.query()
|
const datasets = await Dataset.query()
|
||||||
.where(function (query) {
|
.where(function (query) {
|
||||||
query.where('server_state', 'published').orWhere('server_state', 'deleted');
|
query.where('server_state', 'published').orWhere('server_state', 'deleted');
|
||||||
|
|
@ -15,9 +18,18 @@ export default class DatasetController {
|
||||||
.preload('identifier')
|
.preload('identifier')
|
||||||
.orderBy('server_date_published', 'desc');
|
.orderBy('server_date_published', 'desc');
|
||||||
|
|
||||||
return datasets;
|
return response.status(StatusCodes.OK).json(datasets);
|
||||||
|
} catch (error) {
|
||||||
|
return response.status(StatusCodes.INTERNAL_SERVER_ERROR).json({
|
||||||
|
message: error.message || 'Some error occurred while retrieving datasets.',
|
||||||
|
});
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* GET /api/dataset
|
||||||
|
* Find all published datasets
|
||||||
|
*/
|
||||||
public async findAll({ response }: HttpContext) {
|
public async findAll({ response }: HttpContext) {
|
||||||
try {
|
try {
|
||||||
const datasets = await Dataset.query()
|
const datasets = await Dataset.query()
|
||||||
|
|
@ -33,11 +45,16 @@ export default class DatasetController {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public async findOne({ params }: HttpContext) {
|
/**
|
||||||
const datasets = await Dataset.query()
|
* GET /api/dataset/:publish_id
|
||||||
|
* Find one dataset by publish_id
|
||||||
|
*/
|
||||||
|
public async findOne({ response, params }: HttpContext) {
|
||||||
|
try {
|
||||||
|
const dataset = await Dataset.query()
|
||||||
.where('publish_id', params.publish_id)
|
.where('publish_id', params.publish_id)
|
||||||
.preload('titles')
|
.preload('titles')
|
||||||
.preload('descriptions')
|
.preload('descriptions') // Using 'descriptions' instead of 'abstracts'
|
||||||
.preload('user', (builder) => {
|
.preload('user', (builder) => {
|
||||||
builder.select(['id', 'firstName', 'lastName', 'avatar', 'login']);
|
builder.select(['id', 'firstName', 'lastName', 'avatar', 'login']);
|
||||||
})
|
})
|
||||||
|
|
@ -73,8 +90,97 @@ export default class DatasetController {
|
||||||
builder.preload('hashvalues');
|
builder.preload('hashvalues');
|
||||||
})
|
})
|
||||||
.preload('identifier')
|
.preload('identifier')
|
||||||
.firstOrFail();
|
.first(); // Use first() instead of firstOrFail() to handle not found gracefully
|
||||||
|
|
||||||
return datasets;
|
if (!dataset) {
|
||||||
|
return response.status(StatusCodes.NOT_FOUND).json({
|
||||||
|
message: `Cannot find Dataset with publish_id=${params.publish_id}.`,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
return response.status(StatusCodes.OK).json(dataset);
|
||||||
|
} catch (error) {
|
||||||
|
return response.status(StatusCodes.INTERNAL_SERVER_ERROR).json({
|
||||||
|
message: error.message || `Error retrieving Dataset with publish_id=${params.publish_id}.`,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* GET /:prefix/:value
|
||||||
|
* Find dataset by identifier (e.g., https://doi.tethys.at/10.24341/tethys.99.2)
|
||||||
|
*/
|
||||||
|
public async findByIdentifier({ response, params }: HttpContext) {
|
||||||
|
const identifierValue = `${params.prefix}/${params.value}`;
|
||||||
|
|
||||||
|
// Optional: Validate DOI format
|
||||||
|
if (!identifierValue.match(/^10\.\d+\/[a-zA-Z0-9._-]+\.[0-9]+(?:\.[0-9]+)*$/)) {
|
||||||
|
return response.status(StatusCodes.BAD_REQUEST).json({
|
||||||
|
message: `Invalid DOI format: ${identifierValue}`,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
// Method 1: Using subquery with whereIn (most similar to your original)
|
||||||
|
const dataset = await Dataset.query()
|
||||||
|
// .whereIn('id', (subQuery) => {
|
||||||
|
// subQuery.select('dataset_id').from('dataset_identifiers').where('value', identifierValue);
|
||||||
|
// })
|
||||||
|
.whereHas('identifier', (builder) => {
|
||||||
|
builder.where('value', identifierValue);
|
||||||
|
})
|
||||||
|
.preload('titles')
|
||||||
|
.preload('descriptions') // Using 'descriptions' instead of 'abstracts'
|
||||||
|
.preload('user', (builder) => {
|
||||||
|
builder.select(['id', 'firstName', 'lastName', 'avatar', 'login']);
|
||||||
|
})
|
||||||
|
.preload('authors', (builder) => {
|
||||||
|
builder
|
||||||
|
.select(['id', 'academic_title', 'first_name', 'last_name', 'identifier_orcid', 'status', 'name_type'])
|
||||||
|
.withCount('datasets', (query) => {
|
||||||
|
query.as('datasets_count');
|
||||||
|
})
|
||||||
|
.pivotColumns(['role', 'sort_order'])
|
||||||
|
.wherePivot('role', 'author')
|
||||||
|
.orderBy('pivot_sort_order', 'asc');
|
||||||
|
})
|
||||||
|
.preload('contributors', (builder) => {
|
||||||
|
builder
|
||||||
|
.select(['id', 'academic_title', 'first_name', 'last_name', 'identifier_orcid', 'status', 'name_type'])
|
||||||
|
.withCount('datasets', (query) => {
|
||||||
|
query.as('datasets_count');
|
||||||
|
})
|
||||||
|
.pivotColumns(['role', 'sort_order', 'contributor_type'])
|
||||||
|
.wherePivot('role', 'contributor')
|
||||||
|
.orderBy('pivot_sort_order', 'asc');
|
||||||
|
})
|
||||||
|
.preload('subjects')
|
||||||
|
.preload('coverage')
|
||||||
|
.preload('licenses')
|
||||||
|
.preload('references')
|
||||||
|
.preload('project')
|
||||||
|
.preload('referenced_by', (builder) => {
|
||||||
|
builder.preload('dataset', (builder) => {
|
||||||
|
builder.preload('identifier');
|
||||||
|
});
|
||||||
|
})
|
||||||
|
.preload('files', (builder) => {
|
||||||
|
builder.preload('hashvalues');
|
||||||
|
})
|
||||||
|
.preload('identifier')
|
||||||
|
.first();
|
||||||
|
|
||||||
|
if (!dataset) {
|
||||||
|
return response.status(StatusCodes.NOT_FOUND).json({
|
||||||
|
message: `Cannot find Dataset with identifier=${identifierValue}.`,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
return response.status(StatusCodes.OK).json(dataset);
|
||||||
|
} catch (error) {
|
||||||
|
return response.status(StatusCodes.INTERNAL_SERVER_ERROR).json({
|
||||||
|
message: error.message || `Error retrieving Dataset with identifier=${identifierValue}.`,
|
||||||
|
});
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -235,6 +235,7 @@ export default class DatasetController {
|
||||||
.isUniquePerson({ table: 'persons', column: 'email', idField: 'id' }),
|
.isUniquePerson({ table: 'persons', column: 'email', idField: 'id' }),
|
||||||
first_name: vine.string().trim().minLength(3).maxLength(255).optional().requiredWhen('name_type', '=', 'Personal'),
|
first_name: vine.string().trim().minLength(3).maxLength(255).optional().requiredWhen('name_type', '=', 'Personal'),
|
||||||
last_name: vine.string().trim().minLength(3).maxLength(255),
|
last_name: vine.string().trim().minLength(3).maxLength(255),
|
||||||
|
identifier_orcid: vine.string().trim().maxLength(255).orcid().optional(),
|
||||||
}),
|
}),
|
||||||
)
|
)
|
||||||
.minLength(1)
|
.minLength(1)
|
||||||
|
|
@ -251,6 +252,7 @@ export default class DatasetController {
|
||||||
.isUniquePerson({ table: 'persons', column: 'email', idField: 'id' }),
|
.isUniquePerson({ table: 'persons', column: 'email', idField: 'id' }),
|
||||||
first_name: vine.string().trim().minLength(3).maxLength(255).optional().requiredWhen('name_type', '=', 'Personal'),
|
first_name: vine.string().trim().minLength(3).maxLength(255).optional().requiredWhen('name_type', '=', 'Personal'),
|
||||||
last_name: vine.string().trim().minLength(3).maxLength(255),
|
last_name: vine.string().trim().minLength(3).maxLength(255),
|
||||||
|
identifier_orcid: vine.string().trim().maxLength(255).orcid().optional(),
|
||||||
pivot_contributor_type: vine.enum(Object.keys(ContributorTypes)),
|
pivot_contributor_type: vine.enum(Object.keys(ContributorTypes)),
|
||||||
}),
|
}),
|
||||||
)
|
)
|
||||||
|
|
@ -326,6 +328,7 @@ export default class DatasetController {
|
||||||
.isUniquePerson({ table: 'persons', column: 'email', idField: 'id' }),
|
.isUniquePerson({ table: 'persons', column: 'email', idField: 'id' }),
|
||||||
first_name: vine.string().trim().minLength(3).maxLength(255).optional().requiredWhen('name_type', '=', 'Personal'),
|
first_name: vine.string().trim().minLength(3).maxLength(255).optional().requiredWhen('name_type', '=', 'Personal'),
|
||||||
last_name: vine.string().trim().minLength(3).maxLength(255),
|
last_name: vine.string().trim().minLength(3).maxLength(255),
|
||||||
|
identifier_orcid: vine.string().trim().maxLength(255).orcid().optional(),
|
||||||
}),
|
}),
|
||||||
)
|
)
|
||||||
.minLength(1)
|
.minLength(1)
|
||||||
|
|
@ -342,6 +345,7 @@ export default class DatasetController {
|
||||||
.isUniquePerson({ table: 'persons', column: 'email', idField: 'id' }),
|
.isUniquePerson({ table: 'persons', column: 'email', idField: 'id' }),
|
||||||
first_name: vine.string().trim().minLength(3).maxLength(255).optional().requiredWhen('name_type', '=', 'Personal'),
|
first_name: vine.string().trim().minLength(3).maxLength(255).optional().requiredWhen('name_type', '=', 'Personal'),
|
||||||
last_name: vine.string().trim().minLength(3).maxLength(255),
|
last_name: vine.string().trim().minLength(3).maxLength(255),
|
||||||
|
identifier_orcid: vine.string().trim().maxLength(255).orcid().optional(),
|
||||||
pivot_contributor_type: vine.enum(Object.keys(ContributorTypes)),
|
pivot_contributor_type: vine.enum(Object.keys(ContributorTypes)),
|
||||||
}),
|
}),
|
||||||
)
|
)
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,3 @@
|
||||||
// import { Client } from 'guzzle';
|
|
||||||
// import { Log } from '@adonisjs/core/build/standalone';
|
|
||||||
// import { DoiInterface } from './interfaces/DoiInterface';
|
|
||||||
import DoiClientContract from '#app/Library/Doi/DoiClientContract';
|
import DoiClientContract from '#app/Library/Doi/DoiClientContract';
|
||||||
import DoiClientException from '#app/exceptions/DoiClientException';
|
import DoiClientException from '#app/exceptions/DoiClientException';
|
||||||
import { StatusCodes } from 'http-status-codes';
|
import { StatusCodes } from 'http-status-codes';
|
||||||
|
|
@ -12,14 +9,14 @@ export class DoiClient implements DoiClientContract {
|
||||||
public username: string;
|
public username: string;
|
||||||
public password: string;
|
public password: string;
|
||||||
public serviceUrl: string;
|
public serviceUrl: string;
|
||||||
|
public apiUrl: string;
|
||||||
|
|
||||||
constructor() {
|
constructor() {
|
||||||
// const datacite_environment = process.env.DATACITE_ENVIRONMENT || 'debug';
|
// const datacite_environment = process.env.DATACITE_ENVIRONMENT || 'debug';
|
||||||
this.username = process.env.DATACITE_USERNAME || '';
|
this.username = process.env.DATACITE_USERNAME || '';
|
||||||
this.password = process.env.DATACITE_PASSWORD || '';
|
this.password = process.env.DATACITE_PASSWORD || '';
|
||||||
this.serviceUrl = process.env.DATACITE_SERVICE_URL || '';
|
this.serviceUrl = process.env.DATACITE_SERVICE_URL || '';
|
||||||
// this.prefix = process.env.DATACITE_PREFIX || '';
|
this.apiUrl = process.env.DATACITE_API_URL || 'https://api.datacite.org';
|
||||||
// this.base_domain = process.env.BASE_DOMAIN || '';
|
|
||||||
|
|
||||||
if (this.username === '' || this.password === '' || this.serviceUrl === '') {
|
if (this.username === '' || this.password === '' || this.serviceUrl === '') {
|
||||||
const message = 'issing configuration settings to properly initialize DOI client';
|
const message = 'issing configuration settings to properly initialize DOI client';
|
||||||
|
|
@ -90,4 +87,240 @@ export class DoiClient implements DoiClientContract {
|
||||||
throw new DoiClientException(error.response.status, error.response.data);
|
throw new DoiClientException(error.response.status, error.response.data);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Retrieves DOI information from DataCite REST API
|
||||||
|
*
|
||||||
|
* @param doiValue The DOI identifier e.g. '10.5072/tethys.999'
|
||||||
|
* @returns Promise with DOI information or null if not found
|
||||||
|
*/
|
||||||
|
public async getDoiInfo(doiValue: string): Promise<any | null> {
|
||||||
|
try {
|
||||||
|
// Use configurable DataCite REST API URL
|
||||||
|
const dataciteApiUrl = `${this.apiUrl}/dois/${doiValue}`;
|
||||||
|
const response = await axios.get(dataciteApiUrl, {
|
||||||
|
headers: {
|
||||||
|
Accept: 'application/vnd.api+json',
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
if (response.status === 200 && response.data.data) {
|
||||||
|
return {
|
||||||
|
created: response.data.data.attributes.created,
|
||||||
|
registered: response.data.data.attributes.registered,
|
||||||
|
updated: response.data.data.attributes.updated,
|
||||||
|
published: response.data.data.attributes.published,
|
||||||
|
state: response.data.data.attributes.state,
|
||||||
|
url: response.data.data.attributes.url,
|
||||||
|
metadata: response.data.data.attributes,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
} catch (error) {
|
||||||
|
if (error.response?.status === 404) {
|
||||||
|
logger.debug(`DOI ${doiValue} not found in DataCite`);
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
logger.debug(`DataCite REST API failed for ${doiValue}: ${error.message}`);
|
||||||
|
|
||||||
|
// Fallback to MDS API
|
||||||
|
return await this.getDoiInfoFromMds(doiValue);
|
||||||
|
}
|
||||||
|
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Fallback method to get DOI info from MDS API
|
||||||
|
*
|
||||||
|
* @param doiValue The DOI identifier
|
||||||
|
* @returns Promise with basic DOI information or null
|
||||||
|
*/
|
||||||
|
private async getDoiInfoFromMds(doiValue: string): Promise<any | null> {
|
||||||
|
try {
|
||||||
|
const auth = {
|
||||||
|
username: this.username,
|
||||||
|
password: this.password,
|
||||||
|
};
|
||||||
|
|
||||||
|
// Get DOI URL
|
||||||
|
const doiResponse = await axios.get(`${this.serviceUrl}/doi/${doiValue}`, { auth });
|
||||||
|
|
||||||
|
if (doiResponse.status === 200) {
|
||||||
|
// Get metadata if available
|
||||||
|
try {
|
||||||
|
const metadataResponse = await axios.get(`${this.serviceUrl}/metadata/${doiValue}`, {
|
||||||
|
auth,
|
||||||
|
headers: {
|
||||||
|
Accept: 'application/xml',
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
return {
|
||||||
|
url: doiResponse.data.trim(),
|
||||||
|
metadata: metadataResponse.data,
|
||||||
|
created: new Date().toISOString(), // MDS doesn't provide creation dates
|
||||||
|
registered: new Date().toISOString(), // Use current time as fallback
|
||||||
|
source: 'mds',
|
||||||
|
};
|
||||||
|
} catch (metadataError) {
|
||||||
|
// Return basic info even if metadata fetch fails
|
||||||
|
return {
|
||||||
|
url: doiResponse.data.trim(),
|
||||||
|
created: new Date().toISOString(),
|
||||||
|
registered: new Date().toISOString(),
|
||||||
|
source: 'mds',
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} catch (error) {
|
||||||
|
if (error.response?.status === 404) {
|
||||||
|
logger.debug(`DOI ${doiValue} not found in DataCite MDS`);
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
logger.debug(`DataCite MDS API failed for ${doiValue}: ${error.message}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Checks if a DOI exists in DataCite
|
||||||
|
*
|
||||||
|
* @param doiValue The DOI identifier
|
||||||
|
* @returns Promise<boolean> True if DOI exists
|
||||||
|
*/
|
||||||
|
public async doiExists(doiValue: string): Promise<boolean> {
|
||||||
|
const doiInfo = await this.getDoiInfo(doiValue);
|
||||||
|
return doiInfo !== null;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Gets the last modification date of a DOI
|
||||||
|
*
|
||||||
|
* @param doiValue The DOI identifier
|
||||||
|
* @returns Promise<Date | null> Last modification date or creation date if never updated, null if not found
|
||||||
|
*/
|
||||||
|
public async getDoiLastModified(doiValue: string): Promise<Date | null> {
|
||||||
|
const doiInfo = await this.getDoiInfo(doiValue);
|
||||||
|
|
||||||
|
if (doiInfo) {
|
||||||
|
// Use updated date if available, otherwise fall back to created/registered date
|
||||||
|
const dateToUse = doiInfo.updated || doiInfo.registered || doiInfo.created;
|
||||||
|
|
||||||
|
if (dateToUse) {
|
||||||
|
logger.debug(
|
||||||
|
`DOI ${doiValue}: Using ${doiInfo.updated ? 'updated' : doiInfo.registered ? 'registered' : 'created'} date: ${dateToUse}`,
|
||||||
|
);
|
||||||
|
return new Date(dateToUse);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Makes a DOI unfindable (registered but not discoverable)
|
||||||
|
* Note: DOIs cannot be deleted, only made unfindable
|
||||||
|
* await doiClient.makeDoiUnfindable('10.21388/tethys.231');
|
||||||
|
*
|
||||||
|
* @param doiValue The DOI identifier e.g. '10.5072/tethys.999'
|
||||||
|
* @returns Promise<AxiosResponse<any>> The http response
|
||||||
|
*/
|
||||||
|
public async makeDoiUnfindable(doiValue: string): Promise<AxiosResponse<any>> {
|
||||||
|
const auth = {
|
||||||
|
username: this.username,
|
||||||
|
password: this.password,
|
||||||
|
};
|
||||||
|
|
||||||
|
try {
|
||||||
|
// First, check if DOI exists
|
||||||
|
const exists = await this.doiExists(doiValue);
|
||||||
|
if (!exists) {
|
||||||
|
throw new DoiClientException(404, `DOI ${doiValue} not found`);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Delete the DOI URL mapping to make it unfindable
|
||||||
|
// This removes the URL but keeps the metadata registered
|
||||||
|
const response = await axios.delete(`${this.serviceUrl}/doi/${doiValue}`, { auth });
|
||||||
|
|
||||||
|
// Response Codes for DELETE /doi/{doi}
|
||||||
|
// 200 OK: operation successful
|
||||||
|
// 401 Unauthorized: no login
|
||||||
|
// 403 Forbidden: login problem, quota exceeded
|
||||||
|
// 404 Not Found: DOI does not exist
|
||||||
|
if (response.status !== 200) {
|
||||||
|
const message = `Unexpected DataCite MDS response code ${response.status}`;
|
||||||
|
logger.error(message);
|
||||||
|
throw new DoiClientException(response.status, message);
|
||||||
|
}
|
||||||
|
|
||||||
|
logger.info(`DOI ${doiValue} successfully made unfindable`);
|
||||||
|
return response;
|
||||||
|
} catch (error) {
|
||||||
|
logger.error(`Failed to make DOI ${doiValue} unfindable: ${error.message}`);
|
||||||
|
if (error instanceof DoiClientException) {
|
||||||
|
throw error;
|
||||||
|
}
|
||||||
|
throw new DoiClientException(error.response?.status || 500, error.response?.data || error.message);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Makes a DOI findable again by re-registering the URL
|
||||||
|
* await doiClient.makeDoiFindable(
|
||||||
|
* '10.21388/tethys.231',
|
||||||
|
* 'https://doi.dev.tethys.at/10.21388/tethys.231'
|
||||||
|
* );
|
||||||
|
*
|
||||||
|
* @param doiValue The DOI identifier e.g. '10.5072/tethys.999'
|
||||||
|
* @param landingPageUrl The landing page URL
|
||||||
|
* @returns Promise<AxiosResponse<any>> The http response
|
||||||
|
*/
|
||||||
|
public async makeDoiFindable(doiValue: string, landingPageUrl: string): Promise<AxiosResponse<any>> {
|
||||||
|
const auth = {
|
||||||
|
username: this.username,
|
||||||
|
password: this.password,
|
||||||
|
};
|
||||||
|
|
||||||
|
try {
|
||||||
|
// Re-register the DOI with its URL to make it findable again
|
||||||
|
const response = await axios.put(`${this.serviceUrl}/doi/${doiValue}`, `doi=${doiValue}\nurl=${landingPageUrl}`, { auth });
|
||||||
|
|
||||||
|
// Response Codes for PUT /doi/{doi}
|
||||||
|
// 201 Created: operation successful
|
||||||
|
// 400 Bad Request: request body must be exactly two lines: DOI and URL
|
||||||
|
// 401 Unauthorized: no login
|
||||||
|
// 403 Forbidden: login problem, quota exceeded
|
||||||
|
// 412 Precondition failed: metadata must be uploaded first
|
||||||
|
if (response.status !== 201) {
|
||||||
|
const message = `Unexpected DataCite MDS response code ${response.status}`;
|
||||||
|
logger.error(message);
|
||||||
|
throw new DoiClientException(response.status, message);
|
||||||
|
}
|
||||||
|
|
||||||
|
logger.info(`DOI ${doiValue} successfully made findable again`);
|
||||||
|
return response;
|
||||||
|
} catch (error) {
|
||||||
|
logger.error(`Failed to make DOI ${doiValue} findable: ${error.message}`);
|
||||||
|
if (error instanceof DoiClientException) {
|
||||||
|
throw error;
|
||||||
|
}
|
||||||
|
throw new DoiClientException(error.response?.status || 500, error.response?.data || error.message);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Gets the current state of a DOI (draft, registered, findable)
|
||||||
|
* const state = await doiClient.getDoiState('10.21388/tethys.231');
|
||||||
|
* console.log(`Current state: ${state}`); // 'findable'
|
||||||
|
*
|
||||||
|
* @param doiValue The DOI identifier
|
||||||
|
* @returns Promise<string | null> The DOI state or null if not found
|
||||||
|
*/
|
||||||
|
public async getDoiState(doiValue: string): Promise<string | null> {
|
||||||
|
const doiInfo = await this.getDoiInfo(doiValue);
|
||||||
|
return doiInfo?.state || null;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
380
commands/fix_dataset_cross_references.ts
Normal file
380
commands/fix_dataset_cross_references.ts
Normal file
|
|
@ -0,0 +1,380 @@
|
||||||
|
/*
|
||||||
|
|--------------------------------------------------------------------------
|
||||||
|
| node ace make:command fix-dataset-cross-references
|
||||||
|
| DONE: create commands/fix_dataset_cross_references.ts
|
||||||
|
|--------------------------------------------------------------------------
|
||||||
|
*/
|
||||||
|
import { BaseCommand, flags } from '@adonisjs/core/ace';
|
||||||
|
import type { CommandOptions } from '@adonisjs/core/types/ace';
|
||||||
|
import { DateTime } from 'luxon';
|
||||||
|
import Dataset from '#models/dataset';
|
||||||
|
import DatasetReference from '#models/dataset_reference';
|
||||||
|
// import env from '#start/env';
|
||||||
|
|
||||||
|
interface MissingCrossReference {
|
||||||
|
sourceDatasetId: number;
|
||||||
|
targetDatasetId: number;
|
||||||
|
sourcePublishId: number | null;
|
||||||
|
targetPublishId: number | null;
|
||||||
|
sourceDoi: string | null;
|
||||||
|
targetDoi: string | null;
|
||||||
|
referenceType: string;
|
||||||
|
relation: string;
|
||||||
|
doi: string | null;
|
||||||
|
reverseRelation: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
export default class DetectMissingCrossReferences extends BaseCommand {
|
||||||
|
static commandName = 'detect:missing-cross-references';
|
||||||
|
static description = 'Detect missing bidirectional cross-references between versioned datasets';
|
||||||
|
|
||||||
|
public static needsApplication = true;
|
||||||
|
|
||||||
|
@flags.boolean({ alias: 'f', description: 'Fix missing cross-references automatically' })
|
||||||
|
public fix: boolean = false;
|
||||||
|
|
||||||
|
@flags.boolean({ alias: 'v', description: 'Verbose output' })
|
||||||
|
public verbose: boolean = false;
|
||||||
|
|
||||||
|
@flags.number({ alias: 'p', description: 'Filter by specific publish_id (source or target dataset)' })
|
||||||
|
public publish_id?: number;
|
||||||
|
|
||||||
|
// example: node ace detect:missing-cross-references --verbose -p 227 //if you want to filter by specific publish_id with details
|
||||||
|
// example: node ace detect:missing-cross-references --verbose
|
||||||
|
// example: node ace detect:missing-cross-references --fix -p 227 //if you want to filter by specific publish_id and fix it
|
||||||
|
// example: node ace detect:missing-cross-references
|
||||||
|
|
||||||
|
public static options: CommandOptions = {
|
||||||
|
startApp: true,
|
||||||
|
staysAlive: false,
|
||||||
|
};
|
||||||
|
|
||||||
|
// Define the allowed relations that we want to process
|
||||||
|
private readonly ALLOWED_RELATIONS = ['IsNewVersionOf', 'IsPreviousVersionOf', 'IsVariantFormOf', 'IsOriginalFormOf'];
|
||||||
|
|
||||||
|
async run() {
|
||||||
|
this.logger.info('🔍 Detecting missing cross-references...');
|
||||||
|
this.logger.info(`📋 Processing only these relations: ${this.ALLOWED_RELATIONS.join(', ')}`);
|
||||||
|
|
||||||
|
if (this.publish_id) {
|
||||||
|
this.logger.info(`Filtering by publish_id: ${this.publish_id}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
const missingReferences = await this.findMissingCrossReferences();
|
||||||
|
|
||||||
|
if (missingReferences.length === 0) {
|
||||||
|
const filterMsg = this.publish_id ? ` for publish_id ${this.publish_id}` : '';
|
||||||
|
this.logger.success(`All cross-references are properly linked for the specified relations${filterMsg}!`);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
const filterMsg = this.publish_id ? ` (filtered by publish_id ${this.publish_id})` : '';
|
||||||
|
this.logger.warning(`Found ${missingReferences.length} missing cross-reference(s)${filterMsg}:`);
|
||||||
|
|
||||||
|
// Show brief list if not verbose mode
|
||||||
|
if (!this.verbose) {
|
||||||
|
for (const missing of missingReferences) {
|
||||||
|
const sourceDoi = missing.sourceDoi ? ` DOI: ${missing.sourceDoi}` : '';
|
||||||
|
const targetDoi = missing.targetDoi ? ` DOI: ${missing.targetDoi}` : '';
|
||||||
|
|
||||||
|
this.logger.info(
|
||||||
|
`Dataset ${missing.sourceDatasetId} (Publish ID: ${missing.sourcePublishId}${sourceDoi}) ${missing.relation} Dataset ${missing.targetDatasetId} (Publish ID: ${missing.targetPublishId}${targetDoi}) → missing reverse: ${missing.reverseRelation}`,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// Verbose mode - show detailed info
|
||||||
|
for (const missing of missingReferences) {
|
||||||
|
this.logger.info(
|
||||||
|
`Dataset ${missing.sourceDatasetId} references ${missing.targetDatasetId}, but reverse reference is missing`,
|
||||||
|
);
|
||||||
|
this.logger.info(` - Reference type: ${missing.referenceType}`);
|
||||||
|
this.logger.info(` - Relation: ${missing.relation}`);
|
||||||
|
this.logger.info(` - DOI: ${missing.doi}`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (this.fix) {
|
||||||
|
await this.fixMissingReferences(missingReferences);
|
||||||
|
this.logger.success('All missing cross-references have been fixed!');
|
||||||
|
} else {
|
||||||
|
if (this.verbose) {
|
||||||
|
this.printMissingReferencesList(missingReferences);
|
||||||
|
}
|
||||||
|
this.logger.info('💡 Run with --fix flag to automatically create missing cross-references');
|
||||||
|
if (this.publish_id) {
|
||||||
|
this.logger.info(`🎯 Currently filtering by publish_id: ${this.publish_id}`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} catch (error) {
|
||||||
|
this.logger.error('Error detecting missing cross-references:', error);
|
||||||
|
process.exit(1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private async findMissingCrossReferences(): Promise<MissingCrossReference[]> {
|
||||||
|
const missingReferences: {
|
||||||
|
sourceDatasetId: number;
|
||||||
|
targetDatasetId: number;
|
||||||
|
sourcePublishId: number | null;
|
||||||
|
targetPublishId: number | null;
|
||||||
|
sourceDoi: string | null;
|
||||||
|
targetDoi: string | null;
|
||||||
|
referenceType: string;
|
||||||
|
relation: string;
|
||||||
|
doi: string | null;
|
||||||
|
reverseRelation: string;
|
||||||
|
}[] = [];
|
||||||
|
|
||||||
|
this.logger.info('📊 Querying dataset references...');
|
||||||
|
|
||||||
|
// Find all references that point to Tethys datasets (DOI or URL containing tethys DOI)
|
||||||
|
// Only from datasets that are published AND only for allowed relations
|
||||||
|
const tethysReferencesQuery = DatasetReference.query()
|
||||||
|
.whereIn('type', ['DOI', 'URL'])
|
||||||
|
.whereIn('relation', this.ALLOWED_RELATIONS) // Only process allowed relations
|
||||||
|
.where((query) => {
|
||||||
|
query.where('value', 'like', '%doi.org/10.24341/tethys.%').orWhere('value', 'like', '%tethys.at/dataset/%');
|
||||||
|
})
|
||||||
|
.preload('dataset', (datasetQuery) => {
|
||||||
|
datasetQuery.preload('identifier');
|
||||||
|
})
|
||||||
|
.whereHas('dataset', (datasetQuery) => {
|
||||||
|
datasetQuery.where('server_state', 'published');
|
||||||
|
});
|
||||||
|
if (typeof this.publish_id === 'number') {
|
||||||
|
tethysReferencesQuery.whereHas('dataset', (datasetQuery) => {
|
||||||
|
datasetQuery.where('publish_id', this.publish_id as number);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
const tethysReferences = await tethysReferencesQuery.exec();
|
||||||
|
|
||||||
|
this.logger.info(`🔗 Found ${tethysReferences.length} Tethys references from published datasets (allowed relations only)`);
|
||||||
|
|
||||||
|
let processedCount = 0;
|
||||||
|
let skippedCount = 0;
|
||||||
|
|
||||||
|
for (const reference of tethysReferences) {
|
||||||
|
processedCount++;
|
||||||
|
|
||||||
|
if (this.verbose && processedCount % 10 === 0) {
|
||||||
|
this.logger.info(`📈 Processed ${processedCount}/${tethysReferences.length} references...`);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Double-check that this relation is in our allowed list (safety check)
|
||||||
|
if (!this.ALLOWED_RELATIONS.includes(reference.relation)) {
|
||||||
|
skippedCount++;
|
||||||
|
if (this.verbose) {
|
||||||
|
this.logger.info(`⏭️ Skipping relation "${reference.relation}" - not in allowed list`);
|
||||||
|
}
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Extract dataset publish_id from DOI or URL
|
||||||
|
const targetDatasetPublish = this.extractDatasetPublishIdFromReference(reference.value);
|
||||||
|
|
||||||
|
if (!targetDatasetPublish) {
|
||||||
|
if (this.verbose) {
|
||||||
|
this.logger.warning(`⚠️ Could not extract publish ID from: ${reference.value}`);
|
||||||
|
}
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check if target dataset exists and is published
|
||||||
|
const targetDataset = await Dataset.query()
|
||||||
|
.where('publish_id', targetDatasetPublish)
|
||||||
|
.where('server_state', 'published')
|
||||||
|
.preload('identifier')
|
||||||
|
.first();
|
||||||
|
|
||||||
|
if (!targetDataset) {
|
||||||
|
if (this.verbose) {
|
||||||
|
this.logger.warning(`⚠️ Target dataset with publish_id ${targetDatasetPublish} not found or not published`);
|
||||||
|
}
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Ensure we have a valid source dataset with proper preloading
|
||||||
|
if (!reference.dataset) {
|
||||||
|
this.logger.warning(`⚠️ Source dataset ${reference.document_id} not properly loaded, skipping...`);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check if reverse reference exists
|
||||||
|
const reverseReferenceExists = await this.checkReverseReferenceExists(
|
||||||
|
targetDataset.id,
|
||||||
|
// reference.document_id,
|
||||||
|
reference.relation,
|
||||||
|
);
|
||||||
|
|
||||||
|
if (!reverseReferenceExists) {
|
||||||
|
const reverseRelation = this.getReverseRelation(reference.relation);
|
||||||
|
if (reverseRelation) {
|
||||||
|
// Only add if we have a valid reverse relation
|
||||||
|
missingReferences.push({
|
||||||
|
sourceDatasetId: reference.document_id,
|
||||||
|
targetDatasetId: targetDataset.id,
|
||||||
|
sourcePublishId: reference.dataset.publish_id || null,
|
||||||
|
targetPublishId: targetDataset.publish_id || null,
|
||||||
|
referenceType: reference.type,
|
||||||
|
relation: reference.relation,
|
||||||
|
doi: reference.value,
|
||||||
|
reverseRelation: reverseRelation,
|
||||||
|
sourceDoi: reference.dataset.identifier ? reference.dataset.identifier.value : null,
|
||||||
|
targetDoi: targetDataset.identifier ? targetDataset.identifier.value : null,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
this.logger.info(`✅ Processed ${processedCount} references (${skippedCount} skipped due to relation filtering)`);
|
||||||
|
return missingReferences;
|
||||||
|
}
|
||||||
|
|
||||||
|
private extractDatasetPublishIdFromReference(value: string): number | null {
|
||||||
|
// Extract from DOI: https://doi.org/10.24341/tethys.107 -> 107
|
||||||
|
const doiMatch = value.match(/10\.24341\/tethys\.(\d+)/);
|
||||||
|
if (doiMatch) {
|
||||||
|
return parseInt(doiMatch[1]);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Extract from URL: https://tethys.at/dataset/107 -> 107
|
||||||
|
const urlMatch = value.match(/tethys\.at\/dataset\/(\d+)/);
|
||||||
|
if (urlMatch) {
|
||||||
|
return parseInt(urlMatch[1]);
|
||||||
|
}
|
||||||
|
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
private async checkReverseReferenceExists(targetDatasetId: number, originalRelation: string): Promise<boolean> {
|
||||||
|
const reverseRelation = this.getReverseRelation(originalRelation);
|
||||||
|
|
||||||
|
if (!reverseRelation) {
|
||||||
|
return true; // If no reverse relation is defined, consider it as "exists" to skip processing
|
||||||
|
}
|
||||||
|
|
||||||
|
// Only check for reverse references where the source dataset is also published
|
||||||
|
const reverseReference = await DatasetReference.query()
|
||||||
|
// We don't filter by source document_id here to find any incoming reference from any published dataset
|
||||||
|
// .where('document_id', sourceDatasetId)
|
||||||
|
.where('related_document_id', targetDatasetId)
|
||||||
|
.where('relation', reverseRelation)
|
||||||
|
.first();
|
||||||
|
|
||||||
|
return !!reverseReference;
|
||||||
|
}
|
||||||
|
|
||||||
|
private getReverseRelation(relation: string): string | null {
|
||||||
|
const relationMap: Record<string, string> = {
|
||||||
|
IsNewVersionOf: 'IsPreviousVersionOf',
|
||||||
|
IsPreviousVersionOf: 'IsNewVersionOf',
|
||||||
|
IsVariantFormOf: 'IsOriginalFormOf',
|
||||||
|
IsOriginalFormOf: 'IsVariantFormOf',
|
||||||
|
};
|
||||||
|
|
||||||
|
// Only return reverse relation if it exists in our map, otherwise return null
|
||||||
|
return relationMap[relation] || null;
|
||||||
|
}
|
||||||
|
|
||||||
|
private printMissingReferencesList(missingReferences: MissingCrossReference[]) {
|
||||||
|
console.log('┌─────────────────────────────────────────────────────────────────────────────────┐');
|
||||||
|
console.log('│ MISSING CROSS-REFERENCES REPORT │');
|
||||||
|
console.log('│ (Published Datasets Only - Filtered Relations) │');
|
||||||
|
console.log('└─────────────────────────────────────────────────────────────────────────────────┘');
|
||||||
|
console.log();
|
||||||
|
|
||||||
|
missingReferences.forEach((missing, index) => {
|
||||||
|
console.log(
|
||||||
|
`${index + 1}. Dataset ${missing.sourceDatasetId} (Publish ID: ${missing.sourcePublishId} Identifier: ${missing.sourceDoi})
|
||||||
|
${missing.relation} Dataset ${missing.targetDatasetId} (Publish ID: ${missing.targetPublishId} Identifier: ${missing.targetDoi})`,
|
||||||
|
);
|
||||||
|
console.log(` ├─ Current relation: "${missing.relation}"`);
|
||||||
|
console.log(` ├─ Missing reverse relation: "${missing.reverseRelation}"`);
|
||||||
|
console.log(` ├─ Reference type: ${missing.referenceType}`);
|
||||||
|
console.log(` └─ DOI/URL: ${missing.doi}`);
|
||||||
|
console.log();
|
||||||
|
});
|
||||||
|
|
||||||
|
console.log('┌─────────────────────────────────────────────────────────────────────────────────┐');
|
||||||
|
console.log(`│ SUMMARY: ${missingReferences.length} missing reverse reference(s) detected │`);
|
||||||
|
console.log(`│ Processed relations: ${this.ALLOWED_RELATIONS.join(', ')} │`);
|
||||||
|
console.log('└─────────────────────────────────────────────────────────────────────────────────┘');
|
||||||
|
}
|
||||||
|
|
||||||
|
private async fixMissingReferences(missingReferences: MissingCrossReference[]) {
|
||||||
|
this.logger.info('🔧 Creating missing cross-references in database...');
|
||||||
|
|
||||||
|
let fixedCount = 0;
|
||||||
|
let errorCount = 0;
|
||||||
|
|
||||||
|
for (const [index, missing] of missingReferences.entries()) {
|
||||||
|
try {
|
||||||
|
// Get both source and target datasets
|
||||||
|
const sourceDataset = await Dataset.query()
|
||||||
|
.where('id', missing.sourceDatasetId)
|
||||||
|
.where('server_state', 'published')
|
||||||
|
.preload('identifier')
|
||||||
|
.first();
|
||||||
|
|
||||||
|
const targetDataset = await Dataset.query().where('id', missing.targetDatasetId).where('server_state', 'published').first();
|
||||||
|
|
||||||
|
if (!sourceDataset) {
|
||||||
|
this.logger.warning(`⚠️ Source dataset ${missing.sourceDatasetId} not found or not published, skipping...`);
|
||||||
|
errorCount++;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!targetDataset) {
|
||||||
|
this.logger.warning(`⚠️ Target dataset ${missing.targetDatasetId} not found or not published, skipping...`);
|
||||||
|
errorCount++;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create the reverse reference using the referenced_by relationship
|
||||||
|
// Example: If Dataset 297 IsNewVersionOf Dataset 144
|
||||||
|
// We create an incoming reference for Dataset 144 that shows Dataset 297 IsPreviousVersionOf it
|
||||||
|
const reverseReference = new DatasetReference();
|
||||||
|
// Don't set document_id - this creates an incoming reference via related_document_id
|
||||||
|
reverseReference.related_document_id = missing.targetDatasetId; // 144 (dataset receiving the incoming reference)
|
||||||
|
reverseReference.type = 'DOI';
|
||||||
|
reverseReference.relation = missing.reverseRelation;
|
||||||
|
|
||||||
|
// Use the source dataset's DOI for the value (what's being referenced)
|
||||||
|
if (sourceDataset.identifier?.value) {
|
||||||
|
reverseReference.value = `https://doi.org/${sourceDataset.identifier.value}`;
|
||||||
|
} else {
|
||||||
|
// Fallback to dataset URL if no DOI
|
||||||
|
reverseReference.value = `https://tethys.at/dataset/${sourceDataset.publish_id || missing.sourceDatasetId}`;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Use the source dataset's main title for the label
|
||||||
|
reverseReference.label = sourceDataset.mainTitle || `Dataset ${missing.sourceDatasetId}`;
|
||||||
|
|
||||||
|
// Also save 'server_date_modified' on target dataset to trigger any downstream updates (e.g. search index)
|
||||||
|
targetDataset.server_date_modified = DateTime.now();
|
||||||
|
await targetDataset.save();
|
||||||
|
|
||||||
|
await reverseReference.save();
|
||||||
|
fixedCount++;
|
||||||
|
|
||||||
|
if (this.verbose) {
|
||||||
|
this.logger.info(
|
||||||
|
`✅ [${index + 1}/${missingReferences.length}] Created reverse reference: Dataset ${missing.sourceDatasetId} -> ${missing.targetDatasetId} (${missing.reverseRelation})`,
|
||||||
|
);
|
||||||
|
} else if ((index + 1) % 10 === 0) {
|
||||||
|
this.logger.info(`📈 Fixed ${fixedCount}/${missingReferences.length} references...`);
|
||||||
|
}
|
||||||
|
} catch (error) {
|
||||||
|
this.logger.error(
|
||||||
|
`❌ Error creating reverse reference for datasets ${missing.targetDatasetId} -> ${missing.sourceDatasetId}:`,
|
||||||
|
error,
|
||||||
|
);
|
||||||
|
errorCount++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
this.logger.info(`📊 Fix completed: ${fixedCount} created, ${errorCount} errors`);
|
||||||
|
}
|
||||||
|
}
|
||||||
346
commands/list_updatable_datacite.ts
Normal file
346
commands/list_updatable_datacite.ts
Normal file
|
|
@ -0,0 +1,346 @@
|
||||||
|
/*
|
||||||
|
|--------------------------------------------------------------------------
|
||||||
|
| node ace make:command list-updateable-datacite
|
||||||
|
| DONE: create commands/list_updeatable_datacite.ts
|
||||||
|
|--------------------------------------------------------------------------
|
||||||
|
*/
|
||||||
|
import { BaseCommand, flags } from '@adonisjs/core/ace';
|
||||||
|
import { CommandOptions } from '@adonisjs/core/types/ace';
|
||||||
|
import Dataset from '#models/dataset';
|
||||||
|
import { DoiClient } from '#app/Library/Doi/DoiClient';
|
||||||
|
import env from '#start/env';
|
||||||
|
import logger from '@adonisjs/core/services/logger';
|
||||||
|
import { DateTime } from 'luxon';
|
||||||
|
import pLimit from 'p-limit';
|
||||||
|
|
||||||
|
export default class ListUpdateableDatacite extends BaseCommand {
|
||||||
|
static commandName = 'list:updateable-datacite';
|
||||||
|
static description = 'List all datasets that need DataCite DOI updates';
|
||||||
|
|
||||||
|
public static needsApplication = true;
|
||||||
|
|
||||||
|
// private chunkSize = 100; // Set chunk size for pagination
|
||||||
|
|
||||||
|
@flags.boolean({ alias: 'v', description: 'Verbose output showing detailed information' })
|
||||||
|
public verbose: boolean = false;
|
||||||
|
|
||||||
|
@flags.boolean({ alias: 'c', description: 'Show only count of updatable datasets' })
|
||||||
|
public countOnly: boolean = false;
|
||||||
|
|
||||||
|
@flags.boolean({ alias: 'i', description: 'Show only publish IDs (useful for scripting)' })
|
||||||
|
public idsOnly: boolean = false;
|
||||||
|
|
||||||
|
@flags.number({ description: 'Chunk size for processing datasets (default: 50)' })
|
||||||
|
public chunkSize: number = 50;
|
||||||
|
|
||||||
|
//example: node ace list:updateable-datacite
|
||||||
|
//example: node ace list:updateable-datacite --verbose
|
||||||
|
//example: node ace list:updateable-datacite --count-only
|
||||||
|
//example: node ace list:updateable-datacite --ids-only
|
||||||
|
//example: node ace list:updateable-datacite --chunk-size 50
|
||||||
|
|
||||||
|
public static options: CommandOptions = {
|
||||||
|
startApp: true,
|
||||||
|
stayAlive: false,
|
||||||
|
};
|
||||||
|
|
||||||
|
async run() {
|
||||||
|
const prefix = env.get('DATACITE_PREFIX', '');
|
||||||
|
const base_domain = env.get('BASE_DOMAIN', '');
|
||||||
|
|
||||||
|
if (!prefix || !base_domain) {
|
||||||
|
logger.error('Missing DATACITE_PREFIX or BASE_DOMAIN environment variables');
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Prevent conflicting flags
|
||||||
|
if ((this.verbose && this.countOnly) || (this.verbose && this.idsOnly)) {
|
||||||
|
logger.error('Flags --verbose cannot be combined with --count-only or --ids-only');
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
const chunkSize = this.chunkSize || 50;
|
||||||
|
let page = 1;
|
||||||
|
let hasMoreDatasets = true;
|
||||||
|
let totalProcessed = 0;
|
||||||
|
const updatableDatasets: Dataset[] = [];
|
||||||
|
|
||||||
|
if (!this.countOnly && !this.idsOnly) {
|
||||||
|
logger.info(`Processing datasets in chunks of ${chunkSize}...`);
|
||||||
|
}
|
||||||
|
|
||||||
|
while (hasMoreDatasets) {
|
||||||
|
const datasets = await this.getDatasets(page, chunkSize);
|
||||||
|
|
||||||
|
if (datasets.length === 0) {
|
||||||
|
hasMoreDatasets = false;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!this.countOnly && !this.idsOnly) {
|
||||||
|
logger.info(`Processing chunk ${page} (${datasets.length} datasets)...`);
|
||||||
|
}
|
||||||
|
|
||||||
|
const chunkUpdatableDatasets = await this.processChunk(datasets);
|
||||||
|
updatableDatasets.push(...chunkUpdatableDatasets);
|
||||||
|
totalProcessed += datasets.length;
|
||||||
|
|
||||||
|
page += 1;
|
||||||
|
if (datasets.length < chunkSize) {
|
||||||
|
hasMoreDatasets = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!this.countOnly && !this.idsOnly) {
|
||||||
|
logger.info(`Processed ${totalProcessed} datasets total, found ${updatableDatasets.length} that need updates`);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (this.countOnly) {
|
||||||
|
console.log(updatableDatasets.length);
|
||||||
|
} else if (this.idsOnly) {
|
||||||
|
updatableDatasets.forEach((dataset) => console.log(dataset.publish_id));
|
||||||
|
} else if (this.verbose) {
|
||||||
|
await this.showVerboseOutput(updatableDatasets);
|
||||||
|
} else {
|
||||||
|
this.showSimpleOutput(updatableDatasets);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Processes a chunk of datasets to determine which ones need DataCite updates
|
||||||
|
*
|
||||||
|
* This method handles parallel processing of datasets within a chunk, providing
|
||||||
|
* efficient error handling and filtering of results.
|
||||||
|
*
|
||||||
|
* @param datasets - Array of Dataset objects to process
|
||||||
|
* @returns Promise<Dataset[]> - Array of datasets that need updates
|
||||||
|
*/
|
||||||
|
// private async processChunk(datasets: Dataset[]): Promise<Dataset[]> {
|
||||||
|
// // Process datasets in parallel using Promise.allSettled for better error handling
|
||||||
|
// //
|
||||||
|
// // Why Promise.allSettled vs Promise.all?
|
||||||
|
// // - Promise.all fails fast: if ANY promise rejects, the entire operation fails
|
||||||
|
// // - Promise.allSettled waits for ALL promises: some can fail, others succeed
|
||||||
|
// // - This is crucial for batch processing where we don't want one bad dataset
|
||||||
|
// // to stop processing of the entire chunk
|
||||||
|
// const results = await Promise.allSettled(
|
||||||
|
// datasets.map(async (dataset) => {
|
||||||
|
// try {
|
||||||
|
// // Check if this specific dataset needs a DataCite update
|
||||||
|
// const needsUpdate = await this.shouldUpdateDataset(dataset);
|
||||||
|
|
||||||
|
// // Return the dataset if it needs update, null if it doesn't
|
||||||
|
// // This creates a sparse array that we'll filter later
|
||||||
|
// return needsUpdate ? dataset : null;
|
||||||
|
// } catch (error) {
|
||||||
|
// // Error handling for individual dataset checks
|
||||||
|
// //
|
||||||
|
// // Log warnings only if we're not in silent modes (count-only or ids-only)
|
||||||
|
// // This prevents log spam when running automated scripts
|
||||||
|
// if (!this.countOnly && !this.idsOnly) {
|
||||||
|
// logger.warn(`Error checking dataset ${dataset.publish_id}: ${error.message}`);
|
||||||
|
// }
|
||||||
|
|
||||||
|
// // IMPORTANT DECISION: Return the dataset anyway if we can't determine status
|
||||||
|
// //
|
||||||
|
// // Why? It's safer to include a dataset that might not need updating
|
||||||
|
// // than to miss one that actually does need updating. This follows the
|
||||||
|
// // "fail-safe" principle - if we're unsure, err on the side of caution
|
||||||
|
// return dataset;
|
||||||
|
// }
|
||||||
|
// }),
|
||||||
|
// );
|
||||||
|
|
||||||
|
// // Filter and extract results from Promise.allSettled response
|
||||||
|
// //
|
||||||
|
// // Promise.allSettled returns an array of objects with this structure:
|
||||||
|
// // - { status: 'fulfilled', value: T } for successful promises
|
||||||
|
// // - { status: 'rejected', reason: Error } for failed promises
|
||||||
|
// //
|
||||||
|
// // We need to:
|
||||||
|
// // 1. Only get fulfilled results (rejected ones are already handled above)
|
||||||
|
// // 2. Filter out null values (datasets that don't need updates)
|
||||||
|
// // 3. Extract the actual Dataset objects from the wrapper
|
||||||
|
// return results
|
||||||
|
// .filter(
|
||||||
|
// (result): result is PromiseFulfilledResult<Dataset | null> =>
|
||||||
|
// // Type guard: only include fulfilled results that have actual values
|
||||||
|
// // This filters out:
|
||||||
|
// // - Rejected promises (shouldn't happen due to try/catch, but safety first)
|
||||||
|
// // - Fulfilled promises that returned null (datasets that don't need updates)
|
||||||
|
// result.status === 'fulfilled' && result.value !== null,
|
||||||
|
// )
|
||||||
|
// .map((result) => result.value!); // Extract the Dataset from the wrapper
|
||||||
|
// // The ! is safe here because we filtered out null values above
|
||||||
|
// }
|
||||||
|
|
||||||
|
private async processChunk(datasets: Dataset[]): Promise<Dataset[]> {
|
||||||
|
// Limit concurrency to avoid API flooding (e.g., max 5 at once)
|
||||||
|
const limit = pLimit(5);
|
||||||
|
|
||||||
|
const tasks = datasets.map((dataset) =>
|
||||||
|
limit(async () => {
|
||||||
|
try {
|
||||||
|
const needsUpdate = await this.shouldUpdateDataset(dataset);
|
||||||
|
return needsUpdate ? dataset : null;
|
||||||
|
} catch (error) {
|
||||||
|
if (!this.countOnly && !this.idsOnly) {
|
||||||
|
logger.warn(
|
||||||
|
`Error checking dataset ${dataset.publish_id}: ${
|
||||||
|
error instanceof Error ? error.message : JSON.stringify(error)
|
||||||
|
}`,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
// Fail-safe: include dataset if uncertain
|
||||||
|
return dataset;
|
||||||
|
}
|
||||||
|
}),
|
||||||
|
);
|
||||||
|
|
||||||
|
const results = await Promise.allSettled(tasks);
|
||||||
|
|
||||||
|
return results
|
||||||
|
.filter((result): result is PromiseFulfilledResult<Dataset | null> => result.status === 'fulfilled' && result.value !== null)
|
||||||
|
.map((result) => result.value!);
|
||||||
|
}
|
||||||
|
|
||||||
|
private async getDatasets(page: number, chunkSize: number): Promise<Dataset[]> {
|
||||||
|
return await Dataset.query()
|
||||||
|
.orderBy('publish_id', 'asc')
|
||||||
|
.preload('identifier')
|
||||||
|
.preload('xmlCache')
|
||||||
|
.preload('titles')
|
||||||
|
.where('server_state', 'published')
|
||||||
|
.whereHas('identifier', (identifierQuery) => {
|
||||||
|
identifierQuery.where('type', 'doi');
|
||||||
|
})
|
||||||
|
.forPage(page, chunkSize); // Get files for the current page
|
||||||
|
}
|
||||||
|
|
||||||
|
private async shouldUpdateDataset(dataset: Dataset): Promise<boolean> {
|
||||||
|
try {
|
||||||
|
let doiIdentifier = dataset.identifier;
|
||||||
|
if (!doiIdentifier) {
|
||||||
|
await dataset.load('identifier');
|
||||||
|
doiIdentifier = dataset.identifier;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!doiIdentifier || doiIdentifier.type !== 'doi') {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
const datasetModified =
|
||||||
|
dataset.server_date_modified instanceof DateTime
|
||||||
|
? dataset.server_date_modified
|
||||||
|
: DateTime.fromJSDate(dataset.server_date_modified);
|
||||||
|
|
||||||
|
if (!datasetModified) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (datasetModified > DateTime.now()) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
const doiClient = new DoiClient();
|
||||||
|
const DOI_CHECK_TIMEOUT = 300; // ms
|
||||||
|
|
||||||
|
const doiLastModified = await Promise.race([
|
||||||
|
doiClient.getDoiLastModified(doiIdentifier.value),
|
||||||
|
this.createTimeoutPromise(DOI_CHECK_TIMEOUT),
|
||||||
|
]).catch(() => null);
|
||||||
|
|
||||||
|
if (!doiLastModified) {
|
||||||
|
// If uncertain, better include dataset for update
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
const doiModified = DateTime.fromJSDate(doiLastModified);
|
||||||
|
if (datasetModified > doiModified) {
|
||||||
|
const diffInSeconds = Math.abs(datasetModified.diff(doiModified, 'seconds').seconds);
|
||||||
|
const toleranceSeconds = 600;
|
||||||
|
return diffInSeconds > toleranceSeconds;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
} catch (error) {
|
||||||
|
return true; // safer: include dataset if unsure
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create a timeout promise for API calls
|
||||||
|
*/
|
||||||
|
private createTimeoutPromise(timeoutMs: number): Promise<never> {
|
||||||
|
return new Promise((_, reject) => {
|
||||||
|
setTimeout(() => reject(new Error(`API call timeout after ${timeoutMs}ms`)), timeoutMs);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
private showSimpleOutput(updatableDatasets: Dataset[]): void {
|
||||||
|
if (updatableDatasets.length === 0) {
|
||||||
|
console.log('No datasets need DataCite updates.');
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log(`\nFound ${updatableDatasets.length} dataset(s) that need DataCite updates:\n`);
|
||||||
|
|
||||||
|
updatableDatasets.forEach((dataset) => {
|
||||||
|
console.log(`publish_id ${dataset.publish_id} needs update - ${dataset.mainTitle || 'Untitled'}`);
|
||||||
|
});
|
||||||
|
|
||||||
|
console.log(`\nTo update these datasets, run:`);
|
||||||
|
console.log(` node ace update:datacite`);
|
||||||
|
console.log(`\nOr update specific datasets:`);
|
||||||
|
console.log(` node ace update:datacite -p <publish_id>`);
|
||||||
|
}
|
||||||
|
|
||||||
|
private async showVerboseOutput(updatableDatasets: Dataset[]): Promise<void> {
|
||||||
|
if (updatableDatasets.length === 0) {
|
||||||
|
console.log('No datasets need DataCite updates.');
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log(`\nFound ${updatableDatasets.length} dataset(s) that need DataCite updates:\n`);
|
||||||
|
|
||||||
|
for (const dataset of updatableDatasets) {
|
||||||
|
await this.showDatasetDetails(dataset);
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log(`\nSummary: ${updatableDatasets.length} datasets need updates`);
|
||||||
|
}
|
||||||
|
|
||||||
|
private async showDatasetDetails(dataset: Dataset): Promise<void> {
|
||||||
|
try {
|
||||||
|
let doiIdentifier = dataset.identifier;
|
||||||
|
|
||||||
|
if (!doiIdentifier) {
|
||||||
|
await dataset.load('identifier');
|
||||||
|
doiIdentifier = dataset.identifier;
|
||||||
|
}
|
||||||
|
|
||||||
|
const doiValue = doiIdentifier?.value || 'N/A';
|
||||||
|
const datasetModified = dataset.server_date_modified;
|
||||||
|
|
||||||
|
// Get DOI info from DataCite
|
||||||
|
const doiClient = new DoiClient();
|
||||||
|
const doiLastModified = await doiClient.getDoiLastModified(doiValue);
|
||||||
|
const doiState = await doiClient.getDoiState(doiValue);
|
||||||
|
|
||||||
|
console.log(`┌─ Dataset ${dataset.publish_id} ───────────────────────────────────────────────────────────────`);
|
||||||
|
console.log(`│ Title: ${dataset.mainTitle || 'Untitled'}`);
|
||||||
|
console.log(`│ DOI: ${doiValue}`);
|
||||||
|
console.log(`│ DOI State: ${doiState || 'Unknown'}`);
|
||||||
|
console.log(`│ Dataset Modified: ${datasetModified ? datasetModified.toISO() : 'N/A'}`);
|
||||||
|
console.log(`│ DOI Modified: ${doiLastModified ? DateTime.fromJSDate(doiLastModified).toISO() : 'N/A'}`);
|
||||||
|
console.log(`│ Status: NEEDS UPDATE`);
|
||||||
|
console.log(`└─────────────────────────────────────────────────────────────────────────────────────────────\n`);
|
||||||
|
} catch (error) {
|
||||||
|
console.log(`┌─ Dataset ${dataset.publish_id} ───────────────────────────────────────────────────────────────`);
|
||||||
|
console.log(`│ Title: ${dataset.mainTitle || 'Untitled'}`);
|
||||||
|
console.log(`│ DOI: ${dataset.identifier?.value || 'N/A'}`);
|
||||||
|
console.log(`│ Error: ${error.message}`);
|
||||||
|
console.log(`│ Status: NEEDS UPDATE (Error checking)`);
|
||||||
|
console.log(`└─────────────────────────────────────────────────────────────────────────────────────────────\n`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
266
commands/update_datacite.ts
Normal file
266
commands/update_datacite.ts
Normal file
|
|
@ -0,0 +1,266 @@
|
||||||
|
/*
|
||||||
|
|--------------------------------------------------------------------------
|
||||||
|
| node ace make:command update-datacite
|
||||||
|
| DONE: create commands/update_datacite.ts
|
||||||
|
|--------------------------------------------------------------------------
|
||||||
|
*/
|
||||||
|
import { BaseCommand, flags } from '@adonisjs/core/ace';
|
||||||
|
import { CommandOptions } from '@adonisjs/core/types/ace';
|
||||||
|
import Dataset from '#models/dataset';
|
||||||
|
import { DoiClient } from '#app/Library/Doi/DoiClient';
|
||||||
|
import DoiClientException from '#app/exceptions/DoiClientException';
|
||||||
|
import Index from '#app/Library/Utils/Index';
|
||||||
|
import env from '#start/env';
|
||||||
|
import logger from '@adonisjs/core/services/logger';
|
||||||
|
import { DateTime } from 'luxon';
|
||||||
|
import { getDomain } from '#app/utils/utility-functions';
|
||||||
|
|
||||||
|
export default class UpdateDatacite extends BaseCommand {
|
||||||
|
static commandName = 'update:datacite';
|
||||||
|
static description = 'Update DataCite DOI records for published datasets';
|
||||||
|
|
||||||
|
public static needsApplication = true;
|
||||||
|
|
||||||
|
@flags.number({ alias: 'p', description: 'Specific publish_id to update' })
|
||||||
|
public publish_id: number;
|
||||||
|
|
||||||
|
@flags.boolean({ alias: 'f', description: 'Force update all records regardless of modification date' })
|
||||||
|
public force: boolean = false;
|
||||||
|
|
||||||
|
@flags.boolean({ alias: 'd', description: 'Dry run - show what would be updated without making changes' })
|
||||||
|
public dryRun: boolean = false;
|
||||||
|
|
||||||
|
@flags.boolean({ alias: 's', description: 'Show detailed stats for each dataset that needs updating' })
|
||||||
|
public stats: boolean = false;
|
||||||
|
|
||||||
|
//example: node ace update:datacite -p 123 --force --dry-run
|
||||||
|
|
||||||
|
public static options: CommandOptions = {
|
||||||
|
startApp: true, // Whether to boot the application before running the command
|
||||||
|
stayAlive: false, // Whether to keep the process alive after the command has executed
|
||||||
|
};
|
||||||
|
|
||||||
|
async run() {
|
||||||
|
logger.info('Starting DataCite update process...');
|
||||||
|
|
||||||
|
const prefix = env.get('DATACITE_PREFIX', '');
|
||||||
|
const base_domain = env.get('BASE_DOMAIN', '');
|
||||||
|
const apiUrl = env.get('DATACITE_API_URL', 'https://api.datacite.org');
|
||||||
|
|
||||||
|
if (!prefix || !base_domain) {
|
||||||
|
logger.error('Missing DATACITE_PREFIX or BASE_DOMAIN environment variables');
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
logger.info(`Using DataCite API: ${apiUrl}`);
|
||||||
|
|
||||||
|
const datasets = await this.getDatasets();
|
||||||
|
logger.info(`Found ${datasets.length} datasets to process`);
|
||||||
|
|
||||||
|
let updated = 0;
|
||||||
|
let skipped = 0;
|
||||||
|
let errors = 0;
|
||||||
|
|
||||||
|
for (const dataset of datasets) {
|
||||||
|
try {
|
||||||
|
const shouldUpdate = this.force || (await this.shouldUpdateDataset(dataset));
|
||||||
|
|
||||||
|
if (this.stats) {
|
||||||
|
// Stats mode: show detailed information for datasets that need updating
|
||||||
|
if (shouldUpdate) {
|
||||||
|
await this.showDatasetStats(dataset);
|
||||||
|
updated++;
|
||||||
|
} else {
|
||||||
|
skipped++;
|
||||||
|
}
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!shouldUpdate) {
|
||||||
|
logger.info(`Dataset ${dataset.publish_id}: Up to date, skipping`);
|
||||||
|
skipped++;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (this.dryRun) {
|
||||||
|
logger.info(`Dataset ${dataset.publish_id}: Would update DataCite record (dry run)`);
|
||||||
|
updated++;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
await this.updateDataciteRecord(dataset, prefix, base_domain);
|
||||||
|
logger.info(`Dataset ${dataset.publish_id}: Successfully updated DataCite record`);
|
||||||
|
updated++;
|
||||||
|
} catch (error) {
|
||||||
|
logger.error(`Dataset ${dataset.publish_id}: Failed to update - ${error.message}`);
|
||||||
|
errors++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (this.stats) {
|
||||||
|
logger.info(`\nDataCite Stats Summary: ${updated} datasets need updating, ${skipped} are up to date`);
|
||||||
|
} else {
|
||||||
|
logger.info(`DataCite update completed. Updated: ${updated}, Skipped: ${skipped}, Errors: ${errors}`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private async getDatasets(): Promise<Dataset[]> {
|
||||||
|
const query = Dataset.query()
|
||||||
|
.preload('identifier')
|
||||||
|
.preload('xmlCache')
|
||||||
|
.where('server_state', 'published')
|
||||||
|
.whereHas('identifier', (identifierQuery) => {
|
||||||
|
identifierQuery.where('type', 'doi');
|
||||||
|
});
|
||||||
|
|
||||||
|
if (this.publish_id) {
|
||||||
|
query.where('publish_id', this.publish_id);
|
||||||
|
}
|
||||||
|
|
||||||
|
return await query.exec();
|
||||||
|
}
|
||||||
|
|
||||||
|
private async shouldUpdateDataset(dataset: Dataset): Promise<boolean> {
|
||||||
|
try {
|
||||||
|
let doiIdentifier = dataset.identifier;
|
||||||
|
|
||||||
|
if (!doiIdentifier) {
|
||||||
|
await dataset.load('identifier');
|
||||||
|
doiIdentifier = dataset.identifier;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!doiIdentifier || doiIdentifier.type !== 'doi') {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
const datasetModified = dataset.server_date_modified;
|
||||||
|
const now = DateTime.now();
|
||||||
|
|
||||||
|
if (!datasetModified) {
|
||||||
|
return true; // Update if modification date is missing
|
||||||
|
}
|
||||||
|
|
||||||
|
if (datasetModified > now) {
|
||||||
|
return false; // Skip invalid future dates
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check DataCite DOI modification date
|
||||||
|
const doiClient = new DoiClient();
|
||||||
|
const doiLastModified = await doiClient.getDoiLastModified(doiIdentifier.value);
|
||||||
|
|
||||||
|
if (!doiLastModified) {
|
||||||
|
return false; // not Update if we can't get DOI info
|
||||||
|
}
|
||||||
|
|
||||||
|
const doiModified = DateTime.fromJSDate(doiLastModified);
|
||||||
|
if (datasetModified > doiModified) {
|
||||||
|
// if dataset was modified after DOI creation
|
||||||
|
// Calculate the difference in seconds
|
||||||
|
const diffInSeconds = Math.abs(datasetModified.diff(doiModified, 'seconds').seconds);
|
||||||
|
|
||||||
|
// Define tolerance threshold (60 seconds = 1 minute)
|
||||||
|
const toleranceSeconds = 60;
|
||||||
|
|
||||||
|
// Only update if the difference is greater than the tolerance
|
||||||
|
// This prevents unnecessary updates for minor timestamp differences
|
||||||
|
return diffInSeconds > toleranceSeconds;
|
||||||
|
} else {
|
||||||
|
return false; // No update needed
|
||||||
|
}
|
||||||
|
} catch (error) {
|
||||||
|
return false; // not update if we can't determine status or other error
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private async updateDataciteRecord(dataset: Dataset, prefix: string, base_domain: string): Promise<void> {
|
||||||
|
try {
|
||||||
|
// Get the DOI identifier (HasOne relationship)
|
||||||
|
let doiIdentifier = dataset.identifier;
|
||||||
|
|
||||||
|
if (!doiIdentifier) {
|
||||||
|
await dataset.load('identifier');
|
||||||
|
doiIdentifier = dataset.identifier;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!doiIdentifier || doiIdentifier.type !== 'doi') {
|
||||||
|
throw new Error('No DOI identifier found for dataset');
|
||||||
|
}
|
||||||
|
|
||||||
|
// Generate XML metadata
|
||||||
|
const xmlMeta = (await Index.getDoiRegisterString(dataset)) as string;
|
||||||
|
if (!xmlMeta) {
|
||||||
|
throw new Error('Failed to generate XML metadata');
|
||||||
|
}
|
||||||
|
|
||||||
|
// Construct DOI value and landing page URL
|
||||||
|
const doiValue = doiIdentifier.value; // Use existing DOI value
|
||||||
|
const landingPageUrl = `https://doi.${getDomain(base_domain)}/${doiValue}`;
|
||||||
|
|
||||||
|
// Update DataCite record
|
||||||
|
const doiClient = new DoiClient();
|
||||||
|
const dataciteResponse = await doiClient.registerDoi(doiValue, xmlMeta, landingPageUrl);
|
||||||
|
|
||||||
|
if (dataciteResponse?.status === 201) {
|
||||||
|
// // Update dataset modification date
|
||||||
|
// dataset.server_date_modified = DateTime.now();
|
||||||
|
// await dataset.save();
|
||||||
|
|
||||||
|
// // Update search index
|
||||||
|
// const index_name = 'tethys-records';
|
||||||
|
// await Index.indexDocument(dataset, index_name);
|
||||||
|
|
||||||
|
logger.debug(`Dataset ${dataset.publish_id}: DataCite record and search index updated successfully`);
|
||||||
|
} else {
|
||||||
|
throw new DoiClientException(
|
||||||
|
dataciteResponse?.status || 500,
|
||||||
|
`Unexpected DataCite response code: ${dataciteResponse?.status}`,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
} catch (error) {
|
||||||
|
if (error instanceof DoiClientException) {
|
||||||
|
throw error;
|
||||||
|
}
|
||||||
|
throw new Error(`Failed to update DataCite record: ${error.message}`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Shows detailed statistics for a dataset that needs updating
|
||||||
|
*/
|
||||||
|
private async showDatasetStats(dataset: Dataset): Promise<void> {
|
||||||
|
try {
|
||||||
|
let doiIdentifier = dataset.identifier;
|
||||||
|
|
||||||
|
if (!doiIdentifier) {
|
||||||
|
await dataset.load('identifier');
|
||||||
|
doiIdentifier = dataset.identifier;
|
||||||
|
}
|
||||||
|
|
||||||
|
const doiValue = doiIdentifier?.value || 'N/A';
|
||||||
|
const doiStatus = doiIdentifier?.status || 'N/A';
|
||||||
|
const datasetModified = dataset.server_date_modified;
|
||||||
|
|
||||||
|
// Get DOI info from DataCite
|
||||||
|
const doiClient = new DoiClient();
|
||||||
|
const doiLastModified = await doiClient.getDoiLastModified(doiValue);
|
||||||
|
const doiState = await doiClient.getDoiState(doiValue);
|
||||||
|
|
||||||
|
console.log(`
|
||||||
|
┌─ Dataset ${dataset.publish_id} ───────────────────────────────────────────────────────────────
|
||||||
|
│ DOI Value: ${doiValue}
|
||||||
|
│ DOI Status (DB): ${doiStatus}
|
||||||
|
│ DOI State (DataCite): ${doiState || 'Unknown'}
|
||||||
|
│ Dataset Modified: ${datasetModified ? datasetModified.toISO() : 'N/A'}
|
||||||
|
│ DOI Modified: ${doiLastModified ? DateTime.fromJSDate(doiLastModified).toISO() : 'N/A'}
|
||||||
|
│ Needs Update: YES - Dataset newer than DOI
|
||||||
|
└─────────────────────────────────────────────────────────────────────────────────────────────`);
|
||||||
|
} catch (error) {
|
||||||
|
console.log(`
|
||||||
|
┌─ Dataset ${dataset.publish_id} ───────────────────────────────────────────────────────────────
|
||||||
|
│ DOI Value: ${dataset.identifier?.value || 'N/A'}
|
||||||
|
│ Error: ${error.message}
|
||||||
|
│ Needs Update: YES - Error checking status
|
||||||
|
└─────────────────────────────────────────────────────────────────────────────────────────────`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -1,47 +1,61 @@
|
||||||
#!/bin/bash
|
#!/bin/bash
|
||||||
|
set -e
|
||||||
|
|
||||||
# # Run freshclam to update virus definitions
|
echo "Starting ClamAV services..."
|
||||||
# freshclam
|
|
||||||
|
|
||||||
# # Sleep for a few seconds to give ClamAV time to start
|
|
||||||
# sleep 5
|
|
||||||
|
|
||||||
# # Start the ClamAV daemon
|
# Try to download database if missing
|
||||||
# /etc/init.d/clamav-daemon start
|
if [ ! "$(ls -A /var/lib/clamav 2>/dev/null)" ]; then
|
||||||
|
echo "Downloading ClamAV database (this may take a while)..."
|
||||||
|
|
||||||
# bootstrap clam av service and clam av database updater
|
# Simple freshclam run without complex config
|
||||||
set -m
|
if sg clamav -c "freshclam --datadir=/var/lib/clamav --quiet"; then
|
||||||
|
echo "✓ Database downloaded successfully"
|
||||||
function process_file() {
|
else
|
||||||
if [[ ! -z "$1" ]]; then
|
echo "⚠ Database download failed - creating minimal setup"
|
||||||
local SETTING_LIST=$(echo "$1" | tr ',' '\n' | grep "^[A-Za-z][A-Za-z]*=.*$")
|
# Create a dummy file so clamd doesn't immediately fail
|
||||||
local SETTING
|
sg clamav -c "touch /var/lib/clamav/.dummy"
|
||||||
|
|
||||||
for SETTING in ${SETTING_LIST}; do
|
|
||||||
# Remove any existing copies of this setting. We do this here so that
|
|
||||||
# settings with multiple values (e.g. ExtraDatabase) can still be added
|
|
||||||
# multiple times below
|
|
||||||
local KEY=${SETTING%%=*}
|
|
||||||
sed -i $2 -e "/^${KEY} /d"
|
|
||||||
done
|
|
||||||
|
|
||||||
for SETTING in ${SETTING_LIST}; do
|
|
||||||
# Split on first '='
|
|
||||||
local KEY=${SETTING%%=*}
|
|
||||||
local VALUE=${SETTING#*=}
|
|
||||||
echo "${KEY} ${VALUE}" >> "$2"
|
|
||||||
done
|
|
||||||
fi
|
fi
|
||||||
}
|
fi
|
||||||
|
|
||||||
# process_file "${CLAMD_SETTINGS_CSV}" /etc/clamav/clamd.conf
|
# Start freshclam daemon for automatic updates
|
||||||
# process_file "${FRESHCLAM_SETTINGS_CSV}" /etc/clamav/freshclam.conf
|
echo "Starting freshclam daemon for automatic updates..."
|
||||||
|
sg clamav -c "freshclam -d" &
|
||||||
|
|
||||||
# start in background
|
|
||||||
freshclam -d &
|
|
||||||
# /etc/init.d/clamav-freshclam start &
|
# /etc/init.d/clamav-freshclam start &
|
||||||
clamd
|
# Start clamd in background
|
||||||
|
# Start clamd in foreground (so dumb-init can supervise it)
|
||||||
# /etc/init.d/clamav-daemon start &
|
# /etc/init.d/clamav-daemon start &
|
||||||
|
|
||||||
# change back to CMD of dockerfile
|
# Start clamd daemon in background using sg
|
||||||
exec "$@"
|
echo "Starting ClamAV daemon..."
|
||||||
|
# sg clamav -c "clamd" &
|
||||||
|
# Use sg to run clamd with proper group permissions
|
||||||
|
# sg clamav -c "clamd" &
|
||||||
|
sg clamav -c "clamd --config-file=/etc/clamav/clamd.conf" &
|
||||||
|
|
||||||
|
|
||||||
|
# Give services time to start
|
||||||
|
echo "Waiting for services to initialize..."
|
||||||
|
sleep 8
|
||||||
|
|
||||||
|
# simple check
|
||||||
|
if pgrep clamd > /dev/null; then
|
||||||
|
echo "✓ ClamAV daemon is running"
|
||||||
|
else
|
||||||
|
echo "⚠ ClamAV daemon status uncertain, but continuing..."
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Check if freshclam daemon is running
|
||||||
|
if pgrep freshclam > /dev/null; then
|
||||||
|
echo "✓ Freshclam daemon is running"
|
||||||
|
else
|
||||||
|
echo "⚠ Freshclam daemon status uncertain, but continuing..."
|
||||||
|
fi
|
||||||
|
|
||||||
|
# # change back to CMD of dockerfile
|
||||||
|
# exec "$@"
|
||||||
|
|
||||||
|
echo "✓ ClamAV setup complete"
|
||||||
|
echo "Starting main application..."
|
||||||
|
exec dumb-init -- "$@"
|
||||||
278
docs/commands/index-datasets.md
Normal file
278
docs/commands/index-datasets.md
Normal file
|
|
@ -0,0 +1,278 @@
|
||||||
|
# Dataset Indexing Command
|
||||||
|
|
||||||
|
AdonisJS Ace command for indexing and synchronizing published datasets with OpenSearch for search functionality.
|
||||||
|
|
||||||
|
## Overview
|
||||||
|
|
||||||
|
The `index:datasets` command processes published datasets and creates/updates corresponding search index documents in OpenSearch. It intelligently compares modification timestamps to only re-index datasets when necessary, optimizing performance while maintaining search index accuracy.
|
||||||
|
|
||||||
|
## Command Syntax
|
||||||
|
|
||||||
|
```bash
|
||||||
|
node ace index:datasets [options]
|
||||||
|
```
|
||||||
|
|
||||||
|
## Options
|
||||||
|
|
||||||
|
| Flag | Alias | Description |
|
||||||
|
|------|-------|-------------|
|
||||||
|
| `--publish_id <number>` | `-p` | Index a specific dataset by publish_id |
|
||||||
|
|
||||||
|
## Usage Examples
|
||||||
|
|
||||||
|
### Basic Operations
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Index all published datasets that have been modified since last indexing
|
||||||
|
node ace index:datasets
|
||||||
|
|
||||||
|
# Index a specific dataset by publish_id
|
||||||
|
node ace index:datasets --publish_id 231
|
||||||
|
node ace index:datasets -p 231
|
||||||
|
```
|
||||||
|
|
||||||
|
## How It Works
|
||||||
|
|
||||||
|
### 1. **Dataset Selection**
|
||||||
|
The command processes datasets that meet these criteria:
|
||||||
|
- `server_state = 'published'` - Only published datasets
|
||||||
|
- Has preloaded `xmlCache` relationship for metadata transformation
|
||||||
|
- Optionally filtered by specific `publish_id`
|
||||||
|
|
||||||
|
### 2. **Smart Update Detection**
|
||||||
|
For each dataset, the command:
|
||||||
|
- Checks if the dataset exists in the OpenSearch index
|
||||||
|
- Compares `server_date_modified` timestamps
|
||||||
|
- Only re-indexes if the dataset is newer than the indexed version
|
||||||
|
|
||||||
|
### 3. **Document Processing**
|
||||||
|
The indexing process involves:
|
||||||
|
1. **XML Generation**: Creates structured XML from dataset metadata
|
||||||
|
2. **XSLT Transformation**: Converts XML to JSON using Saxon-JS processor
|
||||||
|
3. **Index Update**: Updates or creates the document in OpenSearch
|
||||||
|
4. **Logging**: Records success/failure for each operation
|
||||||
|
|
||||||
|
## Index Structure
|
||||||
|
|
||||||
|
### Index Configuration
|
||||||
|
- **Index Name**: `tethys-records`
|
||||||
|
- **Document ID**: Dataset `publish_id`
|
||||||
|
- **Refresh**: `true` (immediate availability)
|
||||||
|
|
||||||
|
### Document Fields
|
||||||
|
The indexed documents contain:
|
||||||
|
- **Metadata Fields**: Title, description, authors, keywords
|
||||||
|
- **Identifiers**: DOI, publish_id, and other identifiers
|
||||||
|
- **Temporal Data**: Publication dates, coverage periods
|
||||||
|
- **Geographic Data**: Spatial coverage information
|
||||||
|
- **Technical Details**: Data formats, access information
|
||||||
|
- **Timestamps**: Creation and modification dates
|
||||||
|
|
||||||
|
## Example Output
|
||||||
|
|
||||||
|
### Successful Run
|
||||||
|
```bash
|
||||||
|
node ace index:datasets
|
||||||
|
```
|
||||||
|
```
|
||||||
|
Found 150 published datasets to process
|
||||||
|
Dataset with publish_id 231 successfully indexed
|
||||||
|
Dataset with publish_id 245 is up to date, skipping indexing
|
||||||
|
Dataset with publish_id 267 successfully indexed
|
||||||
|
An error occurred while indexing dataset with publish_id 289. Error: Invalid XML metadata
|
||||||
|
Processing completed: 148 indexed, 1 skipped, 1 error
|
||||||
|
```
|
||||||
|
|
||||||
|
### Specific Dataset
|
||||||
|
```bash
|
||||||
|
node ace index:datasets --publish_id 231
|
||||||
|
```
|
||||||
|
```
|
||||||
|
Found 1 published dataset to process
|
||||||
|
Dataset with publish_id 231 successfully indexed
|
||||||
|
Processing completed: 1 indexed, 0 skipped, 0 errors
|
||||||
|
```
|
||||||
|
|
||||||
|
## Update Logic
|
||||||
|
|
||||||
|
The command uses intelligent indexing to avoid unnecessary processing:
|
||||||
|
|
||||||
|
| Condition | Action | Reason |
|
||||||
|
|-----------|--------|--------|
|
||||||
|
| Dataset not in index | ✅ Index | New dataset needs indexing |
|
||||||
|
| Dataset newer than indexed version | ✅ Re-index | Dataset has been updated |
|
||||||
|
| Dataset same/older than indexed version | ❌ Skip | Already up to date |
|
||||||
|
| OpenSearch document check fails | ✅ Index | Better safe than sorry |
|
||||||
|
| Invalid XML metadata | ❌ Skip + Log Error | Cannot process invalid data |
|
||||||
|
|
||||||
|
### Timestamp Comparison
|
||||||
|
```typescript
|
||||||
|
// Example comparison logic
|
||||||
|
const existingModified = DateTime.fromMillis(Number(existingDoc.server_date_modified) * 1000);
|
||||||
|
const currentModified = dataset.server_date_modified;
|
||||||
|
|
||||||
|
if (currentModified <= existingModified) {
|
||||||
|
// Skip - already up to date
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
// Proceed with indexing
|
||||||
|
```
|
||||||
|
|
||||||
|
## XML Transformation Process
|
||||||
|
|
||||||
|
### 1. **XML Generation**
|
||||||
|
```xml
|
||||||
|
<?xml version="1.0" encoding="UTF-8" standalone="true"?>
|
||||||
|
<root>
|
||||||
|
<Dataset>
|
||||||
|
<!-- Dataset metadata fields -->
|
||||||
|
<title>Research Dataset Title</title>
|
||||||
|
<description>Dataset description...</description>
|
||||||
|
<!-- Additional metadata -->
|
||||||
|
</Dataset>
|
||||||
|
</root>
|
||||||
|
```
|
||||||
|
|
||||||
|
### 2. **XSLT Processing**
|
||||||
|
The command uses Saxon-JS with a compiled stylesheet (`solr.sef.json`) to transform XML to JSON:
|
||||||
|
```javascript
|
||||||
|
const result = await SaxonJS.transform({
|
||||||
|
stylesheetText: proc,
|
||||||
|
destination: 'serialized',
|
||||||
|
sourceText: xmlString,
|
||||||
|
});
|
||||||
|
```
|
||||||
|
|
||||||
|
### 3. **Final JSON Document**
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"id": "231",
|
||||||
|
"title": "Research Dataset Title",
|
||||||
|
"description": "Dataset description...",
|
||||||
|
"authors": ["Author Name"],
|
||||||
|
"server_date_modified": 1634567890,
|
||||||
|
"publish_id": 231
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
## Configuration Requirements
|
||||||
|
|
||||||
|
### Environment Variables
|
||||||
|
```bash
|
||||||
|
# OpenSearch Configuration
|
||||||
|
OPENSEARCH_HOST=localhost:9200
|
||||||
|
|
||||||
|
# For production:
|
||||||
|
# OPENSEARCH_HOST=your-opensearch-cluster:9200
|
||||||
|
```
|
||||||
|
|
||||||
|
### Required Files
|
||||||
|
- **XSLT Stylesheet**: `public/assets2/solr.sef.json` - Compiled Saxon-JS stylesheet for XML transformation
|
||||||
|
|
||||||
|
### Database Relationships
|
||||||
|
The command expects these model relationships:
|
||||||
|
```typescript
|
||||||
|
// Dataset model must have:
|
||||||
|
@hasOne(() => XmlCache, { foreignKey: 'dataset_id' })
|
||||||
|
public xmlCache: HasOne<typeof XmlCache>
|
||||||
|
```
|
||||||
|
|
||||||
|
## Error Handling
|
||||||
|
|
||||||
|
The command handles various error scenarios gracefully:
|
||||||
|
|
||||||
|
### Common Errors and Solutions
|
||||||
|
|
||||||
|
| Error | Cause | Solution |
|
||||||
|
|-------|-------|----------|
|
||||||
|
| `XSLT transformation failed` | Invalid XML or missing stylesheet | Check XML structure and stylesheet path |
|
||||||
|
| `OpenSearch connection error` | Service unavailable | Verify OpenSearch is running and accessible |
|
||||||
|
| `JSON parse error` | Malformed transformation result | Check XSLT stylesheet output format |
|
||||||
|
| `Missing xmlCache relationship` | Data integrity issue | Ensure xmlCache exists for dataset |
|
||||||
|
|
||||||
|
### Error Logging
|
||||||
|
```bash
|
||||||
|
# Typical error log entry
|
||||||
|
An error occurred while indexing dataset with publish_id 231.
|
||||||
|
Error: XSLT transformation failed: Invalid XML structure at line 15
|
||||||
|
```
|
||||||
|
|
||||||
|
## Performance Considerations
|
||||||
|
|
||||||
|
### Batch Processing
|
||||||
|
- Processes datasets sequentially to avoid overwhelming OpenSearch
|
||||||
|
- Each dataset is committed individually for reliability
|
||||||
|
- Failed indexing of one dataset doesn't stop processing others
|
||||||
|
|
||||||
|
### Resource Usage
|
||||||
|
- **Memory**: XML/JSON transformations require temporary memory
|
||||||
|
- **Network**: OpenSearch API calls for each dataset
|
||||||
|
- **CPU**: XSLT transformations are CPU-intensive
|
||||||
|
|
||||||
|
### Optimization Tips
|
||||||
|
```bash
|
||||||
|
# Index only recently modified datasets (run regularly)
|
||||||
|
node ace index:datasets
|
||||||
|
|
||||||
|
# Index specific datasets when needed
|
||||||
|
node ace index:datasets --publish_id 231
|
||||||
|
|
||||||
|
# Consider running during off-peak hours for large batches
|
||||||
|
```
|
||||||
|
|
||||||
|
## Integration with Other Systems
|
||||||
|
|
||||||
|
### Search Functionality
|
||||||
|
The indexed documents power:
|
||||||
|
- **Dataset Search**: Full-text search across metadata
|
||||||
|
- **Faceted Browsing**: Filter by authors, keywords, dates
|
||||||
|
- **Geographic Search**: Spatial query capabilities
|
||||||
|
- **Auto-complete**: Suggest dataset titles and keywords
|
||||||
|
|
||||||
|
### Related Commands
|
||||||
|
- [`update:datacite`](update-datacite.md) - Often run after indexing to sync DOI metadata
|
||||||
|
- **Database migrations** - May require re-indexing after schema changes
|
||||||
|
|
||||||
|
### API Integration
|
||||||
|
The indexed data is consumed by:
|
||||||
|
- **Search API**: `/api/search` endpoints
|
||||||
|
- **Browse API**: `/api/datasets` with filtering
|
||||||
|
- **Recommendations**: Related dataset suggestions
|
||||||
|
|
||||||
|
## Monitoring and Maintenance
|
||||||
|
|
||||||
|
### Regular Tasks
|
||||||
|
```bash
|
||||||
|
# Daily indexing (recommended cron job)
|
||||||
|
0 2 * * * cd /path/to/project && node ace index:datasets
|
||||||
|
|
||||||
|
# Weekly full re-index (if needed)
|
||||||
|
0 3 * * 0 cd /path/to/project && node ace index:datasets --force
|
||||||
|
```
|
||||||
|
|
||||||
|
### Health Checks
|
||||||
|
- Monitor OpenSearch cluster health
|
||||||
|
- Check for failed indexing operations in logs
|
||||||
|
- Verify search functionality is working
|
||||||
|
- Compare dataset counts between database and index
|
||||||
|
|
||||||
|
### Troubleshooting
|
||||||
|
```bash
|
||||||
|
# Check specific dataset indexing
|
||||||
|
node ace index:datasets --publish_id 231
|
||||||
|
|
||||||
|
# Verify OpenSearch connectivity
|
||||||
|
curl -X GET "localhost:9200/_cluster/health"
|
||||||
|
|
||||||
|
# Check index statistics
|
||||||
|
curl -X GET "localhost:9200/tethys-records/_stats"
|
||||||
|
```
|
||||||
|
|
||||||
|
## Best Practices
|
||||||
|
|
||||||
|
1. **Regular Scheduling**: Run the command regularly (daily) to keep the search index current
|
||||||
|
2. **Monitor Logs**: Watch for transformation errors or OpenSearch issues
|
||||||
|
3. **Backup Strategy**: Include OpenSearch indices in backup procedures
|
||||||
|
4. **Resource Management**: Monitor OpenSearch cluster resources during bulk operations
|
||||||
|
5. **Testing**: Verify search functionality after major indexing operations
|
||||||
|
6. **Coordination**: Run indexing before DataCite updates when both are needed
|
||||||
216
docs/commands/update-datacite.md
Normal file
216
docs/commands/update-datacite.md
Normal file
|
|
@ -0,0 +1,216 @@
|
||||||
|
# DataCite Update Command
|
||||||
|
|
||||||
|
AdonisJS Ace command for updating DataCite DOI records for published datasets.
|
||||||
|
|
||||||
|
## Overview
|
||||||
|
|
||||||
|
The `update:datacite` command synchronizes your local dataset metadata with DataCite DOI records. It intelligently compares modification dates to only update records when necessary, reducing unnecessary API calls and maintaining data consistency.
|
||||||
|
|
||||||
|
## Command Syntax
|
||||||
|
|
||||||
|
```bash
|
||||||
|
node ace update:datacite [options]
|
||||||
|
```
|
||||||
|
|
||||||
|
## Options
|
||||||
|
|
||||||
|
| Flag | Alias | Description |
|
||||||
|
|------|-------|-------------|
|
||||||
|
| `--publish_id <number>` | `-p` | Update a specific dataset by publish_id |
|
||||||
|
| `--force` | `-f` | Force update all records regardless of modification date |
|
||||||
|
| `--dry-run` | `-d` | Preview what would be updated without making changes |
|
||||||
|
| `--stats` | `-s` | Show detailed statistics for datasets that need updating |
|
||||||
|
|
||||||
|
## Usage Examples
|
||||||
|
|
||||||
|
### Basic Operations
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Update all datasets that have been modified since their DOI was last updated
|
||||||
|
node ace update:datacite
|
||||||
|
|
||||||
|
# Update a specific dataset
|
||||||
|
node ace update:datacite --publish_id 231
|
||||||
|
node ace update:datacite -p 231
|
||||||
|
|
||||||
|
# Force update all datasets with DOIs (ignores modification dates)
|
||||||
|
node ace update:datacite --force
|
||||||
|
```
|
||||||
|
|
||||||
|
### Preview and Analysis
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Preview what would be updated (dry run)
|
||||||
|
node ace update:datacite --dry-run
|
||||||
|
|
||||||
|
# Show detailed statistics for datasets that need updating
|
||||||
|
node ace update:datacite --stats
|
||||||
|
|
||||||
|
# Show stats for a specific dataset
|
||||||
|
node ace update:datacite --stats --publish_id 231
|
||||||
|
```
|
||||||
|
|
||||||
|
### Combined Options
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Dry run for a specific dataset
|
||||||
|
node ace update:datacite --dry-run --publish_id 231
|
||||||
|
|
||||||
|
# Show stats for all datasets (including up-to-date ones)
|
||||||
|
node ace update:datacite --stats --force
|
||||||
|
```
|
||||||
|
|
||||||
|
## Command Modes
|
||||||
|
|
||||||
|
### 1. **Normal Mode** (Default)
|
||||||
|
Updates DataCite records for datasets that have been modified since their DOI was last updated.
|
||||||
|
|
||||||
|
**Example Output:**
|
||||||
|
```
|
||||||
|
Using DataCite API: https://api.test.datacite.org
|
||||||
|
Found 50 datasets to process
|
||||||
|
Dataset 231: Successfully updated DataCite record
|
||||||
|
Dataset 245: Up to date, skipping
|
||||||
|
Dataset 267: Successfully updated DataCite record
|
||||||
|
DataCite update completed. Updated: 15, Skipped: 35, Errors: 0
|
||||||
|
```
|
||||||
|
|
||||||
|
### 2. **Dry Run Mode** (`--dry-run`)
|
||||||
|
Shows what would be updated without making any changes to DataCite.
|
||||||
|
|
||||||
|
**Use Case:** Preview updates before running the actual command.
|
||||||
|
|
||||||
|
**Example Output:**
|
||||||
|
```
|
||||||
|
Dataset 231: Would update DataCite record (dry run)
|
||||||
|
Dataset 267: Would update DataCite record (dry run)
|
||||||
|
Dataset 245: Up to date, skipping
|
||||||
|
DataCite update completed. Updated: 2, Skipped: 1, Errors: 0
|
||||||
|
```
|
||||||
|
|
||||||
|
### 3. **Stats Mode** (`--stats`)
|
||||||
|
Shows detailed information for each dataset that needs updating, including why it needs updating.
|
||||||
|
|
||||||
|
**Use Case:** Debug synchronization issues, monitor dataset/DOI status, generate reports.
|
||||||
|
|
||||||
|
**Example Output:**
|
||||||
|
```
|
||||||
|
┌─ Dataset 231 ─────────────────────────────────────────────────────────
|
||||||
|
│ DOI Value: 10.21388/tethys.231
|
||||||
|
│ DOI Status (DB): findable
|
||||||
|
│ DOI State (DataCite): findable
|
||||||
|
│ Dataset Modified: 2024-09-15T10:30:00.000Z
|
||||||
|
│ DOI Modified: 2024-09-10T08:15:00.000Z
|
||||||
|
│ Needs Update: YES - Dataset newer than DOI
|
||||||
|
└───────────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
┌─ Dataset 267 ─────────────────────────────────────────────────────────
|
||||||
|
│ DOI Value: 10.21388/tethys.267
|
||||||
|
│ DOI Status (DB): findable
|
||||||
|
│ DOI State (DataCite): findable
|
||||||
|
│ Dataset Modified: 2024-09-18T14:20:00.000Z
|
||||||
|
│ DOI Modified: 2024-09-16T12:45:00.000Z
|
||||||
|
│ Needs Update: YES - Dataset newer than DOI
|
||||||
|
└───────────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
DataCite Stats Summary: 2 datasets need updating, 48 are up to date
|
||||||
|
```
|
||||||
|
|
||||||
|
## Update Logic
|
||||||
|
|
||||||
|
The command uses intelligent update detection:
|
||||||
|
|
||||||
|
1. **Compares modification dates**: Dataset `server_date_modified` vs DOI last modification date from DataCite
|
||||||
|
2. **Validates data integrity**: Checks for missing or future dates
|
||||||
|
3. **Handles API failures gracefully**: Updates anyway if DataCite info can't be retrieved
|
||||||
|
4. **Uses dual API approach**: DataCite REST API (primary) with MDS API fallback
|
||||||
|
|
||||||
|
### When Updates Happen
|
||||||
|
|
||||||
|
| Condition | Action | Reason |
|
||||||
|
|-----------|--------|--------|
|
||||||
|
| Dataset modified > DOI modified | ✅ Update | Dataset has newer changes |
|
||||||
|
| Dataset modified ≤ DOI modified | ❌ Skip | DOI is up to date |
|
||||||
|
| Dataset date in future | ❌ Skip | Invalid data, needs investigation |
|
||||||
|
| Dataset date missing | ✅ Update | Can't determine staleness |
|
||||||
|
| DataCite API error | ✅ Update | Better safe than sorry |
|
||||||
|
| `--force` flag used | ✅ Update | Override all logic |
|
||||||
|
|
||||||
|
## Environment Configuration
|
||||||
|
|
||||||
|
Required environment variables:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# DataCite Credentials
|
||||||
|
DATACITE_USERNAME=your_username
|
||||||
|
DATACITE_PASSWORD=your_password
|
||||||
|
|
||||||
|
# API Endpoints (environment-specific)
|
||||||
|
DATACITE_API_URL=https://api.test.datacite.org # Test environment
|
||||||
|
DATACITE_SERVICE_URL=https://mds.test.datacite.org # Test MDS
|
||||||
|
|
||||||
|
DATACITE_API_URL=https://api.datacite.org # Production
|
||||||
|
DATACITE_SERVICE_URL=https://mds.datacite.org # Production MDS
|
||||||
|
|
||||||
|
# Project Configuration
|
||||||
|
DATACITE_PREFIX=10.21388 # Your DOI prefix
|
||||||
|
BASE_DOMAIN=tethys.at # Your domain
|
||||||
|
```
|
||||||
|
|
||||||
|
## Error Handling
|
||||||
|
|
||||||
|
The command handles various error scenarios:
|
||||||
|
|
||||||
|
- **Invalid modification dates**: Logs errors but continues processing other datasets
|
||||||
|
- **DataCite API failures**: Falls back to MDS API, then to safe update
|
||||||
|
- **Missing DOI identifiers**: Skips datasets without DOI identifiers
|
||||||
|
- **Network issues**: Continues with next dataset after logging error
|
||||||
|
|
||||||
|
## Integration
|
||||||
|
|
||||||
|
The command integrates with:
|
||||||
|
|
||||||
|
- **Dataset Model**: Uses `server_date_modified` for change detection
|
||||||
|
- **DatasetIdentifier Model**: Reads DOI values and status
|
||||||
|
- **OpenSearch Index**: Updates search index after DataCite update
|
||||||
|
- **DoiClient**: Handles all DataCite API interactions
|
||||||
|
|
||||||
|
## Common Workflows
|
||||||
|
|
||||||
|
### Daily Maintenance
|
||||||
|
```bash
|
||||||
|
# Update any datasets modified today
|
||||||
|
node ace update:datacite
|
||||||
|
```
|
||||||
|
|
||||||
|
### Pre-Deployment Check
|
||||||
|
```bash
|
||||||
|
# Check what would be updated before deployment
|
||||||
|
node ace update:datacite --dry-run
|
||||||
|
```
|
||||||
|
|
||||||
|
### Debugging Sync Issues
|
||||||
|
```bash
|
||||||
|
# Investigate why specific dataset isn't syncing
|
||||||
|
node ace update:datacite --stats --publish_id 231
|
||||||
|
```
|
||||||
|
|
||||||
|
### Full Resync
|
||||||
|
```bash
|
||||||
|
# Force update all DOI records (use with caution)
|
||||||
|
node ace update:datacite --force
|
||||||
|
```
|
||||||
|
|
||||||
|
### Monitoring Report
|
||||||
|
```bash
|
||||||
|
# Generate sync status report
|
||||||
|
node ace update:datacite --stats > datacite-sync-report.txt
|
||||||
|
```
|
||||||
|
|
||||||
|
## Best Practices
|
||||||
|
|
||||||
|
1. **Regular Updates**: Run daily or after bulk dataset modifications
|
||||||
|
2. **Test First**: Use `--dry-run` or `--stats` before bulk operations
|
||||||
|
3. **Monitor Logs**: Check for data integrity warnings
|
||||||
|
4. **Environment Separation**: Use correct API URLs for test vs production
|
||||||
|
5. **Rate Limiting**: The command handles DataCite rate limits automatically
|
||||||
222
freshclam.conf
222
freshclam.conf
|
|
@ -1,229 +1,47 @@
|
||||||
##
|
##
|
||||||
## Example config file for freshclam
|
## Container-optimized freshclam configuration
|
||||||
## Please read the freshclam.conf(5) manual before editing this file.
|
|
||||||
##
|
##
|
||||||
|
|
||||||
|
# Database directory
|
||||||
# Comment or remove the line below.
|
|
||||||
|
|
||||||
# Path to the database directory.
|
|
||||||
# WARNING: It must match clamd.conf's directive!
|
|
||||||
# Default: hardcoded (depends on installation options)
|
|
||||||
DatabaseDirectory /var/lib/clamav
|
DatabaseDirectory /var/lib/clamav
|
||||||
|
|
||||||
# Path to the log file (make sure it has proper permissions)
|
# Log to stdout for container logging
|
||||||
# Default: disabled
|
|
||||||
# UpdateLogFile /dev/stdout
|
# UpdateLogFile /dev/stdout
|
||||||
|
|
||||||
# Maximum size of the log file.
|
# Basic logging settings
|
||||||
# Value of 0 disables the limit.
|
|
||||||
# You may use 'M' or 'm' for megabytes (1M = 1m = 1048576 bytes)
|
|
||||||
# and 'K' or 'k' for kilobytes (1K = 1k = 1024 bytes).
|
|
||||||
# in bytes just don't use modifiers. If LogFileMaxSize is enabled,
|
|
||||||
# log rotation (the LogRotate option) will always be enabled.
|
|
||||||
# Default: 1M
|
|
||||||
#LogFileMaxSize 2M
|
|
||||||
|
|
||||||
# Log time with each message.
|
|
||||||
# Default: no
|
|
||||||
LogTime yes
|
LogTime yes
|
||||||
|
LogVerbose no
|
||||||
# Enable verbose logging.
|
|
||||||
# Default: no
|
|
||||||
LogVerbose yes
|
|
||||||
|
|
||||||
# Use system logger (can work together with UpdateLogFile).
|
|
||||||
# Default: no
|
|
||||||
LogSyslog no
|
LogSyslog no
|
||||||
|
|
||||||
# Specify the type of syslog messages - please refer to 'man syslog'
|
# PID file location
|
||||||
# for facility names.
|
|
||||||
# Default: LOG_LOCAL6
|
|
||||||
#LogFacility LOG_MAIL
|
|
||||||
|
|
||||||
# Enable log rotation. Always enabled when LogFileMaxSize is enabled.
|
|
||||||
# Default: no
|
|
||||||
#LogRotate yes
|
|
||||||
|
|
||||||
# This option allows you to save the process identifier of the daemon
|
|
||||||
# Default: disabled
|
|
||||||
#PidFile /var/run/freshclam.pid
|
|
||||||
PidFile /var/run/clamav/freshclam.pid
|
PidFile /var/run/clamav/freshclam.pid
|
||||||
|
|
||||||
# By default when started freshclam drops privileges and switches to the
|
# Database owner
|
||||||
# "clamav" user. This directive allows you to change the database owner.
|
DatabaseOwner clamav
|
||||||
# Default: clamav (may depend on installation options)
|
|
||||||
DatabaseOwner node
|
|
||||||
|
|
||||||
# Use DNS to verify virus database version. Freshclam uses DNS TXT records
|
# Mirror settings for Austria
|
||||||
# to verify database and software versions. With this directive you can change
|
|
||||||
# the database verification domain.
|
|
||||||
# WARNING: Do not touch it unless you're configuring freshclam to use your
|
|
||||||
# own database verification domain.
|
|
||||||
# Default: current.cvd.clamav.net
|
|
||||||
#DNSDatabaseInfo current.cvd.clamav.net
|
|
||||||
|
|
||||||
# Uncomment the following line and replace XY with your country
|
|
||||||
# code. See http://www.iana.org/cctld/cctld-whois.htm for the full list.
|
|
||||||
# You can use db.XY.ipv6.clamav.net for IPv6 connections.
|
|
||||||
DatabaseMirror db.at.clamav.net
|
DatabaseMirror db.at.clamav.net
|
||||||
|
|
||||||
# database.clamav.net is a round-robin record which points to our most
|
|
||||||
# reliable mirrors. It's used as a fall back in case db.XY.clamav.net is
|
|
||||||
# not working. DO NOT TOUCH the following line unless you know what you
|
|
||||||
# are doing.
|
|
||||||
DatabaseMirror database.clamav.net
|
DatabaseMirror database.clamav.net
|
||||||
|
|
||||||
# How many attempts to make before giving up.
|
|
||||||
# Default: 3 (per mirror)
|
|
||||||
#MaxAttempts 5
|
|
||||||
|
|
||||||
# With this option you can control scripted updates. It's highly recommended
|
# With this option you can control scripted updates. It's highly recommended
|
||||||
# to keep it enabled.
|
# to keep it enabled.
|
||||||
# Default: yes
|
# Default: yes
|
||||||
#ScriptedUpdates yes
|
# Update settings
|
||||||
|
ScriptedUpdates yes
|
||||||
# By default freshclam will keep the local databases (.cld) uncompressed to
|
|
||||||
# make their handling faster. With this option you can enable the compression;
|
|
||||||
# the change will take effect with the next database update.
|
|
||||||
# Default: no
|
|
||||||
#CompressLocalDatabase no
|
|
||||||
|
|
||||||
# With this option you can provide custom sources (http:// or file://) for
|
|
||||||
# database files. This option can be used multiple times.
|
|
||||||
# Default: no custom URLs
|
|
||||||
#DatabaseCustomURL http://myserver.com/mysigs.ndb
|
|
||||||
#DatabaseCustomURL file:///mnt/nfs/local.hdb
|
|
||||||
|
|
||||||
# This option allows you to easily point freshclam to private mirrors.
|
|
||||||
# If PrivateMirror is set, freshclam does not attempt to use DNS
|
|
||||||
# to determine whether its databases are out-of-date, instead it will
|
|
||||||
# use the If-Modified-Since request or directly check the headers of the
|
|
||||||
# remote database files. For each database, freshclam first attempts
|
|
||||||
# to download the CLD file. If that fails, it tries to download the
|
|
||||||
# CVD file. This option overrides DatabaseMirror, DNSDatabaseInfo
|
|
||||||
# and ScriptedUpdates. It can be used multiple times to provide
|
|
||||||
# fall-back mirrors.
|
|
||||||
# Default: disabled
|
|
||||||
#PrivateMirror mirror1.mynetwork.com
|
|
||||||
#PrivateMirror mirror2.mynetwork.com
|
|
||||||
|
|
||||||
# Number of database checks per day.
|
# Number of database checks per day.
|
||||||
# Default: 12 (every two hours)
|
# Default: 12 (every two hours)
|
||||||
#Checks 24
|
Checks 12
|
||||||
|
|
||||||
# Proxy settings
|
# Don't fork (good for containers)
|
||||||
# Default: disabled
|
|
||||||
#HTTPProxyServer myproxy.com
|
|
||||||
#HTTPProxyPort 1234
|
|
||||||
#HTTPProxyUsername myusername
|
|
||||||
#HTTPProxyPassword mypass
|
|
||||||
|
|
||||||
# If your servers are behind a firewall/proxy which applies User-Agent
|
|
||||||
# filtering you can use this option to force the use of a different
|
|
||||||
# User-Agent header.
|
|
||||||
# Default: clamav/version_number
|
|
||||||
#HTTPUserAgent SomeUserAgentIdString
|
|
||||||
|
|
||||||
# Use aaa.bbb.ccc.ddd as client address for downloading databases. Useful for
|
|
||||||
# multi-homed systems.
|
|
||||||
# Default: Use OS'es default outgoing IP address.
|
|
||||||
#LocalIPAddress aaa.bbb.ccc.ddd
|
|
||||||
|
|
||||||
# Send the RELOAD command to clamd.
|
|
||||||
# Default: no
|
|
||||||
#NotifyClamd /path/to/clamd.conf
|
|
||||||
|
|
||||||
# Run command after successful database update.
|
|
||||||
# Default: disabled
|
|
||||||
#OnUpdateExecute command
|
|
||||||
|
|
||||||
# Run command when database update process fails.
|
|
||||||
# Default: disabled
|
|
||||||
#OnErrorExecute command
|
|
||||||
|
|
||||||
# Run command when freshclam reports outdated version.
|
|
||||||
# In the command string %v will be replaced by the new version number.
|
|
||||||
# Default: disabled
|
|
||||||
#OnOutdatedExecute command
|
|
||||||
|
|
||||||
# Don't fork into background.
|
|
||||||
# Default: no
|
|
||||||
Foreground no
|
Foreground no
|
||||||
|
|
||||||
# Enable debug messages in libclamav.
|
# Connection timeouts
|
||||||
# Default: no
|
ConnectTimeout 60
|
||||||
#Debug yes
|
ReceiveTimeout 60
|
||||||
|
|
||||||
# Timeout in seconds when connecting to database server.
|
# Test databases before using them
|
||||||
# Default: 30
|
TestDatabases yes
|
||||||
#ConnectTimeout 60
|
|
||||||
|
|
||||||
# Timeout in seconds when reading from database server.
|
# Enable bytecode signatures
|
||||||
# Default: 30
|
Bytecode yes
|
||||||
#ReceiveTimeout 60
|
|
||||||
|
|
||||||
# With this option enabled, freshclam will attempt to load new
|
|
||||||
# databases into memory to make sure they are properly handled
|
|
||||||
# by libclamav before replacing the old ones.
|
|
||||||
# Default: yes
|
|
||||||
#TestDatabases yes
|
|
||||||
|
|
||||||
# When enabled freshclam will submit statistics to the ClamAV Project about
|
|
||||||
# the latest virus detections in your environment. The ClamAV maintainers
|
|
||||||
# will then use this data to determine what types of malware are the most
|
|
||||||
# detected in the field and in what geographic area they are.
|
|
||||||
# Freshclam will connect to clamd in order to get recent statistics.
|
|
||||||
# Default: no
|
|
||||||
#SubmitDetectionStats /path/to/clamd.conf
|
|
||||||
|
|
||||||
# Country of origin of malware/detection statistics (for statistical
|
|
||||||
# purposes only). The statistics collector at ClamAV.net will look up
|
|
||||||
# your IP address to determine the geographical origin of the malware
|
|
||||||
# reported by your installation. If this installation is mainly used to
|
|
||||||
# scan data which comes from a different location, please enable this
|
|
||||||
# option and enter a two-letter code (see http://www.iana.org/domains/root/db/)
|
|
||||||
# of the country of origin.
|
|
||||||
# Default: disabled
|
|
||||||
#DetectionStatsCountry country-code
|
|
||||||
|
|
||||||
# This option enables support for our "Personal Statistics" service.
|
|
||||||
# When this option is enabled, the information on malware detected by
|
|
||||||
# your clamd installation is made available to you through our website.
|
|
||||||
# To get your HostID, log on http://www.stats.clamav.net and add a new
|
|
||||||
# host to your host list. Once you have the HostID, uncomment this option
|
|
||||||
# and paste the HostID here. As soon as your freshclam starts submitting
|
|
||||||
# information to our stats collecting service, you will be able to view
|
|
||||||
# the statistics of this clamd installation by logging into
|
|
||||||
# http://www.stats.clamav.net with the same credentials you used to
|
|
||||||
# generate the HostID. For more information refer to:
|
|
||||||
# http://www.clamav.net/documentation.html#cctts
|
|
||||||
# This feature requires SubmitDetectionStats to be enabled.
|
|
||||||
# Default: disabled
|
|
||||||
#DetectionStatsHostID unique-id
|
|
||||||
|
|
||||||
# This option enables support for Google Safe Browsing. When activated for
|
|
||||||
# the first time, freshclam will download a new database file (safebrowsing.cvd)
|
|
||||||
# which will be automatically loaded by clamd and clamscan during the next
|
|
||||||
# reload, provided that the heuristic phishing detection is turned on. This
|
|
||||||
# database includes information about websites that may be phishing sites or
|
|
||||||
# possible sources of malware. When using this option, it's mandatory to run
|
|
||||||
# freshclam at least every 30 minutes.
|
|
||||||
# Freshclam uses the ClamAV's mirror infrastructure to distribute the
|
|
||||||
# database and its updates but all the contents are provided under Google's
|
|
||||||
# terms of use. See http://www.google.com/transparencyreport/safebrowsing
|
|
||||||
# and http://www.clamav.net/documentation.html#safebrowsing
|
|
||||||
# for more information.
|
|
||||||
# Default: disabled
|
|
||||||
#SafeBrowsing yes
|
|
||||||
|
|
||||||
# This option enables downloading of bytecode.cvd, which includes additional
|
|
||||||
# detection mechanisms and improvements to the ClamAV engine.
|
|
||||||
# Default: enabled
|
|
||||||
#Bytecode yes
|
|
||||||
|
|
||||||
# Download an additional 3rd party signature database distributed through
|
|
||||||
# the ClamAV mirrors.
|
|
||||||
# This option can be used multiple times.
|
|
||||||
#ExtraDatabase dbname1
|
|
||||||
#ExtraDatabase dbname2
|
|
||||||
1044
package-lock.json
generated
1044
package-lock.json
generated
File diff suppressed because it is too large
Load diff
|
|
@ -59,7 +59,6 @@
|
||||||
"hot-hook": "^0.4.0",
|
"hot-hook": "^0.4.0",
|
||||||
"numeral": "^2.0.6",
|
"numeral": "^2.0.6",
|
||||||
"pinia": "^3.0.2",
|
"pinia": "^3.0.2",
|
||||||
"pino-pretty": "^13.0.0",
|
|
||||||
"postcss-loader": "^8.1.1",
|
"postcss-loader": "^8.1.1",
|
||||||
"prettier": "^3.4.2",
|
"prettier": "^3.4.2",
|
||||||
"supertest": "^6.3.3",
|
"supertest": "^6.3.3",
|
||||||
|
|
@ -115,7 +114,9 @@
|
||||||
"node-2fa": "^2.0.3",
|
"node-2fa": "^2.0.3",
|
||||||
"node-exceptions": "^4.0.1",
|
"node-exceptions": "^4.0.1",
|
||||||
"notiwind": "^2.0.0",
|
"notiwind": "^2.0.0",
|
||||||
|
"p-limit": "^7.1.1",
|
||||||
"pg": "^8.9.0",
|
"pg": "^8.9.0",
|
||||||
|
"pino-pretty": "^13.0.0",
|
||||||
"qrcode": "^1.5.3",
|
"qrcode": "^1.5.3",
|
||||||
"redis": "^5.0.0",
|
"redis": "^5.0.0",
|
||||||
"reflect-metadata": "^0.2.1",
|
"reflect-metadata": "^0.2.1",
|
||||||
|
|
|
||||||
|
|
@ -6,17 +6,16 @@
|
||||||
import type { ApplicationService } from '@adonisjs/core/types';
|
import type { ApplicationService } from '@adonisjs/core/types';
|
||||||
import vine, { symbols, BaseLiteralType, Vine } from '@vinejs/vine';
|
import vine, { symbols, BaseLiteralType, Vine } from '@vinejs/vine';
|
||||||
import type { FieldContext, FieldOptions } from '@vinejs/vine/types';
|
import type { FieldContext, FieldOptions } from '@vinejs/vine/types';
|
||||||
// import type { MultipartFile, FileValidationOptions } from '@adonisjs/bodyparser/types';
|
|
||||||
import type { MultipartFile } from '@adonisjs/core/bodyparser';
|
import type { MultipartFile } from '@adonisjs/core/bodyparser';
|
||||||
import type { FileValidationOptions } from '@adonisjs/core/types/bodyparser';
|
import type { FileValidationOptions } from '@adonisjs/core/types/bodyparser';
|
||||||
import { Request, RequestValidator } from '@adonisjs/core/http';
|
import { Request, RequestValidator } from '@adonisjs/core/http';
|
||||||
import MimeType from '#models/mime_type';
|
import MimeType from '#models/mime_type';
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Validation options accepted by the "file" rule
|
* Validation options accepted by the "file" rule
|
||||||
*/
|
*/
|
||||||
export type FileRuleValidationOptions = Partial<FileValidationOptions> | ((field: FieldContext) => Partial<FileValidationOptions>);
|
export type FileRuleValidationOptions = Partial<FileValidationOptions> | ((field: FieldContext) => Partial<FileValidationOptions>);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Extend VineJS
|
* Extend VineJS
|
||||||
*/
|
*/
|
||||||
|
|
@ -25,6 +24,7 @@ declare module '@vinejs/vine' {
|
||||||
myfile(options?: FileRuleValidationOptions): VineMultipartFile;
|
myfile(options?: FileRuleValidationOptions): VineMultipartFile;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Extend HTTP request class
|
* Extend HTTP request class
|
||||||
*/
|
*/
|
||||||
|
|
@ -36,19 +36,54 @@ declare module '@adonisjs/core/http' {
|
||||||
* Checks if the value is an instance of multipart file
|
* Checks if the value is an instance of multipart file
|
||||||
* from bodyparser.
|
* from bodyparser.
|
||||||
*/
|
*/
|
||||||
export function isBodyParserFile(file: MultipartFile | unknown): boolean {
|
export function isBodyParserFile(file: MultipartFile | unknown): file is MultipartFile {
|
||||||
return !!(file && typeof file === 'object' && 'isMultipartFile' in file);
|
return !!(file && typeof file === 'object' && 'isMultipartFile' in file);
|
||||||
}
|
}
|
||||||
export async function getEnabledExtensions() {
|
|
||||||
const enabledExtensions = await MimeType.query().select('file_extension').where('enabled', true).exec();
|
|
||||||
const extensions = enabledExtensions
|
|
||||||
.map((extension) => {
|
|
||||||
return extension.file_extension.split('|');
|
|
||||||
})
|
|
||||||
.flat();
|
|
||||||
|
|
||||||
return extensions;
|
/**
|
||||||
|
* Cache for enabled extensions to reduce database queries
|
||||||
|
*/
|
||||||
|
let extensionsCache: string[] | null = null;
|
||||||
|
let cacheTimestamp = 0;
|
||||||
|
const CACHE_DURATION = 5 * 60 * 1000; // 5 minutes
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get enabled extensions with caching
|
||||||
|
*/
|
||||||
|
export async function getEnabledExtensions(): Promise<string[]> {
|
||||||
|
const now = Date.now();
|
||||||
|
|
||||||
|
if (extensionsCache && now - cacheTimestamp < CACHE_DURATION) {
|
||||||
|
return extensionsCache;
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
const enabledExtensions = await MimeType.query().select('file_extension').where('enabled', true).exec();
|
||||||
|
|
||||||
|
const extensions = enabledExtensions
|
||||||
|
.map((extension) => extension.file_extension.split('|'))
|
||||||
|
.flat()
|
||||||
|
.map((ext) => ext.toLowerCase().trim())
|
||||||
|
.filter((ext) => ext.length > 0);
|
||||||
|
|
||||||
|
extensionsCache = [...new Set(extensions)]; // Remove duplicates
|
||||||
|
cacheTimestamp = now;
|
||||||
|
|
||||||
|
return extensionsCache;
|
||||||
|
} catch (error) {
|
||||||
|
console.error('Error fetching enabled extensions:', error);
|
||||||
|
return extensionsCache || [];
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Clear extensions cache
|
||||||
|
*/
|
||||||
|
export function clearExtensionsCache(): void {
|
||||||
|
extensionsCache = null;
|
||||||
|
cacheTimestamp = 0;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* VineJS validation rule that validates the file to be an
|
* VineJS validation rule that validates the file to be an
|
||||||
* instance of BodyParser MultipartFile class.
|
* instance of BodyParser MultipartFile class.
|
||||||
|
|
@ -65,6 +100,7 @@ const isMultipartFile = vine.createRule(async (file: MultipartFile | unknown, op
|
||||||
// At this point, you can use type assertion to explicitly tell TypeScript that file is of type MultipartFile
|
// At this point, you can use type assertion to explicitly tell TypeScript that file is of type MultipartFile
|
||||||
const validatedFile = file as MultipartFile;
|
const validatedFile = file as MultipartFile;
|
||||||
const validationOptions = typeof options === 'function' ? options(field) : options;
|
const validationOptions = typeof options === 'function' ? options(field) : options;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Set size when it's defined in the options and missing
|
* Set size when it's defined in the options and missing
|
||||||
* on the file instance
|
* on the file instance
|
||||||
|
|
@ -72,30 +108,29 @@ const isMultipartFile = vine.createRule(async (file: MultipartFile | unknown, op
|
||||||
if (validatedFile.sizeLimit === undefined && validationOptions.size) {
|
if (validatedFile.sizeLimit === undefined && validationOptions.size) {
|
||||||
validatedFile.sizeLimit = validationOptions.size;
|
validatedFile.sizeLimit = validationOptions.size;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Set extensions when it's defined in the options and missing
|
* Set extensions when it's defined in the options and missing
|
||||||
* on the file instance
|
* on the file instance
|
||||||
*/
|
*/
|
||||||
// if (validatedFile.allowedExtensions === undefined && validationOptions.extnames) {
|
if (validatedFile.allowedExtensions === undefined) {
|
||||||
// validatedFile.allowedExtensions = validationOptions.extnames;
|
if (validationOptions.extnames !== undefined) {
|
||||||
// }
|
validatedFile.allowedExtensions = validationOptions.extnames;
|
||||||
if (validatedFile.allowedExtensions === undefined && validationOptions.extnames !== undefined) {
|
} else {
|
||||||
validatedFile.allowedExtensions = validationOptions.extnames; // await getEnabledExtensions();
|
|
||||||
} else if (validatedFile.allowedExtensions === undefined && validationOptions.extnames === undefined) {
|
|
||||||
validatedFile.allowedExtensions = await getEnabledExtensions();
|
validatedFile.allowedExtensions = await getEnabledExtensions();
|
||||||
}
|
}
|
||||||
/**
|
}
|
||||||
* wieder löschen
|
|
||||||
* Set extensions when it's defined in the options and missing
|
|
||||||
* on the file instance
|
|
||||||
*/
|
|
||||||
// if (file.clientNameSizeLimit === undefined && validationOptions.clientNameSizeLimit) {
|
|
||||||
// file.clientNameSizeLimit = validationOptions.clientNameSizeLimit;
|
|
||||||
// }
|
|
||||||
/**
|
/**
|
||||||
* Validate file
|
* Validate file
|
||||||
*/
|
*/
|
||||||
|
try {
|
||||||
validatedFile.validate();
|
validatedFile.validate();
|
||||||
|
} catch (error) {
|
||||||
|
field.report(`File validation failed: ${error.message}`, 'file.validation_error', field, validationOptions);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Report errors
|
* Report errors
|
||||||
*/
|
*/
|
||||||
|
|
@ -107,36 +142,37 @@ const isMultipartFile = vine.createRule(async (file: MultipartFile | unknown, op
|
||||||
const MULTIPART_FILE: typeof symbols.SUBTYPE = symbols.SUBTYPE;
|
const MULTIPART_FILE: typeof symbols.SUBTYPE = symbols.SUBTYPE;
|
||||||
|
|
||||||
export class VineMultipartFile extends BaseLiteralType<MultipartFile, MultipartFile, MultipartFile> {
|
export class VineMultipartFile extends BaseLiteralType<MultipartFile, MultipartFile, MultipartFile> {
|
||||||
|
|
||||||
[MULTIPART_FILE]: string;
|
[MULTIPART_FILE]: string;
|
||||||
// constructor(validationOptions?: FileRuleValidationOptions, options?: FieldOptions) {
|
public validationOptions?: FileRuleValidationOptions;
|
||||||
// super(options, [isMultipartFile(validationOptions || {})]);
|
|
||||||
// this.validationOptions = validationOptions;
|
|
||||||
// this.#private = true;
|
|
||||||
// }
|
|
||||||
|
|
||||||
// clone(): this {
|
|
||||||
// return new VineMultipartFile(this.validationOptions, this.cloneOptions()) as this;
|
|
||||||
// }
|
|
||||||
// #private;
|
|
||||||
// constructor(validationOptions?: FileRuleValidationOptions, options?: FieldOptions, validations?: Validation<any>[]);
|
|
||||||
// clone(): this;
|
|
||||||
|
|
||||||
public validationOptions;
|
|
||||||
// extnames: (18) ['gpkg', 'htm', 'html', 'csv', 'txt', 'asc', 'c', 'cc', 'h', 'srt', 'tiff', 'pdf', 'png', 'zip', 'jpg', 'jpeg', 'jpe', 'xlsx']
|
// extnames: (18) ['gpkg', 'htm', 'html', 'csv', 'txt', 'asc', 'c', 'cc', 'h', 'srt', 'tiff', 'pdf', 'png', 'zip', 'jpg', 'jpeg', 'jpe', 'xlsx']
|
||||||
// size: '512mb'
|
// size: '512mb'
|
||||||
|
|
||||||
// public constructor(validationOptions?: FileRuleValidationOptions, options?: FieldOptions, validations?: Validation<any>[]) {
|
|
||||||
public constructor(validationOptions?: FileRuleValidationOptions, options?: FieldOptions) {
|
public constructor(validationOptions?: FileRuleValidationOptions, options?: FieldOptions) {
|
||||||
// super(options, validations);
|
|
||||||
super(options, [isMultipartFile(validationOptions || {})]);
|
super(options, [isMultipartFile(validationOptions || {})]);
|
||||||
this.validationOptions = validationOptions;
|
this.validationOptions = validationOptions;
|
||||||
}
|
}
|
||||||
|
|
||||||
public clone(): any {
|
public clone(): any {
|
||||||
// return new VineMultipartFile(this.validationOptions, this.cloneOptions(), this.cloneValidations());
|
|
||||||
return new VineMultipartFile(this.validationOptions, this.cloneOptions());
|
return new VineMultipartFile(this.validationOptions, this.cloneOptions());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Set maximum file size
|
||||||
|
*/
|
||||||
|
public maxSize(size: string | number): this {
|
||||||
|
const newOptions = { ...this.validationOptions, size };
|
||||||
|
return new VineMultipartFile(newOptions, this.cloneOptions()) as this;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Set allowed extensions
|
||||||
|
*/
|
||||||
|
public extensions(extnames: string[]): this {
|
||||||
|
const newOptions = { ...this.validationOptions, extnames };
|
||||||
|
return new VineMultipartFile(newOptions, this.cloneOptions()) as this;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
export default class VinejsProvider {
|
export default class VinejsProvider {
|
||||||
|
|
@ -155,13 +191,8 @@ export default class VinejsProvider {
|
||||||
/**
|
/**
|
||||||
* The container bindings have booted
|
* The container bindings have booted
|
||||||
*/
|
*/
|
||||||
|
|
||||||
boot(): void {
|
boot(): void {
|
||||||
// VineString.macro('translatedLanguage', function (this: VineString, options: Options) {
|
Vine.macro('myfile', function (this: Vine, options?: FileRuleValidationOptions) {
|
||||||
// return this.use(translatedLanguageRule(options));
|
|
||||||
// });
|
|
||||||
|
|
||||||
Vine.macro('myfile', function (this: Vine, options) {
|
|
||||||
return new VineMultipartFile(options);
|
return new VineMultipartFile(options);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
|
@ -175,6 +206,41 @@ export default class VinejsProvider {
|
||||||
}
|
}
|
||||||
return new RequestValidator(this.ctx).validateUsing(...args);
|
return new RequestValidator(this.ctx).validateUsing(...args);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
// Ensure MIME validation macros are loaded
|
||||||
|
this.loadMimeValidationMacros();
|
||||||
|
this.loadFileScanMacros();
|
||||||
|
this.loadFileLengthMacros();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Load MIME validation macros - called during boot to ensure they're available
|
||||||
|
*/
|
||||||
|
private async loadMimeValidationMacros(): Promise<void> {
|
||||||
|
try {
|
||||||
|
// Dynamically import the MIME validation rule to ensure macros are registered
|
||||||
|
await import('#start/rules/allowed_extensions_mimetypes');
|
||||||
|
} catch (error) {
|
||||||
|
console.warn('Could not load MIME validation macros:', error);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private async loadFileScanMacros(): Promise<void> {
|
||||||
|
try {
|
||||||
|
// Dynamically import the MIME validation rule to ensure macros are registered
|
||||||
|
await import('#start/rules/file_scan');
|
||||||
|
} catch (error) {
|
||||||
|
console.warn('Could not load MIME validation macros:', error);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private async loadFileLengthMacros(): Promise<void> {
|
||||||
|
try {
|
||||||
|
// Dynamically import the MIME validation rule to ensure macros are registered
|
||||||
|
await import('#start/rules/file_length');
|
||||||
|
} catch (error) {
|
||||||
|
console.warn('Could not load MIME validation macros:', error);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -190,5 +256,7 @@ export default class VinejsProvider {
|
||||||
/**
|
/**
|
||||||
* Preparing to shutdown the app
|
* Preparing to shutdown the app
|
||||||
*/
|
*/
|
||||||
async shutdown() {}
|
async shutdown() {
|
||||||
|
clearExtensionsCache();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
174
readme.md
174
readme.md
|
|
@ -11,6 +11,8 @@ Welcome to the Tethys Research Repository Backend System! This is the backend co
|
||||||
- [Configuration](#configuration)
|
- [Configuration](#configuration)
|
||||||
- [Database](#database)
|
- [Database](#database)
|
||||||
- [API Documentation](#api-documentation)
|
- [API Documentation](#api-documentation)
|
||||||
|
- [Commands](#commands)
|
||||||
|
- [Documentation](#documentation)
|
||||||
- [Contributing](#contributing)
|
- [Contributing](#contributing)
|
||||||
- [License](#license)
|
- [License](#license)
|
||||||
|
|
||||||
|
|
@ -29,5 +31,175 @@ Before you begin, ensure you have met the following requirements:
|
||||||
1. Clone this repository:
|
1. Clone this repository:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
git clone https://gitea.geologie.ac.at/geolba/tethys.backend.git
|
git clone git clone https://gitea.geologie.ac.at/geolba/tethys.backend.git
|
||||||
|
cd tethys-backend
|
||||||
```
|
```
|
||||||
|
|
||||||
|
2. Install dependencies:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
npm install
|
||||||
|
```
|
||||||
|
|
||||||
|
3. Configure environment variables (see [Configuration](#configuration))
|
||||||
|
|
||||||
|
4. Run database migrations:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
node ace migration:run
|
||||||
|
```
|
||||||
|
|
||||||
|
5. Start the development server:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
npm run dev
|
||||||
|
```
|
||||||
|
|
||||||
|
## Usage
|
||||||
|
|
||||||
|
The Tethys Backend provides RESTful APIs for managing research datasets, user authentication, DOI registration, and search functionality.
|
||||||
|
|
||||||
|
## Configuration
|
||||||
|
|
||||||
|
Copy the `.env.example` file to `.env` and configure the following variables:
|
||||||
|
|
||||||
|
### Database Configuration
|
||||||
|
```bash
|
||||||
|
DB_CONNECTION=pg
|
||||||
|
DB_HOST=localhost
|
||||||
|
DB_PORT=5432
|
||||||
|
DB_USER=your_username
|
||||||
|
DB_PASSWORD=your_password
|
||||||
|
DB_DATABASE=tethys_db
|
||||||
|
```
|
||||||
|
|
||||||
|
### DataCite Configuration
|
||||||
|
```bash
|
||||||
|
# DataCite Credentials
|
||||||
|
DATACITE_USERNAME=your_datacite_username
|
||||||
|
DATACITE_PASSWORD=your_datacite_password
|
||||||
|
DATACITE_PREFIX=10.21388
|
||||||
|
|
||||||
|
# Environment-specific API endpoints
|
||||||
|
DATACITE_API_URL=https://api.test.datacite.org # Test environment
|
||||||
|
DATACITE_SERVICE_URL=https://mds.test.datacite.org # Test MDS
|
||||||
|
|
||||||
|
# For production:
|
||||||
|
# DATACITE_API_URL=https://api.datacite.org
|
||||||
|
# DATACITE_SERVICE_URL=https://mds.datacite.org
|
||||||
|
```
|
||||||
|
|
||||||
|
### OpenSearch Configuration
|
||||||
|
```bash
|
||||||
|
OPENSEARCH_HOST=localhost:9200
|
||||||
|
```
|
||||||
|
|
||||||
|
### Application Configuration
|
||||||
|
```bash
|
||||||
|
BASE_DOMAIN=tethys.at
|
||||||
|
APP_KEY=your_app_key
|
||||||
|
```
|
||||||
|
|
||||||
|
## Database
|
||||||
|
|
||||||
|
The system uses PostgreSQL with Lucid ORM. Key models include:
|
||||||
|
|
||||||
|
- **Dataset**: Research dataset metadata
|
||||||
|
- **DatasetIdentifier**: DOI and other identifiers for datasets
|
||||||
|
- **User**: User management and authentication
|
||||||
|
- **XmlCache**: Cached XML metadata
|
||||||
|
|
||||||
|
Run migrations and seeders:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Run migrations
|
||||||
|
node ace migration:run
|
||||||
|
|
||||||
|
# Run seeders (if available)
|
||||||
|
node ace db:seed
|
||||||
|
```
|
||||||
|
|
||||||
|
## API Documentation
|
||||||
|
|
||||||
|
API endpoints are available for:
|
||||||
|
|
||||||
|
- Dataset management (`/api/datasets`)
|
||||||
|
- User authentication (`/api/auth`)
|
||||||
|
- DOI registration (`/api/doi`)
|
||||||
|
- Search functionality (`/api/search`)
|
||||||
|
|
||||||
|
*Detailed API documentation can be found in the `/docs/api` directory.*
|
||||||
|
|
||||||
|
## Commands
|
||||||
|
|
||||||
|
The system includes several Ace commands for maintenance and data management:
|
||||||
|
|
||||||
|
### Dataset Indexing
|
||||||
|
```bash
|
||||||
|
# Index all published datasets to OpenSearch
|
||||||
|
node ace index:datasets
|
||||||
|
|
||||||
|
# Index a specific dataset
|
||||||
|
node ace index:datasets --publish_id 123
|
||||||
|
```
|
||||||
|
|
||||||
|
### DataCite DOI Management
|
||||||
|
```bash
|
||||||
|
# Update DataCite records for modified datasets
|
||||||
|
node ace update:datacite
|
||||||
|
|
||||||
|
# Show detailed statistics for datasets needing updates
|
||||||
|
node ace update:datacite --stats
|
||||||
|
|
||||||
|
# Preview what would be updated (dry run)
|
||||||
|
node ace update:datacite --dry-run
|
||||||
|
|
||||||
|
# Force update all DOI records
|
||||||
|
node ace update:datacite --force
|
||||||
|
|
||||||
|
# Update a specific dataset
|
||||||
|
node ace update:datacite --publish_id 123
|
||||||
|
```
|
||||||
|
|
||||||
|
*For detailed command documentation, see the [Commands Documentation](docs/commands/)*
|
||||||
|
|
||||||
|
## Documentation
|
||||||
|
|
||||||
|
Comprehensive documentation is available in the `/docs` directory:
|
||||||
|
|
||||||
|
- **[Commands Documentation](docs/commands/)** - Detailed guides for Ace commands
|
||||||
|
- [DataCite Update Command](docs/commands/update-datacite.md) - DOI synchronization and management
|
||||||
|
- [Dataset Indexing Command](docs/commands/index-datasets.md) - Search index management
|
||||||
|
- **[API Documentation](docs/api/)** - REST API endpoints and usage
|
||||||
|
- **[Deployment Guide](docs/deployment/)** - Production deployment instructions
|
||||||
|
- **[Configuration Guide](docs/configuration/)** - Environment setup and configuration options
|
||||||
|
|
||||||
|
## Contributing
|
||||||
|
|
||||||
|
1. Fork the repository
|
||||||
|
2. Create a feature branch (`git checkout -b feature/amazing-feature`)
|
||||||
|
3. Commit your changes (`git commit -m 'Add some amazing feature'`)
|
||||||
|
4. Push to the branch (`git push origin feature/amazing-feature`)
|
||||||
|
5. Open a Pull Request
|
||||||
|
|
||||||
|
### Development Guidelines
|
||||||
|
|
||||||
|
- Follow the existing code style and conventions
|
||||||
|
- Write tests for new features
|
||||||
|
- Update documentation for any API changes
|
||||||
|
- Ensure all commands and migrations work properly
|
||||||
|
|
||||||
|
### Testing Commands
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Run tests
|
||||||
|
npm test
|
||||||
|
|
||||||
|
# Test specific commands
|
||||||
|
node ace update:datacite --dry-run --publish_id 123
|
||||||
|
node ace index:datasets --publish_id 123
|
||||||
|
```
|
||||||
|
|
||||||
|
## License
|
||||||
|
|
||||||
|
This project is licensed under the [MIT License](LICENSE).
|
||||||
|
|
@ -163,7 +163,7 @@
|
||||||
</div>
|
</div>
|
||||||
</FormControl>
|
</FormControl>
|
||||||
</FormField>
|
</FormField>
|
||||||
<FormField label="Main Title Language*" help="required: main abstract language"
|
<FormField label="Main Description Language*" help="required: main abstract language"
|
||||||
:class="{ 'text-red-400': form.errors['descriptions.0.language'] }"
|
:class="{ 'text-red-400': form.errors['descriptions.0.language'] }"
|
||||||
class="w-full ml-1 flex-1">
|
class="w-full ml-1 flex-1">
|
||||||
<FormControl required v-model="form.descriptions[0].language" type="text"
|
<FormControl required v-model="form.descriptions[0].language" type="text"
|
||||||
|
|
|
||||||
|
|
@ -725,7 +725,7 @@ Removes a selected keyword
|
||||||
</div>
|
</div>
|
||||||
</FormControl>
|
</FormControl>
|
||||||
</FormField>
|
</FormField>
|
||||||
<FormField label="Main Title Language*" help="required: main abstract language"
|
<FormField label="Main Description Language*" help="required: main abstract language"
|
||||||
:class="{ 'text-red-400': form.errors['descriptions.0.language'] }"
|
:class="{ 'text-red-400': form.errors['descriptions.0.language'] }"
|
||||||
class="w-full mx-2 flex-1">
|
class="w-full mx-2 flex-1">
|
||||||
<FormControl required v-model="form.descriptions[0].language" type="text"
|
<FormControl required v-model="form.descriptions[0].language" type="text"
|
||||||
|
|
|
||||||
|
|
@ -272,7 +272,7 @@
|
||||||
</FormControl>
|
</FormControl>
|
||||||
</FormField>
|
</FormField>
|
||||||
<FormField
|
<FormField
|
||||||
label="Main Title Language*"
|
label="Main Description Language*"
|
||||||
help="required: main abstract language"
|
help="required: main abstract language"
|
||||||
:class="{ 'text-red-400': form.errors['descriptions.0.language'] }"
|
:class="{ 'text-red-400': form.errors['descriptions.0.language'] }"
|
||||||
class="w-full ml-1 flex-1"
|
class="w-full ml-1 flex-1"
|
||||||
|
|
|
||||||
|
|
@ -8,14 +8,24 @@ import AvatarController from '#controllers/Http/Api/AvatarController';
|
||||||
import UserController from '#controllers/Http/Api/UserController';
|
import UserController from '#controllers/Http/Api/UserController';
|
||||||
import CollectionsController from '#controllers/Http/Api/collections_controller';
|
import CollectionsController from '#controllers/Http/Api/collections_controller';
|
||||||
import { middleware } from '../kernel.js';
|
import { middleware } from '../kernel.js';
|
||||||
// API
|
|
||||||
|
// Clean DOI URL routes (no /api prefix)
|
||||||
|
|
||||||
|
// API routes with /api prefix
|
||||||
router
|
router
|
||||||
.group(() => {
|
.group(() => {
|
||||||
router.get('clients', [UserController, 'getSubmitters']).as('client.index').use(middleware.auth());;
|
router.get('clients', [UserController, 'getSubmitters']).as('client.index').use(middleware.auth());
|
||||||
router.get('authors', [AuthorsController, 'index']).as('author.index').use(middleware.auth());;
|
router.get('authors', [AuthorsController, 'index']).as('author.index').use(middleware.auth());
|
||||||
router.get('datasets', [DatasetController, 'index']).as('dataset.index');
|
router.get('datasets', [DatasetController, 'index']).as('dataset.index');
|
||||||
router.get('persons', [AuthorsController, 'persons']).as('author.persons');
|
router.get('persons', [AuthorsController, 'persons']).as('author.persons');
|
||||||
|
|
||||||
|
// This should come BEFORE any other routes that might conflict
|
||||||
|
router
|
||||||
|
.get('/dataset/:prefix/:value', [DatasetController, 'findByIdentifier'])
|
||||||
|
.where('prefix', /^10\.\d+$/) // Match DOI prefix pattern (10.xxxx)
|
||||||
|
.where('value', /^[a-zA-Z0-9._-]+\.[0-9]+(?:\.[0-9]+)*$/) // Match DOI suffix pattern
|
||||||
|
.as('dataset.findByIdentifier');
|
||||||
|
|
||||||
router.get('/dataset', [DatasetController, 'findAll']).as('dataset.findAll');
|
router.get('/dataset', [DatasetController, 'findAll']).as('dataset.findAll');
|
||||||
router.get('/dataset/:publish_id', [DatasetController, 'findOne']).as('dataset.findOne');
|
router.get('/dataset/:publish_id', [DatasetController, 'findOne']).as('dataset.findOne');
|
||||||
router.get('/sitelinks/:year', [HomeController, 'findDocumentsPerYear']);
|
router.get('/sitelinks/:year', [HomeController, 'findDocumentsPerYear']);
|
||||||
|
|
@ -35,7 +45,7 @@ router
|
||||||
.as('apps.twofactor_backupcodes.create')
|
.as('apps.twofactor_backupcodes.create')
|
||||||
.use(middleware.auth());
|
.use(middleware.auth());
|
||||||
|
|
||||||
router.get('collections/:id', [CollectionsController, 'show']).as('collection.show')
|
router.get('collections/:id', [CollectionsController, 'show']).as('collection.show');
|
||||||
})
|
})
|
||||||
// .namespace('App/Controllers/Http/Api')
|
// .namespace('App/Controllers/Http/Api')
|
||||||
.prefix('api');
|
.prefix('api');
|
||||||
|
|
|
||||||
|
|
@ -1,7 +1,7 @@
|
||||||
/*
|
/*
|
||||||
|--------------------------------------------------------------------------
|
|--------------------------------------------------------------------------
|
||||||
| Preloaded File - node ace make:preload rules/orcid
|
| Preloaded File - node ace make:preload rules/orcid
|
||||||
| ❯ Do you want to register the preload file in .adonisrc.ts file? (y/N) · true
|
| Do you want to register the preload file in .adonisrc.ts file? (y/N) · true
|
||||||
| DONE: create start/rules/orcid.ts
|
| DONE: create start/rules/orcid.ts
|
||||||
| DONE: update adonisrc.ts file
|
| DONE: update adonisrc.ts file
|
||||||
|--------------------------------------------------------------------------
|
|--------------------------------------------------------------------------
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue