Compare commits
No commits in common. "6757bdb77c14cb25020c80c8134f26519b3d3633" and "8f67839f93d91981c686ea04d25d2d0586fc7a44" have entirely different histories.
6757bdb77c
...
8f67839f93
22 changed files with 919 additions and 2870 deletions
77
Dockerfile
77
Dockerfile
|
|
@ -1,61 +1,55 @@
|
||||||
################## First Stage - Creating base #########################
|
################## First Stage - Creating base #########################
|
||||||
|
|
||||||
# Created a variable to hold our node base image
|
# Created a variable to hold our node base image
|
||||||
ARG NODE_IMAGE=node:22-trixie-slim
|
ARG NODE_IMAGE=node:22-bookworm-slim
|
||||||
|
|
||||||
FROM $NODE_IMAGE AS base
|
FROM $NODE_IMAGE AS base
|
||||||
|
|
||||||
# Install dumb-init and ClamAV, and perform ClamAV database update
|
# Install dumb-init and ClamAV, and perform ClamAV database update
|
||||||
RUN apt-get update \
|
RUN apt update \
|
||||||
&& apt-get install -y --no-install-recommends \
|
&& apt-get install -y dumb-init clamav clamav-daemon nano \
|
||||||
dumb-init \
|
|
||||||
clamav \
|
|
||||||
clamav-daemon \
|
|
||||||
ca-certificates \
|
|
||||||
&& rm -rf /var/lib/apt/lists/* \
|
&& rm -rf /var/lib/apt/lists/* \
|
||||||
# Creating folders and changing ownerships
|
# Creating folders and changing ownerships
|
||||||
&& mkdir -p /home/node/app \
|
&& mkdir -p /home/node/app && chown node:node /home/node/app \
|
||||||
&& mkdir -p /var/lib/clamav \
|
&& mkdir -p /var/lib/clamav \
|
||||||
&& mkdir /usr/local/share/clamav \
|
&& mkdir /usr/local/share/clamav \
|
||||||
|
&& chown -R node:clamav /var/lib/clamav /usr/local/share/clamav /etc/clamav \
|
||||||
|
# permissions
|
||||||
&& mkdir /var/run/clamav \
|
&& mkdir /var/run/clamav \
|
||||||
&& mkdir -p /var/log/clamav \
|
&& chown node:clamav /var/run/clamav \
|
||||||
&& mkdir -p /tmp/clamav-logs \
|
&& chmod 750 /var/run/clamav
|
||||||
|
# -----------------------------------------------
|
||||||
|
# --- ClamAV & FeshClam -------------------------
|
||||||
|
# -----------------------------------------------
|
||||||
|
# RUN \
|
||||||
|
# chmod 644 /etc/clamav/freshclam.conf && \
|
||||||
|
# freshclam && \
|
||||||
|
# mkdir /var/run/clamav && \
|
||||||
|
# chown -R clamav:root /var/run/clamav
|
||||||
|
|
||||||
# Set ownership and permissions
|
# # initial update of av databases
|
||||||
&& chown node:node /home/node/app \
|
# RUN freshclam
|
||||||
# && chown -R node:clamav /var/lib/clamav /usr/local/share/clamav /etc/clamav /var/run/clamav \
|
|
||||||
&& chown -R clamav:clamav /var/lib/clamav /usr/local/share/clamav /etc/clamav /var/run/clamav /var/log/clamav \
|
|
||||||
&& chmod 755 /tmp/clamav-logs \
|
|
||||||
&& chmod 750 /var/run/clamav \
|
|
||||||
&& chmod 755 /var/lib/clamav \
|
|
||||||
&& chmod 755 /var/log/clamav \
|
|
||||||
# Add node user to clamav group and allow sudo for clamav commands
|
|
||||||
&& usermod -a -G clamav node \
|
|
||||||
&& chmod g+w /var/run/clamav /var/lib/clamav /var/log/clamav /tmp/clamav-logs
|
|
||||||
|
|
||||||
|
# Configure Clam AV...
|
||||||
|
COPY --chown=node:clamav ./*.conf /etc/clamav/
|
||||||
|
|
||||||
# Configure ClamAV - copy config files before switching user
|
# # permissions
|
||||||
# COPY --chown=node:clamav ./*.conf /etc/clamav/
|
# RUN mkdir /var/run/clamav && \
|
||||||
COPY --chown=clamav:clamav ./*.conf /etc/clamav/
|
# chown node:clamav /var/run/clamav && \
|
||||||
|
# chmod 750 /var/run/clamav
|
||||||
# Copy entrypoint script
|
|
||||||
COPY --chown=node:node docker-entrypoint.sh /home/node/app/docker-entrypoint.sh
|
|
||||||
RUN chmod +x /home/node/app/docker-entrypoint.sh
|
|
||||||
|
|
||||||
ENV TZ="Europe/Vienna"
|
|
||||||
|
|
||||||
# Setting the working directory
|
# Setting the working directory
|
||||||
WORKDIR /home/node/app
|
WORKDIR /home/node/app
|
||||||
# Changing the current active user to "node"
|
# Changing the current active user to "node"
|
||||||
|
|
||||||
# Download initial ClamAV database as root before switching users
|
|
||||||
USER root
|
|
||||||
RUN freshclam --quiet || echo "Initial database download failed - will retry at runtime"
|
|
||||||
|
|
||||||
USER node
|
USER node
|
||||||
|
|
||||||
# Initial update of AV databases (moved after USER directive)
|
# initial update of av databases
|
||||||
# RUN freshclam || true
|
RUN freshclam
|
||||||
|
|
||||||
|
# VOLUME /var/lib/clamav
|
||||||
|
COPY --chown=node:clamav docker-entrypoint.sh /home/node/app/docker-entrypoint.sh
|
||||||
|
RUN chmod +x /home/node/app/docker-entrypoint.sh
|
||||||
|
ENV TZ="Europe/Vienna"
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
################## Second Stage - Installing dependencies ##########
|
################## Second Stage - Installing dependencies ##########
|
||||||
|
|
@ -76,13 +70,14 @@ ENV NODE_ENV=production
|
||||||
# We run "node ace build" to build the app (dist folder) for production
|
# We run "node ace build" to build the app (dist folder) for production
|
||||||
RUN node ace build --ignore-ts-errors
|
RUN node ace build --ignore-ts-errors
|
||||||
# RUN node ace build --production
|
# RUN node ace build --production
|
||||||
|
# RUN node ace build --ignore-ts-errors
|
||||||
|
|
||||||
|
|
||||||
################## Final Stage - Production #########################
|
################## Final Stage - Production #########################
|
||||||
# In this final stage, we will start running the application
|
# In this final stage, we will start running the application
|
||||||
FROM base AS production
|
FROM base AS production
|
||||||
# Here, we include all the required environment variables
|
# Here, we include all the required environment variables
|
||||||
ENV NODE_ENV=production
|
# ENV NODE_ENV=production
|
||||||
# ENV PORT=$PORT
|
# ENV PORT=$PORT
|
||||||
# ENV HOST=0.0.0.0
|
# ENV HOST=0.0.0.0
|
||||||
|
|
||||||
|
|
@ -96,4 +91,4 @@ COPY --chown=node:node --from=build /home/node/app/build .
|
||||||
EXPOSE 3333
|
EXPOSE 3333
|
||||||
ENTRYPOINT ["/home/node/app/docker-entrypoint.sh"]
|
ENTRYPOINT ["/home/node/app/docker-entrypoint.sh"]
|
||||||
# Run the command to start the server using "dumb-init"
|
# Run the command to start the server using "dumb-init"
|
||||||
CMD [ "node", "bin/server.js" ]
|
CMD [ "dumb-init", "node", "bin/server.js" ]
|
||||||
22
LICENSE
22
LICENSE
|
|
@ -1,22 +0,0 @@
|
||||||
|
|
||||||
MIT License
|
|
||||||
|
|
||||||
Copyright (c) 2025 Tethys Research Repository
|
|
||||||
|
|
||||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
||||||
of this software and associated documentation files (the "Software"), to deal
|
|
||||||
in the Software without restriction, including without limitation the rights
|
|
||||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
||||||
copies of the Software, and to permit persons to whom the Software is
|
|
||||||
furnished to do so, subject to the following conditions:
|
|
||||||
|
|
||||||
The above copyright notice and this permission notice shall be included in all
|
|
||||||
copies or substantial portions of the Software.
|
|
||||||
|
|
||||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
||||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
||||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
||||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
||||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
||||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
||||||
SOFTWARE
|
|
||||||
|
|
@ -30,9 +30,9 @@ export default defineConfig({
|
||||||
() => import('#start/rules/unique'),
|
() => import('#start/rules/unique'),
|
||||||
() => import('#start/rules/translated_language'),
|
() => import('#start/rules/translated_language'),
|
||||||
() => import('#start/rules/unique_person'),
|
() => import('#start/rules/unique_person'),
|
||||||
// () => import('#start/rules/file_length'),
|
() => import('#start/rules/file_length'),
|
||||||
// () => import('#start/rules/file_scan'),
|
() => import('#start/rules/file_scan'),
|
||||||
// () => import('#start/rules/allowed_extensions_mimetypes'),
|
() => import('#start/rules/allowed_extensions_mimetypes'),
|
||||||
() => import('#start/rules/dependent_array_min_length'),
|
() => import('#start/rules/dependent_array_min_length'),
|
||||||
() => import('#start/rules/referenceValidation'),
|
() => import('#start/rules/referenceValidation'),
|
||||||
() => import('#start/rules/valid_mimetype'),
|
() => import('#start/rules/valid_mimetype'),
|
||||||
|
|
|
||||||
|
|
@ -1,35 +1,23 @@
|
||||||
import type { HttpContext } from '@adonisjs/core/http';
|
import type { HttpContext } from '@adonisjs/core/http';
|
||||||
|
// import Person from 'App/Models/Person';
|
||||||
import Dataset from '#models/dataset';
|
import Dataset from '#models/dataset';
|
||||||
import { StatusCodes } from 'http-status-codes';
|
import { StatusCodes } from 'http-status-codes';
|
||||||
|
|
||||||
// node ace make:controller Author
|
// node ace make:controller Author
|
||||||
export default class DatasetController {
|
export default class DatasetController {
|
||||||
/**
|
public async index({}: HttpContext) {
|
||||||
* GET /api/datasets
|
// Select datasets with server_state 'published' or 'deleted' and sort by the last published date
|
||||||
* Find all published datasets
|
const datasets = await Dataset.query()
|
||||||
*/
|
.where(function (query) {
|
||||||
public async index({ response }: HttpContext) {
|
query.where('server_state', 'published').orWhere('server_state', 'deleted');
|
||||||
try {
|
})
|
||||||
const datasets = await Dataset.query()
|
.preload('titles')
|
||||||
.where(function (query) {
|
.preload('identifier')
|
||||||
query.where('server_state', 'published').orWhere('server_state', 'deleted');
|
.orderBy('server_date_published', 'desc');
|
||||||
})
|
|
||||||
.preload('titles')
|
|
||||||
.preload('identifier')
|
|
||||||
.orderBy('server_date_published', 'desc');
|
|
||||||
|
|
||||||
return response.status(StatusCodes.OK).json(datasets);
|
return datasets;
|
||||||
} catch (error) {
|
|
||||||
return response.status(StatusCodes.INTERNAL_SERVER_ERROR).json({
|
|
||||||
message: error.message || 'Some error occurred while retrieving datasets.',
|
|
||||||
});
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* GET /api/dataset
|
|
||||||
* Find all published datasets
|
|
||||||
*/
|
|
||||||
public async findAll({ response }: HttpContext) {
|
public async findAll({ response }: HttpContext) {
|
||||||
try {
|
try {
|
||||||
const datasets = await Dataset.query()
|
const datasets = await Dataset.query()
|
||||||
|
|
@ -45,142 +33,48 @@ export default class DatasetController {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
public async findOne({ params }: HttpContext) {
|
||||||
* GET /api/dataset/:publish_id
|
const datasets = await Dataset.query()
|
||||||
* Find one dataset by publish_id
|
.where('publish_id', params.publish_id)
|
||||||
*/
|
.preload('titles')
|
||||||
public async findOne({ response, params }: HttpContext) {
|
.preload('descriptions')
|
||||||
try {
|
.preload('user', (builder) => {
|
||||||
const dataset = await Dataset.query()
|
builder.select(['id', 'firstName', 'lastName', 'avatar', 'login']);
|
||||||
.where('publish_id', params.publish_id)
|
})
|
||||||
.preload('titles')
|
.preload('authors', (builder) => {
|
||||||
.preload('descriptions') // Using 'descriptions' instead of 'abstracts'
|
builder
|
||||||
.preload('user', (builder) => {
|
.select(['id', 'academic_title', 'first_name', 'last_name', 'identifier_orcid', 'status', 'name_type'])
|
||||||
builder.select(['id', 'firstName', 'lastName', 'avatar', 'login']);
|
.withCount('datasets', (query) => {
|
||||||
})
|
query.as('datasets_count');
|
||||||
.preload('authors', (builder) => {
|
})
|
||||||
builder
|
.pivotColumns(['role', 'sort_order'])
|
||||||
.select(['id', 'academic_title', 'first_name', 'last_name', 'identifier_orcid', 'status', 'name_type'])
|
.orderBy('pivot_sort_order', 'asc');
|
||||||
.withCount('datasets', (query) => {
|
})
|
||||||
query.as('datasets_count');
|
.preload('contributors', (builder) => {
|
||||||
})
|
builder
|
||||||
.pivotColumns(['role', 'sort_order'])
|
.select(['id', 'academic_title', 'first_name', 'last_name', 'identifier_orcid', 'status', 'name_type'])
|
||||||
.orderBy('pivot_sort_order', 'asc');
|
.withCount('datasets', (query) => {
|
||||||
})
|
query.as('datasets_count');
|
||||||
.preload('contributors', (builder) => {
|
})
|
||||||
builder
|
.pivotColumns(['role', 'sort_order', 'contributor_type'])
|
||||||
.select(['id', 'academic_title', 'first_name', 'last_name', 'identifier_orcid', 'status', 'name_type'])
|
.orderBy('pivot_sort_order', 'asc');
|
||||||
.withCount('datasets', (query) => {
|
})
|
||||||
query.as('datasets_count');
|
.preload('subjects')
|
||||||
})
|
.preload('coverage')
|
||||||
.pivotColumns(['role', 'sort_order', 'contributor_type'])
|
.preload('licenses')
|
||||||
.orderBy('pivot_sort_order', 'asc');
|
.preload('references')
|
||||||
})
|
.preload('project')
|
||||||
.preload('subjects')
|
.preload('referenced_by', (builder) => {
|
||||||
.preload('coverage')
|
builder.preload('dataset', (builder) => {
|
||||||
.preload('licenses')
|
builder.preload('identifier');
|
||||||
.preload('references')
|
|
||||||
.preload('project')
|
|
||||||
.preload('referenced_by', (builder) => {
|
|
||||||
builder.preload('dataset', (builder) => {
|
|
||||||
builder.preload('identifier');
|
|
||||||
});
|
|
||||||
})
|
|
||||||
.preload('files', (builder) => {
|
|
||||||
builder.preload('hashvalues');
|
|
||||||
})
|
|
||||||
.preload('identifier')
|
|
||||||
.first(); // Use first() instead of firstOrFail() to handle not found gracefully
|
|
||||||
|
|
||||||
if (!dataset) {
|
|
||||||
return response.status(StatusCodes.NOT_FOUND).json({
|
|
||||||
message: `Cannot find Dataset with publish_id=${params.publish_id}.`,
|
|
||||||
});
|
});
|
||||||
}
|
})
|
||||||
|
.preload('files', (builder) => {
|
||||||
|
builder.preload('hashvalues');
|
||||||
|
})
|
||||||
|
.preload('identifier')
|
||||||
|
.firstOrFail();
|
||||||
|
|
||||||
return response.status(StatusCodes.OK).json(dataset);
|
return datasets;
|
||||||
} catch (error) {
|
|
||||||
return response.status(StatusCodes.INTERNAL_SERVER_ERROR).json({
|
|
||||||
message: error.message || `Error retrieving Dataset with publish_id=${params.publish_id}.`,
|
|
||||||
});
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* GET /:prefix/:value
|
|
||||||
* Find dataset by identifier (e.g., https://doi.tethys.at/10.24341/tethys.99.2)
|
|
||||||
*/
|
|
||||||
public async findByIdentifier({ response, params }: HttpContext) {
|
|
||||||
const identifierValue = `${params.prefix}/${params.value}`;
|
|
||||||
|
|
||||||
// Optional: Validate DOI format
|
|
||||||
if (!identifierValue.match(/^10\.\d+\/[a-zA-Z0-9._-]+\.[0-9]+(?:\.[0-9]+)*$/)) {
|
|
||||||
return response.status(StatusCodes.BAD_REQUEST).json({
|
|
||||||
message: `Invalid DOI format: ${identifierValue}`,
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
try {
|
|
||||||
// Method 1: Using subquery with whereIn (most similar to your original)
|
|
||||||
const dataset = await Dataset.query()
|
|
||||||
// .whereIn('id', (subQuery) => {
|
|
||||||
// subQuery.select('dataset_id').from('dataset_identifiers').where('value', identifierValue);
|
|
||||||
// })
|
|
||||||
.whereHas('identifier', (builder) => {
|
|
||||||
builder.where('value', identifierValue);
|
|
||||||
})
|
|
||||||
.preload('titles')
|
|
||||||
.preload('descriptions') // Using 'descriptions' instead of 'abstracts'
|
|
||||||
.preload('user', (builder) => {
|
|
||||||
builder.select(['id', 'firstName', 'lastName', 'avatar', 'login']);
|
|
||||||
})
|
|
||||||
.preload('authors', (builder) => {
|
|
||||||
builder
|
|
||||||
.select(['id', 'academic_title', 'first_name', 'last_name', 'identifier_orcid', 'status', 'name_type'])
|
|
||||||
.withCount('datasets', (query) => {
|
|
||||||
query.as('datasets_count');
|
|
||||||
})
|
|
||||||
.pivotColumns(['role', 'sort_order'])
|
|
||||||
.wherePivot('role', 'author')
|
|
||||||
.orderBy('pivot_sort_order', 'asc');
|
|
||||||
})
|
|
||||||
.preload('contributors', (builder) => {
|
|
||||||
builder
|
|
||||||
.select(['id', 'academic_title', 'first_name', 'last_name', 'identifier_orcid', 'status', 'name_type'])
|
|
||||||
.withCount('datasets', (query) => {
|
|
||||||
query.as('datasets_count');
|
|
||||||
})
|
|
||||||
.pivotColumns(['role', 'sort_order', 'contributor_type'])
|
|
||||||
.wherePivot('role', 'contributor')
|
|
||||||
.orderBy('pivot_sort_order', 'asc');
|
|
||||||
})
|
|
||||||
.preload('subjects')
|
|
||||||
.preload('coverage')
|
|
||||||
.preload('licenses')
|
|
||||||
.preload('references')
|
|
||||||
.preload('project')
|
|
||||||
.preload('referenced_by', (builder) => {
|
|
||||||
builder.preload('dataset', (builder) => {
|
|
||||||
builder.preload('identifier');
|
|
||||||
});
|
|
||||||
})
|
|
||||||
.preload('files', (builder) => {
|
|
||||||
builder.preload('hashvalues');
|
|
||||||
})
|
|
||||||
.preload('identifier')
|
|
||||||
.first();
|
|
||||||
|
|
||||||
if (!dataset) {
|
|
||||||
return response.status(StatusCodes.NOT_FOUND).json({
|
|
||||||
message: `Cannot find Dataset with identifier=${identifierValue}.`,
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
return response.status(StatusCodes.OK).json(dataset);
|
|
||||||
} catch (error) {
|
|
||||||
return response.status(StatusCodes.INTERNAL_SERVER_ERROR).json({
|
|
||||||
message: error.message || `Error retrieving Dataset with identifier=${identifierValue}.`,
|
|
||||||
});
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -235,7 +235,6 @@ export default class DatasetController {
|
||||||
.isUniquePerson({ table: 'persons', column: 'email', idField: 'id' }),
|
.isUniquePerson({ table: 'persons', column: 'email', idField: 'id' }),
|
||||||
first_name: vine.string().trim().minLength(3).maxLength(255).optional().requiredWhen('name_type', '=', 'Personal'),
|
first_name: vine.string().trim().minLength(3).maxLength(255).optional().requiredWhen('name_type', '=', 'Personal'),
|
||||||
last_name: vine.string().trim().minLength(3).maxLength(255),
|
last_name: vine.string().trim().minLength(3).maxLength(255),
|
||||||
identifier_orcid: vine.string().trim().maxLength(255).orcid().optional(),
|
|
||||||
}),
|
}),
|
||||||
)
|
)
|
||||||
.minLength(1)
|
.minLength(1)
|
||||||
|
|
@ -252,7 +251,6 @@ export default class DatasetController {
|
||||||
.isUniquePerson({ table: 'persons', column: 'email', idField: 'id' }),
|
.isUniquePerson({ table: 'persons', column: 'email', idField: 'id' }),
|
||||||
first_name: vine.string().trim().minLength(3).maxLength(255).optional().requiredWhen('name_type', '=', 'Personal'),
|
first_name: vine.string().trim().minLength(3).maxLength(255).optional().requiredWhen('name_type', '=', 'Personal'),
|
||||||
last_name: vine.string().trim().minLength(3).maxLength(255),
|
last_name: vine.string().trim().minLength(3).maxLength(255),
|
||||||
identifier_orcid: vine.string().trim().maxLength(255).orcid().optional(),
|
|
||||||
pivot_contributor_type: vine.enum(Object.keys(ContributorTypes)),
|
pivot_contributor_type: vine.enum(Object.keys(ContributorTypes)),
|
||||||
}),
|
}),
|
||||||
)
|
)
|
||||||
|
|
@ -328,7 +326,6 @@ export default class DatasetController {
|
||||||
.isUniquePerson({ table: 'persons', column: 'email', idField: 'id' }),
|
.isUniquePerson({ table: 'persons', column: 'email', idField: 'id' }),
|
||||||
first_name: vine.string().trim().minLength(3).maxLength(255).optional().requiredWhen('name_type', '=', 'Personal'),
|
first_name: vine.string().trim().minLength(3).maxLength(255).optional().requiredWhen('name_type', '=', 'Personal'),
|
||||||
last_name: vine.string().trim().minLength(3).maxLength(255),
|
last_name: vine.string().trim().minLength(3).maxLength(255),
|
||||||
identifier_orcid: vine.string().trim().maxLength(255).orcid().optional(),
|
|
||||||
}),
|
}),
|
||||||
)
|
)
|
||||||
.minLength(1)
|
.minLength(1)
|
||||||
|
|
@ -345,7 +342,6 @@ export default class DatasetController {
|
||||||
.isUniquePerson({ table: 'persons', column: 'email', idField: 'id' }),
|
.isUniquePerson({ table: 'persons', column: 'email', idField: 'id' }),
|
||||||
first_name: vine.string().trim().minLength(3).maxLength(255).optional().requiredWhen('name_type', '=', 'Personal'),
|
first_name: vine.string().trim().minLength(3).maxLength(255).optional().requiredWhen('name_type', '=', 'Personal'),
|
||||||
last_name: vine.string().trim().minLength(3).maxLength(255),
|
last_name: vine.string().trim().minLength(3).maxLength(255),
|
||||||
identifier_orcid: vine.string().trim().maxLength(255).orcid().optional(),
|
|
||||||
pivot_contributor_type: vine.enum(Object.keys(ContributorTypes)),
|
pivot_contributor_type: vine.enum(Object.keys(ContributorTypes)),
|
||||||
}),
|
}),
|
||||||
)
|
)
|
||||||
|
|
|
||||||
|
|
@ -1,3 +1,6 @@
|
||||||
|
// import { Client } from 'guzzle';
|
||||||
|
// import { Log } from '@adonisjs/core/build/standalone';
|
||||||
|
// import { DoiInterface } from './interfaces/DoiInterface';
|
||||||
import DoiClientContract from '#app/Library/Doi/DoiClientContract';
|
import DoiClientContract from '#app/Library/Doi/DoiClientContract';
|
||||||
import DoiClientException from '#app/exceptions/DoiClientException';
|
import DoiClientException from '#app/exceptions/DoiClientException';
|
||||||
import { StatusCodes } from 'http-status-codes';
|
import { StatusCodes } from 'http-status-codes';
|
||||||
|
|
@ -9,14 +12,14 @@ export class DoiClient implements DoiClientContract {
|
||||||
public username: string;
|
public username: string;
|
||||||
public password: string;
|
public password: string;
|
||||||
public serviceUrl: string;
|
public serviceUrl: string;
|
||||||
public apiUrl: string;
|
|
||||||
|
|
||||||
constructor() {
|
constructor() {
|
||||||
// const datacite_environment = process.env.DATACITE_ENVIRONMENT || 'debug';
|
// const datacite_environment = process.env.DATACITE_ENVIRONMENT || 'debug';
|
||||||
this.username = process.env.DATACITE_USERNAME || '';
|
this.username = process.env.DATACITE_USERNAME || '';
|
||||||
this.password = process.env.DATACITE_PASSWORD || '';
|
this.password = process.env.DATACITE_PASSWORD || '';
|
||||||
this.serviceUrl = process.env.DATACITE_SERVICE_URL || '';
|
this.serviceUrl = process.env.DATACITE_SERVICE_URL || '';
|
||||||
this.apiUrl = process.env.DATACITE_API_URL || 'https://api.datacite.org';
|
// this.prefix = process.env.DATACITE_PREFIX || '';
|
||||||
|
// this.base_domain = process.env.BASE_DOMAIN || '';
|
||||||
|
|
||||||
if (this.username === '' || this.password === '' || this.serviceUrl === '') {
|
if (this.username === '' || this.password === '' || this.serviceUrl === '') {
|
||||||
const message = 'issing configuration settings to properly initialize DOI client';
|
const message = 'issing configuration settings to properly initialize DOI client';
|
||||||
|
|
@ -87,240 +90,4 @@ export class DoiClient implements DoiClientContract {
|
||||||
throw new DoiClientException(error.response.status, error.response.data);
|
throw new DoiClientException(error.response.status, error.response.data);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Retrieves DOI information from DataCite REST API
|
|
||||||
*
|
|
||||||
* @param doiValue The DOI identifier e.g. '10.5072/tethys.999'
|
|
||||||
* @returns Promise with DOI information or null if not found
|
|
||||||
*/
|
|
||||||
public async getDoiInfo(doiValue: string): Promise<any | null> {
|
|
||||||
try {
|
|
||||||
// Use configurable DataCite REST API URL
|
|
||||||
const dataciteApiUrl = `${this.apiUrl}/dois/${doiValue}`;
|
|
||||||
const response = await axios.get(dataciteApiUrl, {
|
|
||||||
headers: {
|
|
||||||
Accept: 'application/vnd.api+json',
|
|
||||||
},
|
|
||||||
});
|
|
||||||
|
|
||||||
if (response.status === 200 && response.data.data) {
|
|
||||||
return {
|
|
||||||
created: response.data.data.attributes.created,
|
|
||||||
registered: response.data.data.attributes.registered,
|
|
||||||
updated: response.data.data.attributes.updated,
|
|
||||||
published: response.data.data.attributes.published,
|
|
||||||
state: response.data.data.attributes.state,
|
|
||||||
url: response.data.data.attributes.url,
|
|
||||||
metadata: response.data.data.attributes,
|
|
||||||
};
|
|
||||||
}
|
|
||||||
} catch (error) {
|
|
||||||
if (error.response?.status === 404) {
|
|
||||||
logger.debug(`DOI ${doiValue} not found in DataCite`);
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
logger.debug(`DataCite REST API failed for ${doiValue}: ${error.message}`);
|
|
||||||
|
|
||||||
// Fallback to MDS API
|
|
||||||
return await this.getDoiInfoFromMds(doiValue);
|
|
||||||
}
|
|
||||||
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Fallback method to get DOI info from MDS API
|
|
||||||
*
|
|
||||||
* @param doiValue The DOI identifier
|
|
||||||
* @returns Promise with basic DOI information or null
|
|
||||||
*/
|
|
||||||
private async getDoiInfoFromMds(doiValue: string): Promise<any | null> {
|
|
||||||
try {
|
|
||||||
const auth = {
|
|
||||||
username: this.username,
|
|
||||||
password: this.password,
|
|
||||||
};
|
|
||||||
|
|
||||||
// Get DOI URL
|
|
||||||
const doiResponse = await axios.get(`${this.serviceUrl}/doi/${doiValue}`, { auth });
|
|
||||||
|
|
||||||
if (doiResponse.status === 200) {
|
|
||||||
// Get metadata if available
|
|
||||||
try {
|
|
||||||
const metadataResponse = await axios.get(`${this.serviceUrl}/metadata/${doiValue}`, {
|
|
||||||
auth,
|
|
||||||
headers: {
|
|
||||||
Accept: 'application/xml',
|
|
||||||
},
|
|
||||||
});
|
|
||||||
|
|
||||||
return {
|
|
||||||
url: doiResponse.data.trim(),
|
|
||||||
metadata: metadataResponse.data,
|
|
||||||
created: new Date().toISOString(), // MDS doesn't provide creation dates
|
|
||||||
registered: new Date().toISOString(), // Use current time as fallback
|
|
||||||
source: 'mds',
|
|
||||||
};
|
|
||||||
} catch (metadataError) {
|
|
||||||
// Return basic info even if metadata fetch fails
|
|
||||||
return {
|
|
||||||
url: doiResponse.data.trim(),
|
|
||||||
created: new Date().toISOString(),
|
|
||||||
registered: new Date().toISOString(),
|
|
||||||
source: 'mds',
|
|
||||||
};
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} catch (error) {
|
|
||||||
if (error.response?.status === 404) {
|
|
||||||
logger.debug(`DOI ${doiValue} not found in DataCite MDS`);
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
logger.debug(`DataCite MDS API failed for ${doiValue}: ${error.message}`);
|
|
||||||
}
|
|
||||||
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Checks if a DOI exists in DataCite
|
|
||||||
*
|
|
||||||
* @param doiValue The DOI identifier
|
|
||||||
* @returns Promise<boolean> True if DOI exists
|
|
||||||
*/
|
|
||||||
public async doiExists(doiValue: string): Promise<boolean> {
|
|
||||||
const doiInfo = await this.getDoiInfo(doiValue);
|
|
||||||
return doiInfo !== null;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Gets the last modification date of a DOI
|
|
||||||
*
|
|
||||||
* @param doiValue The DOI identifier
|
|
||||||
* @returns Promise<Date | null> Last modification date or creation date if never updated, null if not found
|
|
||||||
*/
|
|
||||||
public async getDoiLastModified(doiValue: string): Promise<Date | null> {
|
|
||||||
const doiInfo = await this.getDoiInfo(doiValue);
|
|
||||||
|
|
||||||
if (doiInfo) {
|
|
||||||
// Use updated date if available, otherwise fall back to created/registered date
|
|
||||||
const dateToUse = doiInfo.updated || doiInfo.registered || doiInfo.created;
|
|
||||||
|
|
||||||
if (dateToUse) {
|
|
||||||
logger.debug(
|
|
||||||
`DOI ${doiValue}: Using ${doiInfo.updated ? 'updated' : doiInfo.registered ? 'registered' : 'created'} date: ${dateToUse}`,
|
|
||||||
);
|
|
||||||
return new Date(dateToUse);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Makes a DOI unfindable (registered but not discoverable)
|
|
||||||
* Note: DOIs cannot be deleted, only made unfindable
|
|
||||||
* await doiClient.makeDoiUnfindable('10.21388/tethys.231');
|
|
||||||
*
|
|
||||||
* @param doiValue The DOI identifier e.g. '10.5072/tethys.999'
|
|
||||||
* @returns Promise<AxiosResponse<any>> The http response
|
|
||||||
*/
|
|
||||||
public async makeDoiUnfindable(doiValue: string): Promise<AxiosResponse<any>> {
|
|
||||||
const auth = {
|
|
||||||
username: this.username,
|
|
||||||
password: this.password,
|
|
||||||
};
|
|
||||||
|
|
||||||
try {
|
|
||||||
// First, check if DOI exists
|
|
||||||
const exists = await this.doiExists(doiValue);
|
|
||||||
if (!exists) {
|
|
||||||
throw new DoiClientException(404, `DOI ${doiValue} not found`);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Delete the DOI URL mapping to make it unfindable
|
|
||||||
// This removes the URL but keeps the metadata registered
|
|
||||||
const response = await axios.delete(`${this.serviceUrl}/doi/${doiValue}`, { auth });
|
|
||||||
|
|
||||||
// Response Codes for DELETE /doi/{doi}
|
|
||||||
// 200 OK: operation successful
|
|
||||||
// 401 Unauthorized: no login
|
|
||||||
// 403 Forbidden: login problem, quota exceeded
|
|
||||||
// 404 Not Found: DOI does not exist
|
|
||||||
if (response.status !== 200) {
|
|
||||||
const message = `Unexpected DataCite MDS response code ${response.status}`;
|
|
||||||
logger.error(message);
|
|
||||||
throw new DoiClientException(response.status, message);
|
|
||||||
}
|
|
||||||
|
|
||||||
logger.info(`DOI ${doiValue} successfully made unfindable`);
|
|
||||||
return response;
|
|
||||||
} catch (error) {
|
|
||||||
logger.error(`Failed to make DOI ${doiValue} unfindable: ${error.message}`);
|
|
||||||
if (error instanceof DoiClientException) {
|
|
||||||
throw error;
|
|
||||||
}
|
|
||||||
throw new DoiClientException(error.response?.status || 500, error.response?.data || error.message);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Makes a DOI findable again by re-registering the URL
|
|
||||||
* await doiClient.makeDoiFindable(
|
|
||||||
* '10.21388/tethys.231',
|
|
||||||
* 'https://doi.dev.tethys.at/10.21388/tethys.231'
|
|
||||||
* );
|
|
||||||
*
|
|
||||||
* @param doiValue The DOI identifier e.g. '10.5072/tethys.999'
|
|
||||||
* @param landingPageUrl The landing page URL
|
|
||||||
* @returns Promise<AxiosResponse<any>> The http response
|
|
||||||
*/
|
|
||||||
public async makeDoiFindable(doiValue: string, landingPageUrl: string): Promise<AxiosResponse<any>> {
|
|
||||||
const auth = {
|
|
||||||
username: this.username,
|
|
||||||
password: this.password,
|
|
||||||
};
|
|
||||||
|
|
||||||
try {
|
|
||||||
// Re-register the DOI with its URL to make it findable again
|
|
||||||
const response = await axios.put(`${this.serviceUrl}/doi/${doiValue}`, `doi=${doiValue}\nurl=${landingPageUrl}`, { auth });
|
|
||||||
|
|
||||||
// Response Codes for PUT /doi/{doi}
|
|
||||||
// 201 Created: operation successful
|
|
||||||
// 400 Bad Request: request body must be exactly two lines: DOI and URL
|
|
||||||
// 401 Unauthorized: no login
|
|
||||||
// 403 Forbidden: login problem, quota exceeded
|
|
||||||
// 412 Precondition failed: metadata must be uploaded first
|
|
||||||
if (response.status !== 201) {
|
|
||||||
const message = `Unexpected DataCite MDS response code ${response.status}`;
|
|
||||||
logger.error(message);
|
|
||||||
throw new DoiClientException(response.status, message);
|
|
||||||
}
|
|
||||||
|
|
||||||
logger.info(`DOI ${doiValue} successfully made findable again`);
|
|
||||||
return response;
|
|
||||||
} catch (error) {
|
|
||||||
logger.error(`Failed to make DOI ${doiValue} findable: ${error.message}`);
|
|
||||||
if (error instanceof DoiClientException) {
|
|
||||||
throw error;
|
|
||||||
}
|
|
||||||
throw new DoiClientException(error.response?.status || 500, error.response?.data || error.message);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Gets the current state of a DOI (draft, registered, findable)
|
|
||||||
* const state = await doiClient.getDoiState('10.21388/tethys.231');
|
|
||||||
* console.log(`Current state: ${state}`); // 'findable'
|
|
||||||
*
|
|
||||||
* @param doiValue The DOI identifier
|
|
||||||
* @returns Promise<string | null> The DOI state or null if not found
|
|
||||||
*/
|
|
||||||
public async getDoiState(doiValue: string): Promise<string | null> {
|
|
||||||
const doiInfo = await this.getDoiInfo(doiValue);
|
|
||||||
return doiInfo?.state || null;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -1,380 +0,0 @@
|
||||||
/*
|
|
||||||
|--------------------------------------------------------------------------
|
|
||||||
| node ace make:command fix-dataset-cross-references
|
|
||||||
| DONE: create commands/fix_dataset_cross_references.ts
|
|
||||||
|--------------------------------------------------------------------------
|
|
||||||
*/
|
|
||||||
import { BaseCommand, flags } from '@adonisjs/core/ace';
|
|
||||||
import type { CommandOptions } from '@adonisjs/core/types/ace';
|
|
||||||
import { DateTime } from 'luxon';
|
|
||||||
import Dataset from '#models/dataset';
|
|
||||||
import DatasetReference from '#models/dataset_reference';
|
|
||||||
// import env from '#start/env';
|
|
||||||
|
|
||||||
interface MissingCrossReference {
|
|
||||||
sourceDatasetId: number;
|
|
||||||
targetDatasetId: number;
|
|
||||||
sourcePublishId: number | null;
|
|
||||||
targetPublishId: number | null;
|
|
||||||
sourceDoi: string | null;
|
|
||||||
targetDoi: string | null;
|
|
||||||
referenceType: string;
|
|
||||||
relation: string;
|
|
||||||
doi: string | null;
|
|
||||||
reverseRelation: string;
|
|
||||||
}
|
|
||||||
|
|
||||||
export default class DetectMissingCrossReferences extends BaseCommand {
|
|
||||||
static commandName = 'detect:missing-cross-references';
|
|
||||||
static description = 'Detect missing bidirectional cross-references between versioned datasets';
|
|
||||||
|
|
||||||
public static needsApplication = true;
|
|
||||||
|
|
||||||
@flags.boolean({ alias: 'f', description: 'Fix missing cross-references automatically' })
|
|
||||||
public fix: boolean = false;
|
|
||||||
|
|
||||||
@flags.boolean({ alias: 'v', description: 'Verbose output' })
|
|
||||||
public verbose: boolean = false;
|
|
||||||
|
|
||||||
@flags.number({ alias: 'p', description: 'Filter by specific publish_id (source or target dataset)' })
|
|
||||||
public publish_id?: number;
|
|
||||||
|
|
||||||
// example: node ace detect:missing-cross-references --verbose -p 227 //if you want to filter by specific publish_id with details
|
|
||||||
// example: node ace detect:missing-cross-references --verbose
|
|
||||||
// example: node ace detect:missing-cross-references --fix -p 227 //if you want to filter by specific publish_id and fix it
|
|
||||||
// example: node ace detect:missing-cross-references
|
|
||||||
|
|
||||||
public static options: CommandOptions = {
|
|
||||||
startApp: true,
|
|
||||||
staysAlive: false,
|
|
||||||
};
|
|
||||||
|
|
||||||
// Define the allowed relations that we want to process
|
|
||||||
private readonly ALLOWED_RELATIONS = ['IsNewVersionOf', 'IsPreviousVersionOf', 'IsVariantFormOf', 'IsOriginalFormOf'];
|
|
||||||
|
|
||||||
async run() {
|
|
||||||
this.logger.info('🔍 Detecting missing cross-references...');
|
|
||||||
this.logger.info(`📋 Processing only these relations: ${this.ALLOWED_RELATIONS.join(', ')}`);
|
|
||||||
|
|
||||||
if (this.publish_id) {
|
|
||||||
this.logger.info(`Filtering by publish_id: ${this.publish_id}`);
|
|
||||||
}
|
|
||||||
|
|
||||||
try {
|
|
||||||
const missingReferences = await this.findMissingCrossReferences();
|
|
||||||
|
|
||||||
if (missingReferences.length === 0) {
|
|
||||||
const filterMsg = this.publish_id ? ` for publish_id ${this.publish_id}` : '';
|
|
||||||
this.logger.success(`All cross-references are properly linked for the specified relations${filterMsg}!`);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
const filterMsg = this.publish_id ? ` (filtered by publish_id ${this.publish_id})` : '';
|
|
||||||
this.logger.warning(`Found ${missingReferences.length} missing cross-reference(s)${filterMsg}:`);
|
|
||||||
|
|
||||||
// Show brief list if not verbose mode
|
|
||||||
if (!this.verbose) {
|
|
||||||
for (const missing of missingReferences) {
|
|
||||||
const sourceDoi = missing.sourceDoi ? ` DOI: ${missing.sourceDoi}` : '';
|
|
||||||
const targetDoi = missing.targetDoi ? ` DOI: ${missing.targetDoi}` : '';
|
|
||||||
|
|
||||||
this.logger.info(
|
|
||||||
`Dataset ${missing.sourceDatasetId} (Publish ID: ${missing.sourcePublishId}${sourceDoi}) ${missing.relation} Dataset ${missing.targetDatasetId} (Publish ID: ${missing.targetPublishId}${targetDoi}) → missing reverse: ${missing.reverseRelation}`,
|
|
||||||
);
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
// Verbose mode - show detailed info
|
|
||||||
for (const missing of missingReferences) {
|
|
||||||
this.logger.info(
|
|
||||||
`Dataset ${missing.sourceDatasetId} references ${missing.targetDatasetId}, but reverse reference is missing`,
|
|
||||||
);
|
|
||||||
this.logger.info(` - Reference type: ${missing.referenceType}`);
|
|
||||||
this.logger.info(` - Relation: ${missing.relation}`);
|
|
||||||
this.logger.info(` - DOI: ${missing.doi}`);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (this.fix) {
|
|
||||||
await this.fixMissingReferences(missingReferences);
|
|
||||||
this.logger.success('All missing cross-references have been fixed!');
|
|
||||||
} else {
|
|
||||||
if (this.verbose) {
|
|
||||||
this.printMissingReferencesList(missingReferences);
|
|
||||||
}
|
|
||||||
this.logger.info('💡 Run with --fix flag to automatically create missing cross-references');
|
|
||||||
if (this.publish_id) {
|
|
||||||
this.logger.info(`🎯 Currently filtering by publish_id: ${this.publish_id}`);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} catch (error) {
|
|
||||||
this.logger.error('Error detecting missing cross-references:', error);
|
|
||||||
process.exit(1);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private async findMissingCrossReferences(): Promise<MissingCrossReference[]> {
|
|
||||||
const missingReferences: {
|
|
||||||
sourceDatasetId: number;
|
|
||||||
targetDatasetId: number;
|
|
||||||
sourcePublishId: number | null;
|
|
||||||
targetPublishId: number | null;
|
|
||||||
sourceDoi: string | null;
|
|
||||||
targetDoi: string | null;
|
|
||||||
referenceType: string;
|
|
||||||
relation: string;
|
|
||||||
doi: string | null;
|
|
||||||
reverseRelation: string;
|
|
||||||
}[] = [];
|
|
||||||
|
|
||||||
this.logger.info('📊 Querying dataset references...');
|
|
||||||
|
|
||||||
// Find all references that point to Tethys datasets (DOI or URL containing tethys DOI)
|
|
||||||
// Only from datasets that are published AND only for allowed relations
|
|
||||||
const tethysReferencesQuery = DatasetReference.query()
|
|
||||||
.whereIn('type', ['DOI', 'URL'])
|
|
||||||
.whereIn('relation', this.ALLOWED_RELATIONS) // Only process allowed relations
|
|
||||||
.where((query) => {
|
|
||||||
query.where('value', 'like', '%doi.org/10.24341/tethys.%').orWhere('value', 'like', '%tethys.at/dataset/%');
|
|
||||||
})
|
|
||||||
.preload('dataset', (datasetQuery) => {
|
|
||||||
datasetQuery.preload('identifier');
|
|
||||||
})
|
|
||||||
.whereHas('dataset', (datasetQuery) => {
|
|
||||||
datasetQuery.where('server_state', 'published');
|
|
||||||
});
|
|
||||||
if (typeof this.publish_id === 'number') {
|
|
||||||
tethysReferencesQuery.whereHas('dataset', (datasetQuery) => {
|
|
||||||
datasetQuery.where('publish_id', this.publish_id as number);
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
const tethysReferences = await tethysReferencesQuery.exec();
|
|
||||||
|
|
||||||
this.logger.info(`🔗 Found ${tethysReferences.length} Tethys references from published datasets (allowed relations only)`);
|
|
||||||
|
|
||||||
let processedCount = 0;
|
|
||||||
let skippedCount = 0;
|
|
||||||
|
|
||||||
for (const reference of tethysReferences) {
|
|
||||||
processedCount++;
|
|
||||||
|
|
||||||
if (this.verbose && processedCount % 10 === 0) {
|
|
||||||
this.logger.info(`📈 Processed ${processedCount}/${tethysReferences.length} references...`);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Double-check that this relation is in our allowed list (safety check)
|
|
||||||
if (!this.ALLOWED_RELATIONS.includes(reference.relation)) {
|
|
||||||
skippedCount++;
|
|
||||||
if (this.verbose) {
|
|
||||||
this.logger.info(`⏭️ Skipping relation "${reference.relation}" - not in allowed list`);
|
|
||||||
}
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Extract dataset publish_id from DOI or URL
|
|
||||||
const targetDatasetPublish = this.extractDatasetPublishIdFromReference(reference.value);
|
|
||||||
|
|
||||||
if (!targetDatasetPublish) {
|
|
||||||
if (this.verbose) {
|
|
||||||
this.logger.warning(`⚠️ Could not extract publish ID from: ${reference.value}`);
|
|
||||||
}
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Check if target dataset exists and is published
|
|
||||||
const targetDataset = await Dataset.query()
|
|
||||||
.where('publish_id', targetDatasetPublish)
|
|
||||||
.where('server_state', 'published')
|
|
||||||
.preload('identifier')
|
|
||||||
.first();
|
|
||||||
|
|
||||||
if (!targetDataset) {
|
|
||||||
if (this.verbose) {
|
|
||||||
this.logger.warning(`⚠️ Target dataset with publish_id ${targetDatasetPublish} not found or not published`);
|
|
||||||
}
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Ensure we have a valid source dataset with proper preloading
|
|
||||||
if (!reference.dataset) {
|
|
||||||
this.logger.warning(`⚠️ Source dataset ${reference.document_id} not properly loaded, skipping...`);
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Check if reverse reference exists
|
|
||||||
const reverseReferenceExists = await this.checkReverseReferenceExists(
|
|
||||||
targetDataset.id,
|
|
||||||
// reference.document_id,
|
|
||||||
reference.relation,
|
|
||||||
);
|
|
||||||
|
|
||||||
if (!reverseReferenceExists) {
|
|
||||||
const reverseRelation = this.getReverseRelation(reference.relation);
|
|
||||||
if (reverseRelation) {
|
|
||||||
// Only add if we have a valid reverse relation
|
|
||||||
missingReferences.push({
|
|
||||||
sourceDatasetId: reference.document_id,
|
|
||||||
targetDatasetId: targetDataset.id,
|
|
||||||
sourcePublishId: reference.dataset.publish_id || null,
|
|
||||||
targetPublishId: targetDataset.publish_id || null,
|
|
||||||
referenceType: reference.type,
|
|
||||||
relation: reference.relation,
|
|
||||||
doi: reference.value,
|
|
||||||
reverseRelation: reverseRelation,
|
|
||||||
sourceDoi: reference.dataset.identifier ? reference.dataset.identifier.value : null,
|
|
||||||
targetDoi: targetDataset.identifier ? targetDataset.identifier.value : null,
|
|
||||||
});
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
this.logger.info(`✅ Processed ${processedCount} references (${skippedCount} skipped due to relation filtering)`);
|
|
||||||
return missingReferences;
|
|
||||||
}
|
|
||||||
|
|
||||||
private extractDatasetPublishIdFromReference(value: string): number | null {
|
|
||||||
// Extract from DOI: https://doi.org/10.24341/tethys.107 -> 107
|
|
||||||
const doiMatch = value.match(/10\.24341\/tethys\.(\d+)/);
|
|
||||||
if (doiMatch) {
|
|
||||||
return parseInt(doiMatch[1]);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Extract from URL: https://tethys.at/dataset/107 -> 107
|
|
||||||
const urlMatch = value.match(/tethys\.at\/dataset\/(\d+)/);
|
|
||||||
if (urlMatch) {
|
|
||||||
return parseInt(urlMatch[1]);
|
|
||||||
}
|
|
||||||
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
private async checkReverseReferenceExists(targetDatasetId: number, originalRelation: string): Promise<boolean> {
|
|
||||||
const reverseRelation = this.getReverseRelation(originalRelation);
|
|
||||||
|
|
||||||
if (!reverseRelation) {
|
|
||||||
return true; // If no reverse relation is defined, consider it as "exists" to skip processing
|
|
||||||
}
|
|
||||||
|
|
||||||
// Only check for reverse references where the source dataset is also published
|
|
||||||
const reverseReference = await DatasetReference.query()
|
|
||||||
// We don't filter by source document_id here to find any incoming reference from any published dataset
|
|
||||||
// .where('document_id', sourceDatasetId)
|
|
||||||
.where('related_document_id', targetDatasetId)
|
|
||||||
.where('relation', reverseRelation)
|
|
||||||
.first();
|
|
||||||
|
|
||||||
return !!reverseReference;
|
|
||||||
}
|
|
||||||
|
|
||||||
private getReverseRelation(relation: string): string | null {
|
|
||||||
const relationMap: Record<string, string> = {
|
|
||||||
IsNewVersionOf: 'IsPreviousVersionOf',
|
|
||||||
IsPreviousVersionOf: 'IsNewVersionOf',
|
|
||||||
IsVariantFormOf: 'IsOriginalFormOf',
|
|
||||||
IsOriginalFormOf: 'IsVariantFormOf',
|
|
||||||
};
|
|
||||||
|
|
||||||
// Only return reverse relation if it exists in our map, otherwise return null
|
|
||||||
return relationMap[relation] || null;
|
|
||||||
}
|
|
||||||
|
|
||||||
private printMissingReferencesList(missingReferences: MissingCrossReference[]) {
|
|
||||||
console.log('┌─────────────────────────────────────────────────────────────────────────────────┐');
|
|
||||||
console.log('│ MISSING CROSS-REFERENCES REPORT │');
|
|
||||||
console.log('│ (Published Datasets Only - Filtered Relations) │');
|
|
||||||
console.log('└─────────────────────────────────────────────────────────────────────────────────┘');
|
|
||||||
console.log();
|
|
||||||
|
|
||||||
missingReferences.forEach((missing, index) => {
|
|
||||||
console.log(
|
|
||||||
`${index + 1}. Dataset ${missing.sourceDatasetId} (Publish ID: ${missing.sourcePublishId} Identifier: ${missing.sourceDoi})
|
|
||||||
${missing.relation} Dataset ${missing.targetDatasetId} (Publish ID: ${missing.targetPublishId} Identifier: ${missing.targetDoi})`,
|
|
||||||
);
|
|
||||||
console.log(` ├─ Current relation: "${missing.relation}"`);
|
|
||||||
console.log(` ├─ Missing reverse relation: "${missing.reverseRelation}"`);
|
|
||||||
console.log(` ├─ Reference type: ${missing.referenceType}`);
|
|
||||||
console.log(` └─ DOI/URL: ${missing.doi}`);
|
|
||||||
console.log();
|
|
||||||
});
|
|
||||||
|
|
||||||
console.log('┌─────────────────────────────────────────────────────────────────────────────────┐');
|
|
||||||
console.log(`│ SUMMARY: ${missingReferences.length} missing reverse reference(s) detected │`);
|
|
||||||
console.log(`│ Processed relations: ${this.ALLOWED_RELATIONS.join(', ')} │`);
|
|
||||||
console.log('└─────────────────────────────────────────────────────────────────────────────────┘');
|
|
||||||
}
|
|
||||||
|
|
||||||
private async fixMissingReferences(missingReferences: MissingCrossReference[]) {
|
|
||||||
this.logger.info('🔧 Creating missing cross-references in database...');
|
|
||||||
|
|
||||||
let fixedCount = 0;
|
|
||||||
let errorCount = 0;
|
|
||||||
|
|
||||||
for (const [index, missing] of missingReferences.entries()) {
|
|
||||||
try {
|
|
||||||
// Get both source and target datasets
|
|
||||||
const sourceDataset = await Dataset.query()
|
|
||||||
.where('id', missing.sourceDatasetId)
|
|
||||||
.where('server_state', 'published')
|
|
||||||
.preload('identifier')
|
|
||||||
.first();
|
|
||||||
|
|
||||||
const targetDataset = await Dataset.query().where('id', missing.targetDatasetId).where('server_state', 'published').first();
|
|
||||||
|
|
||||||
if (!sourceDataset) {
|
|
||||||
this.logger.warning(`⚠️ Source dataset ${missing.sourceDatasetId} not found or not published, skipping...`);
|
|
||||||
errorCount++;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!targetDataset) {
|
|
||||||
this.logger.warning(`⚠️ Target dataset ${missing.targetDatasetId} not found or not published, skipping...`);
|
|
||||||
errorCount++;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Create the reverse reference using the referenced_by relationship
|
|
||||||
// Example: If Dataset 297 IsNewVersionOf Dataset 144
|
|
||||||
// We create an incoming reference for Dataset 144 that shows Dataset 297 IsPreviousVersionOf it
|
|
||||||
const reverseReference = new DatasetReference();
|
|
||||||
// Don't set document_id - this creates an incoming reference via related_document_id
|
|
||||||
reverseReference.related_document_id = missing.targetDatasetId; // 144 (dataset receiving the incoming reference)
|
|
||||||
reverseReference.type = 'DOI';
|
|
||||||
reverseReference.relation = missing.reverseRelation;
|
|
||||||
|
|
||||||
// Use the source dataset's DOI for the value (what's being referenced)
|
|
||||||
if (sourceDataset.identifier?.value) {
|
|
||||||
reverseReference.value = `https://doi.org/${sourceDataset.identifier.value}`;
|
|
||||||
} else {
|
|
||||||
// Fallback to dataset URL if no DOI
|
|
||||||
reverseReference.value = `https://tethys.at/dataset/${sourceDataset.publish_id || missing.sourceDatasetId}`;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Use the source dataset's main title for the label
|
|
||||||
reverseReference.label = sourceDataset.mainTitle || `Dataset ${missing.sourceDatasetId}`;
|
|
||||||
|
|
||||||
// Also save 'server_date_modified' on target dataset to trigger any downstream updates (e.g. search index)
|
|
||||||
targetDataset.server_date_modified = DateTime.now();
|
|
||||||
await targetDataset.save();
|
|
||||||
|
|
||||||
await reverseReference.save();
|
|
||||||
fixedCount++;
|
|
||||||
|
|
||||||
if (this.verbose) {
|
|
||||||
this.logger.info(
|
|
||||||
`✅ [${index + 1}/${missingReferences.length}] Created reverse reference: Dataset ${missing.sourceDatasetId} -> ${missing.targetDatasetId} (${missing.reverseRelation})`,
|
|
||||||
);
|
|
||||||
} else if ((index + 1) % 10 === 0) {
|
|
||||||
this.logger.info(`📈 Fixed ${fixedCount}/${missingReferences.length} references...`);
|
|
||||||
}
|
|
||||||
} catch (error) {
|
|
||||||
this.logger.error(
|
|
||||||
`❌ Error creating reverse reference for datasets ${missing.targetDatasetId} -> ${missing.sourceDatasetId}:`,
|
|
||||||
error,
|
|
||||||
);
|
|
||||||
errorCount++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
this.logger.info(`📊 Fix completed: ${fixedCount} created, ${errorCount} errors`);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
@ -1,346 +0,0 @@
|
||||||
/*
|
|
||||||
|--------------------------------------------------------------------------
|
|
||||||
| node ace make:command list-updateable-datacite
|
|
||||||
| DONE: create commands/list_updeatable_datacite.ts
|
|
||||||
|--------------------------------------------------------------------------
|
|
||||||
*/
|
|
||||||
import { BaseCommand, flags } from '@adonisjs/core/ace';
|
|
||||||
import { CommandOptions } from '@adonisjs/core/types/ace';
|
|
||||||
import Dataset from '#models/dataset';
|
|
||||||
import { DoiClient } from '#app/Library/Doi/DoiClient';
|
|
||||||
import env from '#start/env';
|
|
||||||
import logger from '@adonisjs/core/services/logger';
|
|
||||||
import { DateTime } from 'luxon';
|
|
||||||
import pLimit from 'p-limit';
|
|
||||||
|
|
||||||
export default class ListUpdateableDatacite extends BaseCommand {
|
|
||||||
static commandName = 'list:updateable-datacite';
|
|
||||||
static description = 'List all datasets that need DataCite DOI updates';
|
|
||||||
|
|
||||||
public static needsApplication = true;
|
|
||||||
|
|
||||||
// private chunkSize = 100; // Set chunk size for pagination
|
|
||||||
|
|
||||||
@flags.boolean({ alias: 'v', description: 'Verbose output showing detailed information' })
|
|
||||||
public verbose: boolean = false;
|
|
||||||
|
|
||||||
@flags.boolean({ alias: 'c', description: 'Show only count of updatable datasets' })
|
|
||||||
public countOnly: boolean = false;
|
|
||||||
|
|
||||||
@flags.boolean({ alias: 'i', description: 'Show only publish IDs (useful for scripting)' })
|
|
||||||
public idsOnly: boolean = false;
|
|
||||||
|
|
||||||
@flags.number({ description: 'Chunk size for processing datasets (default: 50)' })
|
|
||||||
public chunkSize: number = 50;
|
|
||||||
|
|
||||||
//example: node ace list:updateable-datacite
|
|
||||||
//example: node ace list:updateable-datacite --verbose
|
|
||||||
//example: node ace list:updateable-datacite --count-only
|
|
||||||
//example: node ace list:updateable-datacite --ids-only
|
|
||||||
//example: node ace list:updateable-datacite --chunk-size 50
|
|
||||||
|
|
||||||
public static options: CommandOptions = {
|
|
||||||
startApp: true,
|
|
||||||
stayAlive: false,
|
|
||||||
};
|
|
||||||
|
|
||||||
async run() {
|
|
||||||
const prefix = env.get('DATACITE_PREFIX', '');
|
|
||||||
const base_domain = env.get('BASE_DOMAIN', '');
|
|
||||||
|
|
||||||
if (!prefix || !base_domain) {
|
|
||||||
logger.error('Missing DATACITE_PREFIX or BASE_DOMAIN environment variables');
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Prevent conflicting flags
|
|
||||||
if ((this.verbose && this.countOnly) || (this.verbose && this.idsOnly)) {
|
|
||||||
logger.error('Flags --verbose cannot be combined with --count-only or --ids-only');
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
const chunkSize = this.chunkSize || 50;
|
|
||||||
let page = 1;
|
|
||||||
let hasMoreDatasets = true;
|
|
||||||
let totalProcessed = 0;
|
|
||||||
const updatableDatasets: Dataset[] = [];
|
|
||||||
|
|
||||||
if (!this.countOnly && !this.idsOnly) {
|
|
||||||
logger.info(`Processing datasets in chunks of ${chunkSize}...`);
|
|
||||||
}
|
|
||||||
|
|
||||||
while (hasMoreDatasets) {
|
|
||||||
const datasets = await this.getDatasets(page, chunkSize);
|
|
||||||
|
|
||||||
if (datasets.length === 0) {
|
|
||||||
hasMoreDatasets = false;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!this.countOnly && !this.idsOnly) {
|
|
||||||
logger.info(`Processing chunk ${page} (${datasets.length} datasets)...`);
|
|
||||||
}
|
|
||||||
|
|
||||||
const chunkUpdatableDatasets = await this.processChunk(datasets);
|
|
||||||
updatableDatasets.push(...chunkUpdatableDatasets);
|
|
||||||
totalProcessed += datasets.length;
|
|
||||||
|
|
||||||
page += 1;
|
|
||||||
if (datasets.length < chunkSize) {
|
|
||||||
hasMoreDatasets = false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!this.countOnly && !this.idsOnly) {
|
|
||||||
logger.info(`Processed ${totalProcessed} datasets total, found ${updatableDatasets.length} that need updates`);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (this.countOnly) {
|
|
||||||
console.log(updatableDatasets.length);
|
|
||||||
} else if (this.idsOnly) {
|
|
||||||
updatableDatasets.forEach((dataset) => console.log(dataset.publish_id));
|
|
||||||
} else if (this.verbose) {
|
|
||||||
await this.showVerboseOutput(updatableDatasets);
|
|
||||||
} else {
|
|
||||||
this.showSimpleOutput(updatableDatasets);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Processes a chunk of datasets to determine which ones need DataCite updates
|
|
||||||
*
|
|
||||||
* This method handles parallel processing of datasets within a chunk, providing
|
|
||||||
* efficient error handling and filtering of results.
|
|
||||||
*
|
|
||||||
* @param datasets - Array of Dataset objects to process
|
|
||||||
* @returns Promise<Dataset[]> - Array of datasets that need updates
|
|
||||||
*/
|
|
||||||
// private async processChunk(datasets: Dataset[]): Promise<Dataset[]> {
|
|
||||||
// // Process datasets in parallel using Promise.allSettled for better error handling
|
|
||||||
// //
|
|
||||||
// // Why Promise.allSettled vs Promise.all?
|
|
||||||
// // - Promise.all fails fast: if ANY promise rejects, the entire operation fails
|
|
||||||
// // - Promise.allSettled waits for ALL promises: some can fail, others succeed
|
|
||||||
// // - This is crucial for batch processing where we don't want one bad dataset
|
|
||||||
// // to stop processing of the entire chunk
|
|
||||||
// const results = await Promise.allSettled(
|
|
||||||
// datasets.map(async (dataset) => {
|
|
||||||
// try {
|
|
||||||
// // Check if this specific dataset needs a DataCite update
|
|
||||||
// const needsUpdate = await this.shouldUpdateDataset(dataset);
|
|
||||||
|
|
||||||
// // Return the dataset if it needs update, null if it doesn't
|
|
||||||
// // This creates a sparse array that we'll filter later
|
|
||||||
// return needsUpdate ? dataset : null;
|
|
||||||
// } catch (error) {
|
|
||||||
// // Error handling for individual dataset checks
|
|
||||||
// //
|
|
||||||
// // Log warnings only if we're not in silent modes (count-only or ids-only)
|
|
||||||
// // This prevents log spam when running automated scripts
|
|
||||||
// if (!this.countOnly && !this.idsOnly) {
|
|
||||||
// logger.warn(`Error checking dataset ${dataset.publish_id}: ${error.message}`);
|
|
||||||
// }
|
|
||||||
|
|
||||||
// // IMPORTANT DECISION: Return the dataset anyway if we can't determine status
|
|
||||||
// //
|
|
||||||
// // Why? It's safer to include a dataset that might not need updating
|
|
||||||
// // than to miss one that actually does need updating. This follows the
|
|
||||||
// // "fail-safe" principle - if we're unsure, err on the side of caution
|
|
||||||
// return dataset;
|
|
||||||
// }
|
|
||||||
// }),
|
|
||||||
// );
|
|
||||||
|
|
||||||
// // Filter and extract results from Promise.allSettled response
|
|
||||||
// //
|
|
||||||
// // Promise.allSettled returns an array of objects with this structure:
|
|
||||||
// // - { status: 'fulfilled', value: T } for successful promises
|
|
||||||
// // - { status: 'rejected', reason: Error } for failed promises
|
|
||||||
// //
|
|
||||||
// // We need to:
|
|
||||||
// // 1. Only get fulfilled results (rejected ones are already handled above)
|
|
||||||
// // 2. Filter out null values (datasets that don't need updates)
|
|
||||||
// // 3. Extract the actual Dataset objects from the wrapper
|
|
||||||
// return results
|
|
||||||
// .filter(
|
|
||||||
// (result): result is PromiseFulfilledResult<Dataset | null> =>
|
|
||||||
// // Type guard: only include fulfilled results that have actual values
|
|
||||||
// // This filters out:
|
|
||||||
// // - Rejected promises (shouldn't happen due to try/catch, but safety first)
|
|
||||||
// // - Fulfilled promises that returned null (datasets that don't need updates)
|
|
||||||
// result.status === 'fulfilled' && result.value !== null,
|
|
||||||
// )
|
|
||||||
// .map((result) => result.value!); // Extract the Dataset from the wrapper
|
|
||||||
// // The ! is safe here because we filtered out null values above
|
|
||||||
// }
|
|
||||||
|
|
||||||
private async processChunk(datasets: Dataset[]): Promise<Dataset[]> {
|
|
||||||
// Limit concurrency to avoid API flooding (e.g., max 5 at once)
|
|
||||||
const limit = pLimit(5);
|
|
||||||
|
|
||||||
const tasks = datasets.map((dataset) =>
|
|
||||||
limit(async () => {
|
|
||||||
try {
|
|
||||||
const needsUpdate = await this.shouldUpdateDataset(dataset);
|
|
||||||
return needsUpdate ? dataset : null;
|
|
||||||
} catch (error) {
|
|
||||||
if (!this.countOnly && !this.idsOnly) {
|
|
||||||
logger.warn(
|
|
||||||
`Error checking dataset ${dataset.publish_id}: ${
|
|
||||||
error instanceof Error ? error.message : JSON.stringify(error)
|
|
||||||
}`,
|
|
||||||
);
|
|
||||||
}
|
|
||||||
// Fail-safe: include dataset if uncertain
|
|
||||||
return dataset;
|
|
||||||
}
|
|
||||||
}),
|
|
||||||
);
|
|
||||||
|
|
||||||
const results = await Promise.allSettled(tasks);
|
|
||||||
|
|
||||||
return results
|
|
||||||
.filter((result): result is PromiseFulfilledResult<Dataset | null> => result.status === 'fulfilled' && result.value !== null)
|
|
||||||
.map((result) => result.value!);
|
|
||||||
}
|
|
||||||
|
|
||||||
private async getDatasets(page: number, chunkSize: number): Promise<Dataset[]> {
|
|
||||||
return await Dataset.query()
|
|
||||||
.orderBy('publish_id', 'asc')
|
|
||||||
.preload('identifier')
|
|
||||||
.preload('xmlCache')
|
|
||||||
.preload('titles')
|
|
||||||
.where('server_state', 'published')
|
|
||||||
.whereHas('identifier', (identifierQuery) => {
|
|
||||||
identifierQuery.where('type', 'doi');
|
|
||||||
})
|
|
||||||
.forPage(page, chunkSize); // Get files for the current page
|
|
||||||
}
|
|
||||||
|
|
||||||
private async shouldUpdateDataset(dataset: Dataset): Promise<boolean> {
|
|
||||||
try {
|
|
||||||
let doiIdentifier = dataset.identifier;
|
|
||||||
if (!doiIdentifier) {
|
|
||||||
await dataset.load('identifier');
|
|
||||||
doiIdentifier = dataset.identifier;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!doiIdentifier || doiIdentifier.type !== 'doi') {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
const datasetModified =
|
|
||||||
dataset.server_date_modified instanceof DateTime
|
|
||||||
? dataset.server_date_modified
|
|
||||||
: DateTime.fromJSDate(dataset.server_date_modified);
|
|
||||||
|
|
||||||
if (!datasetModified) {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (datasetModified > DateTime.now()) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
const doiClient = new DoiClient();
|
|
||||||
const DOI_CHECK_TIMEOUT = 300; // ms
|
|
||||||
|
|
||||||
const doiLastModified = await Promise.race([
|
|
||||||
doiClient.getDoiLastModified(doiIdentifier.value),
|
|
||||||
this.createTimeoutPromise(DOI_CHECK_TIMEOUT),
|
|
||||||
]).catch(() => null);
|
|
||||||
|
|
||||||
if (!doiLastModified) {
|
|
||||||
// If uncertain, better include dataset for update
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
const doiModified = DateTime.fromJSDate(doiLastModified);
|
|
||||||
if (datasetModified > doiModified) {
|
|
||||||
const diffInSeconds = Math.abs(datasetModified.diff(doiModified, 'seconds').seconds);
|
|
||||||
const toleranceSeconds = 600;
|
|
||||||
return diffInSeconds > toleranceSeconds;
|
|
||||||
}
|
|
||||||
return false;
|
|
||||||
} catch (error) {
|
|
||||||
return true; // safer: include dataset if unsure
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Create a timeout promise for API calls
|
|
||||||
*/
|
|
||||||
private createTimeoutPromise(timeoutMs: number): Promise<never> {
|
|
||||||
return new Promise((_, reject) => {
|
|
||||||
setTimeout(() => reject(new Error(`API call timeout after ${timeoutMs}ms`)), timeoutMs);
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
private showSimpleOutput(updatableDatasets: Dataset[]): void {
|
|
||||||
if (updatableDatasets.length === 0) {
|
|
||||||
console.log('No datasets need DataCite updates.');
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
console.log(`\nFound ${updatableDatasets.length} dataset(s) that need DataCite updates:\n`);
|
|
||||||
|
|
||||||
updatableDatasets.forEach((dataset) => {
|
|
||||||
console.log(`publish_id ${dataset.publish_id} needs update - ${dataset.mainTitle || 'Untitled'}`);
|
|
||||||
});
|
|
||||||
|
|
||||||
console.log(`\nTo update these datasets, run:`);
|
|
||||||
console.log(` node ace update:datacite`);
|
|
||||||
console.log(`\nOr update specific datasets:`);
|
|
||||||
console.log(` node ace update:datacite -p <publish_id>`);
|
|
||||||
}
|
|
||||||
|
|
||||||
private async showVerboseOutput(updatableDatasets: Dataset[]): Promise<void> {
|
|
||||||
if (updatableDatasets.length === 0) {
|
|
||||||
console.log('No datasets need DataCite updates.');
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
console.log(`\nFound ${updatableDatasets.length} dataset(s) that need DataCite updates:\n`);
|
|
||||||
|
|
||||||
for (const dataset of updatableDatasets) {
|
|
||||||
await this.showDatasetDetails(dataset);
|
|
||||||
}
|
|
||||||
|
|
||||||
console.log(`\nSummary: ${updatableDatasets.length} datasets need updates`);
|
|
||||||
}
|
|
||||||
|
|
||||||
private async showDatasetDetails(dataset: Dataset): Promise<void> {
|
|
||||||
try {
|
|
||||||
let doiIdentifier = dataset.identifier;
|
|
||||||
|
|
||||||
if (!doiIdentifier) {
|
|
||||||
await dataset.load('identifier');
|
|
||||||
doiIdentifier = dataset.identifier;
|
|
||||||
}
|
|
||||||
|
|
||||||
const doiValue = doiIdentifier?.value || 'N/A';
|
|
||||||
const datasetModified = dataset.server_date_modified;
|
|
||||||
|
|
||||||
// Get DOI info from DataCite
|
|
||||||
const doiClient = new DoiClient();
|
|
||||||
const doiLastModified = await doiClient.getDoiLastModified(doiValue);
|
|
||||||
const doiState = await doiClient.getDoiState(doiValue);
|
|
||||||
|
|
||||||
console.log(`┌─ Dataset ${dataset.publish_id} ───────────────────────────────────────────────────────────────`);
|
|
||||||
console.log(`│ Title: ${dataset.mainTitle || 'Untitled'}`);
|
|
||||||
console.log(`│ DOI: ${doiValue}`);
|
|
||||||
console.log(`│ DOI State: ${doiState || 'Unknown'}`);
|
|
||||||
console.log(`│ Dataset Modified: ${datasetModified ? datasetModified.toISO() : 'N/A'}`);
|
|
||||||
console.log(`│ DOI Modified: ${doiLastModified ? DateTime.fromJSDate(doiLastModified).toISO() : 'N/A'}`);
|
|
||||||
console.log(`│ Status: NEEDS UPDATE`);
|
|
||||||
console.log(`└─────────────────────────────────────────────────────────────────────────────────────────────\n`);
|
|
||||||
} catch (error) {
|
|
||||||
console.log(`┌─ Dataset ${dataset.publish_id} ───────────────────────────────────────────────────────────────`);
|
|
||||||
console.log(`│ Title: ${dataset.mainTitle || 'Untitled'}`);
|
|
||||||
console.log(`│ DOI: ${dataset.identifier?.value || 'N/A'}`);
|
|
||||||
console.log(`│ Error: ${error.message}`);
|
|
||||||
console.log(`│ Status: NEEDS UPDATE (Error checking)`);
|
|
||||||
console.log(`└─────────────────────────────────────────────────────────────────────────────────────────────\n`);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
@ -1,266 +0,0 @@
|
||||||
/*
|
|
||||||
|--------------------------------------------------------------------------
|
|
||||||
| node ace make:command update-datacite
|
|
||||||
| DONE: create commands/update_datacite.ts
|
|
||||||
|--------------------------------------------------------------------------
|
|
||||||
*/
|
|
||||||
import { BaseCommand, flags } from '@adonisjs/core/ace';
|
|
||||||
import { CommandOptions } from '@adonisjs/core/types/ace';
|
|
||||||
import Dataset from '#models/dataset';
|
|
||||||
import { DoiClient } from '#app/Library/Doi/DoiClient';
|
|
||||||
import DoiClientException from '#app/exceptions/DoiClientException';
|
|
||||||
import Index from '#app/Library/Utils/Index';
|
|
||||||
import env from '#start/env';
|
|
||||||
import logger from '@adonisjs/core/services/logger';
|
|
||||||
import { DateTime } from 'luxon';
|
|
||||||
import { getDomain } from '#app/utils/utility-functions';
|
|
||||||
|
|
||||||
export default class UpdateDatacite extends BaseCommand {
|
|
||||||
static commandName = 'update:datacite';
|
|
||||||
static description = 'Update DataCite DOI records for published datasets';
|
|
||||||
|
|
||||||
public static needsApplication = true;
|
|
||||||
|
|
||||||
@flags.number({ alias: 'p', description: 'Specific publish_id to update' })
|
|
||||||
public publish_id: number;
|
|
||||||
|
|
||||||
@flags.boolean({ alias: 'f', description: 'Force update all records regardless of modification date' })
|
|
||||||
public force: boolean = false;
|
|
||||||
|
|
||||||
@flags.boolean({ alias: 'd', description: 'Dry run - show what would be updated without making changes' })
|
|
||||||
public dryRun: boolean = false;
|
|
||||||
|
|
||||||
@flags.boolean({ alias: 's', description: 'Show detailed stats for each dataset that needs updating' })
|
|
||||||
public stats: boolean = false;
|
|
||||||
|
|
||||||
//example: node ace update:datacite -p 123 --force --dry-run
|
|
||||||
|
|
||||||
public static options: CommandOptions = {
|
|
||||||
startApp: true, // Whether to boot the application before running the command
|
|
||||||
stayAlive: false, // Whether to keep the process alive after the command has executed
|
|
||||||
};
|
|
||||||
|
|
||||||
async run() {
|
|
||||||
logger.info('Starting DataCite update process...');
|
|
||||||
|
|
||||||
const prefix = env.get('DATACITE_PREFIX', '');
|
|
||||||
const base_domain = env.get('BASE_DOMAIN', '');
|
|
||||||
const apiUrl = env.get('DATACITE_API_URL', 'https://api.datacite.org');
|
|
||||||
|
|
||||||
if (!prefix || !base_domain) {
|
|
||||||
logger.error('Missing DATACITE_PREFIX or BASE_DOMAIN environment variables');
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
logger.info(`Using DataCite API: ${apiUrl}`);
|
|
||||||
|
|
||||||
const datasets = await this.getDatasets();
|
|
||||||
logger.info(`Found ${datasets.length} datasets to process`);
|
|
||||||
|
|
||||||
let updated = 0;
|
|
||||||
let skipped = 0;
|
|
||||||
let errors = 0;
|
|
||||||
|
|
||||||
for (const dataset of datasets) {
|
|
||||||
try {
|
|
||||||
const shouldUpdate = this.force || (await this.shouldUpdateDataset(dataset));
|
|
||||||
|
|
||||||
if (this.stats) {
|
|
||||||
// Stats mode: show detailed information for datasets that need updating
|
|
||||||
if (shouldUpdate) {
|
|
||||||
await this.showDatasetStats(dataset);
|
|
||||||
updated++;
|
|
||||||
} else {
|
|
||||||
skipped++;
|
|
||||||
}
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!shouldUpdate) {
|
|
||||||
logger.info(`Dataset ${dataset.publish_id}: Up to date, skipping`);
|
|
||||||
skipped++;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (this.dryRun) {
|
|
||||||
logger.info(`Dataset ${dataset.publish_id}: Would update DataCite record (dry run)`);
|
|
||||||
updated++;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
await this.updateDataciteRecord(dataset, prefix, base_domain);
|
|
||||||
logger.info(`Dataset ${dataset.publish_id}: Successfully updated DataCite record`);
|
|
||||||
updated++;
|
|
||||||
} catch (error) {
|
|
||||||
logger.error(`Dataset ${dataset.publish_id}: Failed to update - ${error.message}`);
|
|
||||||
errors++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (this.stats) {
|
|
||||||
logger.info(`\nDataCite Stats Summary: ${updated} datasets need updating, ${skipped} are up to date`);
|
|
||||||
} else {
|
|
||||||
logger.info(`DataCite update completed. Updated: ${updated}, Skipped: ${skipped}, Errors: ${errors}`);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private async getDatasets(): Promise<Dataset[]> {
|
|
||||||
const query = Dataset.query()
|
|
||||||
.preload('identifier')
|
|
||||||
.preload('xmlCache')
|
|
||||||
.where('server_state', 'published')
|
|
||||||
.whereHas('identifier', (identifierQuery) => {
|
|
||||||
identifierQuery.where('type', 'doi');
|
|
||||||
});
|
|
||||||
|
|
||||||
if (this.publish_id) {
|
|
||||||
query.where('publish_id', this.publish_id);
|
|
||||||
}
|
|
||||||
|
|
||||||
return await query.exec();
|
|
||||||
}
|
|
||||||
|
|
||||||
private async shouldUpdateDataset(dataset: Dataset): Promise<boolean> {
|
|
||||||
try {
|
|
||||||
let doiIdentifier = dataset.identifier;
|
|
||||||
|
|
||||||
if (!doiIdentifier) {
|
|
||||||
await dataset.load('identifier');
|
|
||||||
doiIdentifier = dataset.identifier;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!doiIdentifier || doiIdentifier.type !== 'doi') {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
const datasetModified = dataset.server_date_modified;
|
|
||||||
const now = DateTime.now();
|
|
||||||
|
|
||||||
if (!datasetModified) {
|
|
||||||
return true; // Update if modification date is missing
|
|
||||||
}
|
|
||||||
|
|
||||||
if (datasetModified > now) {
|
|
||||||
return false; // Skip invalid future dates
|
|
||||||
}
|
|
||||||
|
|
||||||
// Check DataCite DOI modification date
|
|
||||||
const doiClient = new DoiClient();
|
|
||||||
const doiLastModified = await doiClient.getDoiLastModified(doiIdentifier.value);
|
|
||||||
|
|
||||||
if (!doiLastModified) {
|
|
||||||
return false; // not Update if we can't get DOI info
|
|
||||||
}
|
|
||||||
|
|
||||||
const doiModified = DateTime.fromJSDate(doiLastModified);
|
|
||||||
if (datasetModified > doiModified) {
|
|
||||||
// if dataset was modified after DOI creation
|
|
||||||
// Calculate the difference in seconds
|
|
||||||
const diffInSeconds = Math.abs(datasetModified.diff(doiModified, 'seconds').seconds);
|
|
||||||
|
|
||||||
// Define tolerance threshold (60 seconds = 1 minute)
|
|
||||||
const toleranceSeconds = 60;
|
|
||||||
|
|
||||||
// Only update if the difference is greater than the tolerance
|
|
||||||
// This prevents unnecessary updates for minor timestamp differences
|
|
||||||
return diffInSeconds > toleranceSeconds;
|
|
||||||
} else {
|
|
||||||
return false; // No update needed
|
|
||||||
}
|
|
||||||
} catch (error) {
|
|
||||||
return false; // not update if we can't determine status or other error
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private async updateDataciteRecord(dataset: Dataset, prefix: string, base_domain: string): Promise<void> {
|
|
||||||
try {
|
|
||||||
// Get the DOI identifier (HasOne relationship)
|
|
||||||
let doiIdentifier = dataset.identifier;
|
|
||||||
|
|
||||||
if (!doiIdentifier) {
|
|
||||||
await dataset.load('identifier');
|
|
||||||
doiIdentifier = dataset.identifier;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!doiIdentifier || doiIdentifier.type !== 'doi') {
|
|
||||||
throw new Error('No DOI identifier found for dataset');
|
|
||||||
}
|
|
||||||
|
|
||||||
// Generate XML metadata
|
|
||||||
const xmlMeta = (await Index.getDoiRegisterString(dataset)) as string;
|
|
||||||
if (!xmlMeta) {
|
|
||||||
throw new Error('Failed to generate XML metadata');
|
|
||||||
}
|
|
||||||
|
|
||||||
// Construct DOI value and landing page URL
|
|
||||||
const doiValue = doiIdentifier.value; // Use existing DOI value
|
|
||||||
const landingPageUrl = `https://doi.${getDomain(base_domain)}/${doiValue}`;
|
|
||||||
|
|
||||||
// Update DataCite record
|
|
||||||
const doiClient = new DoiClient();
|
|
||||||
const dataciteResponse = await doiClient.registerDoi(doiValue, xmlMeta, landingPageUrl);
|
|
||||||
|
|
||||||
if (dataciteResponse?.status === 201) {
|
|
||||||
// // Update dataset modification date
|
|
||||||
// dataset.server_date_modified = DateTime.now();
|
|
||||||
// await dataset.save();
|
|
||||||
|
|
||||||
// // Update search index
|
|
||||||
// const index_name = 'tethys-records';
|
|
||||||
// await Index.indexDocument(dataset, index_name);
|
|
||||||
|
|
||||||
logger.debug(`Dataset ${dataset.publish_id}: DataCite record and search index updated successfully`);
|
|
||||||
} else {
|
|
||||||
throw new DoiClientException(
|
|
||||||
dataciteResponse?.status || 500,
|
|
||||||
`Unexpected DataCite response code: ${dataciteResponse?.status}`,
|
|
||||||
);
|
|
||||||
}
|
|
||||||
} catch (error) {
|
|
||||||
if (error instanceof DoiClientException) {
|
|
||||||
throw error;
|
|
||||||
}
|
|
||||||
throw new Error(`Failed to update DataCite record: ${error.message}`);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Shows detailed statistics for a dataset that needs updating
|
|
||||||
*/
|
|
||||||
private async showDatasetStats(dataset: Dataset): Promise<void> {
|
|
||||||
try {
|
|
||||||
let doiIdentifier = dataset.identifier;
|
|
||||||
|
|
||||||
if (!doiIdentifier) {
|
|
||||||
await dataset.load('identifier');
|
|
||||||
doiIdentifier = dataset.identifier;
|
|
||||||
}
|
|
||||||
|
|
||||||
const doiValue = doiIdentifier?.value || 'N/A';
|
|
||||||
const doiStatus = doiIdentifier?.status || 'N/A';
|
|
||||||
const datasetModified = dataset.server_date_modified;
|
|
||||||
|
|
||||||
// Get DOI info from DataCite
|
|
||||||
const doiClient = new DoiClient();
|
|
||||||
const doiLastModified = await doiClient.getDoiLastModified(doiValue);
|
|
||||||
const doiState = await doiClient.getDoiState(doiValue);
|
|
||||||
|
|
||||||
console.log(`
|
|
||||||
┌─ Dataset ${dataset.publish_id} ───────────────────────────────────────────────────────────────
|
|
||||||
│ DOI Value: ${doiValue}
|
|
||||||
│ DOI Status (DB): ${doiStatus}
|
|
||||||
│ DOI State (DataCite): ${doiState || 'Unknown'}
|
|
||||||
│ Dataset Modified: ${datasetModified ? datasetModified.toISO() : 'N/A'}
|
|
||||||
│ DOI Modified: ${doiLastModified ? DateTime.fromJSDate(doiLastModified).toISO() : 'N/A'}
|
|
||||||
│ Needs Update: YES - Dataset newer than DOI
|
|
||||||
└─────────────────────────────────────────────────────────────────────────────────────────────`);
|
|
||||||
} catch (error) {
|
|
||||||
console.log(`
|
|
||||||
┌─ Dataset ${dataset.publish_id} ───────────────────────────────────────────────────────────────
|
|
||||||
│ DOI Value: ${dataset.identifier?.value || 'N/A'}
|
|
||||||
│ Error: ${error.message}
|
|
||||||
│ Needs Update: YES - Error checking status
|
|
||||||
└─────────────────────────────────────────────────────────────────────────────────────────────`);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
@ -1,61 +1,47 @@
|
||||||
#!/bin/bash
|
#!/bin/bash
|
||||||
set -e
|
|
||||||
|
|
||||||
echo "Starting ClamAV services..."
|
# # Run freshclam to update virus definitions
|
||||||
|
# freshclam
|
||||||
|
|
||||||
|
# # Sleep for a few seconds to give ClamAV time to start
|
||||||
|
# sleep 5
|
||||||
|
|
||||||
# Try to download database if missing
|
# # Start the ClamAV daemon
|
||||||
if [ ! "$(ls -A /var/lib/clamav 2>/dev/null)" ]; then
|
# /etc/init.d/clamav-daemon start
|
||||||
echo "Downloading ClamAV database (this may take a while)..."
|
|
||||||
|
|
||||||
# Simple freshclam run without complex config
|
# bootstrap clam av service and clam av database updater
|
||||||
if sg clamav -c "freshclam --datadir=/var/lib/clamav --quiet"; then
|
set -m
|
||||||
echo "✓ Database downloaded successfully"
|
|
||||||
else
|
function process_file() {
|
||||||
echo "⚠ Database download failed - creating minimal setup"
|
if [[ ! -z "$1" ]]; then
|
||||||
# Create a dummy file so clamd doesn't immediately fail
|
local SETTING_LIST=$(echo "$1" | tr ',' '\n' | grep "^[A-Za-z][A-Za-z]*=.*$")
|
||||||
sg clamav -c "touch /var/lib/clamav/.dummy"
|
local SETTING
|
||||||
|
|
||||||
|
for SETTING in ${SETTING_LIST}; do
|
||||||
|
# Remove any existing copies of this setting. We do this here so that
|
||||||
|
# settings with multiple values (e.g. ExtraDatabase) can still be added
|
||||||
|
# multiple times below
|
||||||
|
local KEY=${SETTING%%=*}
|
||||||
|
sed -i $2 -e "/^${KEY} /d"
|
||||||
|
done
|
||||||
|
|
||||||
|
for SETTING in ${SETTING_LIST}; do
|
||||||
|
# Split on first '='
|
||||||
|
local KEY=${SETTING%%=*}
|
||||||
|
local VALUE=${SETTING#*=}
|
||||||
|
echo "${KEY} ${VALUE}" >> "$2"
|
||||||
|
done
|
||||||
fi
|
fi
|
||||||
fi
|
}
|
||||||
|
|
||||||
# Start freshclam daemon for automatic updates
|
# process_file "${CLAMD_SETTINGS_CSV}" /etc/clamav/clamd.conf
|
||||||
echo "Starting freshclam daemon for automatic updates..."
|
# process_file "${FRESHCLAM_SETTINGS_CSV}" /etc/clamav/freshclam.conf
|
||||||
sg clamav -c "freshclam -d" &
|
|
||||||
|
|
||||||
|
# start in background
|
||||||
|
freshclam -d &
|
||||||
# /etc/init.d/clamav-freshclam start &
|
# /etc/init.d/clamav-freshclam start &
|
||||||
# Start clamd in background
|
clamd
|
||||||
# Start clamd in foreground (so dumb-init can supervise it)
|
|
||||||
# /etc/init.d/clamav-daemon start &
|
# /etc/init.d/clamav-daemon start &
|
||||||
|
|
||||||
# Start clamd daemon in background using sg
|
# change back to CMD of dockerfile
|
||||||
echo "Starting ClamAV daemon..."
|
exec "$@"
|
||||||
# sg clamav -c "clamd" &
|
|
||||||
# Use sg to run clamd with proper group permissions
|
|
||||||
# sg clamav -c "clamd" &
|
|
||||||
sg clamav -c "clamd --config-file=/etc/clamav/clamd.conf" &
|
|
||||||
|
|
||||||
|
|
||||||
# Give services time to start
|
|
||||||
echo "Waiting for services to initialize..."
|
|
||||||
sleep 8
|
|
||||||
|
|
||||||
# simple check
|
|
||||||
if pgrep clamd > /dev/null; then
|
|
||||||
echo "✓ ClamAV daemon is running"
|
|
||||||
else
|
|
||||||
echo "⚠ ClamAV daemon status uncertain, but continuing..."
|
|
||||||
fi
|
|
||||||
|
|
||||||
# Check if freshclam daemon is running
|
|
||||||
if pgrep freshclam > /dev/null; then
|
|
||||||
echo "✓ Freshclam daemon is running"
|
|
||||||
else
|
|
||||||
echo "⚠ Freshclam daemon status uncertain, but continuing..."
|
|
||||||
fi
|
|
||||||
|
|
||||||
# # change back to CMD of dockerfile
|
|
||||||
# exec "$@"
|
|
||||||
|
|
||||||
echo "✓ ClamAV setup complete"
|
|
||||||
echo "Starting main application..."
|
|
||||||
exec dumb-init -- "$@"
|
|
||||||
|
|
@ -1,278 +0,0 @@
|
||||||
# Dataset Indexing Command
|
|
||||||
|
|
||||||
AdonisJS Ace command for indexing and synchronizing published datasets with OpenSearch for search functionality.
|
|
||||||
|
|
||||||
## Overview
|
|
||||||
|
|
||||||
The `index:datasets` command processes published datasets and creates/updates corresponding search index documents in OpenSearch. It intelligently compares modification timestamps to only re-index datasets when necessary, optimizing performance while maintaining search index accuracy.
|
|
||||||
|
|
||||||
## Command Syntax
|
|
||||||
|
|
||||||
```bash
|
|
||||||
node ace index:datasets [options]
|
|
||||||
```
|
|
||||||
|
|
||||||
## Options
|
|
||||||
|
|
||||||
| Flag | Alias | Description |
|
|
||||||
|------|-------|-------------|
|
|
||||||
| `--publish_id <number>` | `-p` | Index a specific dataset by publish_id |
|
|
||||||
|
|
||||||
## Usage Examples
|
|
||||||
|
|
||||||
### Basic Operations
|
|
||||||
|
|
||||||
```bash
|
|
||||||
# Index all published datasets that have been modified since last indexing
|
|
||||||
node ace index:datasets
|
|
||||||
|
|
||||||
# Index a specific dataset by publish_id
|
|
||||||
node ace index:datasets --publish_id 231
|
|
||||||
node ace index:datasets -p 231
|
|
||||||
```
|
|
||||||
|
|
||||||
## How It Works
|
|
||||||
|
|
||||||
### 1. **Dataset Selection**
|
|
||||||
The command processes datasets that meet these criteria:
|
|
||||||
- `server_state = 'published'` - Only published datasets
|
|
||||||
- Has preloaded `xmlCache` relationship for metadata transformation
|
|
||||||
- Optionally filtered by specific `publish_id`
|
|
||||||
|
|
||||||
### 2. **Smart Update Detection**
|
|
||||||
For each dataset, the command:
|
|
||||||
- Checks if the dataset exists in the OpenSearch index
|
|
||||||
- Compares `server_date_modified` timestamps
|
|
||||||
- Only re-indexes if the dataset is newer than the indexed version
|
|
||||||
|
|
||||||
### 3. **Document Processing**
|
|
||||||
The indexing process involves:
|
|
||||||
1. **XML Generation**: Creates structured XML from dataset metadata
|
|
||||||
2. **XSLT Transformation**: Converts XML to JSON using Saxon-JS processor
|
|
||||||
3. **Index Update**: Updates or creates the document in OpenSearch
|
|
||||||
4. **Logging**: Records success/failure for each operation
|
|
||||||
|
|
||||||
## Index Structure
|
|
||||||
|
|
||||||
### Index Configuration
|
|
||||||
- **Index Name**: `tethys-records`
|
|
||||||
- **Document ID**: Dataset `publish_id`
|
|
||||||
- **Refresh**: `true` (immediate availability)
|
|
||||||
|
|
||||||
### Document Fields
|
|
||||||
The indexed documents contain:
|
|
||||||
- **Metadata Fields**: Title, description, authors, keywords
|
|
||||||
- **Identifiers**: DOI, publish_id, and other identifiers
|
|
||||||
- **Temporal Data**: Publication dates, coverage periods
|
|
||||||
- **Geographic Data**: Spatial coverage information
|
|
||||||
- **Technical Details**: Data formats, access information
|
|
||||||
- **Timestamps**: Creation and modification dates
|
|
||||||
|
|
||||||
## Example Output
|
|
||||||
|
|
||||||
### Successful Run
|
|
||||||
```bash
|
|
||||||
node ace index:datasets
|
|
||||||
```
|
|
||||||
```
|
|
||||||
Found 150 published datasets to process
|
|
||||||
Dataset with publish_id 231 successfully indexed
|
|
||||||
Dataset with publish_id 245 is up to date, skipping indexing
|
|
||||||
Dataset with publish_id 267 successfully indexed
|
|
||||||
An error occurred while indexing dataset with publish_id 289. Error: Invalid XML metadata
|
|
||||||
Processing completed: 148 indexed, 1 skipped, 1 error
|
|
||||||
```
|
|
||||||
|
|
||||||
### Specific Dataset
|
|
||||||
```bash
|
|
||||||
node ace index:datasets --publish_id 231
|
|
||||||
```
|
|
||||||
```
|
|
||||||
Found 1 published dataset to process
|
|
||||||
Dataset with publish_id 231 successfully indexed
|
|
||||||
Processing completed: 1 indexed, 0 skipped, 0 errors
|
|
||||||
```
|
|
||||||
|
|
||||||
## Update Logic
|
|
||||||
|
|
||||||
The command uses intelligent indexing to avoid unnecessary processing:
|
|
||||||
|
|
||||||
| Condition | Action | Reason |
|
|
||||||
|-----------|--------|--------|
|
|
||||||
| Dataset not in index | ✅ Index | New dataset needs indexing |
|
|
||||||
| Dataset newer than indexed version | ✅ Re-index | Dataset has been updated |
|
|
||||||
| Dataset same/older than indexed version | ❌ Skip | Already up to date |
|
|
||||||
| OpenSearch document check fails | ✅ Index | Better safe than sorry |
|
|
||||||
| Invalid XML metadata | ❌ Skip + Log Error | Cannot process invalid data |
|
|
||||||
|
|
||||||
### Timestamp Comparison
|
|
||||||
```typescript
|
|
||||||
// Example comparison logic
|
|
||||||
const existingModified = DateTime.fromMillis(Number(existingDoc.server_date_modified) * 1000);
|
|
||||||
const currentModified = dataset.server_date_modified;
|
|
||||||
|
|
||||||
if (currentModified <= existingModified) {
|
|
||||||
// Skip - already up to date
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
// Proceed with indexing
|
|
||||||
```
|
|
||||||
|
|
||||||
## XML Transformation Process
|
|
||||||
|
|
||||||
### 1. **XML Generation**
|
|
||||||
```xml
|
|
||||||
<?xml version="1.0" encoding="UTF-8" standalone="true"?>
|
|
||||||
<root>
|
|
||||||
<Dataset>
|
|
||||||
<!-- Dataset metadata fields -->
|
|
||||||
<title>Research Dataset Title</title>
|
|
||||||
<description>Dataset description...</description>
|
|
||||||
<!-- Additional metadata -->
|
|
||||||
</Dataset>
|
|
||||||
</root>
|
|
||||||
```
|
|
||||||
|
|
||||||
### 2. **XSLT Processing**
|
|
||||||
The command uses Saxon-JS with a compiled stylesheet (`solr.sef.json`) to transform XML to JSON:
|
|
||||||
```javascript
|
|
||||||
const result = await SaxonJS.transform({
|
|
||||||
stylesheetText: proc,
|
|
||||||
destination: 'serialized',
|
|
||||||
sourceText: xmlString,
|
|
||||||
});
|
|
||||||
```
|
|
||||||
|
|
||||||
### 3. **Final JSON Document**
|
|
||||||
```json
|
|
||||||
{
|
|
||||||
"id": "231",
|
|
||||||
"title": "Research Dataset Title",
|
|
||||||
"description": "Dataset description...",
|
|
||||||
"authors": ["Author Name"],
|
|
||||||
"server_date_modified": 1634567890,
|
|
||||||
"publish_id": 231
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
## Configuration Requirements
|
|
||||||
|
|
||||||
### Environment Variables
|
|
||||||
```bash
|
|
||||||
# OpenSearch Configuration
|
|
||||||
OPENSEARCH_HOST=localhost:9200
|
|
||||||
|
|
||||||
# For production:
|
|
||||||
# OPENSEARCH_HOST=your-opensearch-cluster:9200
|
|
||||||
```
|
|
||||||
|
|
||||||
### Required Files
|
|
||||||
- **XSLT Stylesheet**: `public/assets2/solr.sef.json` - Compiled Saxon-JS stylesheet for XML transformation
|
|
||||||
|
|
||||||
### Database Relationships
|
|
||||||
The command expects these model relationships:
|
|
||||||
```typescript
|
|
||||||
// Dataset model must have:
|
|
||||||
@hasOne(() => XmlCache, { foreignKey: 'dataset_id' })
|
|
||||||
public xmlCache: HasOne<typeof XmlCache>
|
|
||||||
```
|
|
||||||
|
|
||||||
## Error Handling
|
|
||||||
|
|
||||||
The command handles various error scenarios gracefully:
|
|
||||||
|
|
||||||
### Common Errors and Solutions
|
|
||||||
|
|
||||||
| Error | Cause | Solution |
|
|
||||||
|-------|-------|----------|
|
|
||||||
| `XSLT transformation failed` | Invalid XML or missing stylesheet | Check XML structure and stylesheet path |
|
|
||||||
| `OpenSearch connection error` | Service unavailable | Verify OpenSearch is running and accessible |
|
|
||||||
| `JSON parse error` | Malformed transformation result | Check XSLT stylesheet output format |
|
|
||||||
| `Missing xmlCache relationship` | Data integrity issue | Ensure xmlCache exists for dataset |
|
|
||||||
|
|
||||||
### Error Logging
|
|
||||||
```bash
|
|
||||||
# Typical error log entry
|
|
||||||
An error occurred while indexing dataset with publish_id 231.
|
|
||||||
Error: XSLT transformation failed: Invalid XML structure at line 15
|
|
||||||
```
|
|
||||||
|
|
||||||
## Performance Considerations
|
|
||||||
|
|
||||||
### Batch Processing
|
|
||||||
- Processes datasets sequentially to avoid overwhelming OpenSearch
|
|
||||||
- Each dataset is committed individually for reliability
|
|
||||||
- Failed indexing of one dataset doesn't stop processing others
|
|
||||||
|
|
||||||
### Resource Usage
|
|
||||||
- **Memory**: XML/JSON transformations require temporary memory
|
|
||||||
- **Network**: OpenSearch API calls for each dataset
|
|
||||||
- **CPU**: XSLT transformations are CPU-intensive
|
|
||||||
|
|
||||||
### Optimization Tips
|
|
||||||
```bash
|
|
||||||
# Index only recently modified datasets (run regularly)
|
|
||||||
node ace index:datasets
|
|
||||||
|
|
||||||
# Index specific datasets when needed
|
|
||||||
node ace index:datasets --publish_id 231
|
|
||||||
|
|
||||||
# Consider running during off-peak hours for large batches
|
|
||||||
```
|
|
||||||
|
|
||||||
## Integration with Other Systems
|
|
||||||
|
|
||||||
### Search Functionality
|
|
||||||
The indexed documents power:
|
|
||||||
- **Dataset Search**: Full-text search across metadata
|
|
||||||
- **Faceted Browsing**: Filter by authors, keywords, dates
|
|
||||||
- **Geographic Search**: Spatial query capabilities
|
|
||||||
- **Auto-complete**: Suggest dataset titles and keywords
|
|
||||||
|
|
||||||
### Related Commands
|
|
||||||
- [`update:datacite`](update-datacite.md) - Often run after indexing to sync DOI metadata
|
|
||||||
- **Database migrations** - May require re-indexing after schema changes
|
|
||||||
|
|
||||||
### API Integration
|
|
||||||
The indexed data is consumed by:
|
|
||||||
- **Search API**: `/api/search` endpoints
|
|
||||||
- **Browse API**: `/api/datasets` with filtering
|
|
||||||
- **Recommendations**: Related dataset suggestions
|
|
||||||
|
|
||||||
## Monitoring and Maintenance
|
|
||||||
|
|
||||||
### Regular Tasks
|
|
||||||
```bash
|
|
||||||
# Daily indexing (recommended cron job)
|
|
||||||
0 2 * * * cd /path/to/project && node ace index:datasets
|
|
||||||
|
|
||||||
# Weekly full re-index (if needed)
|
|
||||||
0 3 * * 0 cd /path/to/project && node ace index:datasets --force
|
|
||||||
```
|
|
||||||
|
|
||||||
### Health Checks
|
|
||||||
- Monitor OpenSearch cluster health
|
|
||||||
- Check for failed indexing operations in logs
|
|
||||||
- Verify search functionality is working
|
|
||||||
- Compare dataset counts between database and index
|
|
||||||
|
|
||||||
### Troubleshooting
|
|
||||||
```bash
|
|
||||||
# Check specific dataset indexing
|
|
||||||
node ace index:datasets --publish_id 231
|
|
||||||
|
|
||||||
# Verify OpenSearch connectivity
|
|
||||||
curl -X GET "localhost:9200/_cluster/health"
|
|
||||||
|
|
||||||
# Check index statistics
|
|
||||||
curl -X GET "localhost:9200/tethys-records/_stats"
|
|
||||||
```
|
|
||||||
|
|
||||||
## Best Practices
|
|
||||||
|
|
||||||
1. **Regular Scheduling**: Run the command regularly (daily) to keep the search index current
|
|
||||||
2. **Monitor Logs**: Watch for transformation errors or OpenSearch issues
|
|
||||||
3. **Backup Strategy**: Include OpenSearch indices in backup procedures
|
|
||||||
4. **Resource Management**: Monitor OpenSearch cluster resources during bulk operations
|
|
||||||
5. **Testing**: Verify search functionality after major indexing operations
|
|
||||||
6. **Coordination**: Run indexing before DataCite updates when both are needed
|
|
||||||
|
|
@ -1,216 +0,0 @@
|
||||||
# DataCite Update Command
|
|
||||||
|
|
||||||
AdonisJS Ace command for updating DataCite DOI records for published datasets.
|
|
||||||
|
|
||||||
## Overview
|
|
||||||
|
|
||||||
The `update:datacite` command synchronizes your local dataset metadata with DataCite DOI records. It intelligently compares modification dates to only update records when necessary, reducing unnecessary API calls and maintaining data consistency.
|
|
||||||
|
|
||||||
## Command Syntax
|
|
||||||
|
|
||||||
```bash
|
|
||||||
node ace update:datacite [options]
|
|
||||||
```
|
|
||||||
|
|
||||||
## Options
|
|
||||||
|
|
||||||
| Flag | Alias | Description |
|
|
||||||
|------|-------|-------------|
|
|
||||||
| `--publish_id <number>` | `-p` | Update a specific dataset by publish_id |
|
|
||||||
| `--force` | `-f` | Force update all records regardless of modification date |
|
|
||||||
| `--dry-run` | `-d` | Preview what would be updated without making changes |
|
|
||||||
| `--stats` | `-s` | Show detailed statistics for datasets that need updating |
|
|
||||||
|
|
||||||
## Usage Examples
|
|
||||||
|
|
||||||
### Basic Operations
|
|
||||||
|
|
||||||
```bash
|
|
||||||
# Update all datasets that have been modified since their DOI was last updated
|
|
||||||
node ace update:datacite
|
|
||||||
|
|
||||||
# Update a specific dataset
|
|
||||||
node ace update:datacite --publish_id 231
|
|
||||||
node ace update:datacite -p 231
|
|
||||||
|
|
||||||
# Force update all datasets with DOIs (ignores modification dates)
|
|
||||||
node ace update:datacite --force
|
|
||||||
```
|
|
||||||
|
|
||||||
### Preview and Analysis
|
|
||||||
|
|
||||||
```bash
|
|
||||||
# Preview what would be updated (dry run)
|
|
||||||
node ace update:datacite --dry-run
|
|
||||||
|
|
||||||
# Show detailed statistics for datasets that need updating
|
|
||||||
node ace update:datacite --stats
|
|
||||||
|
|
||||||
# Show stats for a specific dataset
|
|
||||||
node ace update:datacite --stats --publish_id 231
|
|
||||||
```
|
|
||||||
|
|
||||||
### Combined Options
|
|
||||||
|
|
||||||
```bash
|
|
||||||
# Dry run for a specific dataset
|
|
||||||
node ace update:datacite --dry-run --publish_id 231
|
|
||||||
|
|
||||||
# Show stats for all datasets (including up-to-date ones)
|
|
||||||
node ace update:datacite --stats --force
|
|
||||||
```
|
|
||||||
|
|
||||||
## Command Modes
|
|
||||||
|
|
||||||
### 1. **Normal Mode** (Default)
|
|
||||||
Updates DataCite records for datasets that have been modified since their DOI was last updated.
|
|
||||||
|
|
||||||
**Example Output:**
|
|
||||||
```
|
|
||||||
Using DataCite API: https://api.test.datacite.org
|
|
||||||
Found 50 datasets to process
|
|
||||||
Dataset 231: Successfully updated DataCite record
|
|
||||||
Dataset 245: Up to date, skipping
|
|
||||||
Dataset 267: Successfully updated DataCite record
|
|
||||||
DataCite update completed. Updated: 15, Skipped: 35, Errors: 0
|
|
||||||
```
|
|
||||||
|
|
||||||
### 2. **Dry Run Mode** (`--dry-run`)
|
|
||||||
Shows what would be updated without making any changes to DataCite.
|
|
||||||
|
|
||||||
**Use Case:** Preview updates before running the actual command.
|
|
||||||
|
|
||||||
**Example Output:**
|
|
||||||
```
|
|
||||||
Dataset 231: Would update DataCite record (dry run)
|
|
||||||
Dataset 267: Would update DataCite record (dry run)
|
|
||||||
Dataset 245: Up to date, skipping
|
|
||||||
DataCite update completed. Updated: 2, Skipped: 1, Errors: 0
|
|
||||||
```
|
|
||||||
|
|
||||||
### 3. **Stats Mode** (`--stats`)
|
|
||||||
Shows detailed information for each dataset that needs updating, including why it needs updating.
|
|
||||||
|
|
||||||
**Use Case:** Debug synchronization issues, monitor dataset/DOI status, generate reports.
|
|
||||||
|
|
||||||
**Example Output:**
|
|
||||||
```
|
|
||||||
┌─ Dataset 231 ─────────────────────────────────────────────────────────
|
|
||||||
│ DOI Value: 10.21388/tethys.231
|
|
||||||
│ DOI Status (DB): findable
|
|
||||||
│ DOI State (DataCite): findable
|
|
||||||
│ Dataset Modified: 2024-09-15T10:30:00.000Z
|
|
||||||
│ DOI Modified: 2024-09-10T08:15:00.000Z
|
|
||||||
│ Needs Update: YES - Dataset newer than DOI
|
|
||||||
└───────────────────────────────────────────────────────────────────────
|
|
||||||
|
|
||||||
┌─ Dataset 267 ─────────────────────────────────────────────────────────
|
|
||||||
│ DOI Value: 10.21388/tethys.267
|
|
||||||
│ DOI Status (DB): findable
|
|
||||||
│ DOI State (DataCite): findable
|
|
||||||
│ Dataset Modified: 2024-09-18T14:20:00.000Z
|
|
||||||
│ DOI Modified: 2024-09-16T12:45:00.000Z
|
|
||||||
│ Needs Update: YES - Dataset newer than DOI
|
|
||||||
└───────────────────────────────────────────────────────────────────────
|
|
||||||
|
|
||||||
DataCite Stats Summary: 2 datasets need updating, 48 are up to date
|
|
||||||
```
|
|
||||||
|
|
||||||
## Update Logic
|
|
||||||
|
|
||||||
The command uses intelligent update detection:
|
|
||||||
|
|
||||||
1. **Compares modification dates**: Dataset `server_date_modified` vs DOI last modification date from DataCite
|
|
||||||
2. **Validates data integrity**: Checks for missing or future dates
|
|
||||||
3. **Handles API failures gracefully**: Updates anyway if DataCite info can't be retrieved
|
|
||||||
4. **Uses dual API approach**: DataCite REST API (primary) with MDS API fallback
|
|
||||||
|
|
||||||
### When Updates Happen
|
|
||||||
|
|
||||||
| Condition | Action | Reason |
|
|
||||||
|-----------|--------|--------|
|
|
||||||
| Dataset modified > DOI modified | ✅ Update | Dataset has newer changes |
|
|
||||||
| Dataset modified ≤ DOI modified | ❌ Skip | DOI is up to date |
|
|
||||||
| Dataset date in future | ❌ Skip | Invalid data, needs investigation |
|
|
||||||
| Dataset date missing | ✅ Update | Can't determine staleness |
|
|
||||||
| DataCite API error | ✅ Update | Better safe than sorry |
|
|
||||||
| `--force` flag used | ✅ Update | Override all logic |
|
|
||||||
|
|
||||||
## Environment Configuration
|
|
||||||
|
|
||||||
Required environment variables:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
# DataCite Credentials
|
|
||||||
DATACITE_USERNAME=your_username
|
|
||||||
DATACITE_PASSWORD=your_password
|
|
||||||
|
|
||||||
# API Endpoints (environment-specific)
|
|
||||||
DATACITE_API_URL=https://api.test.datacite.org # Test environment
|
|
||||||
DATACITE_SERVICE_URL=https://mds.test.datacite.org # Test MDS
|
|
||||||
|
|
||||||
DATACITE_API_URL=https://api.datacite.org # Production
|
|
||||||
DATACITE_SERVICE_URL=https://mds.datacite.org # Production MDS
|
|
||||||
|
|
||||||
# Project Configuration
|
|
||||||
DATACITE_PREFIX=10.21388 # Your DOI prefix
|
|
||||||
BASE_DOMAIN=tethys.at # Your domain
|
|
||||||
```
|
|
||||||
|
|
||||||
## Error Handling
|
|
||||||
|
|
||||||
The command handles various error scenarios:
|
|
||||||
|
|
||||||
- **Invalid modification dates**: Logs errors but continues processing other datasets
|
|
||||||
- **DataCite API failures**: Falls back to MDS API, then to safe update
|
|
||||||
- **Missing DOI identifiers**: Skips datasets without DOI identifiers
|
|
||||||
- **Network issues**: Continues with next dataset after logging error
|
|
||||||
|
|
||||||
## Integration
|
|
||||||
|
|
||||||
The command integrates with:
|
|
||||||
|
|
||||||
- **Dataset Model**: Uses `server_date_modified` for change detection
|
|
||||||
- **DatasetIdentifier Model**: Reads DOI values and status
|
|
||||||
- **OpenSearch Index**: Updates search index after DataCite update
|
|
||||||
- **DoiClient**: Handles all DataCite API interactions
|
|
||||||
|
|
||||||
## Common Workflows
|
|
||||||
|
|
||||||
### Daily Maintenance
|
|
||||||
```bash
|
|
||||||
# Update any datasets modified today
|
|
||||||
node ace update:datacite
|
|
||||||
```
|
|
||||||
|
|
||||||
### Pre-Deployment Check
|
|
||||||
```bash
|
|
||||||
# Check what would be updated before deployment
|
|
||||||
node ace update:datacite --dry-run
|
|
||||||
```
|
|
||||||
|
|
||||||
### Debugging Sync Issues
|
|
||||||
```bash
|
|
||||||
# Investigate why specific dataset isn't syncing
|
|
||||||
node ace update:datacite --stats --publish_id 231
|
|
||||||
```
|
|
||||||
|
|
||||||
### Full Resync
|
|
||||||
```bash
|
|
||||||
# Force update all DOI records (use with caution)
|
|
||||||
node ace update:datacite --force
|
|
||||||
```
|
|
||||||
|
|
||||||
### Monitoring Report
|
|
||||||
```bash
|
|
||||||
# Generate sync status report
|
|
||||||
node ace update:datacite --stats > datacite-sync-report.txt
|
|
||||||
```
|
|
||||||
|
|
||||||
## Best Practices
|
|
||||||
|
|
||||||
1. **Regular Updates**: Run daily or after bulk dataset modifications
|
|
||||||
2. **Test First**: Use `--dry-run` or `--stats` before bulk operations
|
|
||||||
3. **Monitor Logs**: Check for data integrity warnings
|
|
||||||
4. **Environment Separation**: Use correct API URLs for test vs production
|
|
||||||
5. **Rate Limiting**: The command handles DataCite rate limits automatically
|
|
||||||
222
freshclam.conf
222
freshclam.conf
|
|
@ -1,47 +1,229 @@
|
||||||
##
|
##
|
||||||
## Container-optimized freshclam configuration
|
## Example config file for freshclam
|
||||||
|
## Please read the freshclam.conf(5) manual before editing this file.
|
||||||
##
|
##
|
||||||
|
|
||||||
# Database directory
|
|
||||||
|
# Comment or remove the line below.
|
||||||
|
|
||||||
|
# Path to the database directory.
|
||||||
|
# WARNING: It must match clamd.conf's directive!
|
||||||
|
# Default: hardcoded (depends on installation options)
|
||||||
DatabaseDirectory /var/lib/clamav
|
DatabaseDirectory /var/lib/clamav
|
||||||
|
|
||||||
# Log to stdout for container logging
|
# Path to the log file (make sure it has proper permissions)
|
||||||
|
# Default: disabled
|
||||||
# UpdateLogFile /dev/stdout
|
# UpdateLogFile /dev/stdout
|
||||||
|
|
||||||
# Basic logging settings
|
# Maximum size of the log file.
|
||||||
|
# Value of 0 disables the limit.
|
||||||
|
# You may use 'M' or 'm' for megabytes (1M = 1m = 1048576 bytes)
|
||||||
|
# and 'K' or 'k' for kilobytes (1K = 1k = 1024 bytes).
|
||||||
|
# in bytes just don't use modifiers. If LogFileMaxSize is enabled,
|
||||||
|
# log rotation (the LogRotate option) will always be enabled.
|
||||||
|
# Default: 1M
|
||||||
|
#LogFileMaxSize 2M
|
||||||
|
|
||||||
|
# Log time with each message.
|
||||||
|
# Default: no
|
||||||
LogTime yes
|
LogTime yes
|
||||||
LogVerbose no
|
|
||||||
|
# Enable verbose logging.
|
||||||
|
# Default: no
|
||||||
|
LogVerbose yes
|
||||||
|
|
||||||
|
# Use system logger (can work together with UpdateLogFile).
|
||||||
|
# Default: no
|
||||||
LogSyslog no
|
LogSyslog no
|
||||||
|
|
||||||
# PID file location
|
# Specify the type of syslog messages - please refer to 'man syslog'
|
||||||
|
# for facility names.
|
||||||
|
# Default: LOG_LOCAL6
|
||||||
|
#LogFacility LOG_MAIL
|
||||||
|
|
||||||
|
# Enable log rotation. Always enabled when LogFileMaxSize is enabled.
|
||||||
|
# Default: no
|
||||||
|
#LogRotate yes
|
||||||
|
|
||||||
|
# This option allows you to save the process identifier of the daemon
|
||||||
|
# Default: disabled
|
||||||
|
#PidFile /var/run/freshclam.pid
|
||||||
PidFile /var/run/clamav/freshclam.pid
|
PidFile /var/run/clamav/freshclam.pid
|
||||||
|
|
||||||
# Database owner
|
# By default when started freshclam drops privileges and switches to the
|
||||||
DatabaseOwner clamav
|
# "clamav" user. This directive allows you to change the database owner.
|
||||||
|
# Default: clamav (may depend on installation options)
|
||||||
|
DatabaseOwner node
|
||||||
|
|
||||||
# Mirror settings for Austria
|
# Use DNS to verify virus database version. Freshclam uses DNS TXT records
|
||||||
|
# to verify database and software versions. With this directive you can change
|
||||||
|
# the database verification domain.
|
||||||
|
# WARNING: Do not touch it unless you're configuring freshclam to use your
|
||||||
|
# own database verification domain.
|
||||||
|
# Default: current.cvd.clamav.net
|
||||||
|
#DNSDatabaseInfo current.cvd.clamav.net
|
||||||
|
|
||||||
|
# Uncomment the following line and replace XY with your country
|
||||||
|
# code. See http://www.iana.org/cctld/cctld-whois.htm for the full list.
|
||||||
|
# You can use db.XY.ipv6.clamav.net for IPv6 connections.
|
||||||
DatabaseMirror db.at.clamav.net
|
DatabaseMirror db.at.clamav.net
|
||||||
|
|
||||||
|
# database.clamav.net is a round-robin record which points to our most
|
||||||
|
# reliable mirrors. It's used as a fall back in case db.XY.clamav.net is
|
||||||
|
# not working. DO NOT TOUCH the following line unless you know what you
|
||||||
|
# are doing.
|
||||||
DatabaseMirror database.clamav.net
|
DatabaseMirror database.clamav.net
|
||||||
|
|
||||||
|
# How many attempts to make before giving up.
|
||||||
|
# Default: 3 (per mirror)
|
||||||
|
#MaxAttempts 5
|
||||||
|
|
||||||
# With this option you can control scripted updates. It's highly recommended
|
# With this option you can control scripted updates. It's highly recommended
|
||||||
# to keep it enabled.
|
# to keep it enabled.
|
||||||
# Default: yes
|
# Default: yes
|
||||||
# Update settings
|
#ScriptedUpdates yes
|
||||||
ScriptedUpdates yes
|
|
||||||
|
# By default freshclam will keep the local databases (.cld) uncompressed to
|
||||||
|
# make their handling faster. With this option you can enable the compression;
|
||||||
|
# the change will take effect with the next database update.
|
||||||
|
# Default: no
|
||||||
|
#CompressLocalDatabase no
|
||||||
|
|
||||||
|
# With this option you can provide custom sources (http:// or file://) for
|
||||||
|
# database files. This option can be used multiple times.
|
||||||
|
# Default: no custom URLs
|
||||||
|
#DatabaseCustomURL http://myserver.com/mysigs.ndb
|
||||||
|
#DatabaseCustomURL file:///mnt/nfs/local.hdb
|
||||||
|
|
||||||
|
# This option allows you to easily point freshclam to private mirrors.
|
||||||
|
# If PrivateMirror is set, freshclam does not attempt to use DNS
|
||||||
|
# to determine whether its databases are out-of-date, instead it will
|
||||||
|
# use the If-Modified-Since request or directly check the headers of the
|
||||||
|
# remote database files. For each database, freshclam first attempts
|
||||||
|
# to download the CLD file. If that fails, it tries to download the
|
||||||
|
# CVD file. This option overrides DatabaseMirror, DNSDatabaseInfo
|
||||||
|
# and ScriptedUpdates. It can be used multiple times to provide
|
||||||
|
# fall-back mirrors.
|
||||||
|
# Default: disabled
|
||||||
|
#PrivateMirror mirror1.mynetwork.com
|
||||||
|
#PrivateMirror mirror2.mynetwork.com
|
||||||
|
|
||||||
# Number of database checks per day.
|
# Number of database checks per day.
|
||||||
# Default: 12 (every two hours)
|
# Default: 12 (every two hours)
|
||||||
Checks 12
|
#Checks 24
|
||||||
|
|
||||||
# Don't fork (good for containers)
|
# Proxy settings
|
||||||
|
# Default: disabled
|
||||||
|
#HTTPProxyServer myproxy.com
|
||||||
|
#HTTPProxyPort 1234
|
||||||
|
#HTTPProxyUsername myusername
|
||||||
|
#HTTPProxyPassword mypass
|
||||||
|
|
||||||
|
# If your servers are behind a firewall/proxy which applies User-Agent
|
||||||
|
# filtering you can use this option to force the use of a different
|
||||||
|
# User-Agent header.
|
||||||
|
# Default: clamav/version_number
|
||||||
|
#HTTPUserAgent SomeUserAgentIdString
|
||||||
|
|
||||||
|
# Use aaa.bbb.ccc.ddd as client address for downloading databases. Useful for
|
||||||
|
# multi-homed systems.
|
||||||
|
# Default: Use OS'es default outgoing IP address.
|
||||||
|
#LocalIPAddress aaa.bbb.ccc.ddd
|
||||||
|
|
||||||
|
# Send the RELOAD command to clamd.
|
||||||
|
# Default: no
|
||||||
|
#NotifyClamd /path/to/clamd.conf
|
||||||
|
|
||||||
|
# Run command after successful database update.
|
||||||
|
# Default: disabled
|
||||||
|
#OnUpdateExecute command
|
||||||
|
|
||||||
|
# Run command when database update process fails.
|
||||||
|
# Default: disabled
|
||||||
|
#OnErrorExecute command
|
||||||
|
|
||||||
|
# Run command when freshclam reports outdated version.
|
||||||
|
# In the command string %v will be replaced by the new version number.
|
||||||
|
# Default: disabled
|
||||||
|
#OnOutdatedExecute command
|
||||||
|
|
||||||
|
# Don't fork into background.
|
||||||
|
# Default: no
|
||||||
Foreground no
|
Foreground no
|
||||||
|
|
||||||
# Connection timeouts
|
# Enable debug messages in libclamav.
|
||||||
ConnectTimeout 60
|
# Default: no
|
||||||
ReceiveTimeout 60
|
#Debug yes
|
||||||
|
|
||||||
# Test databases before using them
|
# Timeout in seconds when connecting to database server.
|
||||||
TestDatabases yes
|
# Default: 30
|
||||||
|
#ConnectTimeout 60
|
||||||
|
|
||||||
# Enable bytecode signatures
|
# Timeout in seconds when reading from database server.
|
||||||
Bytecode yes
|
# Default: 30
|
||||||
|
#ReceiveTimeout 60
|
||||||
|
|
||||||
|
# With this option enabled, freshclam will attempt to load new
|
||||||
|
# databases into memory to make sure they are properly handled
|
||||||
|
# by libclamav before replacing the old ones.
|
||||||
|
# Default: yes
|
||||||
|
#TestDatabases yes
|
||||||
|
|
||||||
|
# When enabled freshclam will submit statistics to the ClamAV Project about
|
||||||
|
# the latest virus detections in your environment. The ClamAV maintainers
|
||||||
|
# will then use this data to determine what types of malware are the most
|
||||||
|
# detected in the field and in what geographic area they are.
|
||||||
|
# Freshclam will connect to clamd in order to get recent statistics.
|
||||||
|
# Default: no
|
||||||
|
#SubmitDetectionStats /path/to/clamd.conf
|
||||||
|
|
||||||
|
# Country of origin of malware/detection statistics (for statistical
|
||||||
|
# purposes only). The statistics collector at ClamAV.net will look up
|
||||||
|
# your IP address to determine the geographical origin of the malware
|
||||||
|
# reported by your installation. If this installation is mainly used to
|
||||||
|
# scan data which comes from a different location, please enable this
|
||||||
|
# option and enter a two-letter code (see http://www.iana.org/domains/root/db/)
|
||||||
|
# of the country of origin.
|
||||||
|
# Default: disabled
|
||||||
|
#DetectionStatsCountry country-code
|
||||||
|
|
||||||
|
# This option enables support for our "Personal Statistics" service.
|
||||||
|
# When this option is enabled, the information on malware detected by
|
||||||
|
# your clamd installation is made available to you through our website.
|
||||||
|
# To get your HostID, log on http://www.stats.clamav.net and add a new
|
||||||
|
# host to your host list. Once you have the HostID, uncomment this option
|
||||||
|
# and paste the HostID here. As soon as your freshclam starts submitting
|
||||||
|
# information to our stats collecting service, you will be able to view
|
||||||
|
# the statistics of this clamd installation by logging into
|
||||||
|
# http://www.stats.clamav.net with the same credentials you used to
|
||||||
|
# generate the HostID. For more information refer to:
|
||||||
|
# http://www.clamav.net/documentation.html#cctts
|
||||||
|
# This feature requires SubmitDetectionStats to be enabled.
|
||||||
|
# Default: disabled
|
||||||
|
#DetectionStatsHostID unique-id
|
||||||
|
|
||||||
|
# This option enables support for Google Safe Browsing. When activated for
|
||||||
|
# the first time, freshclam will download a new database file (safebrowsing.cvd)
|
||||||
|
# which will be automatically loaded by clamd and clamscan during the next
|
||||||
|
# reload, provided that the heuristic phishing detection is turned on. This
|
||||||
|
# database includes information about websites that may be phishing sites or
|
||||||
|
# possible sources of malware. When using this option, it's mandatory to run
|
||||||
|
# freshclam at least every 30 minutes.
|
||||||
|
# Freshclam uses the ClamAV's mirror infrastructure to distribute the
|
||||||
|
# database and its updates but all the contents are provided under Google's
|
||||||
|
# terms of use. See http://www.google.com/transparencyreport/safebrowsing
|
||||||
|
# and http://www.clamav.net/documentation.html#safebrowsing
|
||||||
|
# for more information.
|
||||||
|
# Default: disabled
|
||||||
|
#SafeBrowsing yes
|
||||||
|
|
||||||
|
# This option enables downloading of bytecode.cvd, which includes additional
|
||||||
|
# detection mechanisms and improvements to the ClamAV engine.
|
||||||
|
# Default: enabled
|
||||||
|
#Bytecode yes
|
||||||
|
|
||||||
|
# Download an additional 3rd party signature database distributed through
|
||||||
|
# the ClamAV mirrors.
|
||||||
|
# This option can be used multiple times.
|
||||||
|
#ExtraDatabase dbname1
|
||||||
|
#ExtraDatabase dbname2
|
||||||
|
|
|
||||||
1044
package-lock.json
generated
1044
package-lock.json
generated
File diff suppressed because it is too large
Load diff
|
|
@ -59,6 +59,7 @@
|
||||||
"hot-hook": "^0.4.0",
|
"hot-hook": "^0.4.0",
|
||||||
"numeral": "^2.0.6",
|
"numeral": "^2.0.6",
|
||||||
"pinia": "^3.0.2",
|
"pinia": "^3.0.2",
|
||||||
|
"pino-pretty": "^13.0.0",
|
||||||
"postcss-loader": "^8.1.1",
|
"postcss-loader": "^8.1.1",
|
||||||
"prettier": "^3.4.2",
|
"prettier": "^3.4.2",
|
||||||
"supertest": "^6.3.3",
|
"supertest": "^6.3.3",
|
||||||
|
|
@ -114,9 +115,7 @@
|
||||||
"node-2fa": "^2.0.3",
|
"node-2fa": "^2.0.3",
|
||||||
"node-exceptions": "^4.0.1",
|
"node-exceptions": "^4.0.1",
|
||||||
"notiwind": "^2.0.0",
|
"notiwind": "^2.0.0",
|
||||||
"p-limit": "^7.1.1",
|
|
||||||
"pg": "^8.9.0",
|
"pg": "^8.9.0",
|
||||||
"pino-pretty": "^13.0.0",
|
|
||||||
"qrcode": "^1.5.3",
|
"qrcode": "^1.5.3",
|
||||||
"redis": "^5.0.0",
|
"redis": "^5.0.0",
|
||||||
"reflect-metadata": "^0.2.1",
|
"reflect-metadata": "^0.2.1",
|
||||||
|
|
|
||||||
|
|
@ -6,16 +6,17 @@
|
||||||
import type { ApplicationService } from '@adonisjs/core/types';
|
import type { ApplicationService } from '@adonisjs/core/types';
|
||||||
import vine, { symbols, BaseLiteralType, Vine } from '@vinejs/vine';
|
import vine, { symbols, BaseLiteralType, Vine } from '@vinejs/vine';
|
||||||
import type { FieldContext, FieldOptions } from '@vinejs/vine/types';
|
import type { FieldContext, FieldOptions } from '@vinejs/vine/types';
|
||||||
|
// import type { MultipartFile, FileValidationOptions } from '@adonisjs/bodyparser/types';
|
||||||
import type { MultipartFile } from '@adonisjs/core/bodyparser';
|
import type { MultipartFile } from '@adonisjs/core/bodyparser';
|
||||||
import type { FileValidationOptions } from '@adonisjs/core/types/bodyparser';
|
import type { FileValidationOptions } from '@adonisjs/core/types/bodyparser';
|
||||||
import { Request, RequestValidator } from '@adonisjs/core/http';
|
import { Request, RequestValidator } from '@adonisjs/core/http';
|
||||||
import MimeType from '#models/mime_type';
|
import MimeType from '#models/mime_type';
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Validation options accepted by the "file" rule
|
* Validation options accepted by the "file" rule
|
||||||
*/
|
*/
|
||||||
export type FileRuleValidationOptions = Partial<FileValidationOptions> | ((field: FieldContext) => Partial<FileValidationOptions>);
|
export type FileRuleValidationOptions = Partial<FileValidationOptions> | ((field: FieldContext) => Partial<FileValidationOptions>);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Extend VineJS
|
* Extend VineJS
|
||||||
*/
|
*/
|
||||||
|
|
@ -24,7 +25,6 @@ declare module '@vinejs/vine' {
|
||||||
myfile(options?: FileRuleValidationOptions): VineMultipartFile;
|
myfile(options?: FileRuleValidationOptions): VineMultipartFile;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Extend HTTP request class
|
* Extend HTTP request class
|
||||||
*/
|
*/
|
||||||
|
|
@ -36,54 +36,19 @@ declare module '@adonisjs/core/http' {
|
||||||
* Checks if the value is an instance of multipart file
|
* Checks if the value is an instance of multipart file
|
||||||
* from bodyparser.
|
* from bodyparser.
|
||||||
*/
|
*/
|
||||||
export function isBodyParserFile(file: MultipartFile | unknown): file is MultipartFile {
|
export function isBodyParserFile(file: MultipartFile | unknown): boolean {
|
||||||
return !!(file && typeof file === 'object' && 'isMultipartFile' in file);
|
return !!(file && typeof file === 'object' && 'isMultipartFile' in file);
|
||||||
}
|
}
|
||||||
|
export async function getEnabledExtensions() {
|
||||||
|
const enabledExtensions = await MimeType.query().select('file_extension').where('enabled', true).exec();
|
||||||
|
const extensions = enabledExtensions
|
||||||
|
.map((extension) => {
|
||||||
|
return extension.file_extension.split('|');
|
||||||
|
})
|
||||||
|
.flat();
|
||||||
|
|
||||||
/**
|
return extensions;
|
||||||
* Cache for enabled extensions to reduce database queries
|
|
||||||
*/
|
|
||||||
let extensionsCache: string[] | null = null;
|
|
||||||
let cacheTimestamp = 0;
|
|
||||||
const CACHE_DURATION = 5 * 60 * 1000; // 5 minutes
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Get enabled extensions with caching
|
|
||||||
*/
|
|
||||||
export async function getEnabledExtensions(): Promise<string[]> {
|
|
||||||
const now = Date.now();
|
|
||||||
|
|
||||||
if (extensionsCache && now - cacheTimestamp < CACHE_DURATION) {
|
|
||||||
return extensionsCache;
|
|
||||||
}
|
|
||||||
|
|
||||||
try {
|
|
||||||
const enabledExtensions = await MimeType.query().select('file_extension').where('enabled', true).exec();
|
|
||||||
|
|
||||||
const extensions = enabledExtensions
|
|
||||||
.map((extension) => extension.file_extension.split('|'))
|
|
||||||
.flat()
|
|
||||||
.map((ext) => ext.toLowerCase().trim())
|
|
||||||
.filter((ext) => ext.length > 0);
|
|
||||||
|
|
||||||
extensionsCache = [...new Set(extensions)]; // Remove duplicates
|
|
||||||
cacheTimestamp = now;
|
|
||||||
|
|
||||||
return extensionsCache;
|
|
||||||
} catch (error) {
|
|
||||||
console.error('Error fetching enabled extensions:', error);
|
|
||||||
return extensionsCache || [];
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Clear extensions cache
|
|
||||||
*/
|
|
||||||
export function clearExtensionsCache(): void {
|
|
||||||
extensionsCache = null;
|
|
||||||
cacheTimestamp = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* VineJS validation rule that validates the file to be an
|
* VineJS validation rule that validates the file to be an
|
||||||
* instance of BodyParser MultipartFile class.
|
* instance of BodyParser MultipartFile class.
|
||||||
|
|
@ -100,7 +65,6 @@ const isMultipartFile = vine.createRule(async (file: MultipartFile | unknown, op
|
||||||
// At this point, you can use type assertion to explicitly tell TypeScript that file is of type MultipartFile
|
// At this point, you can use type assertion to explicitly tell TypeScript that file is of type MultipartFile
|
||||||
const validatedFile = file as MultipartFile;
|
const validatedFile = file as MultipartFile;
|
||||||
const validationOptions = typeof options === 'function' ? options(field) : options;
|
const validationOptions = typeof options === 'function' ? options(field) : options;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Set size when it's defined in the options and missing
|
* Set size when it's defined in the options and missing
|
||||||
* on the file instance
|
* on the file instance
|
||||||
|
|
@ -108,29 +72,30 @@ const isMultipartFile = vine.createRule(async (file: MultipartFile | unknown, op
|
||||||
if (validatedFile.sizeLimit === undefined && validationOptions.size) {
|
if (validatedFile.sizeLimit === undefined && validationOptions.size) {
|
||||||
validatedFile.sizeLimit = validationOptions.size;
|
validatedFile.sizeLimit = validationOptions.size;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Set extensions when it's defined in the options and missing
|
* Set extensions when it's defined in the options and missing
|
||||||
* on the file instance
|
* on the file instance
|
||||||
*/
|
*/
|
||||||
if (validatedFile.allowedExtensions === undefined) {
|
// if (validatedFile.allowedExtensions === undefined && validationOptions.extnames) {
|
||||||
if (validationOptions.extnames !== undefined) {
|
// validatedFile.allowedExtensions = validationOptions.extnames;
|
||||||
validatedFile.allowedExtensions = validationOptions.extnames;
|
// }
|
||||||
} else {
|
if (validatedFile.allowedExtensions === undefined && validationOptions.extnames !== undefined) {
|
||||||
validatedFile.allowedExtensions = await getEnabledExtensions();
|
validatedFile.allowedExtensions = validationOptions.extnames; // await getEnabledExtensions();
|
||||||
}
|
} else if (validatedFile.allowedExtensions === undefined && validationOptions.extnames === undefined) {
|
||||||
|
validatedFile.allowedExtensions = await getEnabledExtensions();
|
||||||
}
|
}
|
||||||
|
/**
|
||||||
|
* wieder löschen
|
||||||
|
* Set extensions when it's defined in the options and missing
|
||||||
|
* on the file instance
|
||||||
|
*/
|
||||||
|
// if (file.clientNameSizeLimit === undefined && validationOptions.clientNameSizeLimit) {
|
||||||
|
// file.clientNameSizeLimit = validationOptions.clientNameSizeLimit;
|
||||||
|
// }
|
||||||
/**
|
/**
|
||||||
* Validate file
|
* Validate file
|
||||||
*/
|
*/
|
||||||
try {
|
validatedFile.validate();
|
||||||
validatedFile.validate();
|
|
||||||
} catch (error) {
|
|
||||||
field.report(`File validation failed: ${error.message}`, 'file.validation_error', field, validationOptions);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Report errors
|
* Report errors
|
||||||
*/
|
*/
|
||||||
|
|
@ -142,37 +107,36 @@ const isMultipartFile = vine.createRule(async (file: MultipartFile | unknown, op
|
||||||
const MULTIPART_FILE: typeof symbols.SUBTYPE = symbols.SUBTYPE;
|
const MULTIPART_FILE: typeof symbols.SUBTYPE = symbols.SUBTYPE;
|
||||||
|
|
||||||
export class VineMultipartFile extends BaseLiteralType<MultipartFile, MultipartFile, MultipartFile> {
|
export class VineMultipartFile extends BaseLiteralType<MultipartFile, MultipartFile, MultipartFile> {
|
||||||
|
|
||||||
[MULTIPART_FILE]: string;
|
[MULTIPART_FILE]: string;
|
||||||
public validationOptions?: FileRuleValidationOptions;
|
// constructor(validationOptions?: FileRuleValidationOptions, options?: FieldOptions) {
|
||||||
|
// super(options, [isMultipartFile(validationOptions || {})]);
|
||||||
|
// this.validationOptions = validationOptions;
|
||||||
|
// this.#private = true;
|
||||||
|
// }
|
||||||
|
|
||||||
|
// clone(): this {
|
||||||
|
// return new VineMultipartFile(this.validationOptions, this.cloneOptions()) as this;
|
||||||
|
// }
|
||||||
|
// #private;
|
||||||
|
// constructor(validationOptions?: FileRuleValidationOptions, options?: FieldOptions, validations?: Validation<any>[]);
|
||||||
|
// clone(): this;
|
||||||
|
|
||||||
|
public validationOptions;
|
||||||
// extnames: (18) ['gpkg', 'htm', 'html', 'csv', 'txt', 'asc', 'c', 'cc', 'h', 'srt', 'tiff', 'pdf', 'png', 'zip', 'jpg', 'jpeg', 'jpe', 'xlsx']
|
// extnames: (18) ['gpkg', 'htm', 'html', 'csv', 'txt', 'asc', 'c', 'cc', 'h', 'srt', 'tiff', 'pdf', 'png', 'zip', 'jpg', 'jpeg', 'jpe', 'xlsx']
|
||||||
// size: '512mb'
|
// size: '512mb'
|
||||||
|
|
||||||
|
// public constructor(validationOptions?: FileRuleValidationOptions, options?: FieldOptions, validations?: Validation<any>[]) {
|
||||||
public constructor(validationOptions?: FileRuleValidationOptions, options?: FieldOptions) {
|
public constructor(validationOptions?: FileRuleValidationOptions, options?: FieldOptions) {
|
||||||
|
// super(options, validations);
|
||||||
super(options, [isMultipartFile(validationOptions || {})]);
|
super(options, [isMultipartFile(validationOptions || {})]);
|
||||||
this.validationOptions = validationOptions;
|
this.validationOptions = validationOptions;
|
||||||
}
|
}
|
||||||
|
|
||||||
public clone(): any {
|
public clone(): any {
|
||||||
|
// return new VineMultipartFile(this.validationOptions, this.cloneOptions(), this.cloneValidations());
|
||||||
return new VineMultipartFile(this.validationOptions, this.cloneOptions());
|
return new VineMultipartFile(this.validationOptions, this.cloneOptions());
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Set maximum file size
|
|
||||||
*/
|
|
||||||
public maxSize(size: string | number): this {
|
|
||||||
const newOptions = { ...this.validationOptions, size };
|
|
||||||
return new VineMultipartFile(newOptions, this.cloneOptions()) as this;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Set allowed extensions
|
|
||||||
*/
|
|
||||||
public extensions(extnames: string[]): this {
|
|
||||||
const newOptions = { ...this.validationOptions, extnames };
|
|
||||||
return new VineMultipartFile(newOptions, this.cloneOptions()) as this;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
export default class VinejsProvider {
|
export default class VinejsProvider {
|
||||||
|
|
@ -191,8 +155,13 @@ export default class VinejsProvider {
|
||||||
/**
|
/**
|
||||||
* The container bindings have booted
|
* The container bindings have booted
|
||||||
*/
|
*/
|
||||||
|
|
||||||
boot(): void {
|
boot(): void {
|
||||||
Vine.macro('myfile', function (this: Vine, options?: FileRuleValidationOptions) {
|
// VineString.macro('translatedLanguage', function (this: VineString, options: Options) {
|
||||||
|
// return this.use(translatedLanguageRule(options));
|
||||||
|
// });
|
||||||
|
|
||||||
|
Vine.macro('myfile', function (this: Vine, options) {
|
||||||
return new VineMultipartFile(options);
|
return new VineMultipartFile(options);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
|
@ -206,41 +175,6 @@ export default class VinejsProvider {
|
||||||
}
|
}
|
||||||
return new RequestValidator(this.ctx).validateUsing(...args);
|
return new RequestValidator(this.ctx).validateUsing(...args);
|
||||||
});
|
});
|
||||||
|
|
||||||
// Ensure MIME validation macros are loaded
|
|
||||||
this.loadMimeValidationMacros();
|
|
||||||
this.loadFileScanMacros();
|
|
||||||
this.loadFileLengthMacros();
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Load MIME validation macros - called during boot to ensure they're available
|
|
||||||
*/
|
|
||||||
private async loadMimeValidationMacros(): Promise<void> {
|
|
||||||
try {
|
|
||||||
// Dynamically import the MIME validation rule to ensure macros are registered
|
|
||||||
await import('#start/rules/allowed_extensions_mimetypes');
|
|
||||||
} catch (error) {
|
|
||||||
console.warn('Could not load MIME validation macros:', error);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private async loadFileScanMacros(): Promise<void> {
|
|
||||||
try {
|
|
||||||
// Dynamically import the MIME validation rule to ensure macros are registered
|
|
||||||
await import('#start/rules/file_scan');
|
|
||||||
} catch (error) {
|
|
||||||
console.warn('Could not load MIME validation macros:', error);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private async loadFileLengthMacros(): Promise<void> {
|
|
||||||
try {
|
|
||||||
// Dynamically import the MIME validation rule to ensure macros are registered
|
|
||||||
await import('#start/rules/file_length');
|
|
||||||
} catch (error) {
|
|
||||||
console.warn('Could not load MIME validation macros:', error);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -256,7 +190,5 @@ export default class VinejsProvider {
|
||||||
/**
|
/**
|
||||||
* Preparing to shutdown the app
|
* Preparing to shutdown the app
|
||||||
*/
|
*/
|
||||||
async shutdown() {
|
async shutdown() {}
|
||||||
clearExtensionsCache();
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
|
||||||
174
readme.md
174
readme.md
|
|
@ -11,8 +11,6 @@ Welcome to the Tethys Research Repository Backend System! This is the backend co
|
||||||
- [Configuration](#configuration)
|
- [Configuration](#configuration)
|
||||||
- [Database](#database)
|
- [Database](#database)
|
||||||
- [API Documentation](#api-documentation)
|
- [API Documentation](#api-documentation)
|
||||||
- [Commands](#commands)
|
|
||||||
- [Documentation](#documentation)
|
|
||||||
- [Contributing](#contributing)
|
- [Contributing](#contributing)
|
||||||
- [License](#license)
|
- [License](#license)
|
||||||
|
|
||||||
|
|
@ -31,175 +29,5 @@ Before you begin, ensure you have met the following requirements:
|
||||||
1. Clone this repository:
|
1. Clone this repository:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
git clone git clone https://gitea.geologie.ac.at/geolba/tethys.backend.git
|
git clone https://gitea.geologie.ac.at/geolba/tethys.backend.git
|
||||||
cd tethys-backend
|
|
||||||
```
|
```
|
||||||
|
|
||||||
2. Install dependencies:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
npm install
|
|
||||||
```
|
|
||||||
|
|
||||||
3. Configure environment variables (see [Configuration](#configuration))
|
|
||||||
|
|
||||||
4. Run database migrations:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
node ace migration:run
|
|
||||||
```
|
|
||||||
|
|
||||||
5. Start the development server:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
npm run dev
|
|
||||||
```
|
|
||||||
|
|
||||||
## Usage
|
|
||||||
|
|
||||||
The Tethys Backend provides RESTful APIs for managing research datasets, user authentication, DOI registration, and search functionality.
|
|
||||||
|
|
||||||
## Configuration
|
|
||||||
|
|
||||||
Copy the `.env.example` file to `.env` and configure the following variables:
|
|
||||||
|
|
||||||
### Database Configuration
|
|
||||||
```bash
|
|
||||||
DB_CONNECTION=pg
|
|
||||||
DB_HOST=localhost
|
|
||||||
DB_PORT=5432
|
|
||||||
DB_USER=your_username
|
|
||||||
DB_PASSWORD=your_password
|
|
||||||
DB_DATABASE=tethys_db
|
|
||||||
```
|
|
||||||
|
|
||||||
### DataCite Configuration
|
|
||||||
```bash
|
|
||||||
# DataCite Credentials
|
|
||||||
DATACITE_USERNAME=your_datacite_username
|
|
||||||
DATACITE_PASSWORD=your_datacite_password
|
|
||||||
DATACITE_PREFIX=10.21388
|
|
||||||
|
|
||||||
# Environment-specific API endpoints
|
|
||||||
DATACITE_API_URL=https://api.test.datacite.org # Test environment
|
|
||||||
DATACITE_SERVICE_URL=https://mds.test.datacite.org # Test MDS
|
|
||||||
|
|
||||||
# For production:
|
|
||||||
# DATACITE_API_URL=https://api.datacite.org
|
|
||||||
# DATACITE_SERVICE_URL=https://mds.datacite.org
|
|
||||||
```
|
|
||||||
|
|
||||||
### OpenSearch Configuration
|
|
||||||
```bash
|
|
||||||
OPENSEARCH_HOST=localhost:9200
|
|
||||||
```
|
|
||||||
|
|
||||||
### Application Configuration
|
|
||||||
```bash
|
|
||||||
BASE_DOMAIN=tethys.at
|
|
||||||
APP_KEY=your_app_key
|
|
||||||
```
|
|
||||||
|
|
||||||
## Database
|
|
||||||
|
|
||||||
The system uses PostgreSQL with Lucid ORM. Key models include:
|
|
||||||
|
|
||||||
- **Dataset**: Research dataset metadata
|
|
||||||
- **DatasetIdentifier**: DOI and other identifiers for datasets
|
|
||||||
- **User**: User management and authentication
|
|
||||||
- **XmlCache**: Cached XML metadata
|
|
||||||
|
|
||||||
Run migrations and seeders:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
# Run migrations
|
|
||||||
node ace migration:run
|
|
||||||
|
|
||||||
# Run seeders (if available)
|
|
||||||
node ace db:seed
|
|
||||||
```
|
|
||||||
|
|
||||||
## API Documentation
|
|
||||||
|
|
||||||
API endpoints are available for:
|
|
||||||
|
|
||||||
- Dataset management (`/api/datasets`)
|
|
||||||
- User authentication (`/api/auth`)
|
|
||||||
- DOI registration (`/api/doi`)
|
|
||||||
- Search functionality (`/api/search`)
|
|
||||||
|
|
||||||
*Detailed API documentation can be found in the `/docs/api` directory.*
|
|
||||||
|
|
||||||
## Commands
|
|
||||||
|
|
||||||
The system includes several Ace commands for maintenance and data management:
|
|
||||||
|
|
||||||
### Dataset Indexing
|
|
||||||
```bash
|
|
||||||
# Index all published datasets to OpenSearch
|
|
||||||
node ace index:datasets
|
|
||||||
|
|
||||||
# Index a specific dataset
|
|
||||||
node ace index:datasets --publish_id 123
|
|
||||||
```
|
|
||||||
|
|
||||||
### DataCite DOI Management
|
|
||||||
```bash
|
|
||||||
# Update DataCite records for modified datasets
|
|
||||||
node ace update:datacite
|
|
||||||
|
|
||||||
# Show detailed statistics for datasets needing updates
|
|
||||||
node ace update:datacite --stats
|
|
||||||
|
|
||||||
# Preview what would be updated (dry run)
|
|
||||||
node ace update:datacite --dry-run
|
|
||||||
|
|
||||||
# Force update all DOI records
|
|
||||||
node ace update:datacite --force
|
|
||||||
|
|
||||||
# Update a specific dataset
|
|
||||||
node ace update:datacite --publish_id 123
|
|
||||||
```
|
|
||||||
|
|
||||||
*For detailed command documentation, see the [Commands Documentation](docs/commands/)*
|
|
||||||
|
|
||||||
## Documentation
|
|
||||||
|
|
||||||
Comprehensive documentation is available in the `/docs` directory:
|
|
||||||
|
|
||||||
- **[Commands Documentation](docs/commands/)** - Detailed guides for Ace commands
|
|
||||||
- [DataCite Update Command](docs/commands/update-datacite.md) - DOI synchronization and management
|
|
||||||
- [Dataset Indexing Command](docs/commands/index-datasets.md) - Search index management
|
|
||||||
- **[API Documentation](docs/api/)** - REST API endpoints and usage
|
|
||||||
- **[Deployment Guide](docs/deployment/)** - Production deployment instructions
|
|
||||||
- **[Configuration Guide](docs/configuration/)** - Environment setup and configuration options
|
|
||||||
|
|
||||||
## Contributing
|
|
||||||
|
|
||||||
1. Fork the repository
|
|
||||||
2. Create a feature branch (`git checkout -b feature/amazing-feature`)
|
|
||||||
3. Commit your changes (`git commit -m 'Add some amazing feature'`)
|
|
||||||
4. Push to the branch (`git push origin feature/amazing-feature`)
|
|
||||||
5. Open a Pull Request
|
|
||||||
|
|
||||||
### Development Guidelines
|
|
||||||
|
|
||||||
- Follow the existing code style and conventions
|
|
||||||
- Write tests for new features
|
|
||||||
- Update documentation for any API changes
|
|
||||||
- Ensure all commands and migrations work properly
|
|
||||||
|
|
||||||
### Testing Commands
|
|
||||||
|
|
||||||
```bash
|
|
||||||
# Run tests
|
|
||||||
npm test
|
|
||||||
|
|
||||||
# Test specific commands
|
|
||||||
node ace update:datacite --dry-run --publish_id 123
|
|
||||||
node ace index:datasets --publish_id 123
|
|
||||||
```
|
|
||||||
|
|
||||||
## License
|
|
||||||
|
|
||||||
This project is licensed under the [MIT License](LICENSE).
|
|
||||||
|
|
@ -163,7 +163,7 @@
|
||||||
</div>
|
</div>
|
||||||
</FormControl>
|
</FormControl>
|
||||||
</FormField>
|
</FormField>
|
||||||
<FormField label="Main Description Language*" help="required: main abstract language"
|
<FormField label="Main Title Language*" help="required: main abstract language"
|
||||||
:class="{ 'text-red-400': form.errors['descriptions.0.language'] }"
|
:class="{ 'text-red-400': form.errors['descriptions.0.language'] }"
|
||||||
class="w-full ml-1 flex-1">
|
class="w-full ml-1 flex-1">
|
||||||
<FormControl required v-model="form.descriptions[0].language" type="text"
|
<FormControl required v-model="form.descriptions[0].language" type="text"
|
||||||
|
|
|
||||||
|
|
@ -725,7 +725,7 @@ Removes a selected keyword
|
||||||
</div>
|
</div>
|
||||||
</FormControl>
|
</FormControl>
|
||||||
</FormField>
|
</FormField>
|
||||||
<FormField label="Main Description Language*" help="required: main abstract language"
|
<FormField label="Main Title Language*" help="required: main abstract language"
|
||||||
:class="{ 'text-red-400': form.errors['descriptions.0.language'] }"
|
:class="{ 'text-red-400': form.errors['descriptions.0.language'] }"
|
||||||
class="w-full mx-2 flex-1">
|
class="w-full mx-2 flex-1">
|
||||||
<FormControl required v-model="form.descriptions[0].language" type="text"
|
<FormControl required v-model="form.descriptions[0].language" type="text"
|
||||||
|
|
|
||||||
|
|
@ -272,7 +272,7 @@
|
||||||
</FormControl>
|
</FormControl>
|
||||||
</FormField>
|
</FormField>
|
||||||
<FormField
|
<FormField
|
||||||
label="Main Description Language*"
|
label="Main Title Language*"
|
||||||
help="required: main abstract language"
|
help="required: main abstract language"
|
||||||
:class="{ 'text-red-400': form.errors['descriptions.0.language'] }"
|
:class="{ 'text-red-400': form.errors['descriptions.0.language'] }"
|
||||||
class="w-full ml-1 flex-1"
|
class="w-full ml-1 flex-1"
|
||||||
|
|
|
||||||
|
|
@ -8,24 +8,14 @@ import AvatarController from '#controllers/Http/Api/AvatarController';
|
||||||
import UserController from '#controllers/Http/Api/UserController';
|
import UserController from '#controllers/Http/Api/UserController';
|
||||||
import CollectionsController from '#controllers/Http/Api/collections_controller';
|
import CollectionsController from '#controllers/Http/Api/collections_controller';
|
||||||
import { middleware } from '../kernel.js';
|
import { middleware } from '../kernel.js';
|
||||||
|
// API
|
||||||
// Clean DOI URL routes (no /api prefix)
|
|
||||||
|
|
||||||
// API routes with /api prefix
|
|
||||||
router
|
router
|
||||||
.group(() => {
|
.group(() => {
|
||||||
router.get('clients', [UserController, 'getSubmitters']).as('client.index').use(middleware.auth());
|
router.get('clients', [UserController, 'getSubmitters']).as('client.index').use(middleware.auth());;
|
||||||
router.get('authors', [AuthorsController, 'index']).as('author.index').use(middleware.auth());
|
router.get('authors', [AuthorsController, 'index']).as('author.index').use(middleware.auth());;
|
||||||
router.get('datasets', [DatasetController, 'index']).as('dataset.index');
|
router.get('datasets', [DatasetController, 'index']).as('dataset.index');
|
||||||
router.get('persons', [AuthorsController, 'persons']).as('author.persons');
|
router.get('persons', [AuthorsController, 'persons']).as('author.persons');
|
||||||
|
|
||||||
// This should come BEFORE any other routes that might conflict
|
|
||||||
router
|
|
||||||
.get('/dataset/:prefix/:value', [DatasetController, 'findByIdentifier'])
|
|
||||||
.where('prefix', /^10\.\d+$/) // Match DOI prefix pattern (10.xxxx)
|
|
||||||
.where('value', /^[a-zA-Z0-9._-]+\.[0-9]+(?:\.[0-9]+)*$/) // Match DOI suffix pattern
|
|
||||||
.as('dataset.findByIdentifier');
|
|
||||||
|
|
||||||
router.get('/dataset', [DatasetController, 'findAll']).as('dataset.findAll');
|
router.get('/dataset', [DatasetController, 'findAll']).as('dataset.findAll');
|
||||||
router.get('/dataset/:publish_id', [DatasetController, 'findOne']).as('dataset.findOne');
|
router.get('/dataset/:publish_id', [DatasetController, 'findOne']).as('dataset.findOne');
|
||||||
router.get('/sitelinks/:year', [HomeController, 'findDocumentsPerYear']);
|
router.get('/sitelinks/:year', [HomeController, 'findDocumentsPerYear']);
|
||||||
|
|
@ -45,7 +35,7 @@ router
|
||||||
.as('apps.twofactor_backupcodes.create')
|
.as('apps.twofactor_backupcodes.create')
|
||||||
.use(middleware.auth());
|
.use(middleware.auth());
|
||||||
|
|
||||||
router.get('collections/:id', [CollectionsController, 'show']).as('collection.show');
|
router.get('collections/:id', [CollectionsController, 'show']).as('collection.show')
|
||||||
})
|
})
|
||||||
// .namespace('App/Controllers/Http/Api')
|
// .namespace('App/Controllers/Http/Api')
|
||||||
.prefix('api');
|
.prefix('api');
|
||||||
|
|
|
||||||
|
|
@ -1,7 +1,7 @@
|
||||||
/*
|
/*
|
||||||
|--------------------------------------------------------------------------
|
|--------------------------------------------------------------------------
|
||||||
| Preloaded File - node ace make:preload rules/orcid
|
| Preloaded File - node ace make:preload rules/orcid
|
||||||
| Do you want to register the preload file in .adonisrc.ts file? (y/N) · true
|
| ❯ Do you want to register the preload file in .adonisrc.ts file? (y/N) · true
|
||||||
| DONE: create start/rules/orcid.ts
|
| DONE: create start/rules/orcid.ts
|
||||||
| DONE: update adonisrc.ts file
|
| DONE: update adonisrc.ts file
|
||||||
|--------------------------------------------------------------------------
|
|--------------------------------------------------------------------------
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue