feat: Enhance ClamAV Docker entrypoint and configuration

- Updated docker-entrypoint.sh to improve ClamAV service initialization and logging.
- Added checks for ClamAV and freshclam daemon status.
- Optimized freshclam configuration for container usage, including logging to stdout and setting database directory.
- Introduced caching mechanism for enabled file extensions in vinejs_provider.ts to reduce database queries.
- Implemented a new command to list datasets needing DataCite DOI updates, with options for verbose output, count only, and IDs only.
- Updated package dependencies to include p-limit and pino-pretty.
- finalized ace command 'detect:missing-cross-references'
This commit is contained in:
Kaimbacher 2025-09-26 12:19:35 +02:00
commit 6757bdb77c
10 changed files with 745 additions and 430 deletions

View file

@ -1,55 +1,61 @@
################## First Stage - Creating base #########################
# Created a variable to hold our node base image
ARG NODE_IMAGE=node:22-bookworm-slim
ARG NODE_IMAGE=node:22-trixie-slim
FROM $NODE_IMAGE AS base
# Install dumb-init and ClamAV, and perform ClamAV database update
RUN apt update \
&& apt-get install -y dumb-init clamav clamav-daemon nano \
RUN apt-get update \
&& apt-get install -y --no-install-recommends \
dumb-init \
clamav \
clamav-daemon \
ca-certificates \
&& rm -rf /var/lib/apt/lists/* \
# Creating folders and changing ownerships
&& mkdir -p /home/node/app && chown node:node /home/node/app \
&& mkdir -p /home/node/app \
&& mkdir -p /var/lib/clamav \
&& mkdir /usr/local/share/clamav \
&& chown -R node:clamav /var/lib/clamav /usr/local/share/clamav /etc/clamav \
# permissions
&& mkdir /var/run/clamav \
&& chown node:clamav /var/run/clamav \
&& chmod 750 /var/run/clamav
# -----------------------------------------------
# --- ClamAV & FeshClam -------------------------
# -----------------------------------------------
# RUN \
# chmod 644 /etc/clamav/freshclam.conf && \
# freshclam && \
# mkdir /var/run/clamav && \
# chown -R clamav:root /var/run/clamav
&& mkdir -p /var/log/clamav \
&& mkdir -p /tmp/clamav-logs \
# # initial update of av databases
# RUN freshclam
# Set ownership and permissions
&& chown node:node /home/node/app \
# && chown -R node:clamav /var/lib/clamav /usr/local/share/clamav /etc/clamav /var/run/clamav \
&& chown -R clamav:clamav /var/lib/clamav /usr/local/share/clamav /etc/clamav /var/run/clamav /var/log/clamav \
&& chmod 755 /tmp/clamav-logs \
&& chmod 750 /var/run/clamav \
&& chmod 755 /var/lib/clamav \
&& chmod 755 /var/log/clamav \
# Add node user to clamav group and allow sudo for clamav commands
&& usermod -a -G clamav node \
&& chmod g+w /var/run/clamav /var/lib/clamav /var/log/clamav /tmp/clamav-logs
# Configure Clam AV...
COPY --chown=node:clamav ./*.conf /etc/clamav/
# # permissions
# RUN mkdir /var/run/clamav && \
# chown node:clamav /var/run/clamav && \
# chmod 750 /var/run/clamav
# Configure ClamAV - copy config files before switching user
# COPY --chown=node:clamav ./*.conf /etc/clamav/
COPY --chown=clamav:clamav ./*.conf /etc/clamav/
# Copy entrypoint script
COPY --chown=node:node docker-entrypoint.sh /home/node/app/docker-entrypoint.sh
RUN chmod +x /home/node/app/docker-entrypoint.sh
ENV TZ="Europe/Vienna"
# Setting the working directory
WORKDIR /home/node/app
# Changing the current active user to "node"
# Download initial ClamAV database as root before switching users
USER root
RUN freshclam --quiet || echo "Initial database download failed - will retry at runtime"
USER node
# initial update of av databases
RUN freshclam
# VOLUME /var/lib/clamav
COPY --chown=node:clamav docker-entrypoint.sh /home/node/app/docker-entrypoint.sh
RUN chmod +x /home/node/app/docker-entrypoint.sh
ENV TZ="Europe/Vienna"
# Initial update of AV databases (moved after USER directive)
# RUN freshclam || true
################## Second Stage - Installing dependencies ##########
@ -70,14 +76,13 @@ ENV NODE_ENV=production
# We run "node ace build" to build the app (dist folder) for production
RUN node ace build --ignore-ts-errors
# RUN node ace build --production
# RUN node ace build --ignore-ts-errors
################## Final Stage - Production #########################
# In this final stage, we will start running the application
FROM base AS production
# Here, we include all the required environment variables
# ENV NODE_ENV=production
ENV NODE_ENV=production
# ENV PORT=$PORT
# ENV HOST=0.0.0.0
@ -91,4 +96,4 @@ COPY --chown=node:node --from=build /home/node/app/build .
EXPOSE 3333
ENTRYPOINT ["/home/node/app/docker-entrypoint.sh"]
# Run the command to start the server using "dumb-init"
CMD [ "dumb-init", "node", "bin/server.js" ]
CMD [ "node", "bin/server.js" ]

View file

@ -30,9 +30,9 @@ export default defineConfig({
() => import('#start/rules/unique'),
() => import('#start/rules/translated_language'),
() => import('#start/rules/unique_person'),
() => import('#start/rules/file_length'),
() => import('#start/rules/file_scan'),
() => import('#start/rules/allowed_extensions_mimetypes'),
// () => import('#start/rules/file_length'),
// () => import('#start/rules/file_scan'),
// () => import('#start/rules/allowed_extensions_mimetypes'),
() => import('#start/rules/dependent_array_min_length'),
() => import('#start/rules/referenceValidation'),
() => import('#start/rules/valid_mimetype'),

View file

@ -6,6 +6,7 @@
*/
import { BaseCommand, flags } from '@adonisjs/core/ace';
import type { CommandOptions } from '@adonisjs/core/types/ace';
import { DateTime } from 'luxon';
import Dataset from '#models/dataset';
import DatasetReference from '#models/dataset_reference';
// import env from '#start/env';
@ -15,6 +16,8 @@ interface MissingCrossReference {
targetDatasetId: number;
sourcePublishId: number | null;
targetPublishId: number | null;
sourceDoi: string | null;
targetDoi: string | null;
referenceType: string;
relation: string;
doi: string | null;
@ -33,30 +36,58 @@ export default class DetectMissingCrossReferences extends BaseCommand {
@flags.boolean({ alias: 'v', description: 'Verbose output' })
public verbose: boolean = false;
@flags.number({ alias: 'p', description: 'Filter by specific publish_id (source or target dataset)' })
public publish_id?: number;
// example: node ace detect:missing-cross-references --verbose -p 227 //if you want to filter by specific publish_id with details
// example: node ace detect:missing-cross-references --verbose
// example: node ace detect:missing-cross-references --fix -p 227 //if you want to filter by specific publish_id and fix it
// example: node ace detect:missing-cross-references
public static options: CommandOptions = {
startApp: true,
staysAlive: false,
};
// Define the allowed relations that we want to process
private readonly ALLOWED_RELATIONS = ['IsNewVersionOf', 'IsPreviousVersionOf', 'IsVariantFormOf', 'IsOriginalFormOf'];
async run() {
this.logger.info('🔍 Detecting missing cross-references...');
this.logger.info(`📋 Processing only these relations: ${this.ALLOWED_RELATIONS.join(', ')}`);
if (this.publish_id) {
this.logger.info(`Filtering by publish_id: ${this.publish_id}`);
}
try {
const missingReferences = await this.findMissingCrossReferences();
if (missingReferences.length === 0) {
this.logger.success('All cross-references are properly linked!');
const filterMsg = this.publish_id ? ` for publish_id ${this.publish_id}` : '';
this.logger.success(`All cross-references are properly linked for the specified relations${filterMsg}!`);
return;
}
this.logger.warning(`Found ${missingReferences.length} missing cross-reference(s):`);
const filterMsg = this.publish_id ? ` (filtered by publish_id ${this.publish_id})` : '';
this.logger.warning(`Found ${missingReferences.length} missing cross-reference(s)${filterMsg}:`);
// Show brief list if not verbose mode
if (!this.verbose) {
for (const missing of missingReferences) {
const sourceDoi = missing.sourceDoi ? ` DOI: ${missing.sourceDoi}` : '';
const targetDoi = missing.targetDoi ? ` DOI: ${missing.targetDoi}` : '';
this.logger.info(
`Dataset ${missing.sourceDatasetId} (Publish ID: ${missing.sourcePublishId}${sourceDoi}) ${missing.relation} Dataset ${missing.targetDatasetId} (Publish ID: ${missing.targetPublishId}${targetDoi}) → missing reverse: ${missing.reverseRelation}`,
);
}
} else {
// Verbose mode - show detailed info
for (const missing of missingReferences) {
this.logger.info(
`Dataset ${missing.sourceDatasetId} references ${missing.targetDatasetId}, but reverse reference is missing`,
);
if (this.verbose) {
this.logger.info(` - Reference type: ${missing.referenceType}`);
this.logger.info(` - Relation: ${missing.relation}`);
this.logger.info(` - DOI: ${missing.doi}`);
@ -67,20 +98,28 @@ export default class DetectMissingCrossReferences extends BaseCommand {
await this.fixMissingReferences(missingReferences);
this.logger.success('All missing cross-references have been fixed!');
} else {
if (this.verbose) {
this.printMissingReferencesList(missingReferences);
}
this.logger.info('💡 Run with --fix flag to automatically create missing cross-references');
if (this.publish_id) {
this.logger.info(`🎯 Currently filtering by publish_id: ${this.publish_id}`);
}
}
} catch (error) {
this.logger.error('Error detecting missing cross-references:', error);
process.exit(1);
}
}
private async findMissingCrossReferences(): Promise<MissingCrossReference[]> {
const missingReferences: {
sourceDatasetId: number;
targetDatasetId: number;
sourcePublishId: number | null;
targetPublishId: number | null;
sourceDoi: string | null;
targetDoi: string | null;
referenceType: string;
relation: string;
doi: string | null;
@ -90,22 +129,32 @@ export default class DetectMissingCrossReferences extends BaseCommand {
this.logger.info('📊 Querying dataset references...');
// Find all references that point to Tethys datasets (DOI or URL containing tethys DOI)
// Only from datasets that are published
const tethysReferences = await DatasetReference.query()
// Only from datasets that are published AND only for allowed relations
const tethysReferencesQuery = DatasetReference.query()
.whereIn('type', ['DOI', 'URL'])
.whereIn('relation', this.ALLOWED_RELATIONS) // Only process allowed relations
.where((query) => {
query.where('value', 'like', '%doi.org/10.24341/tethys.%').orWhere('value', 'like', '%tethys.at/dataset/%');
})
.preload('dataset', (datasetQuery) => {
datasetQuery.where('server_state', 'published');
datasetQuery.preload('identifier');
})
.whereHas('dataset', (datasetQuery) => {
datasetQuery.where('server_state', 'published');
});
if (typeof this.publish_id === 'number') {
tethysReferencesQuery.whereHas('dataset', (datasetQuery) => {
datasetQuery.where('publish_id', this.publish_id as number);
});
}
this.logger.info(`🔗 Found ${tethysReferences.length} Tethys references from published datasets`);
const tethysReferences = await tethysReferencesQuery.exec();
this.logger.info(`🔗 Found ${tethysReferences.length} Tethys references from published datasets (allowed relations only)`);
let processedCount = 0;
let skippedCount = 0;
for (const reference of tethysReferences) {
processedCount++;
@ -113,6 +162,15 @@ export default class DetectMissingCrossReferences extends BaseCommand {
this.logger.info(`📈 Processed ${processedCount}/${tethysReferences.length} references...`);
}
// Double-check that this relation is in our allowed list (safety check)
if (!this.ALLOWED_RELATIONS.includes(reference.relation)) {
skippedCount++;
if (this.verbose) {
this.logger.info(`⏭️ Skipping relation "${reference.relation}" - not in allowed list`);
}
continue;
}
// Extract dataset publish_id from DOI or URL
const targetDatasetPublish = this.extractDatasetPublishIdFromReference(reference.value);
@ -127,6 +185,7 @@ export default class DetectMissingCrossReferences extends BaseCommand {
const targetDataset = await Dataset.query()
.where('publish_id', targetDatasetPublish)
.where('server_state', 'published')
.preload('identifier')
.first();
if (!targetDataset) {
@ -145,11 +204,14 @@ export default class DetectMissingCrossReferences extends BaseCommand {
// Check if reverse reference exists
const reverseReferenceExists = await this.checkReverseReferenceExists(
targetDataset.id,
reference.document_id,
// reference.document_id,
reference.relation,
);
if (!reverseReferenceExists) {
const reverseRelation = this.getReverseRelation(reference.relation);
if (reverseRelation) {
// Only add if we have a valid reverse relation
missingReferences.push({
sourceDatasetId: reference.document_id,
targetDatasetId: targetDataset.id,
@ -158,12 +220,15 @@ export default class DetectMissingCrossReferences extends BaseCommand {
referenceType: reference.type,
relation: reference.relation,
doi: reference.value,
reverseRelation: this.getReverseRelation(reference.relation),
reverseRelation: reverseRelation,
sourceDoi: reference.dataset.identifier ? reference.dataset.identifier.value : null,
targetDoi: targetDataset.identifier ? targetDataset.identifier.value : null,
});
}
}
}
this.logger.info(`✅ Processed all ${processedCount} references`);
this.logger.info(`✅ Processed ${processedCount} references (${skippedCount} skipped due to relation filtering)`);
return missingReferences;
}
@ -183,64 +248,47 @@ export default class DetectMissingCrossReferences extends BaseCommand {
return null;
}
private async checkReverseReferenceExists(
sourceDatasetId: number,
targetDatasetId: number,
originalRelation: string,
): Promise<boolean> {
private async checkReverseReferenceExists(targetDatasetId: number, originalRelation: string): Promise<boolean> {
const reverseRelation = this.getReverseRelation(originalRelation);
if (!reverseRelation) {
return true; // If no reverse relation is defined, consider it as "exists" to skip processing
}
// Only check for reverse references where the source dataset is also published
const reverseReference = await DatasetReference.query()
.where('document_id', sourceDatasetId)
// We don't filter by source document_id here to find any incoming reference from any published dataset
// .where('document_id', sourceDatasetId)
.where('related_document_id', targetDatasetId)
.where('relation', reverseRelation)
.whereHas('dataset', (datasetQuery) => {
datasetQuery.where('server_state', 'published');
})
.first();
return !!reverseReference;
}
private getReverseRelation(relation: string): string {
private getReverseRelation(relation: string): string | null {
const relationMap: Record<string, string> = {
IsNewVersionOf: 'IsPreviousVersionOf',
IsPreviousVersionOf: 'IsNewVersionOf',
IsVersionOf: 'HasVersion',
HasVersion: 'IsVersionOf',
Compiles: 'IsCompiledBy',
IsCompiledBy: 'Compiles',
IsVariantFormOf: 'IsOriginalFormOf',
IsOriginalFormOf: 'IsVariantFormOf',
IsPartOf: 'HasPart',
HasPart: 'IsPartOf',
IsSupplementTo: 'IsSupplementedBy',
IsSupplementedBy: 'IsSupplementTo',
Continues: 'IsContinuedBy',
IsContinuedBy: 'Continues',
};
// to catch relation types like 'compiles' or 'IsVariantFormOf' that are not in the map mark reverse as 'HasVersion'
return relationMap[relation] || 'HasVersion'; // Default fallback
// Only return reverse relation if it exists in our map, otherwise return null
return relationMap[relation] || null;
}
private printMissingReferencesList(missingReferences: MissingCrossReference[]) {
console.log('┌─────────────────────────────────────────────────────────────────────────────────┐');
console.log('│ MISSING CROSS-REFERENCES REPORT │');
console.log('│ (Published Datasets Only) │');
console.log('│ (Published Datasets Only - Filtered Relations) │');
console.log('└─────────────────────────────────────────────────────────────────────────────────┘');
console.log();
missingReferences.forEach((missing, index) => {
console.log(
`${index + 1}. Dataset ${missing.sourceDatasetId} (Publish ID: ${missing.sourcePublishId}) → Dataset ${missing.targetDatasetId} (Publish ID: ${missing.targetPublishId})`,
`${index + 1}. Dataset ${missing.sourceDatasetId} (Publish ID: ${missing.sourcePublishId} Identifier: ${missing.sourceDoi})
${missing.relation} Dataset ${missing.targetDatasetId} (Publish ID: ${missing.targetPublishId} Identifier: ${missing.targetDoi})`,
);
console.log(` ├─ Current relation: "${missing.relation}"`);
console.log(` ├─ Missing reverse relation: "${missing.reverseRelation}"`);
@ -251,6 +299,7 @@ export default class DetectMissingCrossReferences extends BaseCommand {
console.log('┌─────────────────────────────────────────────────────────────────────────────────┐');
console.log(`│ SUMMARY: ${missingReferences.length} missing reverse reference(s) detected │`);
console.log(`│ Processed relations: ${this.ALLOWED_RELATIONS.join(', ')}`);
console.log('└─────────────────────────────────────────────────────────────────────────────────┘');
}
@ -262,27 +311,37 @@ export default class DetectMissingCrossReferences extends BaseCommand {
for (const [index, missing] of missingReferences.entries()) {
try {
// Get the source dataset to create proper reference - ensure it's published
// Get both source and target datasets
const sourceDataset = await Dataset.query()
.where('id', missing.sourceDatasetId)
.where('server_state', 'published')
.preload('identifier')
.first();
const targetDataset = await Dataset.query().where('id', missing.targetDatasetId).where('server_state', 'published').first();
if (!sourceDataset) {
this.logger.warning(`⚠️ Source dataset ${missing.sourceDatasetId} not found or not published, skipping...`);
errorCount++;
continue;
}
// Create the reverse reference
if (!targetDataset) {
this.logger.warning(`⚠️ Target dataset ${missing.targetDatasetId} not found or not published, skipping...`);
errorCount++;
continue;
}
// Create the reverse reference using the referenced_by relationship
// Example: If Dataset 297 IsNewVersionOf Dataset 144
// We create an incoming reference for Dataset 144 that shows Dataset 297 IsPreviousVersionOf it
const reverseReference = new DatasetReference();
reverseReference.document_id = missing.targetDatasetId;
reverseReference.related_document_id = missing.sourceDatasetId;
// Don't set document_id - this creates an incoming reference via related_document_id
reverseReference.related_document_id = missing.targetDatasetId; // 144 (dataset receiving the incoming reference)
reverseReference.type = 'DOI';
reverseReference.relation = missing.reverseRelation;
// Use the source dataset's DOI for the value
// Use the source dataset's DOI for the value (what's being referenced)
if (sourceDataset.identifier?.value) {
reverseReference.value = `https://doi.org/${sourceDataset.identifier.value}`;
} else {
@ -293,12 +352,16 @@ export default class DetectMissingCrossReferences extends BaseCommand {
// Use the source dataset's main title for the label
reverseReference.label = sourceDataset.mainTitle || `Dataset ${missing.sourceDatasetId}`;
// Also save 'server_date_modified' on target dataset to trigger any downstream updates (e.g. search index)
targetDataset.server_date_modified = DateTime.now();
await targetDataset.save();
await reverseReference.save();
fixedCount++;
if (this.verbose) {
this.logger.info(
`✅ [${index + 1}/${missingReferences.length}] Created reverse reference: Dataset ${missing.targetDatasetId} -> ${missing.sourceDatasetId}`,
`✅ [${index + 1}/${missingReferences.length}] Created reverse reference: Dataset ${missing.sourceDatasetId} -> ${missing.targetDatasetId} (${missing.reverseRelation})`,
);
} else if ((index + 1) % 10 === 0) {
this.logger.info(`📈 Fixed ${fixedCount}/${missingReferences.length} references...`);

View file

@ -0,0 +1,346 @@
/*
|--------------------------------------------------------------------------
| node ace make:command list-updateable-datacite
| DONE: create commands/list_updeatable_datacite.ts
|--------------------------------------------------------------------------
*/
import { BaseCommand, flags } from '@adonisjs/core/ace';
import { CommandOptions } from '@adonisjs/core/types/ace';
import Dataset from '#models/dataset';
import { DoiClient } from '#app/Library/Doi/DoiClient';
import env from '#start/env';
import logger from '@adonisjs/core/services/logger';
import { DateTime } from 'luxon';
import pLimit from 'p-limit';
export default class ListUpdateableDatacite extends BaseCommand {
static commandName = 'list:updateable-datacite';
static description = 'List all datasets that need DataCite DOI updates';
public static needsApplication = true;
// private chunkSize = 100; // Set chunk size for pagination
@flags.boolean({ alias: 'v', description: 'Verbose output showing detailed information' })
public verbose: boolean = false;
@flags.boolean({ alias: 'c', description: 'Show only count of updatable datasets' })
public countOnly: boolean = false;
@flags.boolean({ alias: 'i', description: 'Show only publish IDs (useful for scripting)' })
public idsOnly: boolean = false;
@flags.number({ description: 'Chunk size for processing datasets (default: 50)' })
public chunkSize: number = 50;
//example: node ace list:updateable-datacite
//example: node ace list:updateable-datacite --verbose
//example: node ace list:updateable-datacite --count-only
//example: node ace list:updateable-datacite --ids-only
//example: node ace list:updateable-datacite --chunk-size 50
public static options: CommandOptions = {
startApp: true,
stayAlive: false,
};
async run() {
const prefix = env.get('DATACITE_PREFIX', '');
const base_domain = env.get('BASE_DOMAIN', '');
if (!prefix || !base_domain) {
logger.error('Missing DATACITE_PREFIX or BASE_DOMAIN environment variables');
return;
}
// Prevent conflicting flags
if ((this.verbose && this.countOnly) || (this.verbose && this.idsOnly)) {
logger.error('Flags --verbose cannot be combined with --count-only or --ids-only');
return;
}
const chunkSize = this.chunkSize || 50;
let page = 1;
let hasMoreDatasets = true;
let totalProcessed = 0;
const updatableDatasets: Dataset[] = [];
if (!this.countOnly && !this.idsOnly) {
logger.info(`Processing datasets in chunks of ${chunkSize}...`);
}
while (hasMoreDatasets) {
const datasets = await this.getDatasets(page, chunkSize);
if (datasets.length === 0) {
hasMoreDatasets = false;
break;
}
if (!this.countOnly && !this.idsOnly) {
logger.info(`Processing chunk ${page} (${datasets.length} datasets)...`);
}
const chunkUpdatableDatasets = await this.processChunk(datasets);
updatableDatasets.push(...chunkUpdatableDatasets);
totalProcessed += datasets.length;
page += 1;
if (datasets.length < chunkSize) {
hasMoreDatasets = false;
}
}
if (!this.countOnly && !this.idsOnly) {
logger.info(`Processed ${totalProcessed} datasets total, found ${updatableDatasets.length} that need updates`);
}
if (this.countOnly) {
console.log(updatableDatasets.length);
} else if (this.idsOnly) {
updatableDatasets.forEach((dataset) => console.log(dataset.publish_id));
} else if (this.verbose) {
await this.showVerboseOutput(updatableDatasets);
} else {
this.showSimpleOutput(updatableDatasets);
}
}
/**
* Processes a chunk of datasets to determine which ones need DataCite updates
*
* This method handles parallel processing of datasets within a chunk, providing
* efficient error handling and filtering of results.
*
* @param datasets - Array of Dataset objects to process
* @returns Promise<Dataset[]> - Array of datasets that need updates
*/
// private async processChunk(datasets: Dataset[]): Promise<Dataset[]> {
// // Process datasets in parallel using Promise.allSettled for better error handling
// //
// // Why Promise.allSettled vs Promise.all?
// // - Promise.all fails fast: if ANY promise rejects, the entire operation fails
// // - Promise.allSettled waits for ALL promises: some can fail, others succeed
// // - This is crucial for batch processing where we don't want one bad dataset
// // to stop processing of the entire chunk
// const results = await Promise.allSettled(
// datasets.map(async (dataset) => {
// try {
// // Check if this specific dataset needs a DataCite update
// const needsUpdate = await this.shouldUpdateDataset(dataset);
// // Return the dataset if it needs update, null if it doesn't
// // This creates a sparse array that we'll filter later
// return needsUpdate ? dataset : null;
// } catch (error) {
// // Error handling for individual dataset checks
// //
// // Log warnings only if we're not in silent modes (count-only or ids-only)
// // This prevents log spam when running automated scripts
// if (!this.countOnly && !this.idsOnly) {
// logger.warn(`Error checking dataset ${dataset.publish_id}: ${error.message}`);
// }
// // IMPORTANT DECISION: Return the dataset anyway if we can't determine status
// //
// // Why? It's safer to include a dataset that might not need updating
// // than to miss one that actually does need updating. This follows the
// // "fail-safe" principle - if we're unsure, err on the side of caution
// return dataset;
// }
// }),
// );
// // Filter and extract results from Promise.allSettled response
// //
// // Promise.allSettled returns an array of objects with this structure:
// // - { status: 'fulfilled', value: T } for successful promises
// // - { status: 'rejected', reason: Error } for failed promises
// //
// // We need to:
// // 1. Only get fulfilled results (rejected ones are already handled above)
// // 2. Filter out null values (datasets that don't need updates)
// // 3. Extract the actual Dataset objects from the wrapper
// return results
// .filter(
// (result): result is PromiseFulfilledResult<Dataset | null> =>
// // Type guard: only include fulfilled results that have actual values
// // This filters out:
// // - Rejected promises (shouldn't happen due to try/catch, but safety first)
// // - Fulfilled promises that returned null (datasets that don't need updates)
// result.status === 'fulfilled' && result.value !== null,
// )
// .map((result) => result.value!); // Extract the Dataset from the wrapper
// // The ! is safe here because we filtered out null values above
// }
private async processChunk(datasets: Dataset[]): Promise<Dataset[]> {
// Limit concurrency to avoid API flooding (e.g., max 5 at once)
const limit = pLimit(5);
const tasks = datasets.map((dataset) =>
limit(async () => {
try {
const needsUpdate = await this.shouldUpdateDataset(dataset);
return needsUpdate ? dataset : null;
} catch (error) {
if (!this.countOnly && !this.idsOnly) {
logger.warn(
`Error checking dataset ${dataset.publish_id}: ${
error instanceof Error ? error.message : JSON.stringify(error)
}`,
);
}
// Fail-safe: include dataset if uncertain
return dataset;
}
}),
);
const results = await Promise.allSettled(tasks);
return results
.filter((result): result is PromiseFulfilledResult<Dataset | null> => result.status === 'fulfilled' && result.value !== null)
.map((result) => result.value!);
}
private async getDatasets(page: number, chunkSize: number): Promise<Dataset[]> {
return await Dataset.query()
.orderBy('publish_id', 'asc')
.preload('identifier')
.preload('xmlCache')
.preload('titles')
.where('server_state', 'published')
.whereHas('identifier', (identifierQuery) => {
identifierQuery.where('type', 'doi');
})
.forPage(page, chunkSize); // Get files for the current page
}
private async shouldUpdateDataset(dataset: Dataset): Promise<boolean> {
try {
let doiIdentifier = dataset.identifier;
if (!doiIdentifier) {
await dataset.load('identifier');
doiIdentifier = dataset.identifier;
}
if (!doiIdentifier || doiIdentifier.type !== 'doi') {
return false;
}
const datasetModified =
dataset.server_date_modified instanceof DateTime
? dataset.server_date_modified
: DateTime.fromJSDate(dataset.server_date_modified);
if (!datasetModified) {
return true;
}
if (datasetModified > DateTime.now()) {
return false;
}
const doiClient = new DoiClient();
const DOI_CHECK_TIMEOUT = 300; // ms
const doiLastModified = await Promise.race([
doiClient.getDoiLastModified(doiIdentifier.value),
this.createTimeoutPromise(DOI_CHECK_TIMEOUT),
]).catch(() => null);
if (!doiLastModified) {
// If uncertain, better include dataset for update
return true;
}
const doiModified = DateTime.fromJSDate(doiLastModified);
if (datasetModified > doiModified) {
const diffInSeconds = Math.abs(datasetModified.diff(doiModified, 'seconds').seconds);
const toleranceSeconds = 600;
return diffInSeconds > toleranceSeconds;
}
return false;
} catch (error) {
return true; // safer: include dataset if unsure
}
}
/**
* Create a timeout promise for API calls
*/
private createTimeoutPromise(timeoutMs: number): Promise<never> {
return new Promise((_, reject) => {
setTimeout(() => reject(new Error(`API call timeout after ${timeoutMs}ms`)), timeoutMs);
});
}
private showSimpleOutput(updatableDatasets: Dataset[]): void {
if (updatableDatasets.length === 0) {
console.log('No datasets need DataCite updates.');
return;
}
console.log(`\nFound ${updatableDatasets.length} dataset(s) that need DataCite updates:\n`);
updatableDatasets.forEach((dataset) => {
console.log(`publish_id ${dataset.publish_id} needs update - ${dataset.mainTitle || 'Untitled'}`);
});
console.log(`\nTo update these datasets, run:`);
console.log(` node ace update:datacite`);
console.log(`\nOr update specific datasets:`);
console.log(` node ace update:datacite -p <publish_id>`);
}
private async showVerboseOutput(updatableDatasets: Dataset[]): Promise<void> {
if (updatableDatasets.length === 0) {
console.log('No datasets need DataCite updates.');
return;
}
console.log(`\nFound ${updatableDatasets.length} dataset(s) that need DataCite updates:\n`);
for (const dataset of updatableDatasets) {
await this.showDatasetDetails(dataset);
}
console.log(`\nSummary: ${updatableDatasets.length} datasets need updates`);
}
private async showDatasetDetails(dataset: Dataset): Promise<void> {
try {
let doiIdentifier = dataset.identifier;
if (!doiIdentifier) {
await dataset.load('identifier');
doiIdentifier = dataset.identifier;
}
const doiValue = doiIdentifier?.value || 'N/A';
const datasetModified = dataset.server_date_modified;
// Get DOI info from DataCite
const doiClient = new DoiClient();
const doiLastModified = await doiClient.getDoiLastModified(doiValue);
const doiState = await doiClient.getDoiState(doiValue);
console.log(`┌─ Dataset ${dataset.publish_id} ───────────────────────────────────────────────────────────────`);
console.log(`│ Title: ${dataset.mainTitle || 'Untitled'}`);
console.log(`│ DOI: ${doiValue}`);
console.log(`│ DOI State: ${doiState || 'Unknown'}`);
console.log(`│ Dataset Modified: ${datasetModified ? datasetModified.toISO() : 'N/A'}`);
console.log(`│ DOI Modified: ${doiLastModified ? DateTime.fromJSDate(doiLastModified).toISO() : 'N/A'}`);
console.log(`│ Status: NEEDS UPDATE`);
console.log(`└─────────────────────────────────────────────────────────────────────────────────────────────\n`);
} catch (error) {
console.log(`┌─ Dataset ${dataset.publish_id} ───────────────────────────────────────────────────────────────`);
console.log(`│ Title: ${dataset.mainTitle || 'Untitled'}`);
console.log(`│ DOI: ${dataset.identifier?.value || 'N/A'}`);
console.log(`│ Error: ${error.message}`);
console.log(`│ Status: NEEDS UPDATE (Error checking)`);
console.log(`└─────────────────────────────────────────────────────────────────────────────────────────────\n`);
}
}
}

View file

@ -122,58 +122,53 @@ export default class UpdateDatacite extends BaseCommand {
private async shouldUpdateDataset(dataset: Dataset): Promise<boolean> {
try {
// Check if dataset has a DOI identifier (HasOne relationship)
let doiIdentifier = dataset.identifier;
if (!doiIdentifier) {
// Try to load the relationship if not already loaded
await dataset.load('identifier');
doiIdentifier = dataset.identifier;
}
if (!doiIdentifier || doiIdentifier.type !== 'doi') {
logger.warn(`Dataset ${dataset.publish_id}: No DOI identifier found`);
return false;
}
// Validate dataset modification date
const datasetModified = dataset.server_date_modified;
const now = DateTime.now();
if (!datasetModified) {
logger.error(`Dataset ${dataset.publish_id}: server_date_modified is null or undefined`);
return true; // Update anyway if modification date is missing
return true; // Update if modification date is missing
}
if (datasetModified > now) {
logger.error(
`Dataset ${dataset.publish_id}: server_date_modified (${datasetModified.toISO()}) is in the future! ` +
`Current time: ${now.toISO()}. This indicates a data integrity issue. Skipping update.`,
);
return false; // Do not update when modification date is invalid
return false; // Skip invalid future dates
}
// Get DOI information from DataCite using DoiClient
// Check DataCite DOI modification date
const doiClient = new DoiClient();
const doiLastModified = await doiClient.getDoiLastModified(doiIdentifier.value);
if (!doiLastModified) {
logger.warn(`Dataset ${dataset.publish_id}: Could not retrieve DOI modification date from DataCite`);
return true; // Update anyway if we can't get DOI info
return false; // not Update if we can't get DOI info
}
// Compare dataset modification date with DOI modification date
const doiModified = DateTime.fromJSDate(doiLastModified);
if (datasetModified > doiModified) {
// if dataset was modified after DOI creation
// Calculate the difference in seconds
const diffInSeconds = Math.abs(datasetModified.diff(doiModified, 'seconds').seconds);
logger.debug(
`Dataset ${dataset.publish_id}: Dataset modified: ${datasetModified.toISO()}, DOI modified: ${doiModified.toISO()}`,
);
// Define tolerance threshold (60 seconds = 1 minute)
const toleranceSeconds = 60;
// Update if dataset was modified after the DOI record
return datasetModified > doiModified;
// Only update if the difference is greater than the tolerance
// This prevents unnecessary updates for minor timestamp differences
return diffInSeconds > toleranceSeconds;
} else {
return false; // No update needed
}
} catch (error) {
logger.warn(`Error checking update status for dataset ${dataset.publish_id}: ${error.message}`);
return true; // Update anyway if we can't determine status
return false; // not update if we can't determine status or other error
}
}

View file

@ -1,47 +1,61 @@
#!/bin/bash
set -e
# # Run freshclam to update virus definitions
# freshclam
echo "Starting ClamAV services..."
# # Sleep for a few seconds to give ClamAV time to start
# sleep 5
# # Start the ClamAV daemon
# /etc/init.d/clamav-daemon start
# Try to download database if missing
if [ ! "$(ls -A /var/lib/clamav 2>/dev/null)" ]; then
echo "Downloading ClamAV database (this may take a while)..."
# bootstrap clam av service and clam av database updater
set -m
function process_file() {
if [[ ! -z "$1" ]]; then
local SETTING_LIST=$(echo "$1" | tr ',' '\n' | grep "^[A-Za-z][A-Za-z]*=.*$")
local SETTING
for SETTING in ${SETTING_LIST}; do
# Remove any existing copies of this setting. We do this here so that
# settings with multiple values (e.g. ExtraDatabase) can still be added
# multiple times below
local KEY=${SETTING%%=*}
sed -i $2 -e "/^${KEY} /d"
done
for SETTING in ${SETTING_LIST}; do
# Split on first '='
local KEY=${SETTING%%=*}
local VALUE=${SETTING#*=}
echo "${KEY} ${VALUE}" >> "$2"
done
# Simple freshclam run without complex config
if sg clamav -c "freshclam --datadir=/var/lib/clamav --quiet"; then
echo "✓ Database downloaded successfully"
else
echo "⚠ Database download failed - creating minimal setup"
# Create a dummy file so clamd doesn't immediately fail
sg clamav -c "touch /var/lib/clamav/.dummy"
fi
}
fi
# process_file "${CLAMD_SETTINGS_CSV}" /etc/clamav/clamd.conf
# process_file "${FRESHCLAM_SETTINGS_CSV}" /etc/clamav/freshclam.conf
# Start freshclam daemon for automatic updates
echo "Starting freshclam daemon for automatic updates..."
sg clamav -c "freshclam -d" &
# start in background
freshclam -d &
# /etc/init.d/clamav-freshclam start &
clamd
# Start clamd in background
# Start clamd in foreground (so dumb-init can supervise it)
# /etc/init.d/clamav-daemon start &
# change back to CMD of dockerfile
exec "$@"
# Start clamd daemon in background using sg
echo "Starting ClamAV daemon..."
# sg clamav -c "clamd" &
# Use sg to run clamd with proper group permissions
# sg clamav -c "clamd" &
sg clamav -c "clamd --config-file=/etc/clamav/clamd.conf" &
# Give services time to start
echo "Waiting for services to initialize..."
sleep 8
# simple check
if pgrep clamd > /dev/null; then
echo "✓ ClamAV daemon is running"
else
echo "⚠ ClamAV daemon status uncertain, but continuing..."
fi
# Check if freshclam daemon is running
if pgrep freshclam > /dev/null; then
echo "✓ Freshclam daemon is running"
else
echo "⚠ Freshclam daemon status uncertain, but continuing..."
fi
# # change back to CMD of dockerfile
# exec "$@"
echo "✓ ClamAV setup complete"
echo "Starting main application..."
exec dumb-init -- "$@"

View file

@ -1,229 +1,47 @@
##
## Example config file for freshclam
## Please read the freshclam.conf(5) manual before editing this file.
## Container-optimized freshclam configuration
##
# Comment or remove the line below.
# Path to the database directory.
# WARNING: It must match clamd.conf's directive!
# Default: hardcoded (depends on installation options)
# Database directory
DatabaseDirectory /var/lib/clamav
# Path to the log file (make sure it has proper permissions)
# Default: disabled
# Log to stdout for container logging
# UpdateLogFile /dev/stdout
# Maximum size of the log file.
# Value of 0 disables the limit.
# You may use 'M' or 'm' for megabytes (1M = 1m = 1048576 bytes)
# and 'K' or 'k' for kilobytes (1K = 1k = 1024 bytes).
# in bytes just don't use modifiers. If LogFileMaxSize is enabled,
# log rotation (the LogRotate option) will always be enabled.
# Default: 1M
#LogFileMaxSize 2M
# Log time with each message.
# Default: no
# Basic logging settings
LogTime yes
# Enable verbose logging.
# Default: no
LogVerbose yes
# Use system logger (can work together with UpdateLogFile).
# Default: no
LogVerbose no
LogSyslog no
# Specify the type of syslog messages - please refer to 'man syslog'
# for facility names.
# Default: LOG_LOCAL6
#LogFacility LOG_MAIL
# Enable log rotation. Always enabled when LogFileMaxSize is enabled.
# Default: no
#LogRotate yes
# This option allows you to save the process identifier of the daemon
# Default: disabled
#PidFile /var/run/freshclam.pid
# PID file location
PidFile /var/run/clamav/freshclam.pid
# By default when started freshclam drops privileges and switches to the
# "clamav" user. This directive allows you to change the database owner.
# Default: clamav (may depend on installation options)
DatabaseOwner node
# Database owner
DatabaseOwner clamav
# Use DNS to verify virus database version. Freshclam uses DNS TXT records
# to verify database and software versions. With this directive you can change
# the database verification domain.
# WARNING: Do not touch it unless you're configuring freshclam to use your
# own database verification domain.
# Default: current.cvd.clamav.net
#DNSDatabaseInfo current.cvd.clamav.net
# Uncomment the following line and replace XY with your country
# code. See http://www.iana.org/cctld/cctld-whois.htm for the full list.
# You can use db.XY.ipv6.clamav.net for IPv6 connections.
# Mirror settings for Austria
DatabaseMirror db.at.clamav.net
# database.clamav.net is a round-robin record which points to our most
# reliable mirrors. It's used as a fall back in case db.XY.clamav.net is
# not working. DO NOT TOUCH the following line unless you know what you
# are doing.
DatabaseMirror database.clamav.net
# How many attempts to make before giving up.
# Default: 3 (per mirror)
#MaxAttempts 5
# With this option you can control scripted updates. It's highly recommended
# to keep it enabled.
# Default: yes
#ScriptedUpdates yes
# By default freshclam will keep the local databases (.cld) uncompressed to
# make their handling faster. With this option you can enable the compression;
# the change will take effect with the next database update.
# Default: no
#CompressLocalDatabase no
# With this option you can provide custom sources (http:// or file://) for
# database files. This option can be used multiple times.
# Default: no custom URLs
#DatabaseCustomURL http://myserver.com/mysigs.ndb
#DatabaseCustomURL file:///mnt/nfs/local.hdb
# This option allows you to easily point freshclam to private mirrors.
# If PrivateMirror is set, freshclam does not attempt to use DNS
# to determine whether its databases are out-of-date, instead it will
# use the If-Modified-Since request or directly check the headers of the
# remote database files. For each database, freshclam first attempts
# to download the CLD file. If that fails, it tries to download the
# CVD file. This option overrides DatabaseMirror, DNSDatabaseInfo
# and ScriptedUpdates. It can be used multiple times to provide
# fall-back mirrors.
# Default: disabled
#PrivateMirror mirror1.mynetwork.com
#PrivateMirror mirror2.mynetwork.com
# Update settings
ScriptedUpdates yes
# Number of database checks per day.
# Default: 12 (every two hours)
#Checks 24
Checks 12
# Proxy settings
# Default: disabled
#HTTPProxyServer myproxy.com
#HTTPProxyPort 1234
#HTTPProxyUsername myusername
#HTTPProxyPassword mypass
# If your servers are behind a firewall/proxy which applies User-Agent
# filtering you can use this option to force the use of a different
# User-Agent header.
# Default: clamav/version_number
#HTTPUserAgent SomeUserAgentIdString
# Use aaa.bbb.ccc.ddd as client address for downloading databases. Useful for
# multi-homed systems.
# Default: Use OS'es default outgoing IP address.
#LocalIPAddress aaa.bbb.ccc.ddd
# Send the RELOAD command to clamd.
# Default: no
#NotifyClamd /path/to/clamd.conf
# Run command after successful database update.
# Default: disabled
#OnUpdateExecute command
# Run command when database update process fails.
# Default: disabled
#OnErrorExecute command
# Run command when freshclam reports outdated version.
# In the command string %v will be replaced by the new version number.
# Default: disabled
#OnOutdatedExecute command
# Don't fork into background.
# Default: no
# Don't fork (good for containers)
Foreground no
# Enable debug messages in libclamav.
# Default: no
#Debug yes
# Connection timeouts
ConnectTimeout 60
ReceiveTimeout 60
# Timeout in seconds when connecting to database server.
# Default: 30
#ConnectTimeout 60
# Test databases before using them
TestDatabases yes
# Timeout in seconds when reading from database server.
# Default: 30
#ReceiveTimeout 60
# With this option enabled, freshclam will attempt to load new
# databases into memory to make sure they are properly handled
# by libclamav before replacing the old ones.
# Default: yes
#TestDatabases yes
# When enabled freshclam will submit statistics to the ClamAV Project about
# the latest virus detections in your environment. The ClamAV maintainers
# will then use this data to determine what types of malware are the most
# detected in the field and in what geographic area they are.
# Freshclam will connect to clamd in order to get recent statistics.
# Default: no
#SubmitDetectionStats /path/to/clamd.conf
# Country of origin of malware/detection statistics (for statistical
# purposes only). The statistics collector at ClamAV.net will look up
# your IP address to determine the geographical origin of the malware
# reported by your installation. If this installation is mainly used to
# scan data which comes from a different location, please enable this
# option and enter a two-letter code (see http://www.iana.org/domains/root/db/)
# of the country of origin.
# Default: disabled
#DetectionStatsCountry country-code
# This option enables support for our "Personal Statistics" service.
# When this option is enabled, the information on malware detected by
# your clamd installation is made available to you through our website.
# To get your HostID, log on http://www.stats.clamav.net and add a new
# host to your host list. Once you have the HostID, uncomment this option
# and paste the HostID here. As soon as your freshclam starts submitting
# information to our stats collecting service, you will be able to view
# the statistics of this clamd installation by logging into
# http://www.stats.clamav.net with the same credentials you used to
# generate the HostID. For more information refer to:
# http://www.clamav.net/documentation.html#cctts
# This feature requires SubmitDetectionStats to be enabled.
# Default: disabled
#DetectionStatsHostID unique-id
# This option enables support for Google Safe Browsing. When activated for
# the first time, freshclam will download a new database file (safebrowsing.cvd)
# which will be automatically loaded by clamd and clamscan during the next
# reload, provided that the heuristic phishing detection is turned on. This
# database includes information about websites that may be phishing sites or
# possible sources of malware. When using this option, it's mandatory to run
# freshclam at least every 30 minutes.
# Freshclam uses the ClamAV's mirror infrastructure to distribute the
# database and its updates but all the contents are provided under Google's
# terms of use. See http://www.google.com/transparencyreport/safebrowsing
# and http://www.clamav.net/documentation.html#safebrowsing
# for more information.
# Default: disabled
#SafeBrowsing yes
# This option enables downloading of bytecode.cvd, which includes additional
# detection mechanisms and improvements to the ClamAV engine.
# Default: enabled
#Bytecode yes
# Download an additional 3rd party signature database distributed through
# the ClamAV mirrors.
# This option can be used multiple times.
#ExtraDatabase dbname1
#ExtraDatabase dbname2
# Enable bytecode signatures
Bytecode yes

39
package-lock.json generated
View file

@ -48,7 +48,9 @@
"node-2fa": "^2.0.3",
"node-exceptions": "^4.0.1",
"notiwind": "^2.0.0",
"p-limit": "^7.1.1",
"pg": "^8.9.0",
"pino-pretty": "^13.0.0",
"qrcode": "^1.5.3",
"redis": "^5.0.0",
"reflect-metadata": "^0.2.1",
@ -92,7 +94,6 @@
"hot-hook": "^0.4.0",
"numeral": "^2.0.6",
"pinia": "^3.0.2",
"pino-pretty": "^13.0.0",
"postcss-loader": "^8.1.1",
"prettier": "^3.4.2",
"supertest": "^6.3.3",
@ -7398,7 +7399,6 @@
"version": "4.6.3",
"resolved": "https://registry.npmjs.org/dateformat/-/dateformat-4.6.3.tgz",
"integrity": "sha512-2P0p0pFGzHS5EMnhdxQi7aJN+iMheud0UhG4dlE1DLAlvL8JHjJJTX/CSm4JXwV0Ka5nGk3zC5mcb5bUQUxxMA==",
"dev": true,
"license": "MIT",
"engines": {
"node": "*"
@ -7904,7 +7904,6 @@
"version": "1.4.5",
"resolved": "https://registry.npmjs.org/end-of-stream/-/end-of-stream-1.4.5.tgz",
"integrity": "sha512-ooEGc6HP26xXq/N+GCGOT0JKCLDGrq2bQUZrQ7gyrJiZANJ/8YDTxTpQBXGMn+WbIQXNVpyWymm7KYVICQnyOg==",
"dev": true,
"license": "MIT",
"dependencies": {
"once": "^1.4.0"
@ -8560,7 +8559,6 @@
"version": "3.0.2",
"resolved": "https://registry.npmjs.org/fast-copy/-/fast-copy-3.0.2.tgz",
"integrity": "sha512-dl0O9Vhju8IrcLndv2eU4ldt1ftXMqqfgN4H1cpmGV7P6jeB9FwpN9a2c8DPGE1Ys88rNUJVYDHq73CGAGOPfQ==",
"dev": true,
"license": "MIT"
},
"node_modules/fast-deep-equal": {
@ -8633,7 +8631,6 @@
"version": "2.1.1",
"resolved": "https://registry.npmjs.org/fast-safe-stringify/-/fast-safe-stringify-2.1.1.tgz",
"integrity": "sha512-W+KJc2dmILlPplD/H4K9l9LcAHAfPtP6BY84uVLXQ6Evcz9Lcg33Y2z1IVblT6xdY54PXYVHEv+0Wpq8Io6zkA==",
"dev": true,
"license": "MIT"
},
"node_modules/fast-uri": {
@ -9667,7 +9664,6 @@
"version": "5.0.0",
"resolved": "https://registry.npmjs.org/help-me/-/help-me-5.0.0.tgz",
"integrity": "sha512-7xgomUX6ADmcYzFik0HzAxh/73YlKR9bmFzf51CZwR+b6YtzU2m0u49hQCqV6SvlqIqsaxovfwdvbnsw3b/zpg==",
"dev": true,
"license": "MIT"
},
"node_modules/hookable": {
@ -10432,7 +10428,6 @@
"version": "3.1.1",
"resolved": "https://registry.npmjs.org/joycon/-/joycon-3.1.1.tgz",
"integrity": "sha512-34wB/Y7MW7bzjKRjUKTa46I2Z7eV62Rkhva+KkopW7Qvv/OSWBqvkSY7vusOPrNuZcUG3tApvdVgNB8POj3SPw==",
"dev": true,
"license": "MIT",
"engines": {
"node": ">=10"
@ -11159,7 +11154,6 @@
"version": "1.2.8",
"resolved": "https://registry.npmjs.org/minimist/-/minimist-1.2.8.tgz",
"integrity": "sha512-2yyAR8qBkN3YuheJanUpWC5U3bb5osDywNB8RzDVlDwDHbocAJveqqj1u8+SVD7jkWT4yvsHCpWqqWqAxb0zCA==",
"dev": true,
"license": "MIT",
"funding": {
"url": "https://github.com/sponsors/ljharb"
@ -11717,15 +11711,15 @@
}
},
"node_modules/p-limit": {
"version": "4.0.0",
"resolved": "https://registry.npmjs.org/p-limit/-/p-limit-4.0.0.tgz",
"integrity": "sha512-5b0R4txpzjPWVw/cXXUResoD4hb6U/x9BH08L7nw+GN1sezDzPdxeRvpc9c433fZhBan/wusjbCsqwqm4EIBIQ==",
"version": "7.1.1",
"resolved": "https://registry.npmjs.org/p-limit/-/p-limit-7.1.1.tgz",
"integrity": "sha512-i8PyM2JnsNChVSYWLr2BAjNoLi0BAYC+wecOnZnVV+YSNJkzP7cWmvI34dk0WArWfH9KwBHNoZI3P3MppImlIA==",
"license": "MIT",
"dependencies": {
"yocto-queue": "^1.0.0"
"yocto-queue": "^1.2.1"
},
"engines": {
"node": "^12.20.0 || ^14.13.1 || >=16.0.0"
"node": ">=20"
},
"funding": {
"url": "https://github.com/sponsors/sindresorhus"
@ -11746,6 +11740,21 @@
"url": "https://github.com/sponsors/sindresorhus"
}
},
"node_modules/p-locate/node_modules/p-limit": {
"version": "4.0.0",
"resolved": "https://registry.npmjs.org/p-limit/-/p-limit-4.0.0.tgz",
"integrity": "sha512-5b0R4txpzjPWVw/cXXUResoD4hb6U/x9BH08L7nw+GN1sezDzPdxeRvpc9c433fZhBan/wusjbCsqwqm4EIBIQ==",
"license": "MIT",
"dependencies": {
"yocto-queue": "^1.0.0"
},
"engines": {
"node": "^12.20.0 || ^14.13.1 || >=16.0.0"
},
"funding": {
"url": "https://github.com/sponsors/sindresorhus"
}
},
"node_modules/p-map": {
"version": "7.0.3",
"resolved": "https://registry.npmjs.org/p-map/-/p-map-7.0.3.tgz",
@ -12165,7 +12174,6 @@
"version": "13.1.1",
"resolved": "https://registry.npmjs.org/pino-pretty/-/pino-pretty-13.1.1.tgz",
"integrity": "sha512-TNNEOg0eA0u+/WuqH0MH0Xui7uqVk9D74ESOpjtebSQYbNWJk/dIxCXIxFsNfeN53JmtWqYHP2OrIZjT/CBEnA==",
"dev": true,
"license": "MIT",
"dependencies": {
"colorette": "^2.0.7",
@ -12190,7 +12198,6 @@
"version": "4.0.0",
"resolved": "https://registry.npmjs.org/secure-json-parse/-/secure-json-parse-4.0.0.tgz",
"integrity": "sha512-dxtLJO6sc35jWidmLxo7ij+Eg48PM/kleBsxpC8QJE0qJICe+KawkDQmvCMZUr9u7WKVHgMW6vy3fQ7zMiFZMA==",
"dev": true,
"funding": [
{
"type": "github",
@ -12207,7 +12214,6 @@
"version": "5.0.3",
"resolved": "https://registry.npmjs.org/strip-json-comments/-/strip-json-comments-5.0.3.tgz",
"integrity": "sha512-1tB5mhVo7U+ETBKNf92xT4hrQa3pm0MZ0PQvuDnWgAAGHDsfp4lPSpiS6psrSiet87wyGPh9ft6wmhOMQ0hDiw==",
"dev": true,
"license": "MIT",
"engines": {
"node": ">=14.16"
@ -12659,7 +12665,6 @@
"version": "3.0.3",
"resolved": "https://registry.npmjs.org/pump/-/pump-3.0.3.tgz",
"integrity": "sha512-todwxLMY7/heScKmntwQG8CXVkWUOdYxIvY2s0VWAAMh/nd8SoYiRaKjlr7+iCs984f2P8zvrfWcDDYVb73NfA==",
"dev": true,
"license": "MIT",
"dependencies": {
"end-of-stream": "^1.1.0",

View file

@ -59,7 +59,6 @@
"hot-hook": "^0.4.0",
"numeral": "^2.0.6",
"pinia": "^3.0.2",
"pino-pretty": "^13.0.0",
"postcss-loader": "^8.1.1",
"prettier": "^3.4.2",
"supertest": "^6.3.3",
@ -115,7 +114,9 @@
"node-2fa": "^2.0.3",
"node-exceptions": "^4.0.1",
"notiwind": "^2.0.0",
"p-limit": "^7.1.1",
"pg": "^8.9.0",
"pino-pretty": "^13.0.0",
"qrcode": "^1.5.3",
"redis": "^5.0.0",
"reflect-metadata": "^0.2.1",

View file

@ -6,17 +6,16 @@
import type { ApplicationService } from '@adonisjs/core/types';
import vine, { symbols, BaseLiteralType, Vine } from '@vinejs/vine';
import type { FieldContext, FieldOptions } from '@vinejs/vine/types';
// import type { MultipartFile, FileValidationOptions } from '@adonisjs/bodyparser/types';
import type { MultipartFile } from '@adonisjs/core/bodyparser';
import type { FileValidationOptions } from '@adonisjs/core/types/bodyparser';
import { Request, RequestValidator } from '@adonisjs/core/http';
import MimeType from '#models/mime_type';
/**
* Validation options accepted by the "file" rule
*/
export type FileRuleValidationOptions = Partial<FileValidationOptions> | ((field: FieldContext) => Partial<FileValidationOptions>);
/**
* Extend VineJS
*/
@ -25,6 +24,7 @@ declare module '@vinejs/vine' {
myfile(options?: FileRuleValidationOptions): VineMultipartFile;
}
}
/**
* Extend HTTP request class
*/
@ -36,19 +36,54 @@ declare module '@adonisjs/core/http' {
* Checks if the value is an instance of multipart file
* from bodyparser.
*/
export function isBodyParserFile(file: MultipartFile | unknown): boolean {
export function isBodyParserFile(file: MultipartFile | unknown): file is MultipartFile {
return !!(file && typeof file === 'object' && 'isMultipartFile' in file);
}
export async function getEnabledExtensions() {
const enabledExtensions = await MimeType.query().select('file_extension').where('enabled', true).exec();
const extensions = enabledExtensions
.map((extension) => {
return extension.file_extension.split('|');
})
.flat();
return extensions;
/**
* Cache for enabled extensions to reduce database queries
*/
let extensionsCache: string[] | null = null;
let cacheTimestamp = 0;
const CACHE_DURATION = 5 * 60 * 1000; // 5 minutes
/**
* Get enabled extensions with caching
*/
export async function getEnabledExtensions(): Promise<string[]> {
const now = Date.now();
if (extensionsCache && now - cacheTimestamp < CACHE_DURATION) {
return extensionsCache;
}
try {
const enabledExtensions = await MimeType.query().select('file_extension').where('enabled', true).exec();
const extensions = enabledExtensions
.map((extension) => extension.file_extension.split('|'))
.flat()
.map((ext) => ext.toLowerCase().trim())
.filter((ext) => ext.length > 0);
extensionsCache = [...new Set(extensions)]; // Remove duplicates
cacheTimestamp = now;
return extensionsCache;
} catch (error) {
console.error('Error fetching enabled extensions:', error);
return extensionsCache || [];
}
}
/**
* Clear extensions cache
*/
export function clearExtensionsCache(): void {
extensionsCache = null;
cacheTimestamp = 0;
}
/**
* VineJS validation rule that validates the file to be an
* instance of BodyParser MultipartFile class.
@ -65,6 +100,7 @@ const isMultipartFile = vine.createRule(async (file: MultipartFile | unknown, op
// At this point, you can use type assertion to explicitly tell TypeScript that file is of type MultipartFile
const validatedFile = file as MultipartFile;
const validationOptions = typeof options === 'function' ? options(field) : options;
/**
* Set size when it's defined in the options and missing
* on the file instance
@ -72,30 +108,29 @@ const isMultipartFile = vine.createRule(async (file: MultipartFile | unknown, op
if (validatedFile.sizeLimit === undefined && validationOptions.size) {
validatedFile.sizeLimit = validationOptions.size;
}
/**
* Set extensions when it's defined in the options and missing
* on the file instance
*/
// if (validatedFile.allowedExtensions === undefined && validationOptions.extnames) {
// validatedFile.allowedExtensions = validationOptions.extnames;
// }
if (validatedFile.allowedExtensions === undefined && validationOptions.extnames !== undefined) {
validatedFile.allowedExtensions = validationOptions.extnames; // await getEnabledExtensions();
} else if (validatedFile.allowedExtensions === undefined && validationOptions.extnames === undefined) {
if (validatedFile.allowedExtensions === undefined) {
if (validationOptions.extnames !== undefined) {
validatedFile.allowedExtensions = validationOptions.extnames;
} else {
validatedFile.allowedExtensions = await getEnabledExtensions();
}
/**
* wieder löschen
* Set extensions when it's defined in the options and missing
* on the file instance
*/
// if (file.clientNameSizeLimit === undefined && validationOptions.clientNameSizeLimit) {
// file.clientNameSizeLimit = validationOptions.clientNameSizeLimit;
// }
}
/**
* Validate file
*/
try {
validatedFile.validate();
} catch (error) {
field.report(`File validation failed: ${error.message}`, 'file.validation_error', field, validationOptions);
return;
}
/**
* Report errors
*/
@ -107,36 +142,37 @@ const isMultipartFile = vine.createRule(async (file: MultipartFile | unknown, op
const MULTIPART_FILE: typeof symbols.SUBTYPE = symbols.SUBTYPE;
export class VineMultipartFile extends BaseLiteralType<MultipartFile, MultipartFile, MultipartFile> {
[MULTIPART_FILE]: string;
// constructor(validationOptions?: FileRuleValidationOptions, options?: FieldOptions) {
// super(options, [isMultipartFile(validationOptions || {})]);
// this.validationOptions = validationOptions;
// this.#private = true;
// }
// clone(): this {
// return new VineMultipartFile(this.validationOptions, this.cloneOptions()) as this;
// }
// #private;
// constructor(validationOptions?: FileRuleValidationOptions, options?: FieldOptions, validations?: Validation<any>[]);
// clone(): this;
public validationOptions;
public validationOptions?: FileRuleValidationOptions;
// extnames: (18) ['gpkg', 'htm', 'html', 'csv', 'txt', 'asc', 'c', 'cc', 'h', 'srt', 'tiff', 'pdf', 'png', 'zip', 'jpg', 'jpeg', 'jpe', 'xlsx']
// size: '512mb'
// public constructor(validationOptions?: FileRuleValidationOptions, options?: FieldOptions, validations?: Validation<any>[]) {
public constructor(validationOptions?: FileRuleValidationOptions, options?: FieldOptions) {
// super(options, validations);
super(options, [isMultipartFile(validationOptions || {})]);
this.validationOptions = validationOptions;
}
public clone(): any {
// return new VineMultipartFile(this.validationOptions, this.cloneOptions(), this.cloneValidations());
return new VineMultipartFile(this.validationOptions, this.cloneOptions());
}
/**
* Set maximum file size
*/
public maxSize(size: string | number): this {
const newOptions = { ...this.validationOptions, size };
return new VineMultipartFile(newOptions, this.cloneOptions()) as this;
}
/**
* Set allowed extensions
*/
public extensions(extnames: string[]): this {
const newOptions = { ...this.validationOptions, extnames };
return new VineMultipartFile(newOptions, this.cloneOptions()) as this;
}
}
export default class VinejsProvider {
@ -155,13 +191,8 @@ export default class VinejsProvider {
/**
* The container bindings have booted
*/
boot(): void {
// VineString.macro('translatedLanguage', function (this: VineString, options: Options) {
// return this.use(translatedLanguageRule(options));
// });
Vine.macro('myfile', function (this: Vine, options) {
Vine.macro('myfile', function (this: Vine, options?: FileRuleValidationOptions) {
return new VineMultipartFile(options);
});
@ -175,6 +206,41 @@ export default class VinejsProvider {
}
return new RequestValidator(this.ctx).validateUsing(...args);
});
// Ensure MIME validation macros are loaded
this.loadMimeValidationMacros();
this.loadFileScanMacros();
this.loadFileLengthMacros();
}
/**
* Load MIME validation macros - called during boot to ensure they're available
*/
private async loadMimeValidationMacros(): Promise<void> {
try {
// Dynamically import the MIME validation rule to ensure macros are registered
await import('#start/rules/allowed_extensions_mimetypes');
} catch (error) {
console.warn('Could not load MIME validation macros:', error);
}
}
private async loadFileScanMacros(): Promise<void> {
try {
// Dynamically import the MIME validation rule to ensure macros are registered
await import('#start/rules/file_scan');
} catch (error) {
console.warn('Could not load MIME validation macros:', error);
}
}
private async loadFileLengthMacros(): Promise<void> {
try {
// Dynamically import the MIME validation rule to ensure macros are registered
await import('#start/rules/file_length');
} catch (error) {
console.warn('Could not load MIME validation macros:', error);
}
}
/**
@ -190,5 +256,7 @@ export default class VinejsProvider {
/**
* Preparing to shutdown the app
*/
async shutdown() {}
async shutdown() {
clearExtensionsCache();
}
}