hotfix(dataset): enhance file download with embargo validation and improve API data handling

- Add embargo date validation to file download process with date-only comparison
- Require first_name for authors/contributors only when name_type is 'Personal'
- Remove sensitive personal data from dataset API responses
- Improve dataset validation logic for better data integrity
This commit is contained in:
Kaimbacher 2025-09-03 12:48:44 +02:00
parent 89d91d5e12
commit e1ccf0ddc8
5 changed files with 114 additions and 52 deletions

View file

@ -3,52 +3,81 @@ import File from '#models/file';
import { StatusCodes } from 'http-status-codes';
import * as fs from 'fs';
import * as path from 'path';
import { DateTime } from 'luxon';
// node ace make:controller Author
export default class FileController {
// @Get("download/:id")
public async findOne({ response, params }: HttpContext) {
const id = params.id;
const file = await File.findOrFail(id);
// const file = await File.findOne({
// where: { id: id },
// });
if (file) {
const filePath = '/storage/app/data/' + file.pathName;
const ext = path.extname(filePath);
const fileName = file.label + ext;
try {
fs.accessSync(filePath, fs.constants.R_OK); //| fs.constants.W_OK);
// console.log("can read/write:", path);
response
.header('Cache-Control', 'no-cache private')
.header('Content-Description', 'File Transfer')
.header('Content-Type', file.mimeType)
.header('Content-Disposition', 'inline; filename=' + fileName)
.header('Content-Transfer-Encoding', 'binary')
.header('Access-Control-Allow-Origin', '*')
.header('Access-Control-Allow-Methods', 'GET,POST');
response.status(StatusCodes.OK).download(filePath);
} catch (err) {
// console.log("no access:", path);
response.status(StatusCodes.NOT_FOUND).send({
message: `File with id ${id} doesn't exist on file server`,
});
}
// const file = await File.findOrFail(id);
// Load file with its related dataset to check embargo
const file = await File.query()
.where('id', id)
.preload('dataset') // or 'dataset' - whatever your relationship is named
.firstOrFail();
// res.status(StatusCodes.OK).sendFile(filePath, (err) => {
// // res.setHeader("Content-Type", "application/json");
// // res.removeHeader("Content-Disposition");
// res.status(StatusCodes.NOT_FOUND).send({
// message: `File with id ${id} doesn't exist on file server`,
// });
// });
} else {
response.status(StatusCodes.NOT_FOUND).send({
if (!file) {
return response.status(StatusCodes.NOT_FOUND).send({
message: `Cannot find File with id=${id}.`,
});
}
// Check embargo date
const dataset = file.dataset; // or file.dataset
if (dataset && this.isUnderEmbargo(dataset.embargo_date)) {
return response.status(StatusCodes.FORBIDDEN).send({
message: `File is under embargo until ${dataset.embargo_date?.toFormat('yyyy-MM-dd')}`,
});
}
// Proceed with file download
const filePath = '/storage/app/data/' + file.pathName;
const ext = path.extname(filePath);
const fileName = file.label + ext;
try {
fs.accessSync(filePath, fs.constants.R_OK); //| fs.constants.W_OK);
// console.log("can read/write:", path);
response
.header('Cache-Control', 'no-cache private')
.header('Content-Description', 'File Transfer')
.header('Content-Type', file.mimeType)
.header('Content-Disposition', 'inline; filename=' + fileName)
.header('Content-Transfer-Encoding', 'binary')
.header('Access-Control-Allow-Origin', '*')
.header('Access-Control-Allow-Methods', 'GET,POST');
response.status(StatusCodes.OK).download(filePath);
} catch (err) {
// console.log("no access:", path);
response.status(StatusCodes.NOT_FOUND).send({
message: `File with id ${id} doesn't exist on file server`,
});
}
}
/**
* Check if the dataset is under embargo
* Compares only dates (ignoring time) for embargo check
* @param embargoDate - The embargo date from dataset
* @returns true if under embargo, false if embargo has passed or no embargo set
*/
private isUnderEmbargo(embargoDate: DateTime | null): boolean {
// No embargo date set - allow download
if (!embargoDate) {
return false;
}
// Get current date at start of day (00:00:00)
const today = DateTime.now().startOf('day');
// Get embargo date at start of day (00:00:00)
const embargoDateOnly = embargoDate.startOf('day');
// File is under embargo if embargo date is after today
// This means the embargo lifts at the start of the embargo date
return embargoDateOnly >= today;
}
}

View file

@ -605,11 +605,14 @@ export default class DatasetsController {
doiIdentifier.dataset_id = dataset.id;
doiIdentifier.type = 'doi';
doiIdentifier.status = 'findable';
// save modified date of datset for re-caching model in db an update the search index
dataset.server_date_modified = DateTime.now();
// save updated dataset to db an index to OpenSearch
try {
// save modified date of datset for re-caching model in db an update the search index
dataset.server_date_modified = DateTime.now();
// autoUpdate: true only triggers when dataset.save() is called, not when saving a related model like below
await dataset.save();
await dataset.related('identifier').save(doiIdentifier);
const index_name = 'tethys-records';
await Index.indexDocument(dataset, index_name);
@ -900,6 +903,7 @@ export default class DatasetsController {
const input = request.only(['project_id', 'embargo_date', 'language', 'type', 'creating_corporation']);
// dataset.type = request.input('type');
dataset.merge(input);
dataset.server_date_modified = DateTime.now();
// let test: boolean = dataset.$isDirty;
await dataset.useTransaction(trx).save();

View file

@ -232,8 +232,8 @@ export default class DatasetController {
.maxLength(255)
.email()
.normalizeEmail()
.isUniquePerson({ table: 'persons', column: 'email', idField: 'id' }),
first_name: vine.string().trim().minLength(3).maxLength(255),
.isUniquePerson({ table: 'persons', column: 'email', idField: 'id' }),
first_name: vine.string().trim().minLength(3).maxLength(255).optional().requiredWhen('name_type', '=', 'Personal'),
last_name: vine.string().trim().minLength(3).maxLength(255),
}),
)
@ -248,8 +248,8 @@ export default class DatasetController {
.maxLength(255)
.email()
.normalizeEmail()
.isUniquePerson({ table: 'persons', column: 'email', idField: 'id' }),
first_name: vine.string().trim().minLength(3).maxLength(255),
.isUniquePerson({ table: 'persons', column: 'email', idField: 'id' }),
first_name: vine.string().trim().minLength(3).maxLength(255).optional().requiredWhen('name_type', '=', 'Personal'),
last_name: vine.string().trim().minLength(3).maxLength(255),
pivot_contributor_type: vine.enum(Object.keys(ContributorTypes)),
}),
@ -324,7 +324,7 @@ export default class DatasetController {
.email()
.normalizeEmail()
.isUniquePerson({ table: 'persons', column: 'email', idField: 'id' }),
first_name: vine.string().trim().minLength(3).maxLength(255),
first_name: vine.string().trim().minLength(3).maxLength(255).optional().requiredWhen('name_type', '=', 'Personal'),
last_name: vine.string().trim().minLength(3).maxLength(255),
}),
)
@ -340,7 +340,7 @@ export default class DatasetController {
.email()
.normalizeEmail()
.isUniquePerson({ table: 'persons', column: 'email', idField: 'id' }),
first_name: vine.string().trim().minLength(3).maxLength(255),
first_name: vine.string().trim().minLength(3).maxLength(255).optional().requiredWhen('name_type', '=', 'Personal'),
last_name: vine.string().trim().minLength(3).maxLength(255),
pivot_contributor_type: vine.enum(Object.keys(ContributorTypes)),
}),

View file

@ -1,4 +1,4 @@
import { column, SnakeCaseNamingStrategy, computed, manyToMany } from '@adonisjs/lucid/orm';
import { column, SnakeCaseNamingStrategy, computed, manyToMany, afterFetch, afterFind } from '@adonisjs/lucid/orm';
import { DateTime } from 'luxon';
import dayjs from 'dayjs';
import Dataset from './dataset.js';
@ -95,4 +95,33 @@ export default class Person extends BaseModel {
pivotColumns: ['role', 'sort_order', 'allow_email_contact'],
})
public datasets: ManyToMany<typeof Dataset>;
// public toJSON() {
// const json = super.toJSON();
// // Check if this person is loaded through a pivot relationship with sensitive roles
// const pivotRole = this.$extras?.pivot_role;
// if (pivotRole === 'author' || pivotRole === 'contributor') {
// // Remove sensitive information for public-facing roles
// delete json.email;
// // delete json.identifierOrcid;
// }
// return json;
// }
@afterFind()
public static async afterFindHook(person: Person) {
if (person.$extras?.pivot_role === 'author' || person.$extras?.pivot_role === 'contributor') {
person.email = undefined as any;
}
}
@afterFetch()
public static async afterFetchHook(persons: Person[]) {
persons.forEach(person => {
if (person.$extras?.pivot_role === 'author' || person.$extras?.pivot_role === 'contributor') {
person.email = undefined as any;
}
});
}
}

View file

@ -67,7 +67,7 @@ export const createDatasetValidator = vine.compile(
.email()
.normalizeEmail()
.isUniquePerson({ table: 'persons', column: 'email', idField: 'id' }),
first_name: vine.string().trim().minLength(3).maxLength(255),
first_name: vine.string().trim().minLength(3).maxLength(255).optional().requiredWhen('name_type', '=', 'Personal'),
last_name: vine.string().trim().minLength(3).maxLength(255),
}),
)
@ -83,7 +83,7 @@ export const createDatasetValidator = vine.compile(
.email()
.normalizeEmail()
.isUniquePerson({ table: 'persons', column: 'email', idField: 'id' }),
first_name: vine.string().trim().minLength(3).maxLength(255),
first_name: vine.string().trim().minLength(3).maxLength(255).optional().requiredWhen('name_type', '=', 'Personal'),
last_name: vine.string().trim().minLength(3).maxLength(255),
pivot_contributor_type: vine.enum(Object.keys(ContributorTypes)),
}),
@ -214,7 +214,7 @@ export const updateDatasetValidator = vine.compile(
.email()
.normalizeEmail()
.isUniquePerson({ table: 'persons', column: 'email', idField: 'id' }),
first_name: vine.string().trim().minLength(3).maxLength(255),
first_name: vine.string().trim().minLength(3).maxLength(255).optional().requiredWhen('name_type', '=', 'Personal'),
last_name: vine.string().trim().minLength(3).maxLength(255),
}),
)
@ -230,7 +230,7 @@ export const updateDatasetValidator = vine.compile(
.email()
.normalizeEmail()
.isUniquePerson({ table: 'persons', column: 'email', idField: 'id' }),
first_name: vine.string().trim().minLength(3).maxLength(255),
first_name: vine.string().trim().minLength(3).maxLength(255).optional().requiredWhen('name_type', '=', 'Personal'),
last_name: vine.string().trim().minLength(3).maxLength(255),
pivot_contributor_type: vine.enum(Object.keys(ContributorTypes)),
}),
@ -365,7 +365,7 @@ export const updateEditorDatasetValidator = vine.compile(
.email()
.normalizeEmail()
.isUniquePerson({ table: 'persons', column: 'email', idField: 'id' }),
first_name: vine.string().trim().minLength(3).maxLength(255),
first_name: vine.string().trim().minLength(3).maxLength(255).optional().requiredWhen('name_type', '=', 'Personal'),
last_name: vine.string().trim().minLength(3).maxLength(255),
}),
)
@ -381,7 +381,7 @@ export const updateEditorDatasetValidator = vine.compile(
.email()
.normalizeEmail()
.isUniquePerson({ table: 'persons', column: 'email', idField: 'id' }),
first_name: vine.string().trim().minLength(3).maxLength(255),
first_name: vine.string().trim().minLength(3).maxLength(255).optional().requiredWhen('name_type', '=', 'Personal'),
last_name: vine.string().trim().minLength(3).maxLength(255),
pivot_contributor_type: vine.enum(Object.keys(ContributorTypes)),
}),