/**
* Converts a HEAL json input to a Dataverse JSON output
* @param {object} input - HEAL json object
* @return {object} Dataverse JSON object to upload to a server instance
*/
const healToDataverse = (input)=>{
var output = { datasetVersion: { metadataBlocks: {
citation: { fields: new Array,
name: "citation", displayName: "Citation Metadata" },
heal: { fields: new Array,
displayName: "HEAL metadata schema", name: "heal" }
}}};
const schema = require('../data/heal-schema.json');
var Validator = require('jsonschema').Validator;
var v = new Validator();
const valid = v.validate(input, schema)
if (!valid.valid) {
console.log(valid.errors);
throw "Invalid HEAL file";
}
var citation = output.datasetVersion.metadataBlocks.citation.fields;
var heal = output.datasetVersion.metadataBlocks.heal.fields;
// Copy heal metadata block as closely as possible
// Nesting in the HEAL JSON rarely goes beyond two levels
// so we will run two nested forEach loops, handling various data types
Object.keys(input).forEach(function(key) { // top level of each block in HEAL json
// contacts and registrants are handled separately
if (key !== "contacts_and_registrants") {
let new_field = {
typeName: key,
typeClass: "compound",
multiple: false,
value: new Object
};
// second level of each block in HEAL json
Object.keys(input[key]).forEach(function(key_2) {
//console.log(key.concat(": ", key_2)); // for debugging which field breaks
// data repositories goes top-level later below
if (key_2 !== "data_repositories") {
new_field.value[key_2] = {
typeName: key_2,
multiple: false,
};
let field_schema = schema['properties'][key]['properties'][key_2];
let field_type = field_schema.type;
new_field.value[key_2]['value'] = input[key][key_2];
// start by handling simple strings, detect controlledVocab
if (field_type == "string") {
if (typeof field_schema.enum !== "undefined") {
new_field.value[key_2].typeClass = "controlledVocabulary";
} else {
new_field.value[key_2].typeClass = "primitive";
}
} else if (field_type == "integer") {
// integers need to be strings
new_field.value[key_2].typeClass = "primitive";
new_field.value[key_2].multiple = false;
//console.log(key.concat(": ", key_2));
new_field.value[key_2]['value'] = input[key][key_2].toString();
// handling more complex objects
} else if (field_type == "array") {
if (field_schema.items.type == "string") {
new_field.value[key_2].multiple = true;
if (key_2 == "treatment_mode" || key_2 == "treatment_application_level" || key_2 == "treatment_novelty") {
new_field.value[key_2].multiple = false;
new_field.value[key_2]['value'] = new_field.value[key_2]['value'][0];
}
// Is there controlled vocabulary?
if (typeof field_schema.items.enum !== 'undefined') {
new_field.value[key_2].typeClass = "controlledVocabulary";
} else {
//new_field.value[key_2].multiple = false;
new_field.value[key_2].typeClass = "primitive";
}
}
}
// Change boolean to Yes/No strings
if (field_type == "boolean") {
new_field.value[key_2].typeClass = "controlledVocabulary";
if (input[key][key_2]) {
new_field.value[key_2].value = "Yes";
} else {
new_field.value[key_2].value = "No";
}
}
}
});
// reset "citation" to "heal_citation" for dataverse compatibility
// similar issue with study_translational_focus
if (new_field.typeName == "citation") {
new_field.typeName = "heal_citation"
} else if (new_field.typeName == "study_translational_focus") {
new_field.typeName = "study_translational_focus_group"
}
heal.push(new_field);
}
});
if (typeof input.citation.heal_funded_status == 'undefined') {
throw "Need heal funded status"
}
// move registrants to the top level, DV compatibility issue
if (typeof input.contacts_and_registrants.registrants == 'undefined') {
throw "need a registrant"
}
var registrants = {
typeName: "registrants",
typeClass: "compound",
multiple: true,
value: new Array
};
input.contacts_and_registrants.registrants.forEach(function(entry) {
Object.keys(entry).forEach(function(value) {
entry[value] = {
typeName: value,
multiple: false,
typeClass: "primitive",
value: entry[value]
}
});
registrants.value.push(entry);
});
heal.push(registrants);
if (typeof input.metadata_location.data_repositories !== 'undefined') {
// also move repositories to the top level
var repositories = {
typeName: "data_repositories",
typeClass: "compound",
multiple: true,
value: new Array
};
input.metadata_location.data_repositories.forEach(function (entry) {
Object.keys(entry).forEach(function (value) {
entry[value] = {
typeName: value,
multiple: false,
typeClass: "primitive",
value: entry[value]
}
});
repositories.value.push(entry);
});
heal.push(repositories);
}
// Add the standardr dataverse citation fields
citation.push({ value: input.minimal_info.study_name,
typeClass: "primitive", multiple: false, typeName: "title" });
if (typeof input.citation.investigators == "undefined") {
throw "Need an investigator";
}
author = { value: new Array, typeClass: "compound", multiple: true, typeName: "author"};
input.citation.investigators.forEach(function(investigator) {
// missing investigator ID
if (typeof investigator.investigator_ID == 'undefined') {
investigator.investigator_ID = [ { investigator_ID_type: "ORCID",
investigator_ID_value: "" } ];
} if (investigator.investigator_ID == []) {
investigator.investigator_ID.push( { investigator_ID_type: "ORCID",
investigator_ID_value: "" } );
} if (investigator.investigator_ID[0].investigator_ID_type !== "ORCID") {
throw "Only ORCID IDs currently supported"
}
// missing names
if (typeof investigator.investigator_last_name == 'undefined') {
investigator.investigator_last_name = "";
} if (typeof investigator.investigator_first_name == 'undefined') {
investigator.investigator_first_name = "";
} if (typeof investigator.investigator_affiliation == 'undefined') {
investigator.investigator_affiliation = "";
}
let new_investigator = {
authorName: {
value: investigator.investigator_last_name.concat(", ", investigator.investigator_first_name),
typeClass: "primitive", multiple: false, typeName: "authorName"
}, authorAffiliation: { value: investigator.investigator_affiliation,
typeClass: "primitive", multiple: false, typeName: "authorAffiliation"
}, authorIdentifierScheme: { value: investigator.investigator_ID[0].investigator_ID_type,
typeName: "authorIdentifierScheme", multiple: false, typeClass: "controlledVocabulary"
}, authorIdentifier: { value: investigator.investigator_ID[0].investigator_ID_value,
typeName: "authorIdentifier", multiple : false, typeClass: "primitive"
}
};
author.value.push(new_investigator);
});
citation.push(author);
if (typeof input.contacts_and_registrants.contacts == 'undefined') {
throw "need a contact";
}
datasetContact = { value: new Array, typeClass: "compound",
multiple: true, typeName: "datasetContact" };
input.contacts_and_registrants.contacts.forEach(function(contact) {
if (typeof contact.contact_email == 'undefined') {
throw "Contact missing an email address";
} if (typeof contact.contact_last_name == 'undefined') {
contact.contact_last_name = ""
} if (typeof contact.contact_first_name == "undefined") {
contact.contact_first_name = ""
}
let new_contact = {
datasetContactEmail: {
typeClass: "primitive",
multiple: false,
typeName: "datasetContactEmail",
value: contact.contact_email
},
datasetContactName: {
typeClass: "primitive",
multiple: false,
typeName: "datasetContactName",
value: contact.contact_last_name.concat(", ", contact.contact_first_name)
}
};
datasetContact.value.push(new_contact);
});
citation.push(datasetContact);
dsDescription = { value : new Array, typeClass: "compound",
multiple: true, typeName: "dsDescription" };
dsDescription.value.push({
dsDescriptionValue: {
value: input.minimal_info.study_description,
multiple: false,
typeClass: "primitive",
typeName: "dsDescriptionValue"
}
});
citation.push(dsDescription);
subject = { value: new Array, typeClass: "controlledVocabulary",
multiple: true, typeName: "subject" };
subject.value.push("Medicine, Health and Life Sciences");
citation.push(subject);
return output;
}
module.exports = healToDataverse;