/**
* Create a HEAL json-compliant object from a Dataverse metadata exports
* @param {object} dataverse - Dataverse JSON metadata direct from server
* @return {object} HEAL-compliant javascript object
*/
const generateHEAL = (dataverse)=>{
// create template and begin extracting data
var empty = require('json-schema-empty').default;
const schema = require('../data/heal-schema.json');
var template = empty(schema);
try {
var heal = dataverse.data.latestVersion.metadataBlocks.heal.fields;
} catch (err) {
console.log("Error: No HEAL metadata block detected!");
return(err);
}
var citation = dataverse.data.latestVersion.metadataBlocks.citation.fields;
// cycle through two levels and assign values into the empty template
for (var i=0; i<heal.length; i++) {
var toplevel = heal[i]["typeName"];
// had to name the field as heal_citation due to a limitation of dataverse
// we change it back here
if (toplevel=="heal_citation") {
toplevel = "citation";
}
// need to extract into a simple name:var format
var sublevel = heal[i]["value"];
// Some cases have a third level as an array (only registrants afaik)
if (typeof sublevel.length !== 'undefined') {
sublevel.forEach(element => {
for (let key in element) {
element[key] = element[key]["value"];
}
});
} else { // in case of only two levels
for (let key in sublevel) {
if (sublevel[key]["value"])
sublevel[key] = sublevel[key]["value"];
}
}
// commit back to empty template based on "typeName"
template[toplevel] = sublevel;
}
// rename duplicate names that had to be changed in dataverse
template['study_translational_focus'] = template.study_translational_focus_group;
delete template.study_translational_focus_group;
// should be a sublevel, these aren't in the dataverse block schema for dv purposes
template['contacts_and_registrants']['contacts'] = [];
template['contacts_and_registrants']['registrants'] = template.registrants;
delete template.registrants;
template['metadata_location']['data_repositories'] = template.data_repositories;
delete template.data_repositories;
// Yes to binary values
template.citation.heal_funded_status = (template.citation.heal_funded_status == "Yes");
template.citation.study_collection_status = (template.citation.study_collection_status == "Yes");
template.data_availability.produce_data = (template.data_availability.produce_data == "Yes");
template.data_availability.produce_other = (template.data_availability.produce_data == "Yes");
// idk why this doesn't work automatically, but some fields need to be array-ified
Object.entries(template.human_treatment_applicability).forEach(([key, value]) => {
if (typeof value == "string") {
template.human_treatment_applicability[key] = [ value ];
}
});
// data that got merged into standard dataverse categories
// requires manual handling
var citation_map = new Object;
for (var i=0; i<citation.length; i++) {
citation_map[citation[i]["typeName"]] = citation[i]["value"];
}
template.minimal_info["study_name"] = citation_map.title;
template.minimal_info["study_description"] = citation_map.dsDescription[0]["dsDescriptionValue"]["value"];
for (var i=0; i<citation_map.datasetContact.length; i++) {
// In case there's no name for the contact (dataverse only requires email address)
try {
var contact_name = citation_map.datasetContact[i]['datasetContactName']['value'].split(", ");
} catch(e) {
var contact_name = ["undefined", "undefined"];
}
template['contacts_and_registrants']['contacts'].push( {
contact_first_name: contact_name[1],
contact_last_name: contact_name[0],
//contact_affiliation: citation_map.datasetContact[i]['datasetContactAffiliation']['value'],
contact_email: citation_map.datasetContact[i]['datasetContactEmail']['value']
});
}
template.citation['investigators'] = [];
for (var i=0; i<citation_map.author.length; i++) {
// investigator ID is not necessarily specified
try {
var investigator_ID = [{
investigator_ID_type: citation_map.author[0]['authorIdentifierScheme']['value'],
investigator_ID_value: citation_map.author[0]['authorIdentifier']['value']
}];
} catch(e) {
var investigator_ID = [];
}
var author_name = citation_map.author[i]['authorName']['value'].split(", ")
// author affiliation is also not necessarily specified
try {
var author_affiliation = citation_map.author[i]['authorAffiliation']['value'];
} catch(e) {
var author_affiliation = "";
}
template.citation['investigators'].push( {
investigator_first_name: author_name[1],
investigator_last_name: author_name[0],
investigator_affiliation: author_affiliation,
investigator_ID: investigator_ID
});
}
if (typeof(citation_map.dateOfCollection) !== 'undefined') {
template.data_availability.data_collection_start_date = citation_map.dateOfCollection[0]['dateOfCollectionStart']['value'];
template.data_availability.data_collection_finish_date = citation_map.dateOfCollection[0]['dateOfCollectionEnd']['value'];
}
// needs to be formatted as an array (with funder name as an another array)
if (typeof(citation_map.grantNumber) !== 'undefined') {
template.citation['funding'] = [ {
funder_name: [ citation_map.grantNumber[0]['grantNumberAgency']['value'] ],
funding_award_ID: citation_map.grantNumber[0]['grantNumberValue']['value']
} ];
}
// strings to integers as necessary
if (typeof(template.data.subject_data_unit_of_collection_expected_number) !== 'undefined') {
template.data.subject_data_unit_of_collection_expected_number = Number(template.data.subject_data_unit_of_collection_expected_number);
}
if (typeof(template.data.subject_data_unit_of_analysis_expected_number) !== 'undefined') {
template.data.subject_data_unit_of_analysis_expected_number = Number(template.data.subject_data_unit_of_analysis_expected_number)
}
// Validate against the schema again to quality check output
var Validator = require('jsonschema').Validator;
var v = new Validator();
const valid = v.validate(template, schema)
if (valid.valid) {
return template;
} else {
console.log(valid.errors);
//return template; //useful for debugging
}
}
module.exports = generateHEAL