Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Revisit both 710 and sf5 #114

Merged
merged 6 commits into from
Nov 10, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 14 additions & 5 deletions src/reducers/counterpartField.js
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@ const debug = createDebugLogger('@natlibfi/melinda-marc-record-merge-reducers:me
//const debugData = debug.extend('data');
const debugDev = debug.extend('dev');

const irrelevantSubfieldsInNameAndTitlePartComparison = '5689';

const counterpartRegexps = { // NB! tag is from source!
// Note that in the normal case, all source 1XX fields have been converted to 7XX fields.
'100': /^[17]00$/u, '110': /^[17]10$/u, '111': /^[17]11$/u, '130': /^[17]30$/u,
Expand Down Expand Up @@ -133,8 +135,8 @@ function optionalSubfieldComparison(originalBaseField, originalSourceField, keyS
// When everything is the string, the strings need to be (practically) identical.
// (NB! Here order matters. We should probably make it matter everywhere.)
// (However, keySubfieldsAsString === '' will always succeed. Used by 040 at least.)
// TEE: SKIPPAA INDIKAATTORIT!
return fieldToString(field1) === fieldToString(field2);
// NB! substring(6) skips "TAG II" (I=indicator. Thus we skip indicators)
return fieldToString(field1).substring(6) === fieldToString(field2).substring(6);
}
const subfieldArray = keySubfieldsAsString.split('');

Expand Down Expand Up @@ -397,7 +399,9 @@ function pairableName(baseField, sourceField) {
return true;
}

nvdebug(` name mismatch: '${fieldToString(reducedField1)}' vs '${fieldToString(reducedField2)}'`, debugDev);
nvdebug(` name mismatch:`);
nvdebug(` '${fieldToString(reducedField1)}' vs`);
nvdebug(` '${fieldToString(reducedField2)}'`, debugDev);
return false;
}

Expand Down Expand Up @@ -441,21 +445,26 @@ function namePartThreshold(field) {

function fieldToNamePart(field) {
const index = namePartThreshold(field);
const relevantSubfields = field.subfields.filter((sf, i) => i < index || index === -1);
const relevantSubfields = field.subfields.filter((sf, i) => i < index || index === -1).filter(sf => !irrelevantSubfieldsInNameAndTitlePartComparison.includes(sf.code));

const subsetField = {'tag': field.tag, 'ind1': field.ind1, 'ind2': field.ind2, subfields: relevantSubfields};

/*
if (index > -1) { // eslint-disable-line functional/no-conditional-statements
debugDev(`Name subset: ${fieldToString(subsetField)}`);
}
*/

// Ummm... Sometimes $0 comes after $t but belongs to name part

return subsetField;
}

function fieldToTitlePart(field) {
// Take everything after 1st subfield $t...
const index = field.subfields.findIndex(currSubfield => currSubfield.code === 't');
const subsetField = {'tag': field.tag, 'ind1': field.ind1, 'ind2': field.ind2, subfields: field.subfields.filter((sf, i) => i >= index)};
const relevantSubfields = field.subfields.filter((sf, i) => i >= index).filter(sf => !irrelevantSubfieldsInNameAndTitlePartComparison.includes(sf.code));
const subsetField = {'tag': field.tag, 'ind1': field.ind1, 'ind2': field.ind2, subfields: relevantSubfields};
debugDev(`Title subset: ${fieldToString(subsetField)}`);
return subsetField;
}
Expand Down
4 changes: 2 additions & 2 deletions src/reducers/mergableTag.js
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ const defaultNonMergableFields = [
'853',
'854',
'855',
// '856', // This is mergable but so risky, that let's just ignore these
// '856' is mergable, but a pain in the ass
'863',
'864',
'865',
Expand All @@ -64,7 +64,7 @@ const defaultNonMergableFields = [
'881',
'882',
'883',
'884',
// '884',
'885',
'886',
'887',
Expand Down
2 changes: 1 addition & 1 deletion src/reducers/mergeConstraints.js
Original file line number Diff line number Diff line change
Expand Up @@ -270,7 +270,7 @@ const mergeConstraints = [
{'tag': '881', 'required': ''},
{'tag': '882', 'required': ''},
{'tag': '883', 'required': ''},
{'tag': '884', 'required': ''},
{'tag': '884', 'required': '', 'paired': 'agkq'},
{'tag': '885', 'required': ''},
{'tag': '886', 'required': ''},
{'tag': '887', 'required': ''},
Expand Down
40 changes: 40 additions & 0 deletions src/reducers/mergeField.js
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,41 @@ function skipMergeField(baseRecord, sourceField, config) {
return false;
}

function sourceRecordIsBetter(baseField, sourceField) {
if (!baseField.subfields) {
return;
}
// MELINDA-8978: prefer Asteri version
if (isAsteriField(sourceField) && !isAsteriField(baseField)) {
return 1;
}

function isAsteriField(field) {
if (field.subfields.some(sf => sf.code === '0' && sf.value.match(/^\((?:FI-ASTERI-N|FIN11)\)[0-9]{9}$/u))) {
return true;
}
}
return false;
}

function swapDataBetweenFields(field1, field2) {
// NB! Does not support controlfields yet! Add support if the need arises.
if (field1.subfields) { // If field1 has subfields, then also field2 has them. No need to check the other field here.
swapNamedData('ind1');
swapNamedData('ind2');
swapNamedData('subfields');
return;
}
return;

function swapNamedData(name) {
const data = field1[name]; // eslint-disable-line functional/immutable-data
field1[name] = field2[name]; // eslint-disable-line functional/immutable-data
field2[name] = data; // eslint-disable-line functional/immutable-data
}

}

function mergeField(baseRecord, sourceRecord, sourceField, config) {
nvdebug(`SELF: ${fieldToString(sourceField)}`, debugDev);

Expand All @@ -184,6 +219,11 @@ function mergeField(baseRecord, sourceRecord, sourceField, config) {
const counterpartField = getCounterpart(baseRecord, sourceRecord, sourceField, config);

if (counterpartField) {
if (sourceRecordIsBetter(counterpartField, sourceField)) { // eslint-disable-line functional/no-conditional-statements
swapDataBetweenFields(counterpartField, sourceField);
}


const candFieldPairs880 = sourceField.tag === '880' ? undefined : fieldGetSubfield6Pairs(sourceField, sourceRecord);
nvdebug(`mergeField(): Got counterpart: '${fieldToString(counterpartField)}'. Thus try merge...`, debugDev);
nvdebug(`PAIR: ${candFieldPairs880 ? fieldsToString(candFieldPairs880) : 'NADA'}`, debugDev);
Expand Down
4 changes: 4 additions & 0 deletions test-fixtures/reducers/index/field710_MET502/base.json
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,10 @@
{ "code": "e", "value": "kustantaja." },
{ "code": "0", "value": "(FI-ASTERI-N)000555888" }
]},
{ "tag": "710", "ind1": "2", "ind2": " ", "subfields": [
{ "code": "a", "value": "Paristo" },
{ "code": "e", "value": "kustantaja." }
]},
{ "tag": "710", "ind1": "2", "ind2": " ", "subfields": [ { "code": "a", "value": "Qualifier Mismatch (foo)" } ]},
{ "tag": "710", "ind1": "2", "ind2": " ", "subfields": [ { "code": "a", "value": "WSOY" } ]},
{ "tag": "710", "ind1": "2", "ind2": " ", "subfields": [ { "code": "a", "value": "Ntamo" } ]},
Expand Down
5 changes: 5 additions & 0 deletions test-fixtures/reducers/index/field710_MET502/merged.json
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,11 @@
{ "code": "e", "value": "kustantaja." },
{ "code": "0", "value": "(FI-ASTERI-N)000555888" }
]},
{ "tag": "710", "ind1": "2", "ind2": " ", "subfields": [
{ "code": "a", "value": "Paristo, kustannusosakeyhtiö," },
{ "code": "e", "value": "kustantaja." },
{ "code": "0", "value": "(FI-ASTERI-N)000555889" }
]},

{ "tag": "710", "ind1": "2", "ind2": " ", "subfields": [ { "code": "a", "value": "Qualifier Mismatch (foo)" } ]},
{ "tag": "710", "ind1": "2", "ind2": " ", "subfields": [ { "code": "a", "value": "Werner Söderström osakeyhtiö." } ]},
Expand Down
5 changes: 5 additions & 0 deletions test-fixtures/reducers/index/field710_MET502/source.json
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,11 @@
{ "code": "a", "value": "Karisto" },
{ "code": "e", "value": "kustantaja." }
]},
{ "tag": "710", "ind1": "2", "ind2": " ", "subfields": [
{ "code": "a", "value": "Paristo, kustannusosakeyhtiö," },
{ "code": "e", "value": "kustantaja." },
{ "code": "0", "value": "(FI-ASTERI-N)000555889" }
]},
{ "tag": "710", "ind1": "2", "ind2": " ", "subfields": [ { "code": "a", "value": "Qualifier Mismatch (bar)" } ]},
{ "tag": "710", "ind1": "2", "ind2": " ", "subfields": [ { "code": "a", "value": "Werner Söderström osakeyhtiö" } ]},
{ "tag": "710", "ind1": "2", "ind2": " ", "subfields": [ { "code": "a", "value": "ntamo" } ]},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
{ "code": "9", "value": "FENNI<KEEP>" }
]},
{ "tag": "600", "ind1": "0", "ind2": "4", "subfields": [
{ "code": "a", "value": "Harry," },
{ "code": "a", "value": "harry," },
{ "code": "c", "value": "prinssi, Sussexin herttua." }
]},
{ "tag": "610", "ind1": "2", "ind2": "4", "subfields": [
Expand All @@ -20,7 +20,7 @@
{ "code": "a", "value": "Työväen paheistusliitto." }
]},
{ "tag": "700", "ind1": "1", "ind2": " ", "subfields": [
{ "code": "a", "value": "Manik, Dhyan," },
{ "code": "a", "value": "Islantilainen, Joku" },
{ "code": "e", "value": "kirjoittaja." }
]}
]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,12 +24,10 @@
{ "code": "a", "value": "Työväen paheistusliitto." },
{ "code": "0", "value": "(FI-ASTERI-N)323880000" }
]},
{
"tag": "700", "ind1": "1", "ind2": " ", "subfields": [
{ "code": "a", "value": "Manik, Dhyan," },
{ "code": "e", "value": "kirjoittaja." },
{ "code": "0", "value": "(FI-ASTERI-N)000077333" }
]
}
{ "tag": "700", "ind1": "0", "ind2": " ", "subfields": [
{ "code": "a", "value": "Joku Islantilainen," },
{ "code": "e", "value": "kirjoittaja."},
{ "code": "0", "value": "(FI-ASTERI-N)000077333" }
]}
]
}
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
{
"description":"00 merge X00, keep 710. Now also test MRA-331, MRA-414.",
"MRA-414": "600 and 610 fields should merge",
"NB #1": "MELINDA-506 prefers Harry and \"Joku Islantilainen\" field from source, since it has an Asteri $0 ",
"only": false
}
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
{ "code": "c", "value": "Sussexin herttuatar." }
]},
{ "tag": "600", "ind1": "0", "ind2": "4", "subfields": [
{ "code": "a", "value": "harry" },
{ "code": "a", "value": "Harry," },
{ "code": "c", "value": "prinssi, Sussexin herttua." },
{ "code": "0", "value": "(FI-ASTERI-N)000189324" },
{ "code": "9", "value": "FENNI<KEEP>" }
Expand All @@ -20,7 +20,7 @@
]},
{ "tag": "700", "ind1": " ", "ind2": " ", "subfields": [ { "code": "a", "value": "Tuisku, Sara." } ] },
{ "tag": "700", "ind1": "0", "ind2": " ", "subfields": [
{ "code": "a", "value": "Dhyan Manik." },
{ "code": "a", "value": "Joku Islantilainen." },
{ "code": "0", "value": "(FI-ASTERI-N)000077333" }
]},
{ "tag": "710", "ind1": "1", "ind2": " ", "subfields": [ { "code": "a", "value": "Helsingin yliopisto." } ] }
Expand Down