Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Next #126

Merged
merged 20 commits into from
Jan 17, 2024
Merged

Next #126

Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
913 changes: 510 additions & 403 deletions package-lock.json

Large diffs are not rendered by default.

32 changes: 16 additions & 16 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
"url": "[email protected]:natlibfi/melinda-marc-record-merge-reducers-js.git"
},
"license": "LGPL-3.0+",
"version": "2.0.23",
"version": "2.0.24-alpha.2",
"main": "./dist/index.js",
"engines": {
"node": ">=18"
Expand All @@ -36,31 +36,31 @@
"watch:test": "cross-env DEBUG=1 NODE_ENV=test nodemon -w src -w test-fixtures --exec 'npm run test:dev'"
},
"dependencies": {
"@natlibfi/marc-record": "^8.0.0",
"@natlibfi/marc-record-merge": "^7.0.0",
"@natlibfi/marc-record-validate": "^8.0.3",
"@natlibfi/marc-record-validators-melinda": "^10.15.1",
"@natlibfi/melinda-commons": "^13.0.8",
"@natlibfi/marc-record": "^8.1.0",
"@natlibfi/marc-record-merge": "^7.0.1",
"@natlibfi/marc-record-validate": "^8.0.5",
"@natlibfi/marc-record-validators-melinda": "^10.15.4",
"@natlibfi/melinda-commons": "^13.0.11",
"debug": "^4.3.4",
"isbn3": "^1.1.43",
"isbn3": "^1.1.44",
"normalize-diacritics": "^4.0.3"
},
"devDependencies": {
"@babel/cli": "^7.23.0",
"@babel/core": "^7.23.2",
"@babel/cli": "^7.23.4",
"@babel/core": "^7.23.7",
"@babel/node": "^7.22.19",
"@babel/preset-env": "^7.23.2",
"@babel/register": "^7.22.15",
"@babel/preset-env": "^7.23.8",
"@babel/register": "^7.23.7",
"@natlibfi/eslint-config-melinda-backend": "^3.0.3",
"@natlibfi/fixugen": "^2.0.2",
"@natlibfi/fixura": "^3.0.2",
"@natlibfi/fixugen": "^2.0.4",
"@natlibfi/fixura": "^3.0.4",
"babel-plugin-istanbul": "^6.1.1",
"babel-plugin-rewire": "^1.2.0",
"chai": "^4.3.10",
"chai": "^4.4.1",
"cross-env": "^7.0.3",
"eslint": "^8.53.0",
"eslint": "^8.56.0",
"mocha": "^10.2.0",
"nodemon": "^3.0.1",
"nodemon": "^3.0.3",
"nyc": "^15.1.0"
},
"eslintConfig": {
Expand Down
4 changes: 2 additions & 2 deletions src/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,6 @@
*
*/

import {localReducers as MelindaReducers, localCopyReducerConfigs as MelindaCopyReducerConfigs} from './reducers';
import {localReducers as MelindaReducers, muuntajaReducers as MuuntajaReducers} from './reducers';

export {MelindaReducers, MelindaCopyReducerConfigs};
export {MelindaReducers, MuuntajaReducers};
7 changes: 4 additions & 3 deletions src/index.spec.js
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import merger, {Reducers} from '@natlibfi/marc-record-merge';
import merger from '@natlibfi/marc-record-merge';
import {inspect} from 'util';
import {MelindaReducers, MelindaCopyReducerConfigs} from './index';
import {MelindaReducers} from './index';
import createDebugLogger from 'debug';
import {expect} from 'chai';
import {MarcRecord} from '@natlibfi/marc-record';
Expand Down Expand Up @@ -28,7 +28,8 @@ function callback({getFixture}) {

// Run first copy-reducers with Melinda-configs and then the specific MelindaReducers

const reducers = [...MelindaCopyReducerConfigs.map(conf => Reducers.copy(conf)), ...MelindaReducers];
//const reducers = [...MelindaCopyReducerConfigs.map(conf => Reducers.copy(conf)), ...MelindaReducers];
const reducers = MelindaReducers;

debugData(`Reducers: ${inspect(reducers, {colors: true, maxArrayLength: 10, depth: 8})})}`);

Expand Down
43 changes: 42 additions & 1 deletion src/reducers/counterpartField.js
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,40 @@ export function splitToNameAndQualifier(name) {
}
}


function withAndWithoutQualifierAgree(value1, value2, tag, subfieldCode) {
if (!potentialWithAndWithoutContent()) {
return false;
}

const [name1, qualifier1] = splitToNameAndQualifier(value1);
const [name2, qualifier2] = splitToNameAndQualifier(value2);

//nvdebug(`CN1: '${name1}', '${qualifier1}'`, debugDev);
//nvdebug(`CN2: '${name2}', '${qualifier2}'`, debugDev);

if (name1 !== name2) {
return false;
}

// If either value does not have a qualifier, they are considered equals:
if (qualifier1 === undefined || qualifier2 === undefined || qualifier1 === qualifier2) {
return true;
}

return false;

function potentialWithAndWithoutContent() {
// 300$a needs to be explictly listed as our mergeConstraints.js use this as (part of) field 300 key.
// Note that 776$i is not needed here, as it is not part of of field 776 key.
if (subfieldCode === 'a') {
return ['300'].includes(tag);
}
return false;
}
}


function corporateNamesAgree(value1, value2, tag, subfieldCode) {
if (subfieldCode !== 'a' || !['110', '610', '710', '810'].includes(tag)) {
return false;
Expand Down Expand Up @@ -92,7 +126,14 @@ function corporateNamesAgree(value1, value2, tag, subfieldCode) {
}
}


function pairableValue(tag, subfieldCode, value1, value2) {
// This function could just return true or false.
// I thought of preference when I wrote this, but preference is not currently implemented *here*.
if (withAndWithoutQualifierAgree(value1, value2, tag, subfieldCode)) {
// 300$a "whatever" and "whatever (123 sivua)"
return value1;
}
if (partsAgree(value1, value2, tag, subfieldCode) || corporateNamesAgree(value1, value2, tag, subfieldCode)) {
// Pure baseness: here we assume that base's value1 is better than source's value2.
return value1;
Expand Down Expand Up @@ -158,7 +199,7 @@ function optionalSubfieldComparison(originalBaseField, originalSourceField, keyS


function hasCommonNominator(subfieldCode) {
nvdebug(`hasCommonNominator(${subfieldCode}): '${fieldToString(originalBaseField)}' vs '${fieldToString(originalSourceField)}'`, debugDev);
//nvdebug(`hasCommonNominator(${subfieldCode})? '${fieldToString(originalBaseField)}' vs '${fieldToString(originalSourceField)}'`, debugDev);

// If base has $a and source has $b, there's no common nominator, thus fail...
const subfields1 = field1.subfields.filter(subfield => subfield.code === subfieldCode && valueCarriesMeaning(field1.tag, subfield.code, subfield.value));
Expand Down
14 changes: 0 additions & 14 deletions src/reducers/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -22,20 +22,6 @@ import {default as manufacturer260To264} from './transferManufacturerDataFrom260
import {default as removeDuplicatesFromSource} from './removeIdenticalDataFields';
// const debug = createDebugLogger('@natlibfi/melinda-marc-record-merge-reducers');

export const localCopyReducerConfigs = [

/*

{tagPattern: copyIfDifferent},
{tagPattern: copyIfMissing, compareTagsOnly: true}

{tagPattern: copySpecial1, excludeSubfields: ['b', '6', '8']},
{tagPattern: copySpecial2, excludeSubfields: ['9']},
{tagPattern: copySpecial3, dropSubfields: ['4']},
{tagPattern: copySpecial4, excludeSubfields: ['9'], dropSubfields: ['4']}
*/
];

export const localReducers = [
//// PREPROCESSOR STUFF:
// UTF-8 normalization: if wanted, see mergeField.js for an example
Expand Down
2 changes: 1 addition & 1 deletion src/reducers/mergeOrAddSubfield.js
Original file line number Diff line number Diff line change
Expand Up @@ -166,7 +166,7 @@ export function mergeOrAddSubfield(targetField, candSubfieldData, candFieldPairs
setPunctuationFlag(targetField, candSubfield);
return;
}
nvdebug(` A: No. Field ${original} already had the same or a better merge candidate than our subfield '${candSubfieldAsString}'.`, debugDev);
nvdebug(` A: No. Field ${original} already had the same or a synonymous or a better merge candidate than our subfield '${candSubfieldAsString}'.`, debugDev);
return;
}

Expand Down
59 changes: 56 additions & 3 deletions src/reducers/mergeSubfield.js
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import createDebugLogger from 'debug';
import {partsAgree, subfieldContainsPartData} from '@natlibfi/marc-record-validators-melinda/dist/normalizeSubfieldValueForComparison';
import {valueCarriesMeaning} from './worldKnowledge';
import {nvdebug} from './utils';
import {nvdebug, subfieldToString} from './utils';
import {tagAndSubfieldCodeReferToIsbn} from '@natlibfi/marc-record-validators-melinda/dist/normalizeFieldForComparison.js';
import {splitToNameAndQualifier} from './counterpartField';

Expand Down Expand Up @@ -146,9 +146,12 @@ function isSynonym(field, candSubfield, relevantSubfields) {
return coverTypesMatch(candSubfield, relevantSubfields);
}

if (candSubfield.code === 'i') {
return relationInformationMatches(candSubfield, relevantSubfields);
nvdebug(`Looking for synonyms for '${subfieldToString(candSubfield)}'...`, debugDev);

if (relationInformationMatches(candSubfield, relevantSubfields)) {
return true;
}

if (pairHttpAndHttps(candSubfield, relevantSubfields)) {
return true;
}
Expand Down Expand Up @@ -190,6 +193,55 @@ function preferHttpsOverHttp(candSubfield, relevantSubfields) {
return true;
}

function preferQualifierVersion(field, candSubfield, relevantSubfields) {
if (!fieldAllowsQualifierInOneOfTheSubfields(field, candSubfield) || !candSubfield.value.includes('(')) {
return false;
}

const [name1, qualifier1] = genericSplitToNameAndQualifier(candSubfield.value);
const pair = relevantSubfields.find(sf => subfieldQualifierCheck(sf, name1, qualifier1));
if (!pair) {
return false;
}
// SN: "Kuvailuohjeiden näkökulmasta epubille ei pitäisi koskaan merkitä sivumäärää"
if (field.tag === '300' && candSubfield.code === 'a' && candSubfield.value.match(/(?:online|verkko)/iu)) {
return true; // True, but don't prefer the source value
}

pair.value = candSubfield.value; // eslint-disable-line functional/immutable-data
return true;

function subfieldQualifierCheck(subfield, name, qualifier) {
const [name2, qualifier2] = genericSplitToNameAndQualifier(candSubfield.value);
if (name !== name2) {
return false;
}
if (!qualifier || !qualifier2 || qualifier === qualifier2) {
return true;
}
return false;
}

function genericSplitToNameAndQualifier(value) {
if (value.match(/^.* \([^()]+\)$/u)) {
const name = value.replace(/^(.*) \([^()]+\)$/u, '$1'); // eslint-disable-line prefer-named-capture-group
const qualifier = value.replace(/^.* (\([^()]+\))$/u, '$1'); // eslint-disable-line prefer-named-capture-group
return [name, qualifier];
}
return [value, undefined];
}

function fieldAllowsQualifierInOneOfTheSubfields(field, subfield) {
if (field.tag === '300' && subfield.code === 'a') {
return true;
}
if (field.tag === '776' && subfield.code === 'i') {
return true;
}
return false;
}
}

function preferSourceCorporateName(field, candSubfield, pair) {
if (candSubfield.code !== 'a' || !['110', '610', '710', '810'].includes(field.tag)) {
return false;
Expand Down Expand Up @@ -245,6 +297,7 @@ export function mergeSubfield(targetField, candSubfield) {
preferHyphenatedISBN(targetField, candSubfield, relevantSubfields) ||
preferHttpsOverHttp(candSubfield, relevantSubfields) ||
preferSourceCorporateName(targetField, candSubfield, relevantSubfields[0]) || // SF is non-repeat
preferQualifierVersion(targetField, candSubfield, relevantSubfields) ||
isSynonym(targetField, candSubfield, relevantSubfields)) {
return true;
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
{
"fields" : [
{ "tag": "300", "ind1": " ", "ind2": " ", "subfields" : [
{ "code": "a", "value": "1 verkkoaineisto :" },
{ "code": "b", "value": "kuvitettu" }
]},
{ "tag": "300", "ind1": " ", "ind2": " ", "subfields" : [
{ "code": "a", "value": "1 nide :" },
{ "code": "b", "value": "kuvitettu" }
]}
],
"leader": "01331cam a22003494i 4500"
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
{
"leader": "01331cam a22003494i 4500",
"fields" : [
{ "tag": "300", "ind1": " ", "ind2": " ", "subfields" : [
{ "code": "a", "value": "1 verkkoaineisto :" },
{ "code": "b", "value": "kuvitettu" }
]},
{ "tag": "300", "ind1": " ", "ind2": " ", "subfields" : [
{ "code": "a", "value": "1 nide (373 sivua) :" },
{ "code": "b", "value": "kuvitettu" }
]}
]
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
{
"only": false,
"description": "MET-515: use the source (having qualifier) version of 300$a",
"comment": "SN: Kuvailuohjeen näkökulmasta epubille ei pitäisi koskaan merkitä sivumäärää"
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
{
"leader": "01331cam a22003494i 4500",
"fields" : [
]
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
{
"fields" : [
{ "tag": "300", "ind1": " ", "ind2": " ", "subfields" : [
{ "code": "a", "value": "1 verkkoaineisto (373 sivua) :" },
{ "code": "b", "value": "kuvitettu" }
]},
{ "tag": "300", "ind1": " ", "ind2": " ", "subfields" : [
{ "code": "a", "value": "1 nide (373 sivua) :" },
{ "code": "b", "value": "kuvitettu" }
]}
],
"leader": "01331cam a22003494i 4500"
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
{
"leader": "01331cam a22003494i 4500",
"fields": [
{ "tag": "776", "ind1": " ", "ind2": " ", "subfields": [
{ "code": "i", "value": "Verkkoaineisto:" },
{ "code": "z", "value": "1234567890" }
]}
]
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
{
"leader": "01331cam a22003494i 4500",
"fields": [
{ "tag": "776", "ind1": " ", "ind2": " ", "subfields": [
{ "code": "i", "value": "Verkkoaineisto (PDF):" },
{ "code": "z", "value": "1234567890" }
]}
]
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
{
"description":"776: $i qualifier behaviour",
"comment": "NV: I changed this to prefe the qualifier version (here: source version), as this loses no information, as and is more compatible with, say, 300$a or 65X fields",
"only": false
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
{
"leader": "01331cam a22003494i 4500",
"fields": [

]
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
{
"leader": "01331cam a22003494i 4500",
"fields": [
{ "tag": "776", "ind1": " ", "ind2": " ", "subfields": [
{ "code": "i", "value": "Verkkoaineisto (PDF):" },
{ "code": "z", "value": "1234567890" }
]}
]
}