Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Met502 kustantajat #111

Merged
merged 11 commits into from
Nov 8, 2023
114 changes: 57 additions & 57 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

55 changes: 51 additions & 4 deletions src/reducers/counterpartField.js
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,55 @@ function differentPublisherSubfields(field1, field2) {
}
*/


export function splitToNameAndQualifier(name) {
const nameOnly = name.replace(/(?: \([^)]+\)| abp?|, kustannusosakeyhtiö| oyj?| ry)$/ugi, '');
if (nameOnly === name) {
return [getBestName(name).toLowerCase(), undefined];
}

const bestName = getBestName(nameOnly);
return [bestName.toLowerCase(), name.substring(nameOnly.length)]; // NB! qualifier retains initial space in " (whatever)"

function getBestName(name) {
const NAME = name.toUpperCase();

if (NAME === 'WSOY') {
return 'Werner Söderström osakeyhtiö';
}
if (NAME === 'NTAMO') {
return 'ntamo';
}
return name;
}
}

function corporateNamesAgree(value1, value2, tag, subfieldCode) {
if (subfieldCode !== 'a' || !['110', '610', '710', '810'].includes(tag)) {
return false;
}
const [name1, qualifier1] = splitToNameAndQualifier(value1);
const [name2, qualifier2] = splitToNameAndQualifier(value2);

nvdebug(`CN1: '${name1}', '${qualifier1}'`, debugDev);
nvdebug(`CN2: '${name2}', '${qualifier2}'`, debugDev);

if (name1.toUpperCase() !== name2.toUpperCase()) {
return false;
}

// If both values have qualifiers, they must be equal!
// Note this will reject ", kustannusosakeyhtiö" vs "(yhtiö)" pair
// Also qualifer pair "(foo)" and "(bar)" will result in a failure.
if (qualifier1 !== undefined && qualifier2 !== undefined && qualifier1 !== qualifier2) {
// Should we support "Yhtiö ab" equals "Yhtiö oy"? If so, this is the place. Pretty marginal though
return false;
}
return true;
}

function pairableValue(tag, subfieldCode, value1, value2) {
if (partsAgree(value1, value2, tag, subfieldCode)) {
if (partsAgree(value1, value2, tag, subfieldCode) || corporateNamesAgree(value1, value2, tag, subfieldCode)) {
// Pure baseness: here we assume that base's value1 is better than source's value2.
return value1;
}
Expand All @@ -52,8 +99,8 @@ function counterpartExtraNormalize(tag, subfieldCode, value) {
// Remove trailing punctuation:
value = value.replace(/(\S)(?:,|\.|\?|!|\. -| *:| *;| =| \/)$/u, '$1');
// Remove brackets:
value = value.replace(/^\(([^()]+)\)$/u, '$1'); // Remove starting-'(' and ending-')'
value = value.replace(/^\[([^[\]]+)\]$/u, '$1'); // Remove starting-'[' and ending-']'
value = value.replace(/^\(([^()]+)\)$/u, '$1'); // Remove initial '(' and final ')' if both exist.
value = value.replace(/^\[([^[\]]+)\]$/u, '$1'); // Remove initial '[' and final ']' if both exist.
// Mainly for field 260$c:
value = removeCopyright(value);

Expand Down Expand Up @@ -523,7 +570,7 @@ export function getCounterpart(baseRecord, sourceRecord, field, config) {

nvdebug(`Compare incoming '${fieldToString(field)}' with (up to) ${counterpartCands.length} existing field(s)`, debugDev);

const normalizedField = cloneAndNormalizeFieldForComparison(field);
const normalizedField = cloneAndNormalizeFieldForComparison(field); // mainly strip punctuation here

nvdebug(`Norm to: '${fieldToString(normalizedField)}'`, debugDev);

Expand Down
9 changes: 5 additions & 4 deletions src/reducers/mergeField.js
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ export default (tagPattern = undefined, config = defaultConfig.mergeConfiguratio
//debugData(JSON.stringify(baseRecord));
//debugData(JSON.stringify(sourceRecord));

//sourceRecord.fields.forEach(f => nvdebug(`SRC1: ${fieldToString(f)}`));
//sourceRecord.fields.forEach(f => nvdebug(`SRC1: ${fieldToString(f)}`, debugDev));

//nvdebug(`MERGE CONFIG: ${JSON.stringify(config)}`, debugDev);

Expand Down Expand Up @@ -102,6 +102,7 @@ function copyrightYearHack(baseRecord, baseField, sourceField) {
});
}

// eslint-disable-next-line max-params
function mergeField2(baseRecord, baseField, sourceField, config, candFieldPairs880 = []) {
//// Identical fields
// No need to check every subfield separately.
Expand Down Expand Up @@ -137,14 +138,14 @@ function mergeField2(baseRecord, baseField, sourceField, config, candFieldPairs8
//strippedSourceField.subfields.forEach((subfieldForMergeOrAdd, index) => {
const normalizedSubfield = normalizedSourceField.subfields[index];
const punctlessSubfield = strippedSourceField.subfields[index];
const originalValue = fieldToString(baseField);
nvdebug(` TRYING TO MERGE SUBFIELD '${subfieldToString(originalSubfield)}' TO '${originalValue}'`, debugDev);
const originalBaseValue = fieldToString(baseField);
nvdebug(` TRYING TO MERGE SUBFIELD '${subfieldToString(originalSubfield)}' TO '${originalBaseValue}'`, debugDev);

const subfieldData = {'code': originalSubfield.code, 'originalValue': originalSubfield.value, 'normalizedValue': normalizedSubfield.value, 'punctuationlessValue': punctlessSubfield.value};

mergeOrAddSubfield(baseField, subfieldData, candFieldPairs880); // candSubfield);
const newValue = fieldToString(baseField);
if (originalValue !== newValue) { // eslint-disable-line functional/no-conditional-statements
if (originalBaseValue !== newValue) { // eslint-disable-line functional/no-conditional-statements
nvdebug(` SUBFIELD MERGE RESULT: '${newValue}'`, debugDev);
//debug(` TODO: sort subfields, handle punctuation...`);
}
Expand Down
4 changes: 2 additions & 2 deletions src/reducers/mergeIndicator.js
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,8 @@ export function mergeIndicators(toField, fromField, config) {
// For other indicators the situation is trickier, as we don't know which one is the good value.
//
// NB! We could add fixes for various other indicator types as well. However, it gets quickly pretty ad hoc.
// nvdebug(fieldToString(toField));
// nvdebug(fieldToString(fromField));
// nvdebug(fieldToString(toField), debugDev);
// nvdebug(fieldToString(fromField), debugDev);

mergeIndicator1(toField, fromField, config);
mergeIndicator2(toField, fromField, config);
Expand Down
Loading