Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Pair http and https #106

Merged
merged 6 commits into from
Oct 30, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
"url": "[email protected]:natlibfi/melinda-marc-record-merge-reducers-js.git"
},
"license": "LGPL-3.0+",
"version": "2.0.19-alpha.2",
"version": "2.0.19-alpha.3",
"main": "./dist/index.js",
"engines": {
"node": ">=18"
Expand Down
2 changes: 1 addition & 1 deletion src/reducers/counterpartField.js
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ function counterpartExtraNormalize(tag, subfieldCode, value) {
// Mainly for field 260$c:
value = removeCopyright(value);


value = value.replace(/http:\/\//ug, 'https://'); // MET-501: http vs https
value = normalizeForSamenessCheck(tag, subfieldCode, value);

/* eslint-enable */
Expand Down
2 changes: 1 addition & 1 deletion src/reducers/mergeConstraints.js
Original file line number Diff line number Diff line change
Expand Up @@ -256,7 +256,7 @@ const mergeConstraints = [
{'tag': '853', 'required': 'a'},
{'tag': '854', 'required': 'a'},
{'tag': '855', 'required': 'a'},
{'tag': '856', 'required': 'u', 'paired': 'auw3', 'key': 'abqusv'},
{'tag': '856', 'required': 'u', 'paired': 'u', 'key': 'opuw23'}, // 856 is built around $u...
{'tag': '863', 'required': 'a'},
{'tag': '864', 'required': 'a'},
{'tag': '865', 'required': 'a'},
Expand Down
34 changes: 32 additions & 2 deletions src/reducers/mergeSubfield.js
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,16 @@ function coverTypesMatch(candSubfield, relevantSubfields) {
return false;
}

function httpToHttps(val) {
return val.replace(/http:\/\//ug, 'https://');
}

function pairHttpAndHttps(candSubfield, relevantSubfields) {
const a = httpToHttps(candSubfield.value);
const bs = relevantSubfields.map(sf => httpToHttps(sf.value));
return bs.includes(a);
}

function isSynonym(field, candSubfield, relevantSubfields) {
if (candSubfield.code === 'q' && ['015', '020', '024', '028'].includes(field.tag)) {
return coverTypesMatch(candSubfield, relevantSubfields);
Expand All @@ -138,6 +148,9 @@ function isSynonym(field, candSubfield, relevantSubfields) {
if (candSubfield.code === 'i') {
return relationInformationMatches(candSubfield, relevantSubfields);
}
if (pairHttpAndHttps(candSubfield, relevantSubfields)) {
return true;
}

return false;
}
Expand All @@ -161,6 +174,22 @@ function preferHyphenatedISBN(field, candSubfield, relevantSubfields) {
return true;
}

function preferHttpsOverHttp(candSubfield, relevantSubfields) {
if (candSubfield.value.substring(0, 8) !== 'https://') {
return false;
}

const httpVersion = `http://${candSubfield.value.substring(8)}`;
const pair = relevantSubfields.find(sf => sf.value === httpVersion);

if (!pair) {
return false;
}
pair.value = candSubfield.value; // eslint-disable-line functional/immutable-data
return true;
}


export function mergeSubfield(targetField, candSubfield) {
// Replace existing subfield with the incoming field. These replacements are by name rather hacky...
// Currenty we only select the better X00$d.
Expand All @@ -183,8 +212,9 @@ export function mergeSubfield(targetField, candSubfield) {


if (replaceDatesAssociatedWithName(targetField, candSubfield, relevantSubfields) ||
isSynonym(targetField, candSubfield, relevantSubfields) ||
preferHyphenatedISBN(targetField, candSubfield, relevantSubfields)) {
preferHyphenatedISBN(targetField, candSubfield, relevantSubfields) ||
preferHttpsOverHttp(candSubfield, relevantSubfields) ||
isSynonym(targetField, candSubfield, relevantSubfields)) {
return true;
}

Expand Down
8 changes: 4 additions & 4 deletions src/reducers/preprocessPrepublication.js
Original file line number Diff line number Diff line change
Expand Up @@ -44,10 +44,10 @@ const debugDev = debug.extend('dev');
//const NA = 4; // Non-Applicable; used by Fennica-specific encoding level only

export default () => (base, source) => {
nvdebug('BASE', debugDev);
nvdebug(JSON.stringify(base), debugDev);
nvdebug('SOURCE', debugDev);
nvdebug(JSON.stringify(source), debugDev);
//nvdebug('BASE', debugDev);
//nvdebug(JSON.stringify(base), debugDev);
//nvdebug('SOURCE', debugDev);
//nvdebug(JSON.stringify(source), debugDev);


handlePrepublicationNameEntries(base, source);
Expand Down
15 changes: 15 additions & 0 deletions test-fixtures/reducers/index/856u_http_vs_https/base.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
{
"leader": "01331cam a22003494i 4500",
"fields": [
{ "tag": "001", "value": "12345"},

{ "tag": "856", "ind1": "4", "ind2": "0", "subfields": [
{ "code": "u", "value": "http://urn.fi/polku/123" },
{ "code": "q", "value": "Wanha"}
] },
{ "tag": "856", "ind1": "4", "ind2": "0", "subfields": [
{ "code": "u", "value": "https://urn.fi/polku/321" },
{ "code": "q", "value": "Wanha 2"}
] }
]
}
20 changes: 20 additions & 0 deletions test-fixtures/reducers/index/856u_http_vs_https/merged.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
{
"leader": "01331cam a22003494i 4500",
"fields": [
{ "tag": "001", "value": "12345"},

{ "tag": "856", "ind1": "4", "ind2": "0", "subfields": [
{ "code": "u", "value": "https://urn.fi/polku/123" },
{ "code": "q", "value": "Wanha"}
] },

{ "tag": "856", "ind1": "4", "ind2": "0", "subfields": [
{ "code": "u", "value": "https://urn.fi/polku/321" },
{ "code": "q", "value": "Wanha 2"}
] },
{ "tag": "856", "ind1": "4", "ind2": "0", "subfields": [
{ "code": "u", "value": "http://urn.fi/polku/432" },
{ "code": "q", "value": "Uusi 3" }
] }
]
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
{
"description":"MET-501: pairability of http vs https",
"skip": false,
"only": false
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
{
"leader": "01331cam a22003494i 4500",
"fields": [
{ "tag": "001", "value": "123456"}
]
}
20 changes: 20 additions & 0 deletions test-fixtures/reducers/index/856u_http_vs_https/source.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
{
"leader": "01331cam a22003494i 4500",
"fields": [
{ "tag": "001", "value": "123456"},
{ "tag": "856", "ind1": "4", "ind2": "0", "subfields": [
{ "code": "u", "value": "https://urn.fi/polku/123" }
] },

{ "tag": "856", "ind1": "4", "ind2": "0", "subfields": [
{ "code": "u", "value": "http://urn.fi/polku/321" },
{ "code": "q", "value": "Uusi 2" }
] },

{ "tag": "856", "ind1": "4", "ind2": "0", "subfields": [
{ "code": "u", "value": "http://urn.fi/polku/432" },
{ "code": "q", "value": "Uusi 3" }
] }

]
}