Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improvement to greedy-flag #967

Merged
merged 1 commit into from
Jun 11, 2016
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
58 changes: 31 additions & 27 deletions components/prism-core.js
Original file line number Diff line number Diff line change
Expand Up @@ -280,9 +280,15 @@ var _ = _self.Prism = {
lookbehindLength = 0,
alias = pattern.alias;

if (greedy && !pattern.pattern.global) {
// Without the global flag, lastIndex won't work
pattern.pattern = RegExp(pattern.pattern.source, pattern.pattern.flags + "g");
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@zeitgeist87: How recent is the flags property? It looks like it's not supported very well... I just ran into errors running npm test and I think they are due to this.

Maybe we could refactor like so:

var flags = pattern.pattern.toString().match(/[imuy]*$/)[0];
pattern.pattern = RegExp(pattern.pattern.source, flags + "g");

What do you think?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@Golmote Yes you are right it is not supported well at all. Good catch. Your solution is perfect and we should add it.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@Golmote Shall I add this fix or do you?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@zeitgeist87 I just did, thanks. ;)

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would rather we added a separate polyfill for flags instead of refactoring our code not to use it. That way, once support is more broad, we can just ditch the polyfill.

}

pattern = pattern.pattern || pattern;

for (var i=0; i<strarr.length; i++) { // Don’t cache length as it changes during the loop
// Don’t cache length as it changes during the loop
for (var i=0, pos = 0; i<strarr.length; pos += (strarr[i].matchedStr || strarr[i]).length, ++i) {

var str = strarr[i];

Expand All @@ -302,40 +308,38 @@ var _ = _self.Prism = {

// Greedy patterns can override/remove up to two previously matched tokens
if (!match && greedy && i != strarr.length - 1) {
// Reconstruct the original text using the next two tokens
var nextToken = strarr[i + 1].matchedStr || strarr[i + 1],
combStr = str + nextToken;

if (i < strarr.length - 2) {
combStr += strarr[i + 2].matchedStr || strarr[i + 2];
pattern.lastIndex = pos;
match = pattern.exec(text);
if (!match) {
break;
}

// Try the pattern again on the reconstructed text
pattern.lastIndex = 0;
match = pattern.exec(combStr);
if (!match) {
continue;
var from = match.index + (lookbehind ? match[1].length : 0),
to = match.index + match[0].length,
k = i,
p = pos;

for (var len = strarr.length; k < len && p < to; ++k) {
p += (strarr[k].matchedStr || strarr[k]).length;
// Move the index i to the element in strarr that is closest to from
if (from >= p) {
++i;
pos = p;
}
}

var from = match.index + (lookbehind ? match[1].length : 0);
// To be a valid candidate, the new match has to start inside of str
if (from >= str.length) {
/*
* If strarr[i] is a Token, then the match starts inside another Token, which is invalid
* If strarr[k - 1] is greedy we are in conflict with another greedy pattern
*/
if (strarr[i] instanceof Token || strarr[k - 1].greedy) {
continue;
}
var to = match.index + match[0].length,
len = str.length + nextToken.length;

// Number of tokens to delete and replace with the new match
delNum = 3;

if (to <= len) {
if (strarr[i + 1].greedy) {
continue;
}
delNum = 2;
combStr = combStr.slice(0, len);
}
str = combStr;
delNum = k - i;
str = text.slice(pos, p);
match.index -= pos;
}

if (!match) {
Expand Down
2 changes: 1 addition & 1 deletion components/prism-core.min.js

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

58 changes: 31 additions & 27 deletions prism.js
Original file line number Diff line number Diff line change
Expand Up @@ -285,9 +285,15 @@ var _ = _self.Prism = {
lookbehindLength = 0,
alias = pattern.alias;

if (greedy && !pattern.pattern.global) {
// Without the global flag, lastIndex won't work
pattern.pattern = RegExp(pattern.pattern.source, pattern.pattern.flags + "g");
}

pattern = pattern.pattern || pattern;

for (var i=0; i<strarr.length; i++) { // Don’t cache length as it changes during the loop
// Don’t cache length as it changes during the loop
for (var i=0, pos = 0; i<strarr.length; pos += (strarr[i].matchedStr || strarr[i]).length, ++i) {

var str = strarr[i];

Expand All @@ -307,40 +313,38 @@ var _ = _self.Prism = {

// Greedy patterns can override/remove up to two previously matched tokens
if (!match && greedy && i != strarr.length - 1) {
// Reconstruct the original text using the next two tokens
var nextToken = strarr[i + 1].matchedStr || strarr[i + 1],
combStr = str + nextToken;

if (i < strarr.length - 2) {
combStr += strarr[i + 2].matchedStr || strarr[i + 2];
pattern.lastIndex = pos;
match = pattern.exec(text);
if (!match) {
break;
}

// Try the pattern again on the reconstructed text
pattern.lastIndex = 0;
match = pattern.exec(combStr);
if (!match) {
continue;
var from = match.index + (lookbehind ? match[1].length : 0),
to = match.index + match[0].length,
k = i,
p = pos;

for (var len = strarr.length; k < len && p < to; ++k) {
p += (strarr[k].matchedStr || strarr[k]).length;
// Move the index i to the element in strarr that is closest to from
if (from >= p) {
++i;
pos = p;
}
}

var from = match.index + (lookbehind ? match[1].length : 0);
// To be a valid candidate, the new match has to start inside of str
if (from >= str.length) {
/*
* If strarr[i] is a Token, then the match starts inside another Token, which is invalid
* If strarr[k - 1] is greedy we are in conflict with another greedy pattern
*/
if (strarr[i] instanceof Token || strarr[k - 1].greedy) {
continue;
}
var to = match.index + match[0].length,
len = str.length + nextToken.length;

// Number of tokens to delete and replace with the new match
delNum = 3;

if (to <= len) {
if (strarr[i + 1].greedy) {
continue;
}
delNum = 2;
combStr = combStr.slice(0, len);
}
str = combStr;
delNum = k - i;
str = text.slice(pos, p);
match.index -= pos;
}

if (!match) {
Expand Down
4 changes: 3 additions & 1 deletion tests/languages/javascript/regex_feature.test
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
/foo"test"bar/
/foo\//
1 / 4 + "/, not a regex";
/ '1' '2' '3' '4' '5' /

----------------------------------------------------

Expand All @@ -13,7 +14,8 @@
["regex", "/[\\[\\]]{2,4}(?:foo)*/"], ["punctuation", ";"],
["regex", "/foo\"test\"bar/"],
["regex", "/foo\\//"],
["number", "1"], ["operator", "/"], ["number", "4"], ["operator", "+"], ["string", "\"/, not a regex\""], ["punctuation", ";"]
["number", "1"], ["operator", "/"], ["number", "4"], ["operator", "+"], ["string", "\"/, not a regex\""], ["punctuation", ";"],
["regex", "/ '1' '2' '3' '4' '5' /"]
]

----------------------------------------------------
Expand Down
4 changes: 3 additions & 1 deletion tests/languages/javascript/template-string_feature.test
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
bar`
`40+2=${40+2}`
`${foo()}`
"foo `a` `b` `c` `d` bar"

----------------------------------------------------

Expand Down Expand Up @@ -34,7 +35,8 @@ bar`
["interpolation-punctuation", "}"]
]],
["string", "`"]
]]
]],
["string", "\"foo `a` `b` `c` `d` bar\""]
]

----------------------------------------------------
Expand Down