javascript - Document.createNodeIterator() - 防止无限迭代
问题描述
我想在DOM中标记一个不区分大小写的单词。例如,我想用和标记stackoverflow<mark>stackoverflow</mark>
Google
<mark>Google</mark>
为此,我使用 Document.createNodeIterator()
它过滤所有非文本节点。
window.onload = function() {
getChildren(document.body);
}
function getChildren(mytag) {
const nodeIter = document.createNodeIterator(
mytag,
NodeFilter.SHOW_TEXT,
(node) => {
return NodeFilter.FILTER_ACCEPT
}
);
const mark = document.createElement("mark")
let node = nodeIter.nextNode();
while (node) {
const parent = node.parentElement;
const innerHTML = parent.innerHTML;
const word = "stackoverflow"
const regex = new RegExp(`(${word})`, 'ig');
parent.removeChild(node)
parent.innerHTML = innerHTML.replace(regex, "<mark>$1</mark>");
node = nodeIter.nextNode()
}
}
<h1>Iterating DOM in JavaScript</h1>
<p>
A paragraph.
</p>
<div>
<a href="https://stackoverflow.com/">Stackoverflow</a> is QA website.
</div>
<ul>
<li>Stackoverflow</li>
<li>Google</li>
<li>Apple</li>
</ul>
上面的代码不起作用。它无限迭代。但是,如果我用另一个词替换匹配的词,例如stackoverflow
用<mark>duckduckgo</mark>
,它不会无限迭代。
如何解决这个问题?
解决方案
问题似乎是,当您替换节点时,节点迭代器会永远一遍又一遍地遍历相同的内容。
你可以改进两点:
- 过滤逻辑可以作为过滤回调的一部分来完成。因此,您可以在那里拒绝任何您不想要的节点,而不是获取文本节点并检查它们。
- 用于
Node#replaceWith()
更改节点。我已经用它Node#cloneNode()
来构建替换,但你也可以使用其他方法来完成它。replaceWith()
确实接受一个 DOMString,如果这是你喜欢的。
window.onload = function() {
getChildren(document.body);
}
function getChildren(mytag) {
const word = "stackoverflow"
const regex = new RegExp(`(${word})`, 'ig');
const nodeIter = document.createNodeIterator(
mytag,
NodeFilter.SHOW_TEXT,
(node) => {
//ignore script and style tags
if (node.parent?.tagName === "SCRIPT" || node.parent?.tagName === "STYLE")
return NodeFilter.FILTER_REJECT;
//ignore anything already marked
if (node.parent?.tagName === "MARK")
return NodeFilter.FILTER_REJECT;
//ignore anything not matching regex
if (!regex.test(node.data))
return NodeFilter.FILTER_REJECT;
return NodeFilter.FILTER_ACCEPT;
}
);
let node = nodeIter.nextNode();
while (node) {
const parent = node.parentElement;
const mark = document.createElement("mark");
mark.append(node.cloneNode());
node.replaceWith(mark);
node = nodeIter.nextNode()
}
}
<h1>Iterating DOM in JavaScript</h1>
<p>
A paragraph.
</p>
<div>
<a href="https://stackoverflow.com/">Stackoverflow</a> is QA website.
</div>
<ul>
<li>Stackoverflow</li>
<li>Google</li>
<li>Apple</li>
</ul>
这是整理的代码,可能有助于提高可读性:
window.onload = function() {
getChildren(document.body);
}
function getChildren(mytag) {
const nodeIter = unmarkedTextIterator(mytag, /stackoverflow/ig);
for (const node of iterate(nodeIter)) {
node.replaceWith(mark(node));
}
}
//helper functions to break up the logic into logical parts:
/*
* Create a DOM NodeIterator for text nodes only.
* @param {Node} root - where to start.
* @param {RegExp} regex - optional filter for what text to watch. Defaults to returning everyting.
* @return text node which is not in <mark> or <script> or <style> tag and passes the regex filter.
*/
const unmarkedTextIterator = (root, regex = /.*/) =>
document.createNodeIterator(
root,
NodeFilter.SHOW_TEXT,
(node) => {
//ignore script and style tags
if (node.parent?.tagName === "SCRIPT" || node.parent?.tagName === "STYLE")
return NodeFilter.FILTER_REJECT;
//ignore anything already marked
if (node.parent?.tagName === "MARK")
return NodeFilter.FILTER_REJECT;
//ignore anything not matching regex
if (!regex.test(node.data))
return NodeFilter.FILTER_REJECT;
return NodeFilter.FILTER_ACCEPT;
}
);
/*
* Convenience generator function to easily work with NodeIterors
* @generator
* @param {NodeIterator} nodeIterator
* @yields {Node} that nodeIterator gives
*/
function* iterate(nodeIterator) {
while (node = nodeIterator.nextNode()) {
yield node;
}
}
/*
* Wraps a node in <mark> tag
* @param {Node} node
* @return {Node}
*/
const mark = node => {
const mark = document.createElement("mark");
mark.append(node.cloneNode());
return mark;
}
<h1>Iterating DOM in JavaScript</h1>
<p>
A paragraph.
</p>
<div>
<a href="https://stackoverflow.com/">Stackoverflow</a> is QA website.
</div>
<ul>
<li>Stackoverflow</li>
<li>Google</li>
<li>Apple</li>
</ul>
推荐阅读
- android - 电话身份验证谷歌火力基地失败
- node.js - node.js 中的多线程请求处理(部署在 Kubernetes 和 Nginx 后面)
- python-3.x - Scrapy重复行
- python - Django:如何在基于类和基于函数的自定义验证器之间做出决定?
- javascript - 如何使用 node.js 将我的 postgresql 数据库表传输到 heroku?
- octobercms - 如何将变量从页面传递到部分
- r - RStudio,按列类应用函数
- qt - 为什么我抓不到OOM异常?
- sql - SQL - 错误:没有这样的列 - 加入表
- c++ - 使用 fmtlib,当值为负时,零填充数值更短,我可以调整这种行为吗?