node.js - nodejs列出巨大的目录挂起
问题描述
我正在使用这种方法列出包含大量文件(10K 和更多......)的目录:
scanDirStream = (needle, params) => {
var options = {
// find -type
type: 'f',
// find -name
name: '*',
limit: 100
};
for (var attrname in params) { options[attrname] = params[attrname]; }
return new Promise((resolve, reject) => {
var opt = [needle];
for (var k in options) {
var v = options[k];
if (!Util.empty(v) && k != 'limit') {
opt.push('-' + k);
opt.push(v);
}
};
var res = '';
var find = spawn('find', opt);
var head = spawn('head',[ '-n', options.limit]);
find.stdout.pipe(head.stdin);
head.stdout.on('data', _data => {
var buff = Buffer.from(_data, 'utf-8').toString();
if (buff != '') res += buff;
})
find.stderr.on('data', error => {
return reject(Buffer.from(error, 'utf-8').toString());
});
head.stderr.on('data', error => {
return reject(Buffer.from(error, 'utf-8').toString());
});
head.on('close', (_) => {
data = res.split('\n');
data = data.slice(0, data.length - 1);
return resolve(data);
});
});
}//scanDirStream
所以我称它为传递目录路径和要列出的文件的限制将传递给head -n
命令,而find
命令将列出具有指定名称的文件,将其称为
// 内容test.js
scanDirStream(mediaRoot, {
name: "*.mp3",
limit: 100
})
.then(res => {
console.debug("files %d", res.length);
})
.catch(err => console.error(err))
假设当前有 10000 个文件mediaRoot
,会发生什么
- 使用
limit
< 10000 node.js 主循环的值挂起:我得到控制台输出,但脚本运行node test.js
不会终止。 - 使用
limit
>= 10000 的值,它会正确列出文件和输出并终止运行。
要试用它,请指向包含 about~8000-100000
文件的目录并运行test.js
脚本,将limit
值更改为100
then 10000
:
// this will hang node
scanDirStream(mediaRoot, {
name: "*.mp3",
limit: 100
})
和
// this will not hang node
scanDirStream(mediaRoot, {
name: "*.mp3",
limit: 10000
})
您有时也会收到此错误:
events.js:174
throw er; // Unhandled 'error' event
^
Error: write EPIPE
at WriteWrap.afterWrite [as oncomplete] (net.js:779:14)
Emitted 'error' event at:
at Socket.onerror (_stream_readable.js:713:12)
at Socket.emit (events.js:189:13)
at Socket.EventEmitter.emit (domain.js:441:20)
at onwriteError (_stream_writable.js:431:12)
at onwrite (_stream_writable.js:456:5)
at _destroy (internal/streams/destroy.js:40:7)
at Socket._destroy (net.js:604:3)
at Socket.destroy (internal/streams/destroy.js:32:8)
at WriteWrap.afterWrite [as oncomplete] (net.js:781:10)
解决方案
我猜你从head
进程中得到了一个 EPIPE(断管),因为一旦它读取了必要数量的文件,它就会终止。
我没有看到使用的理由head
;更好地跟踪 Node 代码中的文件数。
这是我对事情的看法——注意它不会跟踪它自己找到的文件名;这是要做的fileCallback
事情。如果该回调将文件视为要计入limit
.
const { spawn } = require("child_process");
function findFiles(
rootPath,
findParams,
limit = 0,
fileCallback = () => true,
) {
return new Promise((resolve, reject) => {
// Files found so far.
let nFound = 0;
// Whether we killed `find` on our own (limit reached).
let killed = false;
// Buffer to hold our results so far.
let buffer = Buffer.alloc(0);
// Build args for `find`. Note `-print0` to delimit the files
// with NUL bytes for extra safety (i.e. this can handle filenames
// with newlines).
const args = [rootPath].concat(findParams).concat("-print0");
// Spawn the find process.
const findProc = spawn("/usr/bin/find", args, { stdio: "pipe" });
// Figure out what to do when the process exits; this may be
// because it's done, or because we've closed things down upon
// finding `limit` files.
findProc.on("close", (code, signal) => {
if (code && code !== 0) {
return reject("find died with error " + code);
}
if (!killed && signal) {
return reject("find died from signal " + signal);
}
resolve(nFound);
});
findProc.stdout.on("data", chunk => {
// Concatenate the new chunk of data into the buffer.
buffer = Buffer.concat([buffer, chunk]);
// Split the buffer; see below.
let searchOffset = 0;
for (;;) {
// Find the next zero byte (which delimits files).
const nextOffset = buffer.indexOf(0, searchOffset);
// If there is none, we're done -- the rest of the buffer
// is the start of a new filename, if any.
if (nextOffset === -1) {
break;
}
// Stringify the segment between `searchOffset` and `nextOffset`...
const filename = buffer.toString("utf8", searchOffset, nextOffset);
// ... pass it to the callback to see if it's valid...
if (fileCallback(filename)) {
nFound++;
// ... see if we need to stop iterating over files...
if (limit > 0 && nFound >= limit) {
killed = true;
findProc.kill();
break;
}
}
// and move on to the byte after the NUL.
searchOffset = nextOffset + 1;
}
// Cut off the already-walked start of the buffer, if any.
if (searchOffset > 0) {
buffer = buffer.slice(searchOffset);
}
});
});
}
findFiles("/Users/akx", ["-type", "f"], 1000, path => {
console.log(path);
return true;
}).then(
res => {
console.log(res);
},
err => {
throw new Error(err);
},
);
推荐阅读
- forms - 颤振形式保存问题
- s4sdk - Problem running project from s4sdk archtypes with artifact id: scp-cf-spring
- excel - 如何从单元格中提取范围(其中单元格包含范围值)
- rest - swagger with list of elements in an array
- json - 如何在 Azure Policy 中准备 JSON
- haskell - 使用 Haskell 进行 https 身份验证
- python - 条件表达式/三元运算符
- sqlite - 节点红色多个 msg.payloads
- java - storm redis spout 元组丢失无一例外
- python - 由于某种原因,它只显示一个 if 语句