首页 > 解决方案 > nodejs列出巨大的目录挂起

问题描述

我正在使用这种方法列出包含大量文件(10K 和更多......)的目录:

scanDirStream = (needle, params) => {
            var options = {
                // find -type
                type: 'f',
                // find -name
                name: '*',
                limit: 100
            };
            for (var attrname in params) { options[attrname] = params[attrname]; }
            return new Promise((resolve, reject) => {
                var opt = [needle];
                for (var k in options) {
                    var v = options[k];
                    if (!Util.empty(v) && k != 'limit') {
                        opt.push('-' + k);
                        opt.push(v);
                    }
                };
                var res = '';
                var find = spawn('find', opt);
                var head = spawn('head',[ '-n', options.limit]);
                find.stdout.pipe(head.stdin);

                head.stdout.on('data', _data => {
                    var buff = Buffer.from(_data, 'utf-8').toString();
                    if (buff != '') res += buff;
                })
                find.stderr.on('data', error => {
                    return reject(Buffer.from(error, 'utf-8').toString());
                });
                head.stderr.on('data', error => {
                    return reject(Buffer.from(error, 'utf-8').toString());
                });
                head.on('close', (_) => {
                    data = res.split('\n');
                    data = data.slice(0, data.length - 1);
                    return resolve(data);
                });
            });
        }//scanDirStream

所以我称它为传递目录路径和要列出的文件的限制将传递给head -n命令,而find命令将列出具有指定名称的文件,将其称为

// 内容test.js

scanDirStream(mediaRoot, {
        name: "*.mp3",
        limit: 100
    })
        .then(res => {
            console.debug("files %d", res.length);
        })
        .catch(err => console.error(err))

假设当前有 10000 个文件mediaRoot,会发生什么

要试用它,请指向包含 about~8000-100000文件的目录并运行test.js脚本,将limit值更改为100then 10000

// this will hang node
scanDirStream(mediaRoot, {
        name: "*.mp3",
        limit: 100
    })

// this will not hang node
scanDirStream(mediaRoot, {
        name: "*.mp3",
        limit: 10000
    })

您有时也会收到此错误:

events.js:174
      throw er; // Unhandled 'error' event
      ^

Error: write EPIPE
    at WriteWrap.afterWrite [as oncomplete] (net.js:779:14)
Emitted 'error' event at:
    at Socket.onerror (_stream_readable.js:713:12)
    at Socket.emit (events.js:189:13)
    at Socket.EventEmitter.emit (domain.js:441:20)
    at onwriteError (_stream_writable.js:431:12)
    at onwrite (_stream_writable.js:456:5)
    at _destroy (internal/streams/destroy.js:40:7)
    at Socket._destroy (net.js:604:3)
    at Socket.destroy (internal/streams/destroy.js:32:8)
    at WriteWrap.afterWrite [as oncomplete] (net.js:781:10)

标签: node.js

解决方案


我猜你从head进程中得到了一个 EPIPE(断管),因为一旦它读取了必要数量的文件,它就会终止。

我没有看到使用的理由head;更好地跟踪 Node 代码中的文件数。

这是我对事情的看法——注意它不会跟踪它自己找到的文件名;这是要做的fileCallback事情。如果该回调将文件视为要计入limit.

const { spawn } = require("child_process");

function findFiles(
  rootPath,
  findParams,
  limit = 0,
  fileCallback = () => true,
) {
  return new Promise((resolve, reject) => {
    // Files found so far.
    let nFound = 0;

    // Whether we killed `find` on our own (limit reached).
    let killed = false;

    // Buffer to hold our results so far.
    let buffer = Buffer.alloc(0);

    // Build args for `find`. Note `-print0` to delimit the files
    // with NUL bytes for extra safety (i.e. this can handle filenames
    // with newlines).
    const args = [rootPath].concat(findParams).concat("-print0");

    // Spawn the find process.
    const findProc = spawn("/usr/bin/find", args, { stdio: "pipe" });

    // Figure out what to do when the process exits; this may be
    // because it's done, or because we've closed things down upon
    // finding `limit` files.
    findProc.on("close", (code, signal) => {
      if (code && code !== 0) {
        return reject("find died with error " + code);
      }
      if (!killed && signal) {
        return reject("find died from signal " + signal);
      }
      resolve(nFound);
    });

    findProc.stdout.on("data", chunk => {
      // Concatenate the new chunk of data into the buffer.
      buffer = Buffer.concat([buffer, chunk]);

      // Split the buffer; see below.
      let searchOffset = 0;
      for (;;) {
        // Find the next zero byte (which delimits files).
        const nextOffset = buffer.indexOf(0, searchOffset);
        // If there is none, we're done -- the rest of the buffer
        // is the start of a new filename, if any.
        if (nextOffset === -1) {
          break;
        }
        // Stringify the segment between `searchOffset` and `nextOffset`...
        const filename = buffer.toString("utf8", searchOffset, nextOffset);

        // ... pass it to the callback to see if it's valid...
        if (fileCallback(filename)) {
          nFound++;
          // ... see if we need to stop iterating over files...
          if (limit > 0 && nFound >= limit) {
            killed = true;
            findProc.kill();
            break;
          }
        }
        // and move on to the byte after the NUL.
        searchOffset = nextOffset + 1;
      }

      // Cut off the already-walked start of the buffer, if any.
      if (searchOffset > 0) {
        buffer = buffer.slice(searchOffset);
      }
    });
  });
}

findFiles("/Users/akx", ["-type", "f"], 1000, path => {
  console.log(path);
  return true;
}).then(
  res => {
    console.log(res);
  },
  err => {
    throw new Error(err);
  },
);

推荐阅读