首页 > 解决方案 > nodejs puppeteer无法下载URL中带有特殊字符的图像文件

问题描述

我有两张图片要下载到本地计算机上:

如您所见,第一张图片在其 url 中是一个特殊字符ö,但仍然是您可以在线访问的有效 url。

我正在尝试使用我制作的 node.js puppeteer(版本“5.5.0”)脚本下载这两个图像,它尝试使用以下代码下载:

function download(url, destination, cb) {
  return new Promise(async function (resolve, reject) {
    try {
      var file = fs.createWriteStream(destination);
      var request = https.get(url, function (response) {
        response.pipe(file);
        file.on('finish', function () {
          file.close(cb);  // close() is async, call cb after close completes.
          resolve(true)
        });
      });
    } catch (err) {
      console.log('err saving file:', err)
      reject()
    }
  });
}

async function debugDownload(){
  let broken_imgUrl='https://45football.com/assets/resources/1134/medium/1035-nöggi-subliga-cf.jpg';
  let broken_imgSavePath='/home/martin/Documents/projects/broken.jpg';
  download(broken_imgUrl, broken_imgSavePath, function (x) { console.log('img download done'); });

  let good_imgUrl='https://45football.com/assets/resources/675/medium/580-maritimo-cf.jpg'
  let good_imgSavePath='/home/martin/Documents/projects/good.jpg';
  await download(good_imgUrl, good_imgSavePath, function (x) { console.log('img download done'); });
}

运行后debugDownload(),文件good.jpg下载正常,但broken.jpg只有 707 字节,我无法打开它,让我觉得我的下载功能有问题,它不适用于包含特殊字符的 URL?

在此处输入图像描述

标签: javascriptnode.jshttpspuppeteer

解决方案


问题是 nodejs 的早期版本默认没有 encodeURI,因此我们得到 404 not found。只需添加 encodeURI(url)。https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/encodeURI

function download(url, destination, cb) {
    return new Promise(async function (resolve, reject) {
      try {
        var file = fs.createWriteStream(destination);
        var request = https.get(encodeURI(url), function (response) { // modified
          response.pipe(file);
          file.on('finish', function () {
            file.close(cb);  // close() is async, call cb after close completes.
            resolve(true)
          });
        });
      } catch (err) {
        console.log('err saving file:', err)
        reject()
      }
    });
  }
  
  async function debugDownload(){
    let broken_imgUrl='https://45football.com/assets/resources/1134/medium/1035-nöggi-subliga-cf.jpg';
    let broken_imgSavePath='/home/martin/Documents/projects/broken.jpg';
    download(broken_imgUrl, broken_imgSavePath, function (x) { console.log('img download done'); });
  
    let good_imgUrl='https://45football.com/assets/resources/675/medium/580-maritimo-cf.jpg'
    let good_imgSavePath='/home/martin/Documents/projects/good.jpg';
    await download(good_imgUrl, good_imgSavePath, function (x) { console.log('img download done'); });
  }

我不知道您使用的是哪个版本的节点。


推荐阅读