javascript - nodejs puppeteer无法下载URL中带有特殊字符的图像文件
问题描述
我有两张图片要下载到本地计算机上:
- https://45football.com/assets/resources/1134/medium/1035-nöggi-subliga-cf.jpg
- https://45football.com/assets/resources/675/medium/580-maritimo-cf.jpg
如您所见,第一张图片在其 url 中是一个特殊字符ö
,但仍然是您可以在线访问的有效 url。
我正在尝试使用我制作的 node.js puppeteer(版本“5.5.0”)脚本下载这两个图像,它尝试使用以下代码下载:
function download(url, destination, cb) {
return new Promise(async function (resolve, reject) {
try {
var file = fs.createWriteStream(destination);
var request = https.get(url, function (response) {
response.pipe(file);
file.on('finish', function () {
file.close(cb); // close() is async, call cb after close completes.
resolve(true)
});
});
} catch (err) {
console.log('err saving file:', err)
reject()
}
});
}
async function debugDownload(){
let broken_imgUrl='https://45football.com/assets/resources/1134/medium/1035-nöggi-subliga-cf.jpg';
let broken_imgSavePath='/home/martin/Documents/projects/broken.jpg';
download(broken_imgUrl, broken_imgSavePath, function (x) { console.log('img download done'); });
let good_imgUrl='https://45football.com/assets/resources/675/medium/580-maritimo-cf.jpg'
let good_imgSavePath='/home/martin/Documents/projects/good.jpg';
await download(good_imgUrl, good_imgSavePath, function (x) { console.log('img download done'); });
}
运行后debugDownload()
,文件good.jpg
下载正常,但broken.jpg
只有 707 字节,我无法打开它,让我觉得我的下载功能有问题,它不适用于包含特殊字符的 URL?
解决方案
问题是 nodejs 的早期版本默认没有 encodeURI,因此我们得到 404 not found。只需添加 encodeURI(url)。https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/encodeURI
function download(url, destination, cb) {
return new Promise(async function (resolve, reject) {
try {
var file = fs.createWriteStream(destination);
var request = https.get(encodeURI(url), function (response) { // modified
response.pipe(file);
file.on('finish', function () {
file.close(cb); // close() is async, call cb after close completes.
resolve(true)
});
});
} catch (err) {
console.log('err saving file:', err)
reject()
}
});
}
async function debugDownload(){
let broken_imgUrl='https://45football.com/assets/resources/1134/medium/1035-nöggi-subliga-cf.jpg';
let broken_imgSavePath='/home/martin/Documents/projects/broken.jpg';
download(broken_imgUrl, broken_imgSavePath, function (x) { console.log('img download done'); });
let good_imgUrl='https://45football.com/assets/resources/675/medium/580-maritimo-cf.jpg'
let good_imgSavePath='/home/martin/Documents/projects/good.jpg';
await download(good_imgUrl, good_imgSavePath, function (x) { console.log('img download done'); });
}
我不知道您使用的是哪个版本的节点。
推荐阅读
- node.js - 填充 mongoose.Schema.Types.ObjectId 数组
- django - Django-Rest-Framework CreateAPIView 不起作用
- android - 无法构建 React Native 0.59.8 android.support.v4.net.ConnectivityManagerCompat
- c++ - 仅将纪元字符串保存到mysql datetime 年/月/日,但没有小时分秒
- r - Shinyproxy - 如何将主机传递给 docker 映像
- c# - 序列号和精度问题
- android - 如何使用列表视图改进应用程序的设置选项界面?
- c++ - 向量中的最大值
- java - 如何修复线程“main”java.lang.StringIndexOutOfBoundsException中的异常:字符串索引超出范围:0
- javascript - 无法使用 getContext 将图像渲染到画布上进行游戏