首页 > 解决方案 > 在使用 pdf.js 上传之前访问文件

问题描述

我想在上传pdf 文档之前获取它的页数。我认为使用 pdf.js 库应该是可能的,但我无法让它工作:(

请在下面查看我的代码。

<!DOCTYPE html>
<html>
<head>
    <meta charset="utf-8" />
    <meta name="viewport" content="width=device-width, initial-scale=1, maximum-scale=1" />
    <title>PDF.js file input test</title>
    <script src="https://cdnjs.cloudflare.com/ajax/libs/pdf.js/2.2.2/pdf.min.js"></script>
</head>
<body>
    <div id="outerwrap"><input type="file" accept=".pdf" id="thisfile" /></div>
<script>
    pdfjsLib.GlobalWorkerOptions.workerSrc = "build/pdf.worker.min.js";
(function() {
    var inputElement = document.getElementById("thisfile");
    inputElement.addEventListener("change", function (e){
    var file = e.target.files[0];
    var url = URL.createObjectURL(file);
    // var jsonfile = JSON.parse(JSON.stringify(file)), jsonurl = JSON.parse(JSON.stringify(url));
    var pdf = pdfjsLib.getDocument(url);
    pdf.then(function(pdf){
    var pages = pdf.pdfInfo.numPages;
    console.log("Number of pages: " + pages);
    });
    window.URL.revokeObjectURL(url);
    });
})();
</script>
</body>
</html>

当我将“文件”或“jsonfile”变量传递给 pdfjsLib.getDocument 时,我收到以下错误:

Uncaught Error: Invalid parameter object: need either .data, .range or .url
    at Object.t.getDocument (pdf.min.js:1)

当我传递“url”或“jsonurl”时,我收到一条警告和三条错误消息:

Deprecated API usage: PDFDocumentLoadingTask.then method, use the `promise` getter instead.

GET blob:http://localhost/8eca90d0-4b10-4f6a-802a-614d98999b76 net::ERR_FILE_NOT_FOUND

Uncaught (in promise) DOMException: Failed to execute 'postMessage' on 'Worker': TypeError: Failed to fetch could not be cloned.

Uncaught (in promise) e {name: "UnknownErrorException", message: "Failed to fetch", details: "UnknownErrorException: Failed to fetch"}details: "UnknownErrorException: Failed to fetch"message: "Failed to fetch"name: "UnknownErrorException"__proto__: Error

有人可以告诉我我做错了什么吗?

编辑: 我试过这个。现在没有错误,但在选择 pdf 文件后没有任何反应:

<!DOCTYPE html>
<html>
<head>
    <meta charset="utf-8" />
    <meta name="viewport" content="width=device-width, initial-scale=1, maximum-scale=1" />
    <title>PDF.js file input test</title>
    <script src="https://cdnjs.cloudflare.com/ajax/libs/pdf.js/2.2.2/pdf.min.js"></script>
</head>
<body>
    <div id="outerwrap"><input type="file" accept=".pdf" id="thisfile" /></div>
<script>
    pdfjsLib.GlobalWorkerOptions.workerSrc = "build/pdf.worker.min.js";

    function convertDataURIToBinary(dataURI){
    var BASE64_MARKER = ";base64,";
    var base64Index = 28;
    var base64 = dataURI.substring(base64Index);
    var raw = window.atob(base64);
    var rawLength = raw.length;
    var array = new Uint8Array(new ArrayBuffer(rawLength));
    for(var i = 0; i < rawLength; i++){ array[i] = raw.charCodeAt(i); }
    return array; }

(function(){
    var inputElement = document.getElementById("thisfile");
    inputElement.addEventListener("change", function (e){
    var file = e.target.files[0];
    var reader = new FileReader(); reader.onload = function(ez){
    reader.readAsDataURL(file);
    var dataURI = reader.result;
    var binaris = convertDataURIToBinary(dataURI);
    var pdf = pdfjsLib.getDocument(binaris);
    pdf.then(function(pdf){
    var pages = pdf.pdfInfo.numPages;
    console.log("Number of pages: " + pages);
    }); } });
})();
</script>
</body>
</html>

标签: javascriptfilepdfinputpdf.js

解决方案


我在这里找到了解决方案:https ://gist.github.com/frontenddeveloping/ab58f46150823f4c0aa6

在下面找到我的新工作代码。

<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8" />
<meta name="viewport" content="width=device-width, initial-scale=1, maximum-scale=1" />
<title>PDF.js file input test</title>
<script src="https://cdn.jsdelivr.net/npm/pdfjs-dist@1.10.100/build/pdf.min.js"></script>
<script src="https://cdn.jsdelivr.net/npm/pdfjs-dist@1.10.100/build/pdf.worker.min.js"></script>
</head>
<body>
<div id="outerwrap"><input type="file" accept=".pdf" id="thisfile" /></div>
<script>
function readPDFFile(pdf){
PDFJS.getDocument({data: pdf}).then(function(pdf){
var pdfPages = pdf.pdfInfo.numPages;
console.log(pdfPages); }); } // <--- get number of pages
window.onload = function(){
document.getElementById('thisfile').addEventListener('change', function(){
var file = this.files[0]; if (!file){ return; }
var fileReader = new FileReader();
fileReader.onload = function(e){ readPDFFile(new Uint8Array(e.target.result)); };
fileReader.readAsArrayBuffer(file); }); };
</script>
</body>
</html>

推荐阅读