首页 > 解决方案 > PDFJS Firebase Cloud Functions:证书已过期

问题描述

我正在使用 PDFJS 从 pdf 文档中提取纯文本,并使用 Firebase Cloud Functions 安排提取。一切都很好,直到有一天我certificate has expired从选定的 pdf 中获得,主要来自两个域。

我检查了那些受影响的域仍然有有效的 SSL,并且在本地机器上运行纯文本提取代码没有问题。一旦将其部署到 Firebase 云功能,它就会引发certificate has expired错误。

Error
    at BaseExceptionClosure (/srv/node_modules/pdfjs-dist/build/pdf.js:666:29)
    at Object.<anonymous> (/srv/node_modules/pdfjs-dist/build/pdf.js:669:2)
    at __w_pdfjs_require__ (/srv/node_modules/pdfjs-dist/build/pdf.js:52:30)
    at Object.defineProperty.value (/srv/node_modules/pdfjs-dist/build/pdf.js:129:23)
    at __w_pdfjs_require__ (/srv/node_modules/pdfjs-dist/build/pdf.js:52:30)
    at pdfjsVersion (/srv/node_modules/pdfjs-dist/build/pdf.js:116:18)
    at /srv/node_modules/pdfjs-dist/build/pdf.js:119:10
    at webpackUniversalModuleDefinition (/srv/node_modules/pdfjs-dist/build/pdf.js:25:20)
    at Object.<anonymous> (/srv/node_modules/pdfjs-dist/build/pdf.js:32:3)
    at Module._compile (module.js:653:30)
    at Object.Module._extensions..js (module.js:664:10)
    at Module.load (module.js:566:32)
    at tryModuleLoad (module.js:506:12)
    at Function.Module._load (module.js:498:3)
    at Module.require (module.js:597:17)
    at require (internal/module.js:11:18)
    at Object.<anonymous> (/srv/pdf/pdf.js:7:18)
    at Module._compile (module.js:653:30)
    at Object.Module._extensions..js (module.js:664:10)
    at Module.load (module.js:566:32)
    at tryModuleLoad (module.js:506:12)
    at Function.Module._load (module.js:498:3)
  message: 'certificate has expired',
  name: 'UnknownErrorException',
  details: 'UnknownErrorException: certificate has expired' }" 

代码:

const pdfjslib = require('pdfjs-dist');
const functions = require('firebase-functions');

module.exports = functions.https.onRequest((req, res) => {
    let url = req.query.url

    return extractPlainTextFromPdf(url)
    .then(pb => {
        return res.send(pb)
    })
    .catch(err => {
        console.log(err)
        return res.send("Err occured")
    })
});

function extractPlainTextFromPdf(pdfUrl) {
    let options = setupPdfOptions(pdfUrl)
    return getPlainBody(options)
    .then((plainBody) => plainBody)
    .catch((err) => {
        console.log("Err plainBody", err) //<== Error thrown here
    })
}

async function getPlainBody(options) {
    return getDocument(options)
    .then(doc => extractTexts(doc, doc.numPages))
}

function getDocument(options) {
    var loadingTask = pdfjslib.getDocument(options)
    return loadingTask.promise
    .then((doc) => doc)
}

function setupPdfOptions(url) {
    return {
        url: url,
        httpHeaders: {
            "User-Agent": "MY-USER-AGENT",
        },
    };
}

这是面临上述问题的两个示例pdf。

https://www.nea.gov.sg/docs/default-source/our-services/building-planning/notification-of-new-edition-of-code-of-practice-on-environment-health-(2020 -版).pdf

https://www.nparks.gov.sg/-/media/nparks-real-content/partner-us/developers-architects-and-engineers/circular_2020_0106_nparks.pdf?la=en&hash=F25A74CC8667D5D98EDF3A9C186E235330D228A8

编辑:

//package.json
{
  "name": "functions",
  "description": "Cloud Functions for Firebase",
  "scripts": {
    "serve": "firebase serve --only functions",
    "shell": "firebase functions:shell",
    "start": "npm run shell",
    "deploy": "firebase deploy --only functions",
    "logs": "firebase functions:log",
  },
  "engines": {
    "node": "8"
  },
  "dependencies": {
    "@google-cloud/functions-framework": "^1.5.1",
    "@google-cloud/vision": "^1.11.0",
    "aws-sdk": "^2.667.0",
    "axios": "^0.19.2",
    "cheerio": "^1.0.0-rc.3",
    "diff-match-patch": "^1.0.4",
    "firebase-admin": "^8.11.0",
    "firebase-functions": "^3.6.1",
    "moment": "^2.25.0",
    "nodemailer": "^6.4.6",
    "pdfjs-dist": "^2.3.200",
    "request": "^2.88.2",
    "request-promise": "^4.2.5",
  },
  "devDependencies": {
    "firebase-functions-test": "^0.1.6"
  },
  "private": true
}

标签: node.jsgoogle-cloud-functionspdfjspdfjs-dist

解决方案


云功能上的 Node.JS 8 已弃用,我认为某些软件包(如 openssl)和其他软件包在 Node 8 运行时已过时并导致奇怪的 SSL 问题,我在一些老式 linux 发行版(ubuntu 10.04)中遇到了这种行为。

“Node.js 8 运行时将于 2020 年 6 月 5 日弃用。为确保您的函数在受支持的 Node.js 版本上,请将它们迁移到 Node.js 10。”


推荐阅读