首页 > 解决方案 > 附加 Blob 未正确连接 pdf 页面

问题描述

我正在使用 Azure 存储将一些 pdf 存储为 blob。我想使用AppendBlob classNode SDK for Azure Storage 中提供的动态连接它们。我的问题是,即使合并的 pdf 大小正确,当我打开它时,它也只显示其中的最后一页,而不是我连接的所有页面。我在代码中犯了错误吗?我将不胜感激对这个问题的任何见解。

我不想下载它们并将它们连接到代码中,因为在某些情况下,生成的 pdf 大小超过 2gb,节点无法处理。

    let bundleUuid = uuidv4();
    var connString = process.env.ST_ENV_01_APS_03_CONNECTION_STRING;
    var appendBlobClient = new AppendBlobClient(connString, container, userId + "/bundles/" + bundleUuid + ".pdf");
    await appendBlobClient.create();
    context.log(userId + "/bundles/" + bundleUuid + ".pdf");
    
    for (const basketDocumentUri of basketDocumentUris) {
        
        context.log("Fetching: " + basketDocumentUri.document_uuid + "/" + (basketDocumentUri.page_index + 1) + ".pdf")
       
        let bbc = await containerClient.getBlockBlobClient(basketDocumentUri.document_uuid 
            + "/" + (basketDocumentUri.page_index + 1) + ".pdf");
        let details = await bbc.getProperties();
        let url = self.generateSasTokenPDF(basketDocumentUri.document_uuid + "/"
         + (basketDocumentUri.page_index + 1) + ".pdf", "r");
       
        try { 
            
            let mbblk = 4194304
            let offset = 0;
            while(offset < details.contentLength){

                let response = await appendBlobClient.appendBlockFromURL(url.uri,offset,mbblk);
                context.log(response);
                
                offset += mbblk;
                
                context.log(offset);
            }
            
        } catch (error) {
            context.log(error);
            debugger;
        }
    }
    callback(bundleUuid)

标签: node.jsazurepdfazure-blob-storage

解决方案


正如 Gaurav 在评论中所说,简单地将字节附加到现有 blob 会使 PDF 文件的格式无效。如果不将它们下载到某个地方,就不可能与这些文件进行交互。

以下是将 blob 下载到缓冲区:

  const containerClient = blobServiceClient.getContainerClient(containerName);
  const blobClient = containerClient.getBlobClient(blobName);

  // Get blob content from position 0 to the end
  // In Node.js, get downloaded data by accessing downloadBlockBlobResponse.readableStreamBody
  const downloadBlockBlobResponse = await blobClient.download();
  const downloaded = (
    await streamToBuffer(downloadBlockBlobResponse.readableStreamBody)
  ).toString();
  console.log("Downloaded blob content:", downloaded);

  // [Node.js only] A helper method used to read a Node.js readable stream into a Buffer
  async function streamToBuffer(readableStream) {
    return new Promise((resolve, reject) => {
      const chunks = [];
      readableStream.on("data", (data) => {
        chunks.push(data instanceof Buffer ? data : Buffer.from(data));
      });
      readableStream.on("end", () => {
        resolve(Buffer.concat(chunks));
      });
      readableStream.on("error", reject);
    });
  }

使用 HummusJS 合并 PDF:

const hummus = require('hummus');
const memoryStreams = require('memory-streams');

/**
 * Concatenate two PDFs in Buffers
 * @param {Buffer} firstBuffer 
 * @param {Buffer} secondBuffer 
 * @returns {Buffer} - a Buffer containing the concactenated PDFs
 */
const combinePDFBuffers = (firstBuffer, secondBuffer) => {
    var outStream = new memoryStreams.WritableStream();

    try {
        var firstPDFStream = new hummus.PDFRStreamForBuffer(firstBuffer);
        var secondPDFStream = new hummus.PDFRStreamForBuffer(secondBuffer);

        var pdfWriter = hummus.createWriterToModify(firstPDFStream, new hummus.PDFStreamForResponse(outStream));
        pdfWriter.appendPDFPagesFromPDF(secondPDFStream);
        pdfWriter.end();
        var newBuffer = outStream.toBuffer();
        outStream.end();

        return newBuffer;
    }
    catch(e){
        outStream.end();
        throw new Error('Error during PDF combination: ' + e.message);
    }
};

combinePDFBuffers(PDFBuffer1, PDFBuffer2);

推荐阅读