java - 将html文本转换为pdf而不丢失格式
问题描述
我使用了 xmlworker-5.5.9.jar 和 itextpdf-5.5.13.jar,
在我的 Web 应用程序中,我使用 CKEditor。在提交按钮上,我想将 CK 内容转换为 .pdf 格式,
我使用了这段代码,它工作正常:
public void createPDF(String text) throws DocumentException, IOException
{
String fileName="f:\\test.pdf";
Document document=new Document();
PdfWriter pdfWriter=PdfWriter.getInstance(document, new FileOutputStream(fileName));
document.open();
String finall=text;
InputStream is = new ByteArrayInputStream(finall.getBytes());
XMLWorkerHelper.getInstance().parseXHtml(pdfWriter,document, is);
document.close();
}
但此代码不适用于 阿拉伯文本
我尝试使用此解决方案而没有成功:
public void createPdf(String htmlContentAr)
{
Charset CHARSET_UTF8 = Charset.forName("UTF-8");
try {
Document pdfDoc = new Document();
PdfWriter writer = PdfWriter.getInstance(pdfDoc, new FileOutputStream("f:\\test.pdf"));
writer.setRgbTransparencyBlending(true);
pdfDoc.open();
StyleAttrCSSResolver cssResolver = new StyleAttrCSSResolver();
ElementsCollector elementsHandler = new ElementsCollector();
HtmlPipelineContext htmlContext = new HtmlPipelineContext(new CssAppliersImpl(
new UnicodeFontProvider()));
htmlContext.charSet(CHARSET_UTF8);
htmlContext.setAcceptUnknown(true).autoBookmark(true)
.setTagFactory(Tags.getHtmlTagProcessorFactory());
CssResolverPipeline pipeline = new CssResolverPipeline(cssResolver, new HtmlPipeline(htmlContext,
new ElementHandlerPipeline(elementsHandler, null)));
XMLWorker worker = new XMLWorker(pipeline, true);
XMLParser parser = new XMLParser();
parser.addListener(worker);
parser.parse(new StringReader(htmlContentAr));
PdfPTable mainTable = new PdfPTable(1);
mainTable.setWidthPercentage(100);
PdfPCell cell = new PdfPCell();
cell.setBorder(0);
cell.setHorizontalAlignment(Element.ALIGN_LEFT);
cell.addElement(elementsHandler.getParagraph());
mainTable.addCell(cell);
pdfDoc.add(mainTable);
pdfDoc.close();
} catch (Exception e) {
e.printStackTrace();
}
}
这是 ElementsCollector.java 的代码:
import java.util.Iterator;
import java.util.List;
import com.itextpdf.text.Chunk;
import com.itextpdf.text.Element;
import com.itextpdf.text.Font;
import com.itextpdf.text.Paragraph;
import com.itextpdf.text.pdf.PdfPCell;
import com.itextpdf.text.pdf.PdfPRow;
import com.itextpdf.text.pdf.PdfPTable;
import com.itextpdf.text.pdf.PdfWriter;
import com.itextpdf.tool.xml.ElementHandler;
import com.itextpdf.tool.xml.Writable;
import com.itextpdf.tool.xml.html.pdfelement.NoNewLineParagraph;
import com.itextpdf.tool.xml.pipeline.WritableElement;
public class ElementsCollector implements ElementHandler {
private Paragraph _paragraph;
public ElementsCollector() {
_paragraph = new Paragraph();
_paragraph.setAlignment(Element.ALIGN_LEFT);
}
public Paragraph getParagraph() {
return _paragraph;
}
@Override
public void add(Writable htmlElement) {
WritableElement writableElement = (WritableElement) htmlElement;
if (writableElement == null) {
return;
}
for (Element element : writableElement.elements()) {
if (element instanceof NoNewLineParagraph) {
NoNewLineParagraph para = (NoNewLineParagraph) element;
Iterator<Element> it = para.iterator();
while (it.hasNext()) {
Element divChildElement = (Element) it.next();
fixNestedTablesRunDirection(divChildElement);
_paragraph.add(divChildElement);
}
} else {
fixNestedTablesRunDirection(element);
_paragraph.add(element);
}
}
}
private void fixNestedTablesRunDirection(Element element) {
if (element == null) {
return;
}
if (element instanceof PdfPTable) {
PdfPTable table = (PdfPTable) element;
for (PdfPRow row : table.getRows()) {
for (PdfPCell cell : row.getCells()) {
if (cell.getCompositeElements() != null) {
for (Element item : cell.getCompositeElements()) {
List<Chunk> chunks = item.getChunks();
if (chunks != null) {
for (Chunk chunk : chunks) {
Font font = chunk.getFont();
if (font != null) {
String name = font.getFamilyname() != null ? font.getFamilyname()
.toLowerCase() : null;
if (name != null && !name.isEmpty() && name.contains("arabic")) {
cell.setRunDirection(PdfWriter.RUN_DIRECTION_RTL);
if (item instanceof Paragraph
&& ((Paragraph) item).getAlignment() == 2) {
((Paragraph) item).setAlignment(0);
}
continue;
}
}
}
}
}
}
}
}
}
}
}
这是 UnicodeFontProvider.java 的代码
import java.nio.file.FileSystems;
import java.nio.file.Path;
import java.nio.file.Paths;
import com.itextpdf.text.BaseColor;
import com.itextpdf.text.Font;
import com.itextpdf.text.FontFactory;
import com.itextpdf.text.FontFactoryImp;
import com.itextpdf.text.pdf.BaseFont;
public class UnicodeFontProvider extends FontFactoryImp {
public UnicodeFontProvider() {
String root = System.getenv("SystemRoot");
FileSystems.getDefault();
Path path = Paths.get(root, "fonts");
FontFactory.getFontImp().registerDirectory(path.toString());
// TODO test, works on windows so far
}
public Font getFont(String fontname, String encoding, boolean embedded, float size, int style,
BaseColor color, boolean cached) {
if (fontname!= null && !fontname.isEmpty()) {
return new Font(Font.FontFamily.UNDEFINED, size, style, color);
}
return FontFactory.getFont(fontname, BaseFont.IDENTITY_H, BaseFont.EMBEDDED, size, style, color);
}
}
但是pdf文件中没有显示任何内容
我认为这一行的问题:
parser.parse(new StringReader(htmlContentAr));
更新 :
我尝试使用此代码:
import java.io.File;
import java.io.IOException;
import com.itextpdf.text.FontProvider;
import com.itextpdf.html2pdf.ConverterProperties;
import com.itextpdf.html2pdf.HtmlConverter;
import com.itextpdf.html2pdf.resolver.font.DefaultFontProvider;
import com.itextpdf.io.font.FontProgram;
import com.itextpdf.io.font.FontProgramFactory;
import com.itextpdf.kernel.font.PdfFont;
import com.itextpdf.kernel.font.PdfFontFactory;
import com.itextpdf.kernel.pdf.PdfDocument;
import com.itextpdf.kernel.pdf.PdfWriter;
public class TestHTML {
public static final String[] FONTS = {
"src/main/resources/fonts/noto/NotoSans-Regular.ttf",
"src/main/resources/fonts/noto/NotoNaskhArabic-Regular.ttf",
"src/main/resources/fonts/noto/NotoSansHebrew-Regular.ttf"
};
/**
* @param args
*/
public static void main(String[] args) {
// TODO Auto-generated method stub
}
public void createPdf(String src, String[] fonts, String dest) throws IOException {
ConverterProperties properties = new ConverterProperties();
FontProvider fontProvider = (FontProvider) new DefaultFontProvider(false, false, false);
for (String font : fonts) {
FontProgram fontProgram = FontProgramFactory.createFont(font);
fontProvider.addFont(fontProgram);
}
properties.setFontProvider(fontProvider);
HtmlConverter.convertToPdf(new File(src), new File(dest), properties);
}
}
但我有与 jar 相关的错误:
此行中的错误:
fontProvider.addFont(fontProgram);
方法 addFont(FontProgram) 未定义 FontProvider 类型 和错误:
properties.setFontProvider(fontProvider);
未为类型 FontProvider 定义方法 addFont(FontProgram)
也有错误:
此行有多个标记 - com.itextpdf.layout.font.FontProvider 类型无法解析。它是从所需的 .class 文件中间接引用的 - com.itextpdf.layout.font.FontProvider 类型无法解析。它是从所需的 .class 文件中间接引用的
我用了这个罐子:
kernel-7.0.0.jar ,io-7.0.0.jar ,html2pdf-1.0.2.jar ,itextpdf-5.5.13.jar,xmlworker-5.5.9.jar
解决方案
推荐阅读
- python - 在 Osmnx 中使用未简化网络时的绘图错误
- python - 如何授予将消息发送到不同 AWS 账户中的 SQS 队列的权限?
- angular7-router - Angular 7路由忽略路径
- angular - 如何测试角度材料日期选择器?
- python - Python日期顺序错误
- linux - Grep 功能不会随着头管停止
- xamarin.forms - 首次加载时 CarouselView 中未显示图像
- wpf - Wpf:在应用程序生命周期中使用单独的 dbcontext
- r - ggplot geom_col 绘图缩放(不可读)
- android - Android:RecyclerView 和 SQLite 数据库 OutOfMemeory 错误?