vb.net - iTextSharp 获取对图形标记的引用
问题描述
我已经研究了几个小时来研究如何做到这一点,但遇到了障碍。我有一个 PDF 文件,其中一个对象是指北针。这是一个简单的线条图形(我相信它们在 Acrobat 中被称为图形标记),它将表示“向上”的方式。我想阅读那条线图并确定它的旋转。我采取的第一步是查看是否可以使用以下代码枚举 PDF 的内容:
Imports it = iTextSharp.text
Imports ip = iTextSharp.text.pdf
Dim pdfRdr As New ip.PdfReader("C:\city.pdf")
Dim page As ip.PdfDictionary = pdfRdr.GetPageN(1)
Dim objectReference As ip.PdfIndirectReference = CType(page.Get(ip.PdfName.CONTENTS), ip.PdfIndirectReference)
Dim stream As ip.PRStream = CType(ip.PdfReader.GetPdfObject(objectReference), ip.PRStream)
Dim streamBytes() As Byte = ip.PdfReader.GetStreamBytes(stream)
Dim tokenizer As New ip.PRTokeniser(New ip.RandomAccessFileOrArray(streamBytes))
'Loop through each PDf token
While tokenizer.NextToken
Debug.Print("token of type={0} and value={1}", tokenizer.TokenType.ToString, tokenizer.StringValue)
End While
我确实得到了一些数据,但恐怕我只是不明白如何破译它。
token of type=OTHER and value=q
token of type=NUMBER and value=0.86275
token of type=NUMBER and value=0
token of type=NUMBER and value=0
token of type=NUMBER and value=0.86275
token of type=NUMBER and value=54
token of type=NUMBER and value=30
token of type=OTHER and value=cm
token of type=NAME and value=Fm0
token of type=OTHER and value=Do
token of type=OTHER and value=Q
token of type=OTHER and value=q
token of type=NUMBER and value=1
token of type=NUMBER and value=0
token of type=NUMBER and value=0
token of type=NUMBER and value=1
token of type=NUMBER and value=54
token of type=NUMBER and value=18
token of type=OTHER and value=cm
token of type=NAME and value=Fm1
token of type=OTHER and value=Do
token of type=OTHER and value=Q
测试文件在这里https://drive.google.com/file/d/1dYFkvLMvznsx6sN-1GsNZVIBtDpgzwCU/view?usp=sharing
我走的是正确的道路还是有不同的方法来获取对图形标记的引用?
解决方案
与最初的印象相反,指北针不在 PDF 的注释中,而是在常规页面内容中。(@Jon 在最初的印象下创建了他的答案。)
在 OP 共享的 PDF 中,箭头是直接页面内容的一部分。另一方面,在 OP 共享的 Adobe Acrobat 屏幕截图中,箭头似乎是 XObject 的形式(反过来将从直接页面内容中引用)。
以下方法应检索任何一种情况的矢量图形指令。
您可以使用 iText 解析器框架检索绘制箭头的矢量图形指令。
例如,使用当前的 iText 5.5.x,您需要在执行中实现IExtRenderListener
并使用该实现PdfReaderContentParser
,例如:
Public Class VectorParser
Implements IExtRenderListener
Public Sub ModifyPath(renderInfo As PathConstructionRenderInfo) Implements IExtRenderListener.ModifyPath
pathInfos.Add(renderInfo)
End Sub
Public Function RenderPath(renderInfo As PathPaintingRenderInfo) As parser.Path Implements IExtRenderListener.RenderPath
Dim GraphicsState As GraphicsState = getGraphicsState(renderInfo)
Dim ctm As Matrix = GraphicsState.GetCtm()
If (Not (renderInfo.Operation And PathPaintingRenderInfo.FILL) = 0) Then
Console.Write("FILL ({0}) ", ToString(GraphicsState.FillColor))
If (Not (renderInfo.Operation And PathPaintingRenderInfo.STROKE) = 0) Then
Console.Write("and ")
End If
End If
If (Not (renderInfo.Operation And PathPaintingRenderInfo.STROKE) = 0) Then
Console.Write("STROKE ({0}) ", ToString(GraphicsState.StrokeColor))
End If
Console.Write("the path ")
For Each pathConstructionRenderInfo In pathInfos
Select Case pathConstructionRenderInfo.Operation
Case PathConstructionRenderInfo.MOVETO
Console.Write("move to {0} ", ToString(transform(ctm, pathConstructionRenderInfo.SegmentData)))
Case PathConstructionRenderInfo.CLOSE
Console.Write("close {0} ", ToString(transform(ctm, pathConstructionRenderInfo.SegmentData)))
Case PathConstructionRenderInfo.CURVE_123
Console.Write("curve123 {0} ", ToString(transform(ctm, pathConstructionRenderInfo.SegmentData)))
Case PathConstructionRenderInfo.CURVE_13
Console.Write("curve13 {0} ", ToString(transform(ctm, pathConstructionRenderInfo.SegmentData)))
Case PathConstructionRenderInfo.CURVE_23
Console.Write("curve23 {0} ", ToString(transform(ctm, pathConstructionRenderInfo.SegmentData)))
Case PathConstructionRenderInfo.LINETO
Console.Write("line to {0} ", ToString(transform(ctm, pathConstructionRenderInfo.SegmentData)))
Case PathConstructionRenderInfo.RECT
Console.Write("rectangle {0} ", ToString(transform(ctm, expandRectangleCoordinates(pathConstructionRenderInfo.SegmentData))))
End Select
Next
Console.WriteLine()
pathInfos.Clear()
Return Nothing
End Function
Public Sub ClipPath(rule As Integer) Implements IExtRenderListener.ClipPath
End Sub
Public Sub BeginTextBlock() Implements IRenderListener.BeginTextBlock
End Sub
Public Sub RenderText(renderInfo As TextRenderInfo) Implements IRenderListener.RenderText
End Sub
Public Sub EndTextBlock() Implements IRenderListener.EndTextBlock
End Sub
Public Sub RenderImage(renderInfo As ImageRenderInfo) Implements IRenderListener.RenderImage
End Sub
Function expandRectangleCoordinates(rectangle As IList(Of Single)) As List(Of Single)
If rectangle.Count < 4 Then
Return New List(Of Single)
End If
Return New List(Of Single)() From
{
rectangle(0), rectangle(1),
rectangle(0) + rectangle(2), rectangle(1),
rectangle(0) + rectangle(2), rectangle(1) + rectangle(3),
rectangle(0), rectangle(1) + rectangle(3)
}
End Function
Function transform(ctm As Matrix, coordinates As IList(Of Single)) As List(Of Single)
Dim result As List(Of Single) = New List(Of Single)
If Not coordinates Is Nothing Then
For i = 0 To coordinates.Count - 1 Step 2
Dim vector As Vector = New Vector(coordinates(i), coordinates(i + 1), 1)
vector = vector.Cross(ctm)
result.Add(vector(Vector.I1))
result.Add(vector(Vector.I2))
Next
End If
Return result
End Function
Public Function ToString(coordinates As IList(Of Single)) As String
Dim result As StringBuilder = New StringBuilder()
result.Append("[ ")
For i = 0 To coordinates.Count - 1
result.Append(coordinates(i))
result.Append(" ")
Next
result.Append("]")
Return result.ToString()
End Function
Public Function ToString(baseColor As BaseColor) As String
If (baseColor Is Nothing) Then
Return "DEFAULT"
End If
Return String.Format("{0},{1},{2}", baseColor.R, baseColor.G, baseColor.B)
End Function
Function getGraphicsState(renderInfo As PathPaintingRenderInfo) As GraphicsState
Dim gsField As Reflection.FieldInfo = GetType(PathPaintingRenderInfo).GetField("gs", Reflection.BindingFlags.NonPublic Or Reflection.BindingFlags.Instance)
Return CType(gsField.GetValue(renderInfo), GraphicsState)
End Function
Dim pathInfos As List(Of PathConstructionRenderInfo) = New List(Of PathConstructionRenderInfo)
End Class
像这样使用
Using pdfReader As New PdfReader("test.pdf")
Dim extRenderListener As IExtRenderListener = New VectorParser
For page = 1 To pdfReader.NumberOfPages
Console.Write(vbCrLf + "Page {0}" + vbCrLf + "====" + vbCrLf, page)
Dim parser As PdfReaderContentParser = New PdfReaderContentParser(pdfReader)
parser.ProcessContent(page, extRenderListener)
Next
End Using
为您的共享文档返回
Page 1
====
STROKE (0,0,255) the path move to [ 277,359 434,2797 ] line to [ 311,5242 434,2797 ]
STROKE (0,0,255) the path move to [ 277,3591 434,2797 ] line to [ 315,0443 424,1336 ]
STROKE (0,0,255) the path move to [ 304,2772 425,376 ] line to [ 304,4842 426,6183 ]
STROKE (0,0,255) the path move to [ 304,6913 426,2042 ] line to [ 310,075 425,376 ]
STROKE (0,0,255) the path move to [ 304,6913 426,8254 ] line to [ 307,5902 425,9972 ]
FILL (0,0,255) the path move to [ 303,656 425,3759 ] line to [ 303,656 425,3759 ] line to [ 306,1407 425,1689 ] line to [ 306,1407 425,1689 ]
STROKE (0,0,255) the path move to [ 303,656 425,376 ] line to [ 303,656 425,376 ] line to [ 306,1407 425,1689 ] line to [ 306,1407 425,1689 ] close [ ]
FILL (0,0,255) the path move to [ 306,969 424,9618 ] line to [ 306,969 424,9618 ] line to [ 309,4538 424,7548 ] line to [ 309,4538 424,7548 ]
STROKE (0,0,255) the path move to [ 306,969 424,9619 ] line to [ 306,969 424,9619 ] line to [ 309,4538 424,7548 ] line to [ 309,4538 424,7548 ] close [ ]
FILL (0,0,255) the path move to [ 309,8679 424,9618 ] line to [ 309,8679 424,9618 ] line to [ 312,3527 424,5477 ] line to [ 312,3527 424,5477 ]
STROKE (0,0,255) the path move to [ 309,868 424,9619 ] line to [ 309,868 424,9619 ] line to [ 312,3527 424,5477 ] line to [ 312,3527 424,5477 ] close [ ]
STROKE (0,0,255) the path move to [ 313,1809 424,3407 ] line to [ 314,8374 424,1336 ]
STROKE (0,0,255) the path move to [ 304,2772 425,7901 ] line to [ 309,8679 424,9619 ] line to [ 312,9738 424,7548 ]
STROKE (0,0,255) the path move to [ 304,2772 425,9972 ] line to [ 309,8679 425,1689 ] line to [ 311,5244 424,9619 ]
STROKE (0,0,255) the path move to [ 304,6914 426,8254 ] line to [ 315,0445 424,1336 ]
STROKE (0,0,255) the path move to [ 311,7315 435,7292 ] line to [ 311,7315 432,8303 ]
STROKE (0,0,255) the path move to [ 321,2564 434,2797 ] line to [ 315,4587 434,2797 ]
STROKE (0,0,255) the path move to [ 315,4586 434,2797 ] line to [ 311,7315 434,2797 ]
STROKE (0,0,255) the path move to [ 311,7315 434,6938 ] line to [ 317,7363 434,0727 ] line to [ 311,7315 433,6585 ]
STROKE (0,0,255) the path move to [ 311,7315 434,4868 ] line to [ 314,8374 434,2797 ] line to [ 311,7315 434,2797 ]
STROKE (0,0,255) the path move to [ 310,6963 436,1433 ] line to [ 317,3222 434,9009 ] line to [ 322,2917 434,2797 ] line to [ 317,3222 433,6585 ] line to [ 310,6963 432,6232 ]
STROKE (0,0,255) the path move to [ 311,7315 435,5221 ] line to [ 317,3222 434,6938 ] line to [ 321,0493 434,2797 ] line to [ 317,3222 433,8656 ] line to [ 311,7315 433,0374 ]
STROKE (0,0,255) the path move to [ 311,7315 435,108 ] line to [ 317,3222 434,4868 ] line to [ 319,3928 434,2797 ] line to [ 317,3222 434,2797 ] line to [ 311,7315 433,4515 ]
这看起来像是一个简单箭头的很多说明,但放大 PDF 会发现箭头确实是由许多小线构成的:
特别是箭头看起来像是有人使用不同长度和宽度的线段手工创建的。
上面的代码本质上是这个答案ExtRenderListener
中 Java 和 iText 5.5.x 的匿名实现的一个端口。
使用 iText 7 实现这一点同样简单。
顺便说一句:不幸的是,绘制箭头的说明没有特别标明;如果同一页面上还有其他矢量图形,则必须通过某些特定标准过滤解析器返回的结果,例如颜色(在手头纯 RGB 蓝色的情况下)或近似坐标范围(例如在仅给定x和y坐标范围)。