首页 > 解决方案 > 处理数据集中的大型 XML 文件(内存不足)

问题描述

我正在尝试通过 URL 读取包含近 100 万条记录的 1GB XML 文件,并将其存储<file>在数据集中并将它们保存到数据库中,但 10 分钟后,应用程序将崩溃并抛出Out of Memory Exception.

XML 如下所示:

<ICECAT-interface xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:noNamespaceSchemaLocation="http://data.icecat.biz/xsd/files.index.xsd">
  <files.index Generated="20200220011050">
    <file path="export/freexml.int/INT/1399.xml" Product_ID="1399" Updated="20200122151223" Quality="ICECAT" Supplier_id="1" Prod_ID="C4872A" Catid="377" On_Market="1" Model_Name="80" Product_View="333140" HighPic="http://images.icecat.biz/img/gallery/1399_4084669833.jpg" HighPicSize="2734733" HighPicWidth="4200" HighPicHeight="2772" Date_Added="20051027000000" Limited="No"></file>
    <file path="export/freexml.int/INT/1400.xml" Product_ID="1400" Updated="20200122151313" Quality="ICECAT" Supplier_id="1" Prod_ID="C4874A" Catid="377" On_Market="1" Model_Name="80" Product_View="283127" HighPic="http://images.icecat.biz/img/gallery/1400_6603855084.jpg" HighPicSize="2734913" HighPicWidth="4200" HighPicHeight="2952" Date_Added="20051027000000" Limited="No"></file>
  </files.index>
</ICECAT-interface>

子主():

Class Module1
    Public Shared Sub Main()
        Dim url As String = "http://data.Icecat.biz/export/freexml/EN/daily.index.xml"
        ProcessXMLFeedURL(url)
    End Sub
End Class

功能:

Public Shared Function ProcessXMLFeedURL(MyURL As String) As Boolean
    Dim OK As Boolean = False
    Try
        Dim rssReq As WebRequest = WebRequest.Create(MyURL)
        Dim username As String = ""
        Dim password As String = ""
        Dim encoded As String = System.Convert.ToBase64String(System.Text.Encoding.UTF8.GetBytes(username + ":" + password))
        rssReq.Headers.Add("Authorization", "Basic " + encoded)
        '//Get the WebResponse
        Dim rep As WebResponse = rssReq.GetResponse()
        '//Read the Response in a XMLTextReader
        Dim xtr As XmlTextReader = New XmlTextReader(rep.GetResponseStream())
        '// Set up the connection to the SQL server
        Dim MyConnectionString As String = "Data Source=...."
        Dim Connection As SqlConnection = New SqlConnection(MyConnectionString)
        Dim MyServer As Server = New Server(New ServerConnection(Connection))
        Dim db As Database = New Database(MyServer, "xxxxxx")
        db.Create()
        '//Create a new DataSet
        Dim ds As DataSet = New DataSet()
        ds.ReadXml(xtr)
        '// Parse tables
        Dim Mytable As Table
        Dim MyTableName As String = ds.Tables(1).TableName
        If Not HaveTable(MyConnectionString, MyTableName) Then
                '// Create the table
                Try
                    Mytable = New Table(db, MyTableName)
                Catch ex As Exception
                    Dim ii As Integer = 0
                End Try
                '// create the columns
                Dim Mycolumn As Column = New Column()
            For Each dc As DataColumn In ds.Tables(1).Columns
                Mycolumn = New Column(Mytable, dc.ColumnName)
                Mycolumn.DataType = getdatatype(dc.DataType.ToString)
                Mytable.Columns.Add(Mycolumn)
            Next
            Mytable.Create()
            Dim PrimaryKeys() As DataColumn = ds.Tables(1).PrimaryKey
            Dim PrimaryKey As DataColumn
                For Each PrimaryKey In PrimaryKeys
                    Dim Myindex As Index = New Index(Mytable, PrimaryKey.ColumnName)
                    Myindex.IndexKeyType = IndexKeyType.DriPrimaryKey
                    Myindex.IndexedColumns.Add(New IndexedColumn(Myindex, PrimaryKey.ColumnName))
                    Mytable.Indexes.Add(Myindex)
                Next
            End If
            Using MyConnection As SqlConnection = New SqlConnection(MyConnectionString)
                MyConnection.Open()
                Using bulkcopy As SqlBulkCopy = New SqlBulkCopy(MyConnection)
                bulkcopy.DestinationTableName = "[databasename].[dbo].[" + MyTableName + "]"
                Try
                    bulkcopy.WriteToServer(ds.Tables(1))
                Catch ex As Exception
                        Dim iw As Integer = 0
                    End Try
                End Using
                MyConnection.Close()
            End Using
    Catch ex As Exception
        Throw ex '// Do errorhanddling here
    End Try
    Return OK
End Function

标签: vb.netdatasetout-of-memory

解决方案


推荐阅读