首页 > 解决方案 > 从本地html文件导入数据后调整结果

问题描述

在了不起的成员@QHarr 的帮助下,我得到了以下代码,使我能够从 html 本地文件中抓取数据,这非常好

Sub Test()
Dim html As HTMLDocument, tables As Object, ws As Worksheet, fStream As ADODB.Stream
Dim headers(), mappings(), arr(13), newarr(13), cnt As Long, i As Long, j As Long, n As Long
Dim xFd As FileDialog, sFile As Variant, sSchool As String, sFolder As String, x As Long

Set ws = ThisWorkbook.Worksheets("Results")
Set html = New HTMLDocument
Set fStream = New ADODB.Stream
Set xFd = Application.FileDialog(msoFileDialogFolderPicker)
xFd.Title = "Please Select The Original Folder:"
If xFd.Show = -1 Then sFolder = xFd.SelectedItems(1) & "\" Else Exit Sub
sSchool = Split(sFolder, "\")(UBound(Split(sFolder, "\")) - 1)
sFile = Dir(sFolder)

cnt = ws.Cells(Rows.Count, 1).End(xlUp).Row: x = cnt
headers = Array("م", "كود الطالب", "الرقم القومي", "اسم الطالب", "الجنسية", "الديانة", "تاريخ الميلاد", "يوم", "شهر", "سنة", "محافظة الميلاد", "حالة القيد", "النوع", "ملاحظات")
mappings = Array(3, 8, 9, 12, 11, 10, 2, 7, 1, 6, 5, 4, 13)
If IsEmpty(ws.Cells(1, 1).Value) Then ws.Cells(1, 1).Resize(1, UBound(headers) + 1) = headers

Application.ScreenUpdating = False
    While sFile <> ""
        With fStream
            .Charset = "UTF-8"
            .Open
            .LoadFromFile sFolder & sFile
            html.body.innerHTML = .ReadText
            .Close
        End With

        Set tables = html.querySelectorAll("table[width='100%'] table:first-child")

        For i = 89 To tables.Length - 17 Step 26
            Erase arr
            arr(0) = vbNullString

            For j = 0 To 12
                arr(mappings(j)) = Application.Trim(tables.Item(i + (2 * (j))).innerText)
                If j = 4 And arr(3) = "غير مصرى‏" Then arr(mappings(j)) = 0
            Next j

            For j = UBound(arr) To LBound(arr) Step -1
                newarr(n) = arr(j)
                If n = 6 Then
                    If IsDate(newarr) Then newarr(n) = CDate(Day(newarr(n)) & "/" & Month(newarr(n)) & "/" & Year(newarr(n)))
                End If
                n = n + 1
            Next j

            ws.Cells(cnt + 1, 1).Resize(1, UBound(arr) + 1) = newarr
            cnt = cnt + 1: n = 0
        Next i

        sFile = Dir
    Wend

    ws.Cells(x + 1, 14).Resize(cnt - x).Value = sSchool
    ws.Activate
Application.ScreenUpdating = True
End Sub

当没有国籍ID(html表中的第三列)为空时,结果的唯一问题是الرقــمالقومــي我没有得到正确的名称以及以下名称的结果如果您运行代码,注意从 11 到 17 的行...附件是在此LINK上有文件的 FolderToTest

我已经尝试解决并且在某种程度上调整了结果(但仍然不正确,因为缺少具有空国籍 ID 的名称并且以下名称包含他的一些数据)这是我的最后一次尝试

Sub Test()
Dim html As HTMLDocument, tables As Object, ws As Worksheet, fStream As ADODB.Stream
Dim headers(), mappings(), arr(13), newarr(13), cnt As Long, i As Long, j As Long, n As Long
Dim xFd As FileDialog, sFile As Variant, sSchool As String, sFolder As String, x As Long

Set ws = ThisWorkbook.Worksheets("Results")
Set html = New HTMLDocument
Set fStream = New ADODB.Stream
Set xFd = Application.FileDialog(msoFileDialogFolderPicker)
xFd.Title = "Please Select The Original Folder:"
If xFd.Show = -1 Then sFolder = xFd.SelectedItems(1) & "\" Else Exit Sub
sSchool = Split(sFolder, "\")(UBound(Split(sFolder, "\")) - 1)
sFile = Dir(sFolder)

cnt = ws.Cells(Rows.Count, 1).End(xlUp).Row: x = cnt
headers = Array("م", "كود الطالب", "الرقم القومي", "اسم الطالب", "الجنسية", "الديانة", "تاريخ الميلاد", "يوم", "شهر", "سنة", "محافظة الميلاد", "حالة القيد", "النوع", "ملاحظات")
mappings = Array(3, 8, 9, 12, 11, 10, 2, 7, 1, 6, 5, 4, 13)
If IsEmpty(ws.Cells(1, 1).Value) Then ws.Cells(1, 1).Resize(1, UBound(headers) + 1) = headers

Application.ScreenUpdating = False
While sFile <> ""
    With fStream
        .Charset = "UTF-8"
        .Open
        .LoadFromFile sFolder & sFile
        html.body.innerHTML = .ReadText
        .Close
    End With

    Set tables = html.querySelectorAll("table[width='100%'] table:first-child")

    For i = 89 To tables.Length - 17 Step 26
        Erase arr
        arr(0) = vbNullString

        For j = 0 To 12
            arr(mappings(j)) = Application.Trim(tables.Item(i + (2 * (j))).innerText)
            'If j = 4 And arr(3) = "غير مصرى‏" Then arr(mappings(j)) = 0
            If j = 3 And Not IsNumeric(Application.Trim(tables.Item(i + (2 * (j)) + 2).innerText)) Then
                i = i + 24
            End If
        Next j

        For j = UBound(arr) To LBound(arr) Step -1
            newarr(n) = arr(j)
            If n = 6 Then
                newarr(n) = CDate(Day(newarr(n)) & "/" & Month(newarr(n)) & "/" & Year(newarr(n)))
            End If
            n = n + 1
        Next j

        ws.Cells(cnt + 1, 1).Resize(1, UBound(arr) + 1) = newarr
        cnt = cnt + 1: n = 0
    Next i

    sFile = Dir
Wend

ws.Cells(x + 1, 14).Resize(cnt - x).Value = sSchool
ws.Activate
Application.ScreenUpdating = True
End Sub

标签: excelvbahtml-table

解决方案



推荐阅读