excel - 从本地html文件导入数据后调整结果
问题描述
在了不起的成员@QHarr 的帮助下,我得到了以下代码,使我能够从 html 本地文件中抓取数据,这非常好
Sub Test()
Dim html As HTMLDocument, tables As Object, ws As Worksheet, fStream As ADODB.Stream
Dim headers(), mappings(), arr(13), newarr(13), cnt As Long, i As Long, j As Long, n As Long
Dim xFd As FileDialog, sFile As Variant, sSchool As String, sFolder As String, x As Long
Set ws = ThisWorkbook.Worksheets("Results")
Set html = New HTMLDocument
Set fStream = New ADODB.Stream
Set xFd = Application.FileDialog(msoFileDialogFolderPicker)
xFd.Title = "Please Select The Original Folder:"
If xFd.Show = -1 Then sFolder = xFd.SelectedItems(1) & "\" Else Exit Sub
sSchool = Split(sFolder, "\")(UBound(Split(sFolder, "\")) - 1)
sFile = Dir(sFolder)
cnt = ws.Cells(Rows.Count, 1).End(xlUp).Row: x = cnt
headers = Array("م", "كود الطالب", "الرقم القومي", "اسم الطالب", "الجنسية", "الديانة", "تاريخ الميلاد", "يوم", "شهر", "سنة", "محافظة الميلاد", "حالة القيد", "النوع", "ملاحظات")
mappings = Array(3, 8, 9, 12, 11, 10, 2, 7, 1, 6, 5, 4, 13)
If IsEmpty(ws.Cells(1, 1).Value) Then ws.Cells(1, 1).Resize(1, UBound(headers) + 1) = headers
Application.ScreenUpdating = False
While sFile <> ""
With fStream
.Charset = "UTF-8"
.Open
.LoadFromFile sFolder & sFile
html.body.innerHTML = .ReadText
.Close
End With
Set tables = html.querySelectorAll("table[width='100%'] table:first-child")
For i = 89 To tables.Length - 17 Step 26
Erase arr
arr(0) = vbNullString
For j = 0 To 12
arr(mappings(j)) = Application.Trim(tables.Item(i + (2 * (j))).innerText)
If j = 4 And arr(3) = "غير مصرى" Then arr(mappings(j)) = 0
Next j
For j = UBound(arr) To LBound(arr) Step -1
newarr(n) = arr(j)
If n = 6 Then
If IsDate(newarr) Then newarr(n) = CDate(Day(newarr(n)) & "/" & Month(newarr(n)) & "/" & Year(newarr(n)))
End If
n = n + 1
Next j
ws.Cells(cnt + 1, 1).Resize(1, UBound(arr) + 1) = newarr
cnt = cnt + 1: n = 0
Next i
sFile = Dir
Wend
ws.Cells(x + 1, 14).Resize(cnt - x).Value = sSchool
ws.Activate
Application.ScreenUpdating = True
End Sub
当没有国籍ID(html表中的第三列)为空时,结果的唯一问题是الرقــمالقومــي我没有得到正确的名称以及以下名称的结果如果您运行代码,注意从 11 到 17 的行...附件是在此LINK上有文件的 FolderToTest
我已经尝试解决并且在某种程度上调整了结果(但仍然不正确,因为缺少具有空国籍 ID 的名称并且以下名称包含他的一些数据)这是我的最后一次尝试
Sub Test()
Dim html As HTMLDocument, tables As Object, ws As Worksheet, fStream As ADODB.Stream
Dim headers(), mappings(), arr(13), newarr(13), cnt As Long, i As Long, j As Long, n As Long
Dim xFd As FileDialog, sFile As Variant, sSchool As String, sFolder As String, x As Long
Set ws = ThisWorkbook.Worksheets("Results")
Set html = New HTMLDocument
Set fStream = New ADODB.Stream
Set xFd = Application.FileDialog(msoFileDialogFolderPicker)
xFd.Title = "Please Select The Original Folder:"
If xFd.Show = -1 Then sFolder = xFd.SelectedItems(1) & "\" Else Exit Sub
sSchool = Split(sFolder, "\")(UBound(Split(sFolder, "\")) - 1)
sFile = Dir(sFolder)
cnt = ws.Cells(Rows.Count, 1).End(xlUp).Row: x = cnt
headers = Array("م", "كود الطالب", "الرقم القومي", "اسم الطالب", "الجنسية", "الديانة", "تاريخ الميلاد", "يوم", "شهر", "سنة", "محافظة الميلاد", "حالة القيد", "النوع", "ملاحظات")
mappings = Array(3, 8, 9, 12, 11, 10, 2, 7, 1, 6, 5, 4, 13)
If IsEmpty(ws.Cells(1, 1).Value) Then ws.Cells(1, 1).Resize(1, UBound(headers) + 1) = headers
Application.ScreenUpdating = False
While sFile <> ""
With fStream
.Charset = "UTF-8"
.Open
.LoadFromFile sFolder & sFile
html.body.innerHTML = .ReadText
.Close
End With
Set tables = html.querySelectorAll("table[width='100%'] table:first-child")
For i = 89 To tables.Length - 17 Step 26
Erase arr
arr(0) = vbNullString
For j = 0 To 12
arr(mappings(j)) = Application.Trim(tables.Item(i + (2 * (j))).innerText)
'If j = 4 And arr(3) = "غير مصرى" Then arr(mappings(j)) = 0
If j = 3 And Not IsNumeric(Application.Trim(tables.Item(i + (2 * (j)) + 2).innerText)) Then
i = i + 24
End If
Next j
For j = UBound(arr) To LBound(arr) Step -1
newarr(n) = arr(j)
If n = 6 Then
newarr(n) = CDate(Day(newarr(n)) & "/" & Month(newarr(n)) & "/" & Year(newarr(n)))
End If
n = n + 1
Next j
ws.Cells(cnt + 1, 1).Resize(1, UBound(arr) + 1) = newarr
cnt = cnt + 1: n = 0
Next i
sFile = Dir
Wend
ws.Cells(x + 1, 14).Resize(cnt - x).Value = sSchool
ws.Activate
Application.ScreenUpdating = True
End Sub
解决方案
推荐阅读
- actions-on-google - 日期期间系统实体解析到下一个日历年以获取非特定年份的句子
- python - Python访问错误类的属性
- vmware-clarity - 完全编程隐藏/显示路标
- amazon-web-services - AWS Lambda,使用 python 在 s3 存储桶之间复制
- scala - 在 Zeppelin 导入 scala 包(无 jar)
- docker - 使用 livemedia-creator 构建 CentOS 6 Docker 失败
- sql - 将同一表上的 BOM 数量与 SQL Server 2012 进行比较
- python - 使用 Pandas 从另一列中减去一列的值
- python - Django - 如何创建一个包含自己类型集合的模型?
- javascript - 从 Javascript 将原始 ZPL 和 EPL 发送到打印机