regex - VBA RegEx 第二场比赛从第一场比赛开始
问题描述
美好的一天——我有一个例程,搜索数以万计的文本文件以捕获相关的文件名,结果只剩下不到 10,000 个。这些文件名为下一个例程打开每个剩余的文本文件以搜索特定数据提供了起点。每个文本文件可能有 50 到 50,000 行数据。循环遍历每个文件,例程首先找到一个序列号,然后找到所有出现的 FAILED,然后捕获该文本行开头附近的日期/时间戳并填充另一个工作表。这一切都适用于各种故障发生。除了一个。
(请参阅下面代码示例中关注领域的开始)我现在来到了我希望得到一些指导的领域。有一个 FAILED 实例,我需要验证 PASSED 是否出现在包含 FAILED 的行之后的三行。如果 PASSED 存在,它将始终是 FAILED 下面的第三行。我不能使用在 FAILED 后一定秒数内发生的日期/时间戳,例如 PASSED,因为它变化太大,可能会产生错误的结果。我认为在 FAILED 之后捕获第一个 PASSED 的最佳方法是捕获 FAILED 的 FirstIndex 位置并从那里开始搜索 PASSED。但是,我不知道该怎么做,或者是否有可能。老实说,我不知道这是否可以使用 RegEx 或我没有想到的 VBA 中的东西来完成。我只是在学习 RegEx,所以我在这方面非常薄弱,即使在我阅读完所有内容之后也是如此。一个带有答案的解释将不胜感激。VBA 我很满意。我正在使用引用 Microsoft VBScript 正则表达式 5.5 的 Excel 2010 Professional。
我认为这个答案可能会有所帮助,但如果可以的话,我不明白。 如何在 VBA 中获取子匹配的位置?任何帮助或指导将不胜感激。先感谢您。
亲切的问候,马克
样本净化的搜索数据
日志:00::01:11:03.129 [XXX_##] XXX:3390、3412、3401、3400、3401、3398、3402、3409 0090123101000172
日志:00::01:11:15.576 [XXX_###] XXX:3393、3399、3393、3395、3394、3396、3397、3395 0090123101000200
日志:00::01:11:23.568 [XXX_##] XXX:3390、3411、3401、3400、3401、3398、3402、3409 0090123101000173
日志:00::01:11:37.049 [XXX_###] XXX:3393、3400、3393、3394、3394、3396、3396、3395 0090123101000201
日志:00::01:11:53.265 [XXX_##] XXX:3388、3409、3399、3397、3399、3396、3400、3406 0090123101000129
日志:00::01:11:56.361 [XXX_###] XXX:3393、3399、3392、3394、3394、3396、3396、3395 0090123101000202
日志:00::01:12:14.596 [XXX_##] XXXX Xxxxxxxxxxxxx Xxxxxxxxxxx FAILED , Xxxxxxxxxxx: A:1, 0090123101000130
日志:00::01:12:16.432 [XXX_##] XXXX ADC 3401、3402、3401、3399、3399、3401、3399、3401、
日志:00::01:12:16.502 [XXX_##] XXXX DAC 1477、1301、1405、1229、1406、1473、1770、1543、
日志:00::01:12:16.581 [XXX_##] XXXX Xxxxxxxxxxxxx Xxxxxxxxxx已通过,Xxxxxxxxxxx:1
日志:00::01:12:16.846 [XXX_##] XXX:3407、3408、3406、3405、3405、3406、3404、3405 0090123101000130
日志:00::01:12:17.406 [XXX_###] XXX:3398、3403、3397、3400、3399、3401、3402、3399 0090123101000203
日志:00::01:12:37.508 [XXX_##] XXX:3402、3402、3400、3398、3400、3401、3400、3401 0090123101000131
日志:00::01:12:38.511 [XXX_###] XXX:3386、3393、3386、3386、3387、3389、3389、3387 0090123101000204
日志:00::01:13:02.619 [XXX_##] XXX:3403、3402、3400、3397、3400、3401、3399、3401 0090123101000132
Dim bFound As Boolean 'Used to identify if sFile <> "".
Dim dHr As Double 'Test 'Number of hours in dEndTime
Dim dMin As Double 'Test 'Number of minutes in dEndTime.
Dim dSec As Double 'Test 'Number of seconds in dEndTime.
Dim dStartTime As Double 'Test 'Time routine starts.
Dim dEndTime As Double 'Test 'Time routine completes.
Dim i As Integer 'Array variable for rows.
Dim iCurrentRow As Integer 'Variable used in centering filename cells.
Dim iNextRow As Integer 'Used to find last row in column to add new data.
Dim j As Integer 'Array variable for columns.
Dim LastRow As Integer 'Last row used by any column in current range.
Dim NextRow As Integer 'Last row of current column.
Dim z As Integer 'Counter for files > 200 bytes.
Dim lFileLen As Long 'Length of text file.
Dim oM As Object 'Single match.
Dim oMtch As Object 'Match collection.
Dim oS As Object 'Number of matches found.
Dim LastCol As String 'Identify last column used.
Dim LastColLetter As String 'Last Column letter.
Dim s1LastCol As String 'Identify last column in Row 1 used.
Dim s1LastColLetter As String 'Last column in Row 1 letter.
Dim sCurrCol As String 'Numerical value of current column.
Dim sCurrColLetter As String 'Alphabetical value of current column.
Dim sFile As String 'File name to search in.
Dim sFn As String 'Combined path and file to search in.
Dim sPath As String 'Path of file to search in.
Dim sTxt As String 'Variable to hold scripting.filesystemobject.
Dim vArr As Variant 'Array containing all finlenames.
' Turn the following activity off to increase program speed.
With Application
.StatusBar = True
.EnableEvents = False
.DisplayAlerts = False
.ScreenUpdating = False
End With
dStartTime = Now() 'For testing purposes ONLY.
Sheets("Failures").Activate
LastCol = ActiveSheet.UsedRange.SpecialCells(xlLastCell).Column
If LastCol > 26 Then
LastColLetter = Chr(Int((LastCol - 1) / 26) + 64) & Chr(((LastCol - 1) Mod 26) + 65)
Else
LastColLetter = Chr(LastCol + 64)
End If
' Get last row used by any column in current range.
LastRow = ActiveSheet.UsedRange.Rows.Count
' Set range values.
vArr = Range("B1:" & LastColLetter & LastRow).Value
Columns("B:" & LastColLetter).Delete Shift:=xlToLeft 'Delete previous data.
sPath = "U:\Serial_Server_Data\" 'Get path name.
Sheets("Log Files").Activate
' Will provide the last used column letter.
LastCol = ActiveSheet.UsedRange.SpecialCells(xlLastCell).Column
If LastCol > 26 Then
LastColLetter = Chr(Int((LastCol - 1) / 26) + 64) & Chr(((LastCol - 1) Mod 26) + 65)
Else
LastColLetter = Chr(LastCol + 64)
End If
' Get last row used by any column in current range.
LastRow = ActiveSheet.UsedRange.Rows.Count
' Set range values.
vArr = Range("C2:" & LastColLetter & LastRow).Value
' Initialize variables.
z = 1
bFound = False
' Step through files to apply Pattern to.
For i = LBound(vArr, 1) To UBound(vArr, 1) 'Step through rows to apply Pattern to.
For j = LBound(vArr, 2) To UBound(vArr, 2) 'Step through columns to apply Pattern to.
If vArr(i, j) = "" Then GoTo SkipAll 'Skip cell if empty.
sFile = vArr(i, j) 'Get file name.
lFileLen = GetDirOrFileSize(sPath, sFile) 'Get the file size for later use.
If lFileLen > 200 Then 'Only search files that are over 200 bytes in length.
Application.StatusBar = "Processing file " & z & " - " & sFile
' Create full path with filename.
sFn = sPath & sFile
' Determine the next file number available for use by the FileOpen function
sTxt = FreeFile
sTxt = CreateObject("scripting.filesystemobject").OpenTextFile(sFn).ReadAll
' i = 0
With CreateObject("vbscript.regexp") 'Search for serial number.
.Global = False 'Search for first instance.
.IgnoreCase = True 'Select either upper or lowercase.
.Pattern = "Serial\sNo.\s\d{4}"
Set oMtch = .Execute(sTxt)
For Each oM In oMtch
For Each oS In .Execute(oM.Value)
' Debug.Print oS.Value
If oS <> vbNull Then 'Continue on only if serial number found.
Sheets("Failures").Activate
Range("A1").Activate
Do While ActiveCell.Value <> ""
ActiveCell.Offset(0, 1).Activate
' sFile already exists.
If ActiveCell.Value = Right(oS.Value, 4) Then
sCurrCol = ActiveCell.Column
Do While ActiveCell.Value <> ""
ActiveCell.Offset(1, 0).Activate
Loop
ActiveCell.Value = sFile
' Get column letter from column number.
If sCurrCol > 26 Then
sCurrColLetter = Chr(Int((sCurrCol - 1) / 26) + 64) _
& Chr(((sCurrCol - 1) Mod 26) + 65)
Else
sCurrColLetter = Chr(sCurrCol + 64)
End If
' Center cell.
iCurrentRow = Application.WorksheetFunction.CountA(Range _
(sCurrColLetter & ":" & sCurrColLetter))
Range(sCurrColLetter & iCurrentRow).HorizontalAlignment _
= xlCenter
' Adjust the column to fit file name.
Columns(sCurrColLetter & ":" & _
sCurrColLetter).ColumnWidth = 35
bFound = True
z = z + 1
Exit Do
End If
Loop
' sFile doesn't exist.
If ActiveCell.Value = "" And bFound = False Then
ActiveCell.Value = Right(oS.Value, 4)
ActiveCell.Offset(1, 0).Value = sFile
sCurrCol = ActiveCell.Column
' Get column letter from column number.
If sCurrCol > 26 Then
sCurrColLetter = Chr(Int((sCurrCol - 1) / 26) + 64) _
& Chr(((sCurrCol - 1) Mod 26) + 65)
Else
sCurrColLetter = Chr(sCurrCol + 64)
End If
' Center cell.
iCurrentRow = Application.WorksheetFunction.CountA(Range _
(sCurrColLetter & ":" & sCurrColLetter))
Range(sCurrColLetter & iCurrentRow).HorizontalAlignment _
= xlCenter
' Adjust the column to fit file name.
Columns(sCurrColLetter & ":" & sCurrColLetter).ColumnWidth _
= 35
z = z + 1
End If
End If
Next
Next
' >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
' >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
' >>>>>>>>>> Beginning of area of concern.
' >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
' >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
.Global = True 'Search for instance.
.IgnoreCase = True 'Select either upper or lowercase.
' Search for 'failed' with any amount of text on either side.
.Pattern = ".*failed.*"
Set oMtch = .Execute(sTxt)
For Each oM In oMtch
For Each oS In .Execute(oM.Value)
iNextRow = Application.WorksheetFunction.CountA(Range _
(sCurrColLetter & ":" & sCurrColLetter)) + 1
If Left(oS.Value, 4) = "LOG:" Then
' Ignore FLR-x PeakDetector Dash failure.
If UCase(Mid(oS.Value, 32, 3)) <> "FLR" Then
' Print all other "Failed" occurances.
Range(sCurrColLetter & iNextRow).Activate
ActiveCell.Value = Mid(oS.Value, 6, 16)
End If
End If
If Mid(oS.Value, 4, 4) = "LOG:" Then
Range(sCurrColLetter & iNextRow).Activate
ActiveCell.Value = Mid(oS.Value, 9, 16)
End If
Next
Next
End With
End If
' >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
' >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
' >>>>>>>>>> End of area of concern.
' >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
' >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
SkipAll:
bFound = False
Sheets("Log Files").Activate
Next j
Next i
' Cleanup: Add borders, heading background fill, remove gridlines.
解决方案
这是对您的问题的间接回答:冒着引发激烈战争的风险,我不喜欢regex
. 我以前用过它,主要是在 Perl 的 bash 脚本中,但在开发文本解析代码时(几乎)总是能够解决它。以下是如何解决您的特定问题的示例。显然,调整我的方法会导致代码重构。请考虑将此作为替代方案。
我的方法将日志文件摄取到 VBACollection
中,其中集合中的每个Item
都是单独的行。我使用的是集合而不是String()
数组,因为集合很容易扩展为未知数量的行,而数组必须ReDim
事先知道有多少行(可能导致双循环,双读同一个文件)。
Private Function GetFileByLines(ByVal filePath As String) As Collection
Dim fso As FileSystemObject
Set fso = New FileSystemObject
Dim txtStream As Object
Set txtStream = fso.OpenTextFile(filePath, ForReading, False)
Dim lines As Collection
Set lines = New Collection
Do While Not txtStream.AtEndOfStream
Dim line As String
lines.Add txtStream.ReadLine
Loop
txtStream.Close
Set GetFileByLines = lines
End Function
一旦你从文本文件中收集了行,它就是一个简单的循环,带有索引来检查“PASSED”是否存在于“FAILED”行之后的三行。
Private Sub ScanInputFile(ByVal filename As String)
Dim fileLines As Collection
Set fileLines = GetFileByLines(filename)
Dim i As Long
For i = 1 To fileLines.Count
If LCase(fileLines(i)) Like "*failed*" Then
'--- check to make sure we're not near the end of the file
If i + 3 < fileLines.Count Then
If LCase(fileLines(i + 3)) Like "*passed*" Then
Debug.Print "found a PASSED line three lines after FAILED, " & _
"lines " & i & " and " & i + 3
End If
End If
End If
Next i
End Sub
显然,这不会直接集成到您的整个解决方案中,因为我没有解决您已经编码和调试过的部分。不过,作为快速代码审查,请阅读有关避免使用Activate
andSelect
的内容。
这是我使用的整个测试模块。“testlog.txt”文件是您上面数据的直接copypasta。
Option Explicit
Public Sub test()
ScanInputFile "C:\Temp\testlog.txt"
End Sub
Private Sub ScanInputFile(ByVal filename As String)
Dim fileLines As Collection
Set fileLines = GetFileByLines(filename)
Dim i As Long
For i = 1 To fileLines.Count
If LCase(fileLines(i)) Like "*failed*" Then
'--- check to make sure we're not near the end of the file
If i + 3 < fileLines.Count Then
If LCase(fileLines(i + 3)) Like "*passed*" Then
Debug.Print "found a PASSED line three lines after FAILED, " & _
"lines " & i & " and " & i + 3
End If
End If
End If
Next i
End Sub
Private Function GetFileByLines(ByVal filePath As String) As Collection
Dim fso As FileSystemObject
Set fso = New FileSystemObject
Dim txtStream As Object
Set txtStream = fso.OpenTextFile(filePath, ForReading, False)
Dim lines As Collection
Set lines = New Collection
Do While Not txtStream.AtEndOfStream
Dim line As String
lines.Add txtStream.ReadLine
Loop
txtStream.Close
Set GetFileByLines = lines
End Function
推荐阅读
- java - 通过使用 maven 构建指定模块的依赖树来构建 classpath.properties
- amazon-web-services - AWS RedShift:并发扩展在峰值期间不添加集群
- laravel - 如何在延迟作业到达 available_at 之前处理它们?
- c++ - 如何使用 C/C++ 通过 gRPC 发送字节数据?
- r - 如何在二项式 GLMM(lme4)中确定正确的混合效应结构?
- javascript - 将pdf转换为doc并保存在文件夹中
- php - 如何使用散列密码添加密码验证系统?
- c - 在 C 中完成包装器的不完整类型
- html - Firefox中的自定义css切换开关对齐问题
- python - 删除空格、轴和所有数字。只想看彼此相邻的图像