首页 > 解决方案 > 我有 55,000 行租赁数据要清理,数据不一致并且包含文本和数字

问题描述

我有 55,000 行租金数据,其中一列是每周的租金价格。

此列未经过清理,导致这些字段中包含数字、文本和电话号码的噩梦般的组合。

我正在尝试仅提取每周租金数字,或者如果没有提供则返回一个空白单元格。

下面使用的代码当前,它将列复制到另一列并针对该数据运行一系列查找和替换。这导致大约 75%/80% 是完美的,其余的几乎完全没用(数据要么像下图那样被屠杀,要么包含像 $400$500 这样仍需要清理的项目)

过滤列表

上述数据包含以下文本作为其屠宰方式的示例:

列表

Sub A_Core_Clean_Level_1()

'Application.ScreenUpdating = False
Rows("1:1").AutoFilter

Sheets("Rental Data").Range("I1").EntireColumn.Copy Range("R1").EntireColumn

Columns("R:R").Replace What:="1oo", Replacement:="100", LookAt:=xlPart, SearchOrder:=xlByRows, MatchCase:=False
Columns("R:R").Replace What:="2oo", Replacement:="200", LookAt:=xlPart, SearchOrder:=xlByRows, MatchCase:=False
Columns("R:R").Replace What:="3oo", Replacement:="300", LookAt:=xlPart, SearchOrder:=xlByRows, MatchCase:=False
Columns("R:R").Replace What:="4oo", Replacement:="400", LookAt:=xlPart, SearchOrder:=xlByRows, MatchCase:=False
Columns("R:R").Replace What:="5oo", Replacement:="500", LookAt:=xlPart, SearchOrder:=xlByRows, MatchCase:=False
Columns("R:R").Replace What:="6oo", Replacement:="600", LookAt:=xlPart, SearchOrder:=xlByRows, MatchCase:=False
Columns("R:R").Replace What:="7oo", Replacement:="700", LookAt:=xlPart, SearchOrder:=xlByRows, MatchCase:=False
Columns("R:R").Replace What:="9oo", Replacement:="800", LookAt:=xlPart, SearchOrder:=xlByRows, MatchCase:=False
Columns("R:R").Replace What:="9oo", Replacement:="900", LookAt:=xlPart, SearchOrder:=xlByRows, MatchCase:=False

Columns("R:R").Replace What:="1 day", Replacement:="", LookAt:=xlPart, SearchOrder:=xlByRows, MatchCase:=False
Columns("R:R").Replace What:="2 days", Replacement:="", LookAt:=xlPart, SearchOrder:=xlByRows, MatchCase:=False
Columns("R:R").Replace What:="3 days", Replacement:="", LookAt:=xlPart, SearchOrder:=xlByRows, MatchCase:=False
Columns("R:R").Replace What:="4 days", Replacement:="", LookAt:=xlPart, SearchOrder:=xlByRows, MatchCase:=False
Columns("R:R").Replace What:="5 days", Replacement:="", LookAt:=xlPart, SearchOrder:=xlByRows, MatchCase:=False
Columns("R:R").Replace What:="6 days", Replacement:="", LookAt:=xlPart, SearchOrder:=xlByRows, MatchCase:=False
Columns("R:R").Replace What:="7 days", Replacement:="", LookAt:=xlPart, SearchOrder:=xlByRows, MatchCase:=False
Columns("R:R").Replace What:="8 days", Replacement:="", LookAt:=xlPart, SearchOrder:=xlByRows, MatchCase:=False
Columns("R:R").Replace What:="9 days", Replacement:="", LookAt:=xlPart, SearchOrder:=xlByRows, MatchCase:=False
Columns("R:R").Replace What:="10 days", Replacement:="", LookAt:=xlPart, SearchOrder:=xlByRows, MatchCase:=False

Columns("R:R").Replace What:=".00*", Replacement:="", LookAt:=xlPart, SearchOrder:=xlByRows, MatchCase:=False
Columns("R:R").Replace What:="-", Replacement:="", LookAt:=xlPart, SearchOrder:=xlByRows, MatchCase:=False
Columns("R:R").Replace What:="$ ", Replacement:="", LookAt:=xlPart, SearchOrder:=xlByRows, MatchCase:=False
Columns("R:R").Replace What:=" $", Replacement:="", LookAt:=xlPart, SearchOrder:=xlByRows, MatchCase:=False

Columns("R:R").Replace What:=" 1 ", Replacement:="", LookAt:=xlPart, SearchOrder:=xlByRows, MatchCase:=False
Columns("R:R").Replace What:=" 2 ", Replacement:="", LookAt:=xlPart, SearchOrder:=xlByRows, MatchCase:=False
Columns("R:R").Replace What:=" 3 ", Replacement:="", LookAt:=xlPart, SearchOrder:=xlByRows, MatchCase:=False
Columns("R:R").Replace What:=" 4 ", Replacement:="", LookAt:=xlPart, SearchOrder:=xlByRows, MatchCase:=False
Columns("R:R").Replace What:=" 5 ", Replacement:="", LookAt:=xlPart, SearchOrder:=xlByRows, MatchCase:=False
Columns("R:R").Replace What:=" 6 ", Replacement:="", LookAt:=xlPart, SearchOrder:=xlByRows, MatchCase:=False
Columns("R:R").Replace What:=" 7 ", Replacement:="", LookAt:=xlPart, SearchOrder:=xlByRows, MatchCase:=False
Columns("R:R").Replace What:=" 8 ", Replacement:="", LookAt:=xlPart, SearchOrder:=xlByRows, MatchCase:=False
Columns("R:R").Replace What:=" 9 ", Replacement:="", LookAt:=xlPart, SearchOrder:=xlByRows, MatchCase:=False
Columns("R:R").Replace What:=" 10 ", Replacement:="", LookAt:=xlPart, SearchOrder:=xlByRows, MatchCase:=False
Columns("R:R").Replace What:=" 11 ", Replacement:="", LookAt:=xlPart, SearchOrder:=xlByRows, MatchCase:=False
Columns("R:R").Replace What:=" 12 ", Replacement:="", LookAt:=xlPart, SearchOrder:=xlByRows, MatchCase:=False
Columns("R:R").Replace What:=" 13 ", Replacement:="", LookAt:=xlPart, SearchOrder:=xlByRows, MatchCase:=False
Columns("R:R").Replace What:=" 14 ", Replacement:="", LookAt:=xlPart, SearchOrder:=xlByRows, MatchCase:=False
Columns("R:R").Replace What:=" 15 ", Replacement:="", LookAt:=xlPart, SearchOrder:=xlByRows, MatchCase:=False
Columns("R:R").Replace What:=" 16 ", Replacement:="", LookAt:=xlPart, SearchOrder:=xlByRows, MatchCase:=False
Columns("R:R").Replace What:=" 17 ", Replacement:="", LookAt:=xlPart, SearchOrder:=xlByRows, MatchCase:=False
Columns("R:R").Replace What:=" 18 ", Replacement:="", LookAt:=xlPart, SearchOrder:=xlByRows, MatchCase:=False
Columns("R:R").Replace What:=" 19 ", Replacement:="", LookAt:=xlPart, SearchOrder:=xlByRows, MatchCase:=False
Columns("R:R").Replace What:=" 20 ", Replacement:="", LookAt:=xlPart, SearchOrder:=xlByRows, MatchCase:=False
Columns("R:R").Replace What:=" 21 ", Replacement:="", LookAt:=xlPart, SearchOrder:=xlByRows, MatchCase:=False
Columns("R:R").Replace What:=" 22 ", Replacement:="", LookAt:=xlPart, SearchOrder:=xlByRows, MatchCase:=False
Columns("R:R").Replace What:=" 23 ", Replacement:="", LookAt:=xlPart, SearchOrder:=xlByRows, MatchCase:=False
Columns("R:R").Replace What:=" 24 ", Replacement:="", LookAt:=xlPart, SearchOrder:=xlByRows, MatchCase:=False
Columns("R:R").Replace What:=" 25 ", Replacement:="", LookAt:=xlPart, SearchOrder:=xlByRows, MatchCase:=False

Columns("R:R").Replace What:="a", Replacement:="", LookAt:=xlPart, SearchOrder:=xlByRows, MatchCase:=False
Columns("R:R").Replace What:="b", Replacement:="", LookAt:=xlPart, SearchOrder:=xlByRows, MatchCase:=False
Columns("R:R").Replace What:="c", Replacement:="", LookAt:=xlPart, SearchOrder:=xlByRows, MatchCase:=False
Columns("R:R").Replace What:="d", Replacement:="", LookAt:=xlPart, SearchOrder:=xlByRows, MatchCase:=False
Columns("R:R").Replace What:="e", Replacement:="", LookAt:=xlPart, SearchOrder:=xlByRows, MatchCase:=False
Columns("R:R").Replace What:="f", Replacement:="", LookAt:=xlPart, SearchOrder:=xlByRows, MatchCase:=False
Columns("R:R").Replace What:="g", Replacement:="", LookAt:=xlPart, SearchOrder:=xlByRows, MatchCase:=False
Columns("R:R").Replace What:="h", Replacement:="", LookAt:=xlPart, SearchOrder:=xlByRows, MatchCase:=False
Columns("R:R").Replace What:="i", Replacement:="", LookAt:=xlPart, SearchOrder:=xlByRows, MatchCase:=False
Columns("R:R").Replace What:="j", Replacement:="", LookAt:=xlPart, SearchOrder:=xlByRows, MatchCase:=False
Columns("R:R").Replace What:="k", Replacement:="", LookAt:=xlPart, SearchOrder:=xlByRows, MatchCase:=False
Columns("R:R").Replace What:="l", Replacement:="", LookAt:=xlPart, SearchOrder:=xlByRows, MatchCase:=False
Columns("R:R").Replace What:="m", Replacement:="", LookAt:=xlPart, SearchOrder:=xlByRows, MatchCase:=False
Columns("R:R").Replace What:="n", Replacement:="", LookAt:=xlPart, SearchOrder:=xlByRows, MatchCase:=False
Columns("R:R").Replace What:="o", Replacement:="", LookAt:=xlPart, SearchOrder:=xlByRows, MatchCase:=False
Columns("R:R").Replace What:="p", Replacement:="", LookAt:=xlPart, SearchOrder:=xlByRows, MatchCase:=False
Columns("R:R").Replace What:="q", Replacement:="", LookAt:=xlPart, SearchOrder:=xlByRows, MatchCase:=False
Columns("R:R").Replace What:="r", Replacement:="", LookAt:=xlPart, SearchOrder:=xlByRows, MatchCase:=False
Columns("R:R").Replace What:="s", Replacement:="", LookAt:=xlPart, SearchOrder:=xlByRows, MatchCase:=False
Columns("R:R").Replace What:="t", Replacement:="", LookAt:=xlPart, SearchOrder:=xlByRows, MatchCase:=False
Columns("R:R").Replace What:="u", Replacement:="", LookAt:=xlPart, SearchOrder:=xlByRows, MatchCase:=False
Columns("R:R").Replace What:="v", Replacement:="", LookAt:=xlPart, SearchOrder:=xlByRows, MatchCase:=False
Columns("R:R").Replace What:="w", Replacement:="", LookAt:=xlPart, SearchOrder:=xlByRows, MatchCase:=False
Columns("R:R").Replace What:="x", Replacement:="", LookAt:=xlPart, SearchOrder:=xlByRows, MatchCase:=False
Columns("R:R").Replace What:="y", Replacement:="", LookAt:=xlPart, SearchOrder:=xlByRows, MatchCase:=False
Columns("R:R").Replace What:="z", Replacement:="", LookAt:=xlPart, SearchOrder:=xlByRows, MatchCase:=False

Columns("R:R").Replace What:="..", Replacement:="", LookAt:=xlPart, SearchOrder:=xlByRows, MatchCase:=False
Columns("R:R").Replace What:=".", Replacement:="", LookAt:=xlPart, SearchOrder:=xlByRows, MatchCase:=False

Columns("R:R").Replace What:=" ", Replacement:="", LookAt:=xlPart, SearchOrder:=xlByRows, MatchCase:=False
Columns("R:R").Replace What:="&", Replacement:="", LookAt:=xlPart, SearchOrder:=xlByRows, MatchCase:=False
Columns("R:R").Replace What:="+", Replacement:="", LookAt:=xlPart, SearchOrder:=xlByRows, MatchCase:=False
Columns("R:R").Replace What:="/", Replacement:="", LookAt:=xlPart, SearchOrder:=xlByRows, MatchCase:=False
Columns("R:R").Replace What:="!", Replacement:="", LookAt:=xlPart, SearchOrder:=xlByRows, MatchCase:=False
Columns("R:R").Replace What:="|", Replacement:="", LookAt:=xlPart, SearchOrder:=xlByRows, MatchCase:=False
Columns("R:R").Replace What:="(", Replacement:="", LookAt:=xlPart, SearchOrder:=xlByRows, MatchCase:=False
Columns("R:R").Replace What:=")", Replacement:="", LookAt:=xlPart, SearchOrder:=xlByRows, MatchCase:=False
'Columns("R:R").Replace What:="'", Replacement:="", LookAt:=xlPart, SearchOrder:=xlByRows, MatchCase:=False
'Columns("R:R").Replace What:=",", Replacement:="", LookAt:=xlPart, SearchOrder:=xlByRows, MatchCase:=False
Columns("R:R").Replace What:=" *", Replacement:="", LookAt:=xlPart, SearchOrder:=xlByRows, MatchCase:=False

Rows("1:1").AutoFilter

Application.ScreenUpdating = True

End Sub

我相信理想的结果是提取找到 3 个或 4 个连续数字的情况,然后使用查找和替换来处理代码以清理其余部分。虽然这意味着存在手机/电话号码的位置;如果它们是第一个,它们仍将包含在捕获的数据中,但它更接近于清理。

有没有人有任何建议使用公式或代码在 excel 中清理这些数据。

标签: excelvbaexcel-formula

解决方案


这是用户定义函数 (UDF) 的工作。这是一个原型。

Function Rental(Cell As Range) As Variant

    Dim CellVal As String           ' Cell's contents
    Dim p       As Integer          ' position of $ in CellVal
    
    CellVal = Cell.Value
    p = InStr(CellVal, "$") + 1
    If p = 1 Then
        Rental = "No $$$"
    Else
        Rental = Val(Mid(CellVal, p))
    End If
End Function

在工作簿的标准代码模块中安装函数。这是一个你必须自己插入的模块,默认是Module1。现有的代码表都无法完成这项工作。

使用如下所示的语法从工作表中调用函数,其中 A2 是您保存屠宰数据的单元格。您可以使用与内置 Excel 公式相同的方式复制公式。

=Rental(A2)

在其当前形状中,该函数将查找 $ 符号并返回它后面的任何数字,直到下一个非数字字符或字符串的结尾。您系统的小数分隔符将被视为数字,而不是千位分隔符。

如果未找到 $ 符号,则函数返回“No $$$”。在这里,UDF 的优势就体现出来了。取而代之的是,您可能会搭上另一种方法来寻找您想要的值。


推荐阅读