python - 重复值为 1 时出现 itertools 问题 ERROR '>=' not supported 在 'int' 和 'tuple' 的实例之间
问题描述
新手,对不起,如果这是基本的,但无论出于何种原因,当我执行两个阈值时,我的代码运行良好,但是一旦我将其设为一个,它就给了我在 'int' 和 'tuple 的实例之间不支持'> ='的错误'。我想使用 1 因为 2 或更多会创造太多的可能性并使用太多的内存。
没有错误的代码:
import pandas as pd
data = {'Name_Raw':['AECOM TECHNICAL SERVICES', 'AECOM_*', 'AECOM- Amentum', 'AECOM GOVERNMENT SERVICES (Inactive)', 'ADT LLC dba ADT Security Services', 'ADT', 'AAA Call Center', 'AAA of Northern California, Nevada', 'ANHEUSER BUSCH InBev'], 'Name_CleanCorrect':['AECOM', 'AECOM', 'AECOM', 'AECOM', 'ADT SECURITY CORPORATION', 'ADT SECURITY CORPORATION', 'AAA', 'AAA', 'AB InBev'], 'Name_ngram':['AECOM', 'AECOM', 'AECOM', 'AECOM', 'ADT SECURITY CORPORATION', 'ADT SECURITY CORPORATION', 'AAA', 'State Bar of California', 'Ivanhoe Cambridge USA'], 'Score_ngrams':[38, 100, 51, 33, 52, 41, 36, 30, 16], 'Name_Fuzz':['AECOM', 'AECOM', 'AECOM', 'AECOM', 'ADT SECURITY CORPORATION', 'ADT SECURITY CORPORATION', 'AAA', 'State Bar of California', 'AB InBev'], 'Score_fuzz':[100, 100, 100, 100, 65, 85, 85, 37, 65], 'Name_jw':['Chicago Title Insuranc', 'Invesco', 'Heitman', 'Patheon/Thermo Fisher Scientific', 'Securitas Security Service', 'Michael Baker International, LLC', 'Bank of America', 'Ascension Health', 'Frontier Communication'], 'Score_jw':[66, 66, 63, 61, 62, 64, 67, 32, 100]}
df2 = pd.DataFrame(data)
from itertools import product
def f(x, ngram_thresh, leven_thresh):
if x['Score_ngrams'] >= ngram_thresh : return x['Name_ngram']
elif x['Score_fuzz'] >= leven_thresh : return x['Name_Fuzz']
else: return 0
for ngram_t, leven_t in product(range(40,110,5), repeat=2):
df2[f'Name_Clean_{ngram_t}_{leven_t}'] = df2.apply(f, ngram_thresh=ngram_t, leven_thresh=leven_t, axis=1)
print(df2)
错误代码更改为只有一个重复:
def f(x, ngram_thresh):
if x['Score_ngrams'] >= ngram_thresh : return x['Name_ngram']
else: return 0
for ngram_t in product(range(40,110,5), repeat=1):
df2[f'Name_Clean_{ngram_t}'] = df2.apply(f, ngram_thresh=ngram_t, axis=1)
错误:
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-96-9f9d8793169c> in <module>
5 for ngram_t in product(range(40,110,5), repeat=1):
6 print(ngram_t)
----> 7 df2[f'Name_Clean_{ngram_t}'] = df2.apply(f, ngram_thresh=ngram_t, axis=1)
e:\Anaconda3\lib\site-packages\pandas\core\frame.py in apply(self, func, axis, raw, result_type, args, **kwds)
7539 kwds=kwds,
7540 )
-> 7541 return op.get_result()
7542
7543 def applymap(self, func) -> "DataFrame":
e:\Anaconda3\lib\site-packages\pandas\core\apply.py in get_result(self)
178 return self.apply_raw()
179
--> 180 return self.apply_standard()
181
182 def apply_empty_result(self):
e:\Anaconda3\lib\site-packages\pandas\core\apply.py in apply_standard(self)
253
254 def apply_standard(self):
--> 255 results, res_index = self.apply_series_generator()
256
257 # wrap results
e:\Anaconda3\lib\site-packages\pandas\core\apply.py in apply_series_generator(self)
282 for i, v in enumerate(series_gen):
283 # ignore SettingWithCopy here in case the user mutates
--> 284 results[i] = self.f(v)
285 if isinstance(results[i], ABCSeries):
286 # If we have a view on v, we need to make a copy because
e:\Anaconda3\lib\site-packages\pandas\core\apply.py in f(x)
107
108 def f(x):
--> 109 return func(x, *args, **kwds)
110
111 else:
<ipython-input-96-9f9d8793169c> in f(x, ngram_thresh)
1 def f(x, ngram_thresh):
----> 2 if x['Score_ngrams'] >= ngram_thresh : return x['Name_ngram']
3 else: return 0
4
5 for ngram_t in product(range(40,110,5), repeat=1):
TypeError: '>=' not supported between instances of 'int' and 'tuple'
解决方案
你忘了逗号。做for ngram_t, in product...
。
推荐阅读
- java - MySQL JDBC参数(utf8mb4_general_ci)是否设置正确
- c# - 计算两个向量之间的角度
- notepad++ - 使用记事本++搜索Dxxxx并替换前后的所有字符串
- java - 包 com.sun.net.httpserver 是标准的吗?
- php - 使用数据库提供程序在 Laravel 项目上重置密码:“用户必须实现 canresetpassword 接口”
- javascript - 获取ajax请求的响应文本
- javascript - 无法将 SocketIO 中的 JSON 对象填充到前端 React 表中
- mysql - 在单个查询中获取数据
- python - Python - 你如何绘制数据直方图?
- node.js - 使用 Passport.js,密码正在本地策略中验证,但用户未经过身份验证