python - Python:导入 csv 忽略单个逗号
问题描述
我在下面有一个 csv 文件,它工作正常:
Test Case ID,summary
TC-16610,“verify that user is able to u_pdate 'active' attribute 'false ' on adding “new category records” using 'v3/definition/categories' PUT API on specifying the 'active' attribute 'true'”
TC-16609,“verify that user is able to u_pdate 'active' attribute 'true ' on adding “new category records” using 'v3/definition/categories' PUT API on specifying the 'active' attribute 'false'”
但是,如果我添加单个逗号,则无法解析:
Test Case ID,summary
,
TC-16610,“verify that user is able to u_pdate 'active' attribute 'false ' on adding “new category records” using 'v3/definition/categories' PUT API on specifying the 'active' attribute 'true'”
TC-16609,“verify that user is able to u_pdate 'active' attribute 'true ' on adding “new category records” using 'v3/definition/categories' PUT API on specifying the 'active' attribute 'false'”
我想解析 csv 文件,即使其中有单个逗号。它应该跳过并解析或验证解析。谁能帮我这个。
我的代码...我在 django 中使用它:
class CsvUpload(forms.Form):
csv_file = forms.FileField()
def clean_csv_file(self):
# Probably worth doing this check first anyway
value = self.cleaned_data['csv_file']
if not value.name.endswith('.csv'):
raise forms.ValidationError('Invalid file type')
try:
data = pd.read_csv(value.file, encoding = 'ISO-8859-1', engine='python')
data.columns= data.columns.str.strip().str.lower()
data=data.rename(columns = {'test case id':'Test Case ID'})
def transform(df):
my_new_string = re.sub('[^a-zA-Z0-9"''-_“” \n\.]', '', df)
return my_new_string
data['summary'] = data['summary'].apply(transform)
except KeyError:
raise forms.ValidationError(
'CSV file must have "summary" column and "Issue Key" column')
except Exception as e:
print('Error while parsing CSV file=> %s', e)
raise forms.ValidationError('Failed to parse the CSV file')
return data
编辑:解析文件后,它会导致异常“无法解析 CSV 文件”,如果我删除最后一个异常,则会导致
Traceback (most recent call last):
File "C:\Users\ssuri\Envs\elixir\lib\site-packages\django\core\handlers\exception.py", line 34, in inner
response = get_response(request)
File "C:\Users\ssuri\Envs\elixir\lib\site-packages\django\core\handlers\base.py", line 115, in _get_response
response = self.process_exception_by_middleware(e, request)
File "C:\Users\ssuri\Envs\elixir\lib\site-packages\django\core\handlers\base.py", line 113, in _get_response
response = wrapped_callback(request, *callback_args, **callback_kwargs)
File "C:\Users\ssuri\Envs\elixir\lib\site-packages\django\utils\decorators.py", line 130, in _wrapped_view
response = view_func(request, *args, **kwargs)
File "C:\Users\ssuri\Envs\elixir\lib\site-packages\django\views\decorators\cache.py", line 44, in _wrapped_view_func
response = view_func(request, *args, **kwargs)
File "C:\Users\ssuri\Envs\elixir\lib\site-packages\django\contrib\admin\sites.py", line 231, in inner
return view(request, *args, **kwargs)
File "C:\Users\ssuri\myproject\elixirdev\test_suite_optimizer\admin.py", line 282, in process_csv
if form.is_valid():
File "C:\Users\ssuri\Envs\elixir\lib\site-packages\django\forms\forms.py", line 180, in is_valid
return self.is_bound and not self.errors
File "C:\Users\ssuri\Envs\elixir\lib\site-packages\django\forms\forms.py", line 175, in errors
self.full_clean()
File "C:\Users\ssuri\Envs\elixir\lib\site-packages\django\forms\forms.py", line 376, in full_clean
self._clean_fields()
File "C:\Users\ssuri\Envs\elixir\lib\site-packages\django\forms\forms.py", line 397, in _clean_fields
value = getattr(self, 'clean_%s' % name)()
File "C:\Users\ssuri\myproject\elixirdev\test_suite_optimizer\forms.py", line 78, in clean_csv_file
data['summary'] = data['summary'].apply(transform)
File "C:\Users\ssuri\Envs\elixir\lib\site-packages\pandas\core\series.py", line 4045, in apply
mapped = lib.map_infer(values, f, convert=convert_dtype)
File "pandas/_libs/lib.pyx", line 2228, in pandas._libs.lib.map_infer
File "C:\Users\ssuri\myproject\elixirdev\test_suite_optimizer\forms.py", line 74, in transform
my_new_string = re.sub('[^a-zA-Z0-9"''-_“” \n\.]', '', df)
File "c:\users\ssuri\appdata\local\programs\python\python38\lib\re.py", line 210, in sub
return _compile(pattern, flags).sub(repl, string, count)
TypeError: expected string or bytes-like object
使用后:
data = data.dropna()
它导致:
Traceback (most recent call last):
File "C:\Users\ssuri\Envs\elixir\lib\site-packages\pandas\core\indexes\base.py", line 2897, in get_loc
return self._engine.get_loc(key)
File "pandas/_libs/index.pyx", line 107, in pandas._libs.index.IndexEngine.get_loc
File "pandas/_libs/index.pyx", line 131, in pandas._libs.index.IndexEngine.get_loc
File "pandas/_libs/hashtable_class_helper.pxi", line 992, in pandas._libs.hashtable.Int64HashTable.get_item
File "pandas/_libs/hashtable_class_helper.pxi", line 998, in pandas._libs.hashtable.Int64HashTable.get_item
KeyError: 0
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "C:\Users\ssuri\Envs\elixir\lib\site-packages\django\core\handlers\exception.py", line 34, in inner
response = get_response(request)
File "C:\Users\ssuri\Envs\elixir\lib\site-packages\django\core\handlers\base.py", line 115, in _get_response
response = self.process_exception_by_middleware(e, request)
File "C:\Users\ssuri\Envs\elixir\lib\site-packages\django\core\handlers\base.py", line 113, in _get_response
response = wrapped_callback(request, *callback_args, **callback_kwargs)
File "C:\Users\ssuri\Envs\elixir\lib\site-packages\django\utils\decorators.py", line 130, in _wrapped_view
response = view_func(request, *args, **kwargs)
File "C:\Users\ssuri\Envs\elixir\lib\site-packages\django\views\decorators\cache.py", line 44, in _wrapped_view_func
response = view_func(request, *args, **kwargs)
File "C:\Users\ssuri\Envs\elixir\lib\site-packages\django\contrib\admin\sites.py", line 231, in inner
return view(request, *args, **kwargs)
File "C:\Users\ssuri\myproject\elixirdev\test_suite_optimizer\admin.py", line 287, in process_csv
data = handle_demo_ai(csv_data)
File "C:\Users\ssuri\myproject\elixirdev\test_suite_optimizer\ai\web_demo_test_suite_optimization.py", line 108, in handle_json_ai
X = df_tcs.loc[X_row, "Summary_lemmatized"]
File "C:\Users\ssuri\Envs\elixir\lib\site-packages\pandas\core\indexing.py", line 1418, in __getitem__
return self._getitem_tuple(key)
File "C:\Users\ssuri\Envs\elixir\lib\site-packages\pandas\core\indexing.py", line 805, in _getitem_tuple
return self._getitem_lowerdim(tup)
File "C:\Users\ssuri\Envs\elixir\lib\site-packages\pandas\core\indexing.py", line 929, in _getitem_lowerdim
section = self._getitem_axis(key, axis=i)
File "C:\Users\ssuri\Envs\elixir\lib\site-packages\pandas\core\indexing.py", line 1850, in _getitem_axis
return self._get_label(key, axis=axis)
File "C:\Users\ssuri\Envs\elixir\lib\site-packages\pandas\core\indexing.py", line 160, in _get_label
return self.obj._xs(label, axis=axis)
File "C:\Users\ssuri\Envs\elixir\lib\site-packages\pandas\core\generic.py", line 3737, in xs
loc = self.index.get_loc(key)
File "C:\Users\ssuri\Envs\elixir\lib\site-packages\pandas\core\indexes\base.py", line 2899, in get_loc
return self._engine.get_loc(self._maybe_cast_indexer(key))
File "pandas/_libs/index.pyx", line 107, in pandas._libs.index.IndexEngine.get_loc
File "pandas/_libs/index.pyx", line 131, in pandas._libs.index.IndexEngine.get_loc
File "pandas/_libs/hashtable_class_helper.pxi", line 992, in pandas._libs.hashtable.Int64HashTable.get_item
File "pandas/_libs/hashtable_class_helper.pxi", line 998, in pandas._libs.hashtable.Int64HashTable.get_item
KeyError: 0
解决方案
当您使用逗号读取文件时,第一行有NAN
值。NAN
在尝试应用transform()
use之前,您需要删除值
data = data.dropna()
推荐阅读
- swift - 创建背景精灵节点
- html - HTML 登录页面
- python - Selenium Webscraping 出于某种原因,数据只带回部分而不是全部。不确定是否有任何动态数据在后台
- python - 激活环境时,Conda 无法设置路径
- angular - 在 Angular 2 中使用 Freemarker 模板标签
- module - 无法在 hassio 上的 Node-RED 中安装任何模块(节点)
- c - 如果用户输入错误,则使一段代码循环回代码中的某个点
- python - 数据框:尝试修复不可散列的类型:“列表”错误
- java - 存储字符串的整数的通用数组列表?
- excel - Excel 工作簿外部链接在网络驱动器上保存时未更新。