python - 针对 Pandas 模型优化 PyQt5 QAbstractView
问题描述
我目前正在通过 Python 编写 SQL 查询接口。在将数据写入 QTableView 时,我使用 QAbstractTableModel 来编写查询结果。
这适用于较小的查询,但在尝试呈现许多行和列时会变得非常慢。有没有办法提高将数据帧加载到 QTableView 中的速度?
这是我的 QAbtractTableModel 类的代码:
class SQLConnection_PandasModel(QtCore.QAbstractTableModel):
def __init__(self, reason, df, parent=None):
QtCore.QAbstractTableModel.__init__(self, parent)
self._df = df.copy()
self.original_df = df
self.reason = reason
# PyQt5 Slots and Signals
if self.reason == 'Read':
self.conSig = sqlWindow()
self.conSig.dataChanged.connect(self.conSig.sql_table_updated)
# set the shortcut ctrl+F for find in menu
self.find_list_row = []
# setup menu options
def toDataFrame(self):
return self._df.copy()
def headerData(self, section, orientation, role=QtCore.Qt.DisplayRole):
if role != QtCore.Qt.DisplayRole:
return QtCore.QVariant()
if orientation == QtCore.Qt.Horizontal:
try:
return self._df.columns.tolist()[section]
except (IndexError,):
return QtCore.QVariant()
elif orientation == QtCore.Qt.Vertical:
try:
# return self.df.index.tolist()
return self._df.index.tolist()[section]
except (IndexError,):
return QtCore.QVariant()
def data(self, index, role=QtCore.Qt.DisplayRole):
if not index.isValid():
return QtCore.QVariant()
if role == QtCore.Qt.DisplayRole or role == QtCore.Qt.EditRole:
#print(type(self._df.iloc[index.row(), index.column()]))
if isinstance(self._df.iloc[index.row(), index.column()], bytes):
return QtCore.QVariant('(BLOB)')
else:
return QtCore.QVariant(str(self._df.iloc[index.row(), index.column()]))
def setData(self, index, value, role):
row = self._df.index[index.row()]
col = self._df.columns[index.column()]
if hasattr(value, 'toPyObject'):
# PyQt4 gets a QVariant
value = value.toPyObject()
else:
# PySide gets an unicode
dtype = self._df[col].dtype
if dtype != object:
if ((np.issubdtype(dtype, np.integer)) or isinstance(dtype, int)) and value.isnumeric():
value = None if value == '' else dtype.type(value)
self._df.loc[row, col] = value
# This is the signal to my RDB class that a cell has changed and to update the rdb_DF.
if role == QtCore.Qt.EditRole and self.reason == 'Read':
#print('Emitting 1',row , col)
self.conSig.dataChanged.emit(row, col, value)
return True
elif ((np.issubdtype(dtype, np.integer)) or isinstance(dtype, int)) and not(value.isnumeric()):
return False
else:
value = None if value == '' else dtype.type(value)
self._df.loc[row, col] = value
# This is the signal to my RDB class that a cell has changed and to update the rdb_DF.
if role == QtCore.Qt.EditRole and self.reason == 'Read':
#print('Emitting 2', row, col)
self.conSig.dataChanged.emit(row, col, value)
return True
else:
self._df.loc[row, col] = value
# This is the signal to my RDB class that a cell has changed and to update the rdb_DF.
if role == QtCore.Qt.EditRole and self.reason == 'Read':
#print('Emitting 3', row, col)
self.original_df.at[row, col] = value
#print(self.original_df)
self.conSig.dataChanged.emit(row, col, self.original_df)
return True
def rowCount(self, parent=QtCore.QModelIndex()):
return len(self._df.index)
def columnCount(self, parent=QtCore.QModelIndex()):
return len(self._df.columns)
def sort(self, column, order):
colname = self._df.columns.tolist()[column]
self.layoutAboutToBeChanged.emit()
self._df.sort_values(colname, ascending=order == QtCore.Qt.AscendingOrder, inplace=True)
self._df.reset_index(inplace=True, drop=True)
self.layoutChanged.emit()
def flags(self, index):
return Qt.ItemIsEnabled | Qt.ItemIsSelectable | Qt.ItemIsEditable
我将不胜感激在提高数据帧加载速度方面可以获得的任何帮助。谢谢你。
解决方案
好吧,我回答了我自己的问题。对于那些将来遇到问题的人,您正在尝试使用带有大型数据集的 QAbstractTableModel 填充 QTableViews ......这里你去:
class SQLConnection_PandasModel(QtCore.QAbstractTableModel):
def __init__(self, reason, df, parent=None):
QtCore.QAbstractTableModel.__init__(self, parent)
self._df = np.array(df.values)
self.original_df = df.copy()
self._cols = df.columns
self.r, self.c = np.shape(self._df)
self.reason = reason
# PyQt5 Slots and Signals
if self.reason == 'Read':
self.conSig = sqlWindow()
self.conSig.dataChanged.connect(self.conSig.sql_table_updated)
# set the shortcut ctrl+F for find in menu
self.find_list_row = []
def rowCount(self, parent=None):
return self.r
def columnCount(self, parent=None):
return self.c
def data(self, index, role=QtCore.Qt.DisplayRole):
if index.isValid():
if role == QtCore.Qt.DisplayRole:
if isinstance(self._df[index.row(), index.column()], bytes):
row = self._df[index.row()][0] - 1
col = self._df[index.column()][0] -1
self._df[row, col] = '(BLOB)'
return str('(BLOB)')
return str(self._df[index.row(),index.column()])
return None
def setData(self, index, value, role):
row = self._df[index.row()]
col = self._df[index.column()]
if hasattr(value, 'toPyObject'):
# PyQt4 gets a QVariant
value = value.toPyObject()
else:
# PySide gets an unicode
dtype = self._df.dtype
if dtype != object:
value = None if value == '' else dtype.type(value)
table_row = row[0]-1
table_col = col[0]-1
self._df[table_row, table_col] = value
# This is the signal to my RDB class that a cell has changed and to update the rdb_DF.
if role == QtCore.Qt.EditRole and self.reason == 'Read':
column_name = self.original_df.columns[table_col]
self.original_df.loc[table_row ,column_name] = value
my_df = pd.DataFrame(self._df)
my_df.columns = self.original_df.columns
self.conSig.dataChanged.emit(table_row, table_col, my_df)
return True
def headerData(self, p_int, orientation, role):
if role == QtCore.Qt.DisplayRole:
if orientation == QtCore.Qt.Horizontal:
return self._cols[p_int]
elif orientation == QtCore.Qt.Vertical:
return p_int
return None
def sort(self, column, order):
colname = self._df.columns.tolist()[column]
self.layoutAboutToBeChanged.emit()
self._df.sort_values(colname, ascending=order == QtCore.Qt.AscendingOrder, inplace=True)
self._df.reset_index(inplace=True, drop=True)
self.layoutChanged.emit()
def flags(self, index):
return Qt.ItemIsEnabled | Qt.ItemIsSelectable | Qt.ItemIsEditable
我将 pandas df 转换为一个 numpy 数组,这确实加快了它的速度。最后,当我需要打印时,我将 numpy 转换回 pandas。这大大提高了速度。
120k 行和 10 列的 Pandas 加载时间:~280 秒。120k 行和 10 列的 Numpy 加载时间:6.634 秒。
推荐阅读
- java - macOS 升级后 Grails 应用程序未运行
- sql - 在 AWS Athena 中添加月份日期列
- angular - 如何在没有任何动画的情况下打开角度材质弹出窗口(matDialog)
- java - 在检查指定位置是否设置位时,右移数字和左移位掩码哪个更好?
- php - 注册后自动记住我
- python - SQLAlchemy:尝试从列中提取不同的值
- sql - SQL 中使用查询的百分比
- java - 使用环境变量的 Spring jndi.xml 放置架
- asp.net-mvc - 如何在选择表格行上的复选框时获取行的 ID 并将 Id 值绑定到 MVC 剃刀视图中的模型
- python - 带有数组输入参数的 Python OPC-UA 调用方法