首页 > 解决方案 > 针对 Pandas 模型优化 PyQt5 QAbstractView

问题描述

我目前正在通过 Python 编写 SQL 查询接口。在将数据写入 QTableView 时,我使用 QAbstractTableModel 来编写查询结果。

这适用于较小的查询,但在尝试呈现许多行和列时会变得非常慢。有没有办法提高将数据帧加载到 QTableView 中的速度?

这是我的 QAbtractTableModel 类的代码:

class SQLConnection_PandasModel(QtCore.QAbstractTableModel):

    def __init__(self, reason, df, parent=None):
        QtCore.QAbstractTableModel.__init__(self, parent)
        self._df = df.copy()
        self.original_df = df

        self.reason = reason

        # PyQt5 Slots and Signals
        if self.reason == 'Read':
            self.conSig = sqlWindow()
            self.conSig.dataChanged.connect(self.conSig.sql_table_updated)
        # set the shortcut ctrl+F for find in menu
        self.find_list_row = []
        # setup menu options

    def toDataFrame(self):
        return self._df.copy()

    def headerData(self, section, orientation, role=QtCore.Qt.DisplayRole):
        if role != QtCore.Qt.DisplayRole:
            return QtCore.QVariant()

        if orientation == QtCore.Qt.Horizontal:
            try:
                return self._df.columns.tolist()[section]
            except (IndexError,):
                return QtCore.QVariant()
        elif orientation == QtCore.Qt.Vertical:
            try:
                # return self.df.index.tolist()
                return self._df.index.tolist()[section]
            except (IndexError,):
                return QtCore.QVariant()

    def data(self, index, role=QtCore.Qt.DisplayRole):

        if not index.isValid():
            return QtCore.QVariant()
        if role == QtCore.Qt.DisplayRole or role == QtCore.Qt.EditRole:
            #print(type(self._df.iloc[index.row(), index.column()]))
            if isinstance(self._df.iloc[index.row(), index.column()], bytes):

                return QtCore.QVariant('(BLOB)')
            else:

                return QtCore.QVariant(str(self._df.iloc[index.row(), index.column()]))


    def setData(self, index, value, role):
        row = self._df.index[index.row()]
        col = self._df.columns[index.column()]

        if hasattr(value, 'toPyObject'):
            # PyQt4 gets a QVariant
            value = value.toPyObject()
        else:
            # PySide gets an unicode

            dtype = self._df[col].dtype

            if dtype != object:
                if ((np.issubdtype(dtype, np.integer)) or isinstance(dtype, int)) and value.isnumeric():
                    value = None if value == '' else dtype.type(value)
                    self._df.loc[row, col] = value

                    # This is the signal to my RDB class that a cell has changed and to update the rdb_DF.
                    if role == QtCore.Qt.EditRole and self.reason == 'Read':
                        #print('Emitting 1',row , col)
                        self.conSig.dataChanged.emit(row, col, value)
                    return True
                elif ((np.issubdtype(dtype, np.integer)) or isinstance(dtype, int)) and not(value.isnumeric()):
                    return False
                else:

                    value = None if value == '' else dtype.type(value)
                    self._df.loc[row, col] = value

                    # This is the signal to my RDB class that a cell has changed and to update the rdb_DF.
                    if role == QtCore.Qt.EditRole and self.reason == 'Read':
                        #print('Emitting 2', row, col)
                        self.conSig.dataChanged.emit(row, col, value)
                    return True
            else:
                self._df.loc[row, col] = value

                # This is the signal to my RDB class that a cell has changed and to update the rdb_DF.
                if role == QtCore.Qt.EditRole and self.reason == 'Read':
                    #print('Emitting 3', row, col)
                    self.original_df.at[row, col] = value
                    #print(self.original_df)
                    self.conSig.dataChanged.emit(row, col, self.original_df)
                return True



    def rowCount(self, parent=QtCore.QModelIndex()):
        return len(self._df.index)

    def columnCount(self, parent=QtCore.QModelIndex()):
        return len(self._df.columns)

    def sort(self, column, order):
        colname = self._df.columns.tolist()[column]
        self.layoutAboutToBeChanged.emit()
        self._df.sort_values(colname, ascending=order == QtCore.Qt.AscendingOrder, inplace=True)
        self._df.reset_index(inplace=True, drop=True)
        self.layoutChanged.emit()

    def flags(self, index):
        return Qt.ItemIsEnabled | Qt.ItemIsSelectable | Qt.ItemIsEditable

我将不胜感激在提高数据帧加载速度方面可以获得的任何帮助。谢谢你。

标签: pythonpandaspyqt5qabstracttablemodel

解决方案


好吧,我回答了我自己的问题。对于那些将来遇到问题的人,您正在尝试使用带有大型数据集的 QAbstractTableModel 填充 QTableViews ......这里你去:

class SQLConnection_PandasModel(QtCore.QAbstractTableModel):

    def __init__(self, reason, df, parent=None):
        QtCore.QAbstractTableModel.__init__(self, parent)
        self._df = np.array(df.values)
        self.original_df = df.copy()

        self._cols = df.columns
        self.r, self.c = np.shape(self._df)

        self.reason = reason

        # PyQt5 Slots and Signals
        if self.reason == 'Read':
            self.conSig = sqlWindow()
            self.conSig.dataChanged.connect(self.conSig.sql_table_updated)
        # set the shortcut ctrl+F for find in menu
        self.find_list_row = []

    def rowCount(self, parent=None):
        return self.r

    def columnCount(self, parent=None):
        return self.c

    def data(self, index, role=QtCore.Qt.DisplayRole):
        if index.isValid():
            if role == QtCore.Qt.DisplayRole:
                if isinstance(self._df[index.row(), index.column()], bytes):
                    row = self._df[index.row()][0] - 1
                    col = self._df[index.column()][0] -1
                    self._df[row, col] = '(BLOB)'
                    return str('(BLOB)')

                return str(self._df[index.row(),index.column()])
        return None

    def setData(self, index, value, role):
        row = self._df[index.row()]
        col = self._df[index.column()]

        if hasattr(value, 'toPyObject'):
            # PyQt4 gets a QVariant
            value = value.toPyObject()
        else:
            # PySide gets an unicode
            dtype = self._df.dtype
            if dtype != object:
                value = None if value == '' else dtype.type(value)
        table_row = row[0]-1
        table_col = col[0]-1
        self._df[table_row, table_col] = value
        # This is the signal to my RDB class that a cell has changed and to update the rdb_DF.
        if role == QtCore.Qt.EditRole and self.reason == 'Read':
            column_name = self.original_df.columns[table_col]
            self.original_df.loc[table_row ,column_name] = value

            my_df = pd.DataFrame(self._df)
            my_df.columns = self.original_df.columns
            self.conSig.dataChanged.emit(table_row, table_col, my_df)
        return True

    def headerData(self, p_int, orientation, role):
        if role == QtCore.Qt.DisplayRole:
            if orientation == QtCore.Qt.Horizontal:
                return self._cols[p_int]
            elif orientation == QtCore.Qt.Vertical:
                return p_int
        return None

    def sort(self, column, order):
        colname = self._df.columns.tolist()[column]
        self.layoutAboutToBeChanged.emit()
        self._df.sort_values(colname, ascending=order == QtCore.Qt.AscendingOrder, inplace=True)
        self._df.reset_index(inplace=True, drop=True)
        self.layoutChanged.emit()

    def flags(self, index):
        return Qt.ItemIsEnabled | Qt.ItemIsSelectable | Qt.ItemIsEditable

我将 pandas df 转换为一个 numpy 数组,这确实加快了它的速度。最后,当我需要打印时,我将 numpy 转换回 pandas。这大大提高了速度。

120k 行和 10 列的 Pandas 加载时间:~280 秒。120k 行和 10 列的 Numpy 加载时间:6.634 秒。


推荐阅读