首页 > 解决方案 > 如何加速链表中的归并排序?

问题描述

我已经有一个迭代版本的合并排序算法来对链表进行排序。

以下版本在一般情况下运行良好。

大致描述一下我的算法:

我使用按钮向上,首先迭代我将 2 个节点和 2 个节点合并为 4 个已排序的节点,依此类推...

typedef struct ELE {
    char *value;
    struct ELE *next;
} list_ele_t;

/* Queue structure */
typedef struct {
    list_ele_t *head; /* Linked list of elements */
    list_ele_t *tail; /* Linked list of elements */
    int size;
} queue_t;

void q_sort(queue_t *q)
{
    if (!q || !q->head || q->size == 1)
        return;

    int block_size = 1, n = q->size, i, alen, blen;
    list_ele_t virtual_head;
    list_ele_t *last = NULL, *it = NULL, *l1 = NULL, *l2 = NULL, *tmp = NULL;
    // list_ele_t *l1head = NULL, *l1tail = NULL, *l2tail = NULL;
    virtual_head.next = q->head;
    while (block_size < n) {
        int iter = 0;
        last = &virtual_head;
        it = virtual_head.next;
        while (iter < n) {
            // decide each iterate time's block size a and b
            alen = min(n - iter, block_size);
            // avoid odd block
            blen = min(n - iter - alen, block_size);

            l1 = it;
            l1head = l1;
            // if left block is odd, just skip
            if (blen != 0) {
                // seperate one list in to l1 and l2
                for (i = 0; i < alen - 1; ++i)
                    it = it->next;
                l1tail = it;
                l2 = it->next;
                it->next = NULL;
                it = l2;
                for (i = 0; i < blen - 1; ++i)
                    it = it->next;
                l2tail = it;
                tmp = it->next;
                it->next = NULL;
                it = tmp;
            }


            while (l1 || l2) {
                if (l2 == NULL ||
                    (l1 != NULL && strcmp(l1->value, l2->value) < 0)) {
                    // if l2 doesn't contain any node, just append l1 to
                    // merge list or if value of l1 is smaller
                    last->next = l1;
                    last = last->next;
                    l1 = l1->next;
                } else {
                    // if l1 doesn't contain any node, just append l2 to
                    // merge list or if value of l2 is smaller
                    last->next = l2;
                    last = last->next;
                    l2 = l2->next;
                }
            }
            last->next = NULL;
            iter += alen + blen;
        }
        block_size <<= 1;
    }

    q->head = virtual_head.next;
    list_ele_t *nd = q->head;
    while (nd->next) {
        nd = nd->next;
    }
    q->tail = nd;
} 

当这种算法遇到很长的链表时会很慢,但是这种测试台有一些特定的格式,比如有几个重复的部分。例如 AAAAABBBBBBB

结果,我在原始版本中添加了一些代码。我检查列表 1 ( l1) 和列表 2 ( l2) 的成员是否相同。如果是这样,我跳过合并步骤,只需附加l2l1

以下是新版本:

void q_sort(queue_t *q)
{
    long long cnt = 0;
    if (!q || !q->head || q->size == 1)
        return;
    int block_size = 1, n = q->size, i, alen, blen;
    list_ele_t virtual_head;
    list_ele_t *last = NULL, *it = NULL, *l1 = NULL, *l2 = NULL, *tmp = NULL;
    list_ele_t *l1head = NULL, *l1tail = NULL, *l2tail = NULL;
    virtual_head.next = q->head;
    while (block_size < n) {
        int iter = 0;
        last = &virtual_head;
        it = virtual_head.next;
        while (iter < n) {
            // decide each iterate time's block size a and b
            alen = min(n - iter, block_size);
            // avoid odd block
            blen = min(n - iter - alen, block_size);
            // printf("%d %d block size: %d\n",alen,blen,block_size);

            l1 = it;
            l1head = l1;
            // if left block is odd, just skip
            if (blen != 0) {
                // seperate one list in to l1 and l2
                for (i = 0; i < alen - 1; ++i)
                    it = it->next;
                l1tail = it;
                l2 = it->next;
                it->next = NULL;
                it = l2;
                for (i = 0; i < blen - 1; ++i)
                    it = it->next;
                l2tail = it;
                tmp = it->next;
                it->next = NULL;
                it = tmp;
            }

            if (l1head && l2tail && strcmp(l1head->value, l2tail->value) == 0
                && (l1tail && l2 && strcmp(l1tail->value,l2->value) == 0)) {
                iter += alen + blen;
                last->next = l1;
                l1tail->next = l2;
                last = l2tail;
                l2tail->next = NULL;
            } else {
                while (l1 || l2) {
                    if (l2 == NULL ||
                        (l1 != NULL && strcmp(l1->value, l2->value) < 0)) {
                        // if l2 doesn't contain any node, just append l1 to
                        // merge list or if value of l1 is smaller
                        last->next = l1;
                        last = last->next;
                        l1 = l1->next;
                    } else {
                        // if l1 doesn't contain any node, just append l2 to
                        // merge list or if value of l2 is smaller
                        last->next = l2;
                        last = last->next;
                        l2 = l2->next;
                    }
                }
                last->next = NULL;
                iter += alen + blen;
            }
        }
        block_size <<= 1;
    }

    q->head = virtual_head.next;
    list_ele_t *nd = q->head;
    while (nd->next) {
        nd = nd->next;
    }
    q->tail = nd;
}

概念很简单,但我的添加似乎是错误的。错误消息告诉我可能存在无限循环。

首先,我想知道我在添加此部分时是否犯了任何错误?如何解决?

if (l1head && l2tail && strcmp(l1head->value, l2tail->value) == 0
            && (l1tail && l2 && strcmp(l1tail->value,l2->value) == 0)) {
            iter += alen + blen;
            last->next = l1;
            l1tail->next = l2;
            last = l2tail;
            l2tail->next = NULL;
        }

其次,有没有更好的方法来加速这个特定格式的链表?(例如C->C->C->C->B->B.....B

谢谢大家的建议!

标签: clinked-listmergesort

解决方案


使用“列表”的小 (32) 数组的示例代码,其中 array[i] 是具有 2^i 个节点的列表(最后一个条目是无限大小的除外)。这可能是使用列表对列表进行排序的最快方法。将列表复制到数组、对数组进行排序并从数组创建新的排序列表通常更快。输入参数是指向列表第一个节点的指针,返回值是指向排序列表第一个节点的指针。对列表进行排序后,调用代码将需要扫描排序后的列表以设置列表结构中的尾指针。合并中的比较使用“src2 < src1”来遵循 C++ 模板排序中使用的约定。

typedef struct NODE_{
struct NODE_ * next;
uint64_t data;
}NODE;

NODE * MergeLists(NODE *, NODE *);      /* prototype */

#define NUMLISTS 32                     /* number of lists */

NODE * SortList(NODE *pList)
{
NODE * aList[NUMLISTS];                 /* array of lists */
NODE * pNode;
NODE * pNext;
size_t i;
    if(pList == NULL)                   /* check for empty list */
        return NULL;
    for(i = 0; i < NUMLISTS; i++)       /* zero array */
        aList[i] = NULL;
    pNode = pList;                      /* merge nodes into array */
    while(pNode != NULL){
        pNext = pNode->next;
        pNode->next = NULL;
        for(i = 0; (i < NUMLISTS) && (aList[i] != NULL); i++){
            pNode = MergeLists(aList[i], pNode);
            aList[i] = NULL;
        }
        if(i == NUMLISTS)
            i--;
        aList[i] = pNode;
        pNode = pNext;
    }
    pNode = NULL;                       /* merge array into one list */
    for(i = 0; i < NUMLISTS; i++)
        pNode = MergeLists(aList[i], pNode);
    return pNode;
}

NODE * MergeLists(NODE *pSrc1, NODE *pSrc2)
{
NODE *pDst = NULL;                      /* destination head ptr */
NODE **ppDst = &pDst;                   /* ptr to head or prev->next */
    if(pSrc1 == NULL)
        return pSrc2;
    if(pSrc2 == NULL)
        return pSrc1;
    while(1){
        if(pSrc2->data < pSrc1->data){  /* if src2 < src1 */
            *ppDst = pSrc2;
            pSrc2 = *(ppDst = &pSrc2->next);
            if(pSrc2 == NULL){
                *ppDst = pSrc1;
                break;
            }
        } else {                        /* src1 <= src2 */
            *ppDst = pSrc1;
            pSrc1 = *(ppDst = &pSrc1->next);
            if(pSrc1 == NULL){
                *ppDst = pSrc2;
                break;
            }
        }
    }
    return pDst;
}

推荐阅读