首页 > 解决方案 > 根据嵌套键值对对象数组进行排序的最快方法

问题描述

我试图根据深深嵌套在对象中的键值对包含大约 100 个大型实体(具有近 30 个键)的对象数组进行排序,为此我使用了 lodash 的 orderBy 方法:

let name = (user) => user.accountDetails.name.toLowerCase();
let dob = (user) => user.personalProfile.dob;    
orderBy(cloneDeep(data), [name, dob], [sortOrder1, sortOrder2])

*考虑 sortOrder 为desc 或 asc

但是排序过程所花费的时间是相当大的。我们可以使用哪种更快的方法来对对象数组进行排序,并将键埋在对象深处?

示例数据(考虑像这样的 50 个条目至少有 40 个键)

{
    "list": "bugs42",
    "start-date": "2015-08-27",
    "accountDetails": {
        "name": "diamond",
        "text": "8 months",
        "milliseconds": 19936427304
    }
    "personalProfile": {
        "name": "stark",
        "dob": "2003-03-12T09:26:39.980Z",
    }
},
{
    
    "list": "bugs50",
    "start-date": "2015-08-27",
    "accountDetails": {
        "name": "ruby",
        "text": "8 months",
        "milliseconds": 19936427305
    }
    "personalProfile": {
        "name": "warmachine",
        "dob": "2007-03-31T09:26:39.980Z",
    }
}

标签: javascriptarraysreactjssortingtime-complexity

解决方案


1.使用JavaScript的内置sort()函数

我们可以使用 JavaScript 的内置数组sort()方法,它可以非常快速和良好地对所有内容进行排序。sort()如果您希望原始数组保持不变,那么在数组的副本而不是数组本身上运行该方法很重要。我们可以通过几个非常简单的方法来做到这一点:

  • array.slice.sort(…)
  • [...array].sort(…)

在下面的示例中,我选择使用扩展语法,即后一种选择:

const data = [{
  list: "bugs42",
  startdate: "2015-08-27",
  accountDetails: { name: "diamond", text: "8 months", milliseconds: 19936427304 },
  personalProfile: { name: "stark", dob: "2003-03-12T09:26:39.980Z" }
}, {
  list: "bugs50",
  startdate: "2015-08-27",
  accountDetails: { name: "ruby", text: "8 months", milliseconds: 19936427305 },
  personalProfile: { name: "warmachine", dob: "2007-03-31T09:26:39.980Z" }
}];

const sortByDobAsc = data => [...data].sort((a,b) => new Date(a.personalProfile.dob) - new Date(b.personalProfile.dob));

const sortByDobDes = data => [...data].sort((a,b) => new Date(b.personalProfile.dob) - new Date(a.personalProfile.dob));

console.log(sortByDobAsc(data), sortByDobDes(data));

有关 JavaScript 内置sort()方法的更多信息,请在此处查看 MDN 文档:Array.prototype.sort()

2.使用第三方排序功能

Hariyanto Lim 的这篇文章探讨了替代排序方法,似乎有几种著名的自定义排序算法可供您选择,甚至可以在此基础上进行构建。

从他的比较来看,最快的似乎是QuickInsertionSortChrome 和 Safari,以及quickSortFirefox 中的任何其他功能,在某些情况下奇怪地变得与原生 JS方法QuickInsertionSort一样慢。sort()

以下是探索的所有三个替代函数的源代码:

1.QuickInsertionSort()

function QuickInsertionSort(arr) {
  'use strict';

  if(!arr || 1 > arr.length) {
    return null;
  }

  var startIndex = 0, endIndex = arr.length - 1;

  // use 'stack' data structure to eliminate recursive call
  // DON'T use Array.push() and Array.pop() because slow !!!
  // so use manual indexing
  var stackLength = 0; 
  
  // use 2 arrays instead of 1 array to fasten (reduce calculation of '+= 2' and '-= 2')
  var startIndexes = [];
  var endIndexes = [];

  // variables for partitioning
  var partitionIndex, pivot, left, right, _swap_temp;

  // variables for insertion sort
  var i, j, key;

  do {
    // in my testing, I found 32 is very good choice for totally generated-random data,
    // more than 100 will cause slower speed overal.      
    if(32 >= endIndex - startIndex) {

      // even using insertionSort,
      // still need this because it still come here !!
      if(1 == endIndex - startIndex) {
        if(arr[startIndex] > arr[endIndex]) {
          _swap_temp = arr[startIndex];
          arr[startIndex] = arr[endIndex];
          arr[endIndex] = _swap_temp;
        }
      } else {
        /**************************************
        ****** start of insertion sort ********
        ***************************************/
        for(i = startIndex + 1; endIndex >= i; i++) {
          key = arr[i];
          
          // Move elements of arr[startIndex..i-1], that are 
          // greater than key, to one position ahead 
          // of their current position
          for (j = i - 1; j >= startIndex; j--) {
            if(arr[j] > key) {
              arr[j + 1] = arr[j];
              continue;
            }

            // use 'break' to avoid decreasing 'j' 
            break;
          }

          // swap
          arr[j + 1] = key;
        }
        /**************************************
        ****** end of insertion sort **********
        ***************************************/
      }

      // continue to process next data, is there any data inside stack ? 
      if(stackLength > 0) {
        // pop
        stackLength--; // reduce counter to get the last position from stack
        startIndex = startIndexes[stackLength];
        endIndex = endIndexes[stackLength];
      } else {
        // no data inside stack, so finish
        break;
      }
    } else {
      // squeeze every millisecond by put main logic here instead of separate function

      // in my testing using median_of_3 does not give better result for generated totally random data !!

      /*********************************************
      *********** start of partitioning ************
      ************* Tony Hoare *********************
      **********************************************/

      // minimize worst case scenario

      // === start of select pivot ============
      pivot = arr[startIndex];

      // try to find a different element value
      j = endIndex;
      while(pivot == arr[j] && j >= startIndex) {
        j--;
      }
      if(j > startIndex) {
        // check which element is lower? 
        // use the lower value as pivot   
        if(pivot > arr[j]) {
          pivot = arr[j];
        }
      }
      // === end of select pivot ============

      left = startIndex;
      right = endIndex;

      do {
        
        while(pivot > arr[left]) {
          left++;
        }

        while(arr[right] > pivot) {
          right--;
        }

        if(left >= right) {
          partitionIndex = right;
          break;
        }

        //swap(left, right);
        // because many swaps, so optimize to implement swap here !
        _swap_temp = arr[left];
        arr[left] = arr[right];
        arr[right] = _swap_temp;

        left++;
        right--;
      } while(true); // loop forever until break

      if(partitionIndex > startIndex) {
        // has lower partition, so process it

        if(endIndex > partitionIndex + 1) {
          // push 'right' side partition info into stack for later
          startIndexes[stackLength] = partitionIndex + 1;
          endIndexes[stackLength] = endIndex;
          stackLength++; // increase counter for NEXT slot
        }

        // prepare next loop
        // keep same value for startIndex but update endIndex
        endIndex = partitionIndex;

      } else if(endIndex > partitionIndex + 1) {
        // at this point, it means there is no 'lower' side partition but has 'higher' side partition

        // prepare next loop
        // keep same value for endIndex but update startIndex
        startIndex = partitionIndex + 1;
      }
      
      /*********************************************
      ****** end of Tony Hoare partitioning ********
      **********************************************/
    }
  } while(endIndex > startIndex);
}

2.quickSort_by_Tony_Hoare_non_recursive()

function quickSort_by_Tony_Hoare_non_recursive(arr) {
  'use strict';

  if(!arr || 1 > arr.length) {
    return null;
  }

  var arrLength = arr.length;

  var startIndex = 0, endIndex = arrLength - 1;

  // don't use Array.push() and Array.pop() because too slow
  // use 2 arrays instead of 1 to avoid unnecessary increasing and reducing stackLength
  var stackStartIndex = [], stackEndIndex = [];
  var stackLength = 0;

  var partitionIndex;

  var i, j, is_key;

  do {
    partitionIndex = partition_by_Tony_Hoare(arr, startIndex, endIndex);

    if(partitionIndex > startIndex) {
      // there is lower values to partition 

      // is there higher values?
      if(endIndex > partitionIndex + 1) { 
        // we don't do it now, push it into stack for later 
        stackStartIndex[stackLength] = partitionIndex + 1;
        stackEndIndex[stackLength] = endIndex;
        stackLength++; // increase counter for next slot
      }

      // set new parameter to partition lower values 
      endIndex = partitionIndex;
    } else if(endIndex > partitionIndex + 1) { 
      // there is no lower values, only higher value, this is worst case!
      // set new parameter for next partitioning
      startIndex = partitionIndex + 1;
    } else {
      // no valid partitioning index, so we get from stack (if any)
      if(stackLength > 0) {
        stackLength--;
        startIndex = stackStartIndex[stackLength];
        endIndex = stackEndIndex[stackLength];
      } else {
        break; // finished !
      }
    }
  } while(endIndex > startIndex);

  return arr;
}

3.quickSort_by_Nico_Lomuto()

function quickSort_by_Nico_Lomuto(arr, startIndex, endIndex) {
  // using Nico Lomuto partition scheme
  // simpler and easier to understand.    

  if(endIndex > startIndex) {

    var partitionIndex = partition_by_Nico_Lomuto(arr, startIndex, endIndex);

    // the item at partitionIndex will not be included in recursive sorting because 
    // arr[partitionIndex] >= [...lowers]
    // [...highers] >= arr[partitionIndex]

    // recursion to sort lower values
    quickSort_by_Nico_Lomuto(arr, startIndex, partitionIndex - 1);

    // recursion to sort higher values
    quickSort_by_Nico_Lomuto(arr, partitionIndex + 1, endIndex);
  }

  return arr;
}

function partition_by_Nico_Lomuto(arr, startIndex, endIndex) {
  // easier to implement and understand 

  //var pivot = arr[startIndex];

  // Lomuto partitioning has worst case if selected pivot value is LARGEST value in the range!
  // prevent worst case by carefully selecting pivot value!
  var pivot = selectPivot(arr, startIndex, endIndex, true); // true = MUST do swapping !
  
  var i = startIndex;

  // one time loop from bottom to the second from top, because pivot is the top position
  for(j = startIndex; endIndex > j; j++) {
    // is current element is smaller than or equal to pivot ?
    if(pivot >= arr[j]) {
      // swap 
      swap(arr, i, j);

      i++;
    }
  }

  // swap
  swap(arr, i, endIndex);

  return i;
}

function selectPivot(arr, startIndex, endIndex, doSwap) {
  // find a pivot value which not the lowest value within the range 
  
  // Get 2 UNIQUE elements, if failed then it means all elements are same value.

  var pivot = arr[startIndex]; // get first element from the first position

  // try to find a different element value
  var j = endIndex;
  while(pivot == arr[j] && j >= startIndex) {
    j--;
  }
  if(startIndex > j) {
    //console.log('selectPivot(arr, ' + startIndex + ',' + endIndex + '), all elements are equal, nothing to sort');
    return pivot;
  }

  // check which element is lower? 
  // use the lower value as pivot and swap the position with the last position (endIndex)   
  if(pivot > arr[j]) {
    pivot = arr[j];
    if(doSwap) {
      swap(arr, j, endIndex);
    }
  } else {
    if(doSwap) {
      swap(arr, startIndex, endIndex);
    }
  }

  return pivot;
}

function swap(arr, a, b) {
  // replace more than 1 element value in array using 1 line
  // this ability is 'ES6 destructuring swap',
  // only specific for Javascript language
  // but VERY VERY SLOW, almost 3 times slower !
  //[arr[a], arr[b]] = [arr[b], arr[a]];

  // normal way for many programming language
  var _swap_temp = arr[a];
  arr[a] = arr[b];
  arr[b] = _swap_temp;
}


推荐阅读