排序算法之堆排序

关于二叉树的一些概念

一些基本概念

堆的定义

堆(heap)，是数据结构中的堆（不是内存模型中的堆）。堆通常是一个可以被看做一棵树，它满足下列性质：

堆中任意节点的值总是不大于(不小于)其子节点的值；
堆总是一棵完全树；

二叉堆的定义

二叉堆是一颗完全树，它分为两种：最大堆和最小堆。

最大堆：父结点的键值总是大于或等于任何一个子节点的键值
最小堆：父结点的键值总是小于或等于任何一个子节点的键值

二叉堆的性质

二叉堆一般都通过”数组”来实现。数组实现的二叉堆，父节点和子节点的位置存在一定的关系。

设二叉堆的顶点索引为 1：

性质一：索引为 i 的左孩子的索引是 2*i；
性质二：索引为 i 的右孩子的索引是 2*i + 1；
性质三：索引为 i 的父节点的索引是 floor(i/2)；

设二叉堆的顶点索引为 0：

性质一：索引为 i 的左孩子的索引是 2*i + 1；
性质二：索引为 i 的右孩子的索引是 2*i + 2；
性质三：索引为 i 的父节点的索引是 floor((i-1)/2)；

二叉堆的基本实现（最大堆）

用数组存储二叉堆

shiftUp

向二叉堆中添加一个元素，我们就要为它找到一个合适的位置，例如添加的是 95 这个元素

显然此时这个二叉堆已经不满足我们对最大堆的定义，我们要维护的它（最大堆的定义），这里需要几步。我们要不断的把它(95)和它的父节点相比较，大于父节点就交换，直至小于它的父节点。

第一步和父节点 50 比较

95 大于 50，所以 95 和父节点 50 交换位置

第二步和父节点 58 比较

95 大于 58，所以 95 和父节点 58 交换位置

第三步和父节点 86 比较

95 大于 86，所以 95 和父节点 86 交换位置

至此，就完成了整个 shiftUp 的过程

shiftUp 程序的编写

我们首先建立一个结构体：

typedef struct Heap_sort_max_s Heap_sort_max;

typedef struct Heap_sort_max_s {
    int count;                              //-> 当前堆内数据的大小
    int capacity;                           //-> 当前堆的容量
    void (*insert)(Heap_sort_max *,int);    //-> 插入函数
    int (*extract_max)(Heap_sort_max *);    //-> 输出一个最大值
    int *item;                              //-> 数组指针
    void (*print_to_dot)(Heap_sort_max *);  //-> 生成 dot 语言 函数
} Heap_sort_max;

insert 函数：

static void insert(Heap_sort_max* heap_sort_max,int item) {
    if (heap_sort_max->count > heap_sort_max->capacity) {
        return;
    }
    heap_sort_max->count += 1;
    heap_sort_max->item[heap_sort_max->count] = item;
    heap_sort_max_shift_up(heap_sort_max, heap_sort_max->count);
}

shiftUp 函数：

static void heap_sort_max_shift_up(Heap_sort_max *heap_sort_max,int k) {
    while (k > 1 && heap_sort_max->item[k/2] < heap_sort_max->item[k]) {
        swap(&heap_sort_max->item[k/2], &heap_sort_max->item[k]);
        k /= 2;
    }
}

其它：

static void print_to_dot(Heap_sort_max *heap_sort_max) {
    printf("\n<---------- dot start ---------->\n\n");
    printf("digraph heap {\nnode[shape=circle];\nnodesep=0.3;\n");
    int count = heap_sort_max->count;
    for (int i = 1; i <= count; i++) {
        int leftIndex = 2 * i;
        int rightIndex = 2 * i + 1;
        if (leftIndex <= count) {
            printf("\t%d -> %d;\n",heap_sort_max->item[i],heap_sort_max->item[leftIndex]);
        }
        if (rightIndex <= count) {
            printf("\t%d -> %d;\n",heap_sort_max->item[i],heap_sort_max->item[rightIndex]);
        }
    }
    printf("}\n");
    printf("\n\n<---------- dot end ---------->\n");
}

static Heap_sort_max create_heap_sort_max(int capacity) {
    
    int *p = (int *)malloc(sizeof(int) * capacity);
    for (int i = 0; i < capacity; i++) {
        p[i] = 0;
    }
    Heap_sort_max heap_sort_max = {
        .count = 0,
        .capacity = capacity,
        .insert = insert,
        .extract_max = extract_max,
        .item = p,
        .print_to_dot = print_to_dot
    };
    return heap_sort_max;
}

static void free_heap_sort_max(Heap_sort_max *heap_sort_max) {
    free(heap_sort_max->item);
}

测试 insert

void test_heap_sort_max() {
    
    int *a = generateNoRepeatRandomArray(20,0,100);
    
    Heap_sort_max heap_sort_max = create_heap_sort_max(50);
    
    for (int i = 0; i < 20; i++) {
        heap_sort_max.insert(&heap_sort_max,a[i]);
    }
    
    void (*print_to_dot)(Heap_sort_max *heap_sort_max) = heap_sort_max.print_to_dot;
    print_to_dot(&heap_sort_max);
    
    free_heap_sort_max(&heap_sort_max);
}

我们创建一个空间为 50 大小的堆，向其中插入 20 个随机的元素。最终由 dot 生成的图片：

shiftDown

下面我们要从堆中取出一个最大的元素 95，为了使这个堆仍然是一个完全二叉树，那么我们将最后一个元素放到堆顶，此时的 count 应将减去一。

现在这个二叉堆已经不满足我们对最大堆的定义，我们要维护的它，对它进行 shiftDown 操作。

第一步和子节点 86 、60 比较

50 小于它的两个子节点 86 、60，但 86 比 60 要大，所以 86 和 50 交换：

第二步和子节点 46 、58 比较

50 小于它的左节点 58 ，大于右结点 46，所以 58 和 50 交换：

最后

由于 50 大于 35，所以不必交换。

至此，就完成了整个 shiftDown 的过程

shiftDown 程序的编写

shiftDown:

static void heap_sort_max_shift_down(Heap_sort_max *heap_sort_max,int k) {
    while (2*k <= heap_sort_max->count) {
        int index = 2*k;
        if (index + 1 <= heap_sort_max->count && heap_sort_max->item[index + 1] > heap_sort_max->item[index]) {
            index += 1;
        }
        if (heap_sort_max->item[k] > heap_sort_max->item[index]) {
            break;
        }
        
        swap(&heap_sort_max->item[k], &heap_sort_max->item[index]);
        k = index;
    }
}

输出最大值：

static int extract_max(Heap_sort_max* heap_sort_max) {
    assert(heap_sort_max->count > 0);
    int ret = heap_sort_max->item[1];
    
    swap(&heap_sort_max->item[1], &heap_sort_max->item[heap_sort_max->count]);
    heap_sort_max->count -= 1;
    
    heap_sort_max_shift_down(heap_sort_max, 1);
    
    return ret;
}

堆排序

我们实现了 shiftUp 和 shiftDown 两个操作，那堆排序也就可以实现了。

实现的基本思路是：先把元素一个一个插入，然后再一个一个去取堆顶元素。代码如下：

void heap_sort(int arr[],int n) {
    Heap_sort_max heap_sort_max = create_heap_sort_max(n + 1);
    
    for (int i = 0; i < n; i++) {
        heap_sort_max.insert(&heap_sort_max,arr[i]);
    }
    
    for (int i = n-1; i >=0; i--) {
        arr[i] = ((int (*)(Heap_sort_max *heap_sort_max))heap_sort_max.extract_max)(&heap_sort_max);
    }
}

测试

我们用 50w 个元素测试：

**************有序*************
sortname:插入排序优化	 sorttime: 0.003090s
sortname:希尔排序　　	 sorttime: 0.025961s
sortname:快速排序随机	 sorttime: 0.067989s
sortname:快速排序双路	 sorttime: 0.059503s
sortname:快速排序三路	 sorttime: 0.109031s
sortname:堆排序　　　	 sorttime: 0.283506s
**************近乎有序*************
sortname:插入排序优化	 sorttime: 0.011907s
sortname:希尔排序　　	 sorttime: 0.031257s
sortname:快速排序随机	 sorttime: 0.069627s
sortname:快速排序双路	 sorttime: 0.047605s
sortname:快速排序三路	 sorttime: 0.090930s
sortname:堆排序　　　	 sorttime: 0.249643s
**************有大量的重复值*************
sortname:希尔排序　　	 sorttime: 0.055228s
sortname:快速排序随机	 sorttime: 27.275784s
sortname:快速排序双路	 sorttime: 0.055961s
sortname:快速排序三路	 sorttime: 0.015928s
sortname:堆排序　　　	 sorttime: 0.151020s
**************随机*************
sortname:希尔排序　　	 sorttime: 0.197971s
sortname:快速排序随机	 sorttime: 0.099134s
sortname:快速排序双路	 sorttime: 0.103923s
sortname:快速排序三路	 sorttime: 0.120655s
sortname:堆排序　　　	 sorttime: 0.210723s
**************倒叙*************
sortname:希尔排序　　	 sorttime: 0.057273s
sortname:快速排序随机	 sorttime: 0.077192s
sortname:快速排序双路	 sorttime: 0.049288s
sortname:快速排序三路	 sorttime: 0.092265s
sortname:堆排序　　　	 sorttime: 0.130887s

可以看到是，堆排序要比其它的排序稍稍慢一些，不过还是可以接受的。

堆排序优化 heapify

将排序的数组直接赋值给堆，然后进行 shiftDown 操作，省去 insert 操作。代码如下：

void heapify_sort(int arr[],int n) {
    Heap_sort_max heap_sort_max = create_heap_sort_max(n + 1);
    for (int i = 0; i < n; i ++) {
        heap_sort_max.item[i+1] = arr[i];
    }
    heap_sort_max.count = n;
    
    for (int i = n/2; i >= 1; i--) {
        heap_sort_max_shift_down(&heap_sort_max, i);/// O(n)
    }
    
    for (int i = n-1; i >=0; i--) {
        arr[i] = ((int (*)(Heap_sort_max *heap_sort_max))heap_sort_max.extract_max)(&heap_sort_max);
    }
}

测试

我们用 50w 个元素测试：

**************有序*************
sortname:插入排序优化	 sorttime: 0.003090s
sortname:希尔排序　　	 sorttime: 0.025961s
sortname:快速排序随机	 sorttime: 0.067989s
sortname:快速排序双路	 sorttime: 0.059503s
sortname:快速排序三路	 sorttime: 0.109031s
sortname:堆排序　　　	 sorttime: 0.283506s
sortname:堆排序优化　	 sorttime: 0.130963s
**************近乎有序*************
sortname:插入排序优化	 sorttime: 0.011907s
sortname:希尔排序　　	 sorttime: 0.031257s
sortname:快速排序随机	 sorttime: 0.069627s
sortname:快速排序双路	 sorttime: 0.047605s
sortname:快速排序三路	 sorttime: 0.090930s
sortname:堆排序　　　	 sorttime: 0.249643s
sortname:堆排序优化　	 sorttime: 0.127976s
**************有大量的重复值*************
sortname:希尔排序　　	 sorttime: 0.055228s
sortname:快速排序随机	 sorttime: 27.275784s
sortname:快速排序双路	 sorttime: 0.055961s
sortname:快速排序三路	 sorttime: 0.015928s
sortname:堆排序　　　	 sorttime: 0.151020s
sortname:堆排序优化　	 sorttime: 0.132577s
**************随机*************
sortname:希尔排序　　	 sorttime: 0.197971s
sortname:快速排序随机	 sorttime: 0.099134s
sortname:快速排序双路	 sorttime: 0.103923s
sortname:快速排序三路	 sorttime: 0.120655s
sortname:堆排序　　　	 sorttime: 0.210723s
sortname:堆排序优化　	 sorttime: 0.212289s
**************倒叙*************
sortname:希尔排序　　	 sorttime: 0.057273s
sortname:快速排序随机	 sorttime: 0.077192s
sortname:快速排序双路	 sorttime: 0.049288s
sortname:快速排序三路	 sorttime: 0.092265s
sortname:堆排序　　　	 sorttime: 0.130887s
sortname:堆排序优化　	 sorttime: 0.130506s

可以看到比之前稍稍快一点点。

原地堆排序

之前的堆排序都是用了额外的空，原地堆排序的空间复杂度为 O(1)。

ps:原地堆排序的顶元素是从 0 开始的。

原地堆排序分析

首先二叉堆数组是这样的：

然后不断的将堆顶元素和堆尾的元素交换，并且每次交换后将堆元素的大小减一，当剩下最后一个的时候，数组已经从小到大排好序了。

由于原地堆排序的顶元素是从 0 开始的，我们要重新实现 shiftDown 操作：

// 顶结点的索引从 0 开始
static void heap_autochthonous_sort_shift_down(int arr[],int n,int k) {
    while ((2*k + 1) < n) {
        int index = 2*k + 1; // 取左孩子索引
        if (index + 1 < n && arr[index + 1] > arr[index]) { index++; }
        if (arr[k] > arr[index]) { break; }
        swap(&arr[k], &arr[index]);
        k = index;
    }
}

/// 原地堆排序 空间O(1)
void heap_autochthonous_sort(int arr[],int n) {
    /// heapify
    for (int i = (n-1)/2; i >= 0; i--) {
        heap_autochthonous_sort_shift_down(arr, n, i);
    }
    
    for (int i = n-1; i > 0; i--) {
        swap(&arr[0], &arr[i]);
        heap_autochthonous_sort_shift_down(arr, i, 0);
    }
}

测试

我们用 50w 个元素测试：

**************有序*************
sortname:插入排序优化	 sorttime: 0.003090s
sortname:希尔排序　　	 sorttime: 0.025961s
sortname:快速排序随机	 sorttime: 0.067989s
sortname:快速排序双路	 sorttime: 0.059503s
sortname:快速排序三路	 sorttime: 0.109031s
sortname:堆排序　　　	 sorttime: 0.283506s
sortname:堆排序优化　	 sorttime: 0.130963s
sortname:堆排序原地　	 sorttime: 0.113156s
**************近乎有序*************
sortname:插入排序优化	 sorttime: 0.011907s
sortname:希尔排序　　	 sorttime: 0.031257s
sortname:快速排序随机	 sorttime: 0.069627s
sortname:快速排序双路	 sorttime: 0.047605s
sortname:快速排序三路	 sorttime: 0.090930s
sortname:堆排序　　　	 sorttime: 0.249643s
sortname:堆排序优化　	 sorttime: 0.127976s
sortname:堆排序原地　	 sorttime: 0.107415s

**************有大量的重复值*************
sortname:希尔排序　　	 sorttime: 0.055228s
sortname:快速排序随机	 sorttime: 27.275784s
sortname:快速排序双路	 sorttime: 0.055961s
sortname:快速排序三路	 sorttime: 0.015928s
sortname:堆排序　　　	 sorttime: 0.151020s
sortname:堆排序优化　	 sorttime: 0.132577s
sortname:堆排序原地　	 sorttime: 0.104522s

**************随机*************
sortname:希尔排序　　	 sorttime: 0.197971s
sortname:快速排序随机	 sorttime: 0.099134s
sortname:快速排序双路	 sorttime: 0.103923s
sortname:快速排序三路	 sorttime: 0.120655s
sortname:堆排序　　　	 sorttime: 0.210723s
sortname:堆排序优化　	 sorttime: 0.212289s
sortname:堆排序原地　	 sorttime: 0.180132s

**************逆序*************
sortname:希尔排序　　	 sorttime: 0.057273s
sortname:快速排序随机	 sorttime: 0.077192s
sortname:快速排序双路	 sorttime: 0.049288s
sortname:快速排序三路	 sorttime: 0.092265s
sortname:堆排序　　　	 sorttime: 0.130887s
sortname:堆排序优化　	 sorttime: 0.130506s
sortname:堆排序原地　	 sorttime: 0.104213s

可以看到又稍稍快了一点。

最后附上Demo

结束

至此，整个排序算法便介绍完了。

下面是各个排序的时间复杂度和空间复杂度的对比：

–	平均时间复杂度	原地排序	额外空间	稳定排序
插入排序	O(n^2 )	✅	O(1)	✅
归并排序	O(nlogn)	❌	O(n)	✅
快速排序	O(nlogn)	✅	O(logn)	❌
堆排序	O(nlogn)	✅	O(1)	❌

关于二叉树的一些概念

堆的定义

二叉堆的定义

二叉堆的基本实现 （最大堆）

shiftUp

shiftUp 程序的编写

测试 insert

shiftDown

shiftDown 程序的编写

堆排序

测试

堆排序优化 heapify

测试

原地堆排序

原地堆排序分析

测试

结束

二叉堆的基本实现（最大堆）