Recursive merge sort takes up a lot of space and time, but the non-recursive algorithm is different, with a minimum additional space complexity of O(N).

Chen Yue's MOOC explains it very clearly~ Click here

Here is the code with comments directly

```
#include <iostream>
#include <cstdio>
#include <queue>
using namespace std;
typedef long long ll;
const int maxn = 100005;
const int inf = 0x3f3f3f;
int N;
ll a[maxn];
void swap(ll &a, ll &b){//swap the values of a and b
int tmp = a;
a = b;
b = tmp;
}
void Merge(ll a[],ll tmp[], int s, int m, int e) {
//Merge the subarrays a[s,m-1] and a[m,e] into array tmp, and ensure that tmp is sorted
int pb = s;
int p1 = s, p2 = m;//p1 points to the first half, p2 points to the second half
while (p1 <= m-1 && p2 <= e) {
if (a[p1] <= a[p2])
tmp[pb++] = a[p1++];
else
tmp[pb++] = a[p2++];
}
while(p1 <= m-1)
tmp[pb++] = a[p1++];
while(p2 <= e)
tmp[pb++] = a[p2++];
for (int i = 0; i < e-s+1; ++i)
a[s+i] = tmp[i];
}
void Merge_Pass(ll a[], ll b[], int N, int len) {
//Split array a into subarrays of length len and merge them into array b
//len is the length of the current sorted subarray
int i;
for(i = 0; i <= N - 2*len; i += 2*len)
//Find each pair of subarrays to be merged until the second last pair
Merge(a, b, i, i+len, i+2*len-1); //Merge a
if(i+len < N) //There are 2 subarrays left at the end
Merge(a, b, i, i+len, N-1);
else //There is only 1 sorted subarray left at the end
for(int j = i; j < N; ++j) b[j] = a[j];
}
void Merge_Sort(ll a[], int N) {
int len = 1;//Initialize the length of the sorted subarray
ll *tmp;
tmp = new ll[N];
if(tmp != NULL) {
while(len < N) {
Merge_Pass(a, tmp, N, len);
len *= 2;
Merge_Pass(tmp, a, N, len);
len *= 2;
}
delete [] tmp;
} else printf("Insufficient space");
}
int main(){
scanf("%d", &N);
for(int i = 0; i < N; ++i)
scanf("%lld", &a[i]);
Merge_Sort(a, N);
for(int i = 0; i < N; ++i) {
printf(" %lld"+!i, a[i]);
}
return 0;
}
```