Natural merge: mergesort that uses already sorted subarrays
The code bellow is my implementation for the natural merge exercise in Robert Sedgwick's Algorithms book:
Write a version of bottom-up mergesort that takes advantage of order
in the array by proceeding as follows each time it needs to find two
arrays to merge: find a sorted subarray (by incrementing a pointer
until finding an entry that is smaller than its predecessor in the
array), then find the next, then merge them.
def merge(a, lo, mi, hi):
aux_lo = deque(a[lo:mi])
aux_hi = deque(a[mi:hi])
for i in range(lo, hi):
if len(aux_lo) and len(aux_hi):
a[i] = aux_lo.popleft() if aux_lo[0] < aux_hi[0] else aux_hi.popleft()
elif len(aux_lo) or len(aux_hi):
a[i] = aux_lo.popleft() if aux_lo else aux_hi.popleft()
def find_next_stop(a, start):
if start >= len(a)-1:
return start
stop = start + 1
if a[start] < a[stop]:
while(stop<len(a)-1 and a[stop] <= a[stop+1]):
stop += 1
else:
while(stop<len(a)-1 and a[stop] >= a[stop+1]):
stop += 1
_stop = stop
while(start<_stop):
a[_stop], a[start] = a[start], a[_stop]
start += 1
_stop -= 1
return stop
def natural_merge(a):
lo = hi = 0
while(True):
lo = hi
mi = find_next_stop(a, lo)
if lo == 0 and mi == len(a) - 1:
return
hi = find_next_stop(a, mi)
if mi == hi == len(a)-1:
lo = hi = 0
continue
merge(a, lo, mi, hi)
I referenced this answer.
python algorithm python-3.x sorting mergesort
|
show 1 more comment
The code bellow is my implementation for the natural merge exercise in Robert Sedgwick's Algorithms book:
Write a version of bottom-up mergesort that takes advantage of order
in the array by proceeding as follows each time it needs to find two
arrays to merge: find a sorted subarray (by incrementing a pointer
until finding an entry that is smaller than its predecessor in the
array), then find the next, then merge them.
def merge(a, lo, mi, hi):
aux_lo = deque(a[lo:mi])
aux_hi = deque(a[mi:hi])
for i in range(lo, hi):
if len(aux_lo) and len(aux_hi):
a[i] = aux_lo.popleft() if aux_lo[0] < aux_hi[0] else aux_hi.popleft()
elif len(aux_lo) or len(aux_hi):
a[i] = aux_lo.popleft() if aux_lo else aux_hi.popleft()
def find_next_stop(a, start):
if start >= len(a)-1:
return start
stop = start + 1
if a[start] < a[stop]:
while(stop<len(a)-1 and a[stop] <= a[stop+1]):
stop += 1
else:
while(stop<len(a)-1 and a[stop] >= a[stop+1]):
stop += 1
_stop = stop
while(start<_stop):
a[_stop], a[start] = a[start], a[_stop]
start += 1
_stop -= 1
return stop
def natural_merge(a):
lo = hi = 0
while(True):
lo = hi
mi = find_next_stop(a, lo)
if lo == 0 and mi == len(a) - 1:
return
hi = find_next_stop(a, mi)
if mi == hi == len(a)-1:
lo = hi = 0
continue
merge(a, lo, mi, hi)
I referenced this answer.
python algorithm python-3.x sorting mergesort
Add docstrings and typed arguments. Otherwise it's not bad
– Reinderien
Dec 25 '18 at 17:23
1
A pity the code presented is uncommented but for a crypticthis takes more space than allowed
- extra constraint(s)? (There's more than one error inI take this anwser as a referenced.
.)
– greybeard
Dec 25 '18 at 17:32
@greybeard Thanks for helping me point out the errors.
– lerner
Dec 26 '18 at 10:05
3
I've reverted your change. The code in the question should not be updated to ensure that answers stay relevant.
– Josay
Dec 26 '18 at 11:59
@Josay But if you could update your answer according to my change reverted by you, I'd appreciate it very much.
– lerner
Dec 26 '18 at 16:09
|
show 1 more comment
The code bellow is my implementation for the natural merge exercise in Robert Sedgwick's Algorithms book:
Write a version of bottom-up mergesort that takes advantage of order
in the array by proceeding as follows each time it needs to find two
arrays to merge: find a sorted subarray (by incrementing a pointer
until finding an entry that is smaller than its predecessor in the
array), then find the next, then merge them.
def merge(a, lo, mi, hi):
aux_lo = deque(a[lo:mi])
aux_hi = deque(a[mi:hi])
for i in range(lo, hi):
if len(aux_lo) and len(aux_hi):
a[i] = aux_lo.popleft() if aux_lo[0] < aux_hi[0] else aux_hi.popleft()
elif len(aux_lo) or len(aux_hi):
a[i] = aux_lo.popleft() if aux_lo else aux_hi.popleft()
def find_next_stop(a, start):
if start >= len(a)-1:
return start
stop = start + 1
if a[start] < a[stop]:
while(stop<len(a)-1 and a[stop] <= a[stop+1]):
stop += 1
else:
while(stop<len(a)-1 and a[stop] >= a[stop+1]):
stop += 1
_stop = stop
while(start<_stop):
a[_stop], a[start] = a[start], a[_stop]
start += 1
_stop -= 1
return stop
def natural_merge(a):
lo = hi = 0
while(True):
lo = hi
mi = find_next_stop(a, lo)
if lo == 0 and mi == len(a) - 1:
return
hi = find_next_stop(a, mi)
if mi == hi == len(a)-1:
lo = hi = 0
continue
merge(a, lo, mi, hi)
I referenced this answer.
python algorithm python-3.x sorting mergesort
The code bellow is my implementation for the natural merge exercise in Robert Sedgwick's Algorithms book:
Write a version of bottom-up mergesort that takes advantage of order
in the array by proceeding as follows each time it needs to find two
arrays to merge: find a sorted subarray (by incrementing a pointer
until finding an entry that is smaller than its predecessor in the
array), then find the next, then merge them.
def merge(a, lo, mi, hi):
aux_lo = deque(a[lo:mi])
aux_hi = deque(a[mi:hi])
for i in range(lo, hi):
if len(aux_lo) and len(aux_hi):
a[i] = aux_lo.popleft() if aux_lo[0] < aux_hi[0] else aux_hi.popleft()
elif len(aux_lo) or len(aux_hi):
a[i] = aux_lo.popleft() if aux_lo else aux_hi.popleft()
def find_next_stop(a, start):
if start >= len(a)-1:
return start
stop = start + 1
if a[start] < a[stop]:
while(stop<len(a)-1 and a[stop] <= a[stop+1]):
stop += 1
else:
while(stop<len(a)-1 and a[stop] >= a[stop+1]):
stop += 1
_stop = stop
while(start<_stop):
a[_stop], a[start] = a[start], a[_stop]
start += 1
_stop -= 1
return stop
def natural_merge(a):
lo = hi = 0
while(True):
lo = hi
mi = find_next_stop(a, lo)
if lo == 0 and mi == len(a) - 1:
return
hi = find_next_stop(a, mi)
if mi == hi == len(a)-1:
lo = hi = 0
continue
merge(a, lo, mi, hi)
I referenced this answer.
python algorithm python-3.x sorting mergesort
python algorithm python-3.x sorting mergesort
edited Dec 26 '18 at 11:58
Josay
25.6k14087
25.6k14087
asked Dec 25 '18 at 17:14
lernerlerner
1737
1737
Add docstrings and typed arguments. Otherwise it's not bad
– Reinderien
Dec 25 '18 at 17:23
1
A pity the code presented is uncommented but for a crypticthis takes more space than allowed
- extra constraint(s)? (There's more than one error inI take this anwser as a referenced.
.)
– greybeard
Dec 25 '18 at 17:32
@greybeard Thanks for helping me point out the errors.
– lerner
Dec 26 '18 at 10:05
3
I've reverted your change. The code in the question should not be updated to ensure that answers stay relevant.
– Josay
Dec 26 '18 at 11:59
@Josay But if you could update your answer according to my change reverted by you, I'd appreciate it very much.
– lerner
Dec 26 '18 at 16:09
|
show 1 more comment
Add docstrings and typed arguments. Otherwise it's not bad
– Reinderien
Dec 25 '18 at 17:23
1
A pity the code presented is uncommented but for a crypticthis takes more space than allowed
- extra constraint(s)? (There's more than one error inI take this anwser as a referenced.
.)
– greybeard
Dec 25 '18 at 17:32
@greybeard Thanks for helping me point out the errors.
– lerner
Dec 26 '18 at 10:05
3
I've reverted your change. The code in the question should not be updated to ensure that answers stay relevant.
– Josay
Dec 26 '18 at 11:59
@Josay But if you could update your answer according to my change reverted by you, I'd appreciate it very much.
– lerner
Dec 26 '18 at 16:09
Add docstrings and typed arguments. Otherwise it's not bad
– Reinderien
Dec 25 '18 at 17:23
Add docstrings and typed arguments. Otherwise it's not bad
– Reinderien
Dec 25 '18 at 17:23
1
1
A pity the code presented is uncommented but for a cryptic
this takes more space than allowed
- extra constraint(s)? (There's more than one error in I take this anwser as a referenced.
.)– greybeard
Dec 25 '18 at 17:32
A pity the code presented is uncommented but for a cryptic
this takes more space than allowed
- extra constraint(s)? (There's more than one error in I take this anwser as a referenced.
.)– greybeard
Dec 25 '18 at 17:32
@greybeard Thanks for helping me point out the errors.
– lerner
Dec 26 '18 at 10:05
@greybeard Thanks for helping me point out the errors.
– lerner
Dec 26 '18 at 10:05
3
3
I've reverted your change. The code in the question should not be updated to ensure that answers stay relevant.
– Josay
Dec 26 '18 at 11:59
I've reverted your change. The code in the question should not be updated to ensure that answers stay relevant.
– Josay
Dec 26 '18 at 11:59
@Josay But if you could update your answer according to my change reverted by you, I'd appreciate it very much.
– lerner
Dec 26 '18 at 16:09
@Josay But if you could update your answer according to my change reverted by you, I'd appreciate it very much.
– lerner
Dec 26 '18 at 16:09
|
show 1 more comment
1 Answer
1
active
oldest
votes
Tests and bugs
Your code looks mostly good. Before trying to go and change the code to see what can be improved. I usually try to write a few simple tests. This is even easier when you have a reference implementation that can be compared to your function. In your case, I wrote:
TESTS = [
,
[0],
[0, 0, 0],
[0, 1, 2],
[0, 1, 2, 3, 4, 5],
[5, 4, 3, 2, 1, 0],
[5, 2, 3, 1, 0, 4],
[5, 2, 5, 3, 1, 3, 0, 4, 5],
]
for t in TESTS:
print(t)
ref_lst, tst_lst = list(t), list(t)
ref_lst.sort()
natural_merge(tst_lst)
print(ref_lst, tst_lst)
assert ref_lst == tst_lst
which leads to a first comment: the empty list is not handled properly and the function never returns.
Improving merge
The case elif len(aux_lo) or len(aux_hi)
seems complicated as we check if aux_lo
just after. Things would be clearer if we were to split in two different cases:
if len(aux_lo) and len(aux_hi):
a[i] = aux_lo.popleft() if aux_lo[0] < aux_hi[0] else aux_hi.popleft()
elif len(aux_lo):
a[i] = aux_lo.popleft()
elif len(aux_hi):
a[i] = aux_hi.popleft()
Also, we could reuse the fact that in a boolean context, list are considered to be true if and only if they are not empty to write:
for i in range(lo, hi):
if aux_lo and aux_hi:
a[i] = aux_lo.popleft() if aux_lo[0] < aux_hi[0] else aux_hi.popleft()
elif aux_lo:
a[i] = aux_lo.popleft()
elif aux_hi:
a[i] = aux_hi.popleft()
Improving find_next_stop
You don't need so many parenthesis.
You could store len(a) - 1
in a variable in order not to re-compute it every time.
The name _stop
is pretty ugly. I do not have any great suggestion for an alternative but end
seems okay-ish.
More tests... and more bugs
I wanted to add a few tests to verify an assumption... and I stumbled upon another issue.
Here is the corresponding test suite:
TESTS = [
,
[0],
[0, 0, 0],
[0, 1, 2],
[0, 1, 2, 3, 4, 5],
[5, 4, 3, 2, 1, 0],
[0, 1, 2, 2, 1, 0],
[0, 1, 2, 3, 2, 1, 0],
[5, 2, 3, 1, 0, 4],
[5, 2, 5, 3, 1, 3, 0, 4, 5],
[5, 2, 5, 3, 1, 3, 0, 4, 5, 3, 1, 0, 1, 5, 2, 5, 3, 1, 3, 0, 4, 5],
]
Taking into account my comments and the fix you tried to add in the question, we now have:
from collections import deque
def merge(a, lo, mi, hi):
aux_lo = deque(a[lo:mi])
aux_hi = deque(a[mi:hi])
for i in range(lo, hi):
if aux_lo and aux_hi:
a[i] = aux_lo.popleft() if aux_lo[0] < aux_hi[0] else aux_hi.popleft()
elif aux_lo:
a[i] = aux_lo.popleft()
elif aux_hi:
a[i] = aux_hi.popleft()
def find_next_stop(a, start):
upper = len(a) - 1
if start >= upper:
return start
stop = start + 1
if a[start] <= a[stop]:
while stop < upper and a[stop] <= a[stop+1]:
stop += 1
else:
while stop < upper and a[stop] >= a[stop+1]:
stop += 1
end = stop
while start < end:
a[end], a[start] = a[start], a[end]
start += 1
end -= 1
return stop
def natural_merge(a):
upper = len(a) - 1
if upper <= 0:
return
lo = hi = 0
while True:
lo = hi
mi = find_next_stop(a, lo)
if lo == 0 and mi == upper:
return
hi = find_next_stop(a, mi)
if mi == hi == upper:
lo = hi = 0
else:
merge(a, lo, mi, hi)
TESTS = [
,
[0],
[0, 0, 0],
[0, 1, 2],
[0, 1, 2, 3, 4, 5],
[5, 4, 3, 2, 1, 0],
[0, 1, 2, 2, 1, 0],
[0, 1, 2, 3, 2, 1, 0],
[5, 2, 3, 1, 0, 4],
[5, 2, 5, 3, 1, 3, 0, 4, 5],
[5, 2, 5, 3, 1, 3, 0, 4, 5, 3, 1, 0, 1, 5, 2, 5, 3, 1, 3, 0, 4, 5],
]
for t in TESTS:
print(t)
ref_lst, tst_lst = list(t), list(t)
ref_lst.sort()
natural_merge(tst_lst)
print(ref_lst, tst_lst)
assert ref_lst == tst_lst
More improvements in find_next_stop
We have a while
loop but we can compute the number of iterations we'll need: it corresponds to have the distance between start
and stop
. We could use a for _ in range
loop to perform this. It has pros and cons but one of the key aspect is that we do not need to change start
and stop
, thus we don't need to copy the value in a variable.
for k in range((1 + stop - start) // 2):
i, j = start + k, stop - k
a[i], a[j] = a[j], a[i]
More improvements in natural_merge
A few steps can be used to re-organise the function:
- see that we can move the assignment
lo = hi
from the beginning of the loop to the end of the loop with no impact - realise that it is already done in the first branch of the test already so move it to the
else
block exclusively - see that the initialisation of
hi
is not required anymore - notice that the condition
mi == upper
is checked in 2 places (with the same value ofmi
andupper
and that iflo != 0
, we see thatmi == upper
directly leads tofind_next(a, mi)
returningupper
and thus ending withmi == hi == upper
and thus to the assignmentlo = hi = 0
.
At this stage, we have:
def natural_merge(a):
upper = len(a) - 1
if upper <= 0:
return
lo = 0
while True:
mi = find_next_stop(a, lo)
if mi == upper:
if lo == 0:
return
lo = hi = 0
else:
hi = find_next_stop(a, mi)
merge(a, lo, mi, hi)
lo = hi
We can go further:
- the assignment
hi = 0
has no effect - we can reorganise conditions
We'd get
def natural_merge(a):
upper = len(a) - 1
if upper <= 0:
return
lo = 0
while True:
mi = find_next_stop(a, lo)
if mi != upper:
hi = find_next_stop(a, mi)
merge(a, lo, mi, hi)
lo = hi
elif lo == 0:
return
else:
lo = 0
Interestingly, removing lo = hi
leads a much more efficient code on my benchmark: the function returns much more quickly (because we always have lo == 0
, we get out of the loop as soon as mi == upper
) and the list is still fully sorted.
def natural_merge(a):
upper = len(a) - 1
if upper <= 0:
return
while True:
mi = find_next_stop(a, 0)
if mi == upper:
return
hi = find_next_stop(a, mi)
merge(a, 0, mi, hi)
This looked surprising at first but thinking about it, it looks like this may be the way this algorithm is supposed to be.
Oh, I get to know the strategy of test. Thanks.
– lerner
Dec 26 '18 at 9:47
add a comment |
Your Answer
StackExchange.ifUsing("editor", function () {
return StackExchange.using("mathjaxEditing", function () {
StackExchange.MarkdownEditor.creationCallbacks.add(function (editor, postfix) {
StackExchange.mathjaxEditing.prepareWmdForMathJax(editor, postfix, [["\$", "\$"]]);
});
});
}, "mathjax-editing");
StackExchange.ifUsing("editor", function () {
StackExchange.using("externalEditor", function () {
StackExchange.using("snippets", function () {
StackExchange.snippets.init();
});
});
}, "code-snippets");
StackExchange.ready(function() {
var channelOptions = {
tags: "".split(" "),
id: "196"
};
initTagRenderer("".split(" "), "".split(" "), channelOptions);
StackExchange.using("externalEditor", function() {
// Have to fire editor after snippets, if snippets enabled
if (StackExchange.settings.snippets.snippetsEnabled) {
StackExchange.using("snippets", function() {
createEditor();
});
}
else {
createEditor();
}
});
function createEditor() {
StackExchange.prepareEditor({
heartbeatType: 'answer',
autoActivateHeartbeat: false,
convertImagesToLinks: false,
noModals: true,
showLowRepImageUploadWarning: true,
reputationToPostImages: null,
bindNavPrevention: true,
postfix: "",
imageUploader: {
brandingHtml: "Powered by u003ca class="icon-imgur-white" href="https://imgur.com/"u003eu003c/au003e",
contentPolicyHtml: "User contributions licensed under u003ca href="https://creativecommons.org/licenses/by-sa/3.0/"u003ecc by-sa 3.0 with attribution requiredu003c/au003e u003ca href="https://stackoverflow.com/legal/content-policy"u003e(content policy)u003c/au003e",
allowUrls: true
},
onDemand: true,
discardSelector: ".discard-answer"
,immediatelyShowMarkdownHelp:true
});
}
});
Sign up or log in
StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Required, but never shown
StackExchange.ready(
function () {
StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fcodereview.stackexchange.com%2fquestions%2f210315%2fnatural-merge-mergesort-that-uses-already-sorted-subarrays%23new-answer', 'question_page');
}
);
Post as a guest
Required, but never shown
1 Answer
1
active
oldest
votes
1 Answer
1
active
oldest
votes
active
oldest
votes
active
oldest
votes
Tests and bugs
Your code looks mostly good. Before trying to go and change the code to see what can be improved. I usually try to write a few simple tests. This is even easier when you have a reference implementation that can be compared to your function. In your case, I wrote:
TESTS = [
,
[0],
[0, 0, 0],
[0, 1, 2],
[0, 1, 2, 3, 4, 5],
[5, 4, 3, 2, 1, 0],
[5, 2, 3, 1, 0, 4],
[5, 2, 5, 3, 1, 3, 0, 4, 5],
]
for t in TESTS:
print(t)
ref_lst, tst_lst = list(t), list(t)
ref_lst.sort()
natural_merge(tst_lst)
print(ref_lst, tst_lst)
assert ref_lst == tst_lst
which leads to a first comment: the empty list is not handled properly and the function never returns.
Improving merge
The case elif len(aux_lo) or len(aux_hi)
seems complicated as we check if aux_lo
just after. Things would be clearer if we were to split in two different cases:
if len(aux_lo) and len(aux_hi):
a[i] = aux_lo.popleft() if aux_lo[0] < aux_hi[0] else aux_hi.popleft()
elif len(aux_lo):
a[i] = aux_lo.popleft()
elif len(aux_hi):
a[i] = aux_hi.popleft()
Also, we could reuse the fact that in a boolean context, list are considered to be true if and only if they are not empty to write:
for i in range(lo, hi):
if aux_lo and aux_hi:
a[i] = aux_lo.popleft() if aux_lo[0] < aux_hi[0] else aux_hi.popleft()
elif aux_lo:
a[i] = aux_lo.popleft()
elif aux_hi:
a[i] = aux_hi.popleft()
Improving find_next_stop
You don't need so many parenthesis.
You could store len(a) - 1
in a variable in order not to re-compute it every time.
The name _stop
is pretty ugly. I do not have any great suggestion for an alternative but end
seems okay-ish.
More tests... and more bugs
I wanted to add a few tests to verify an assumption... and I stumbled upon another issue.
Here is the corresponding test suite:
TESTS = [
,
[0],
[0, 0, 0],
[0, 1, 2],
[0, 1, 2, 3, 4, 5],
[5, 4, 3, 2, 1, 0],
[0, 1, 2, 2, 1, 0],
[0, 1, 2, 3, 2, 1, 0],
[5, 2, 3, 1, 0, 4],
[5, 2, 5, 3, 1, 3, 0, 4, 5],
[5, 2, 5, 3, 1, 3, 0, 4, 5, 3, 1, 0, 1, 5, 2, 5, 3, 1, 3, 0, 4, 5],
]
Taking into account my comments and the fix you tried to add in the question, we now have:
from collections import deque
def merge(a, lo, mi, hi):
aux_lo = deque(a[lo:mi])
aux_hi = deque(a[mi:hi])
for i in range(lo, hi):
if aux_lo and aux_hi:
a[i] = aux_lo.popleft() if aux_lo[0] < aux_hi[0] else aux_hi.popleft()
elif aux_lo:
a[i] = aux_lo.popleft()
elif aux_hi:
a[i] = aux_hi.popleft()
def find_next_stop(a, start):
upper = len(a) - 1
if start >= upper:
return start
stop = start + 1
if a[start] <= a[stop]:
while stop < upper and a[stop] <= a[stop+1]:
stop += 1
else:
while stop < upper and a[stop] >= a[stop+1]:
stop += 1
end = stop
while start < end:
a[end], a[start] = a[start], a[end]
start += 1
end -= 1
return stop
def natural_merge(a):
upper = len(a) - 1
if upper <= 0:
return
lo = hi = 0
while True:
lo = hi
mi = find_next_stop(a, lo)
if lo == 0 and mi == upper:
return
hi = find_next_stop(a, mi)
if mi == hi == upper:
lo = hi = 0
else:
merge(a, lo, mi, hi)
TESTS = [
,
[0],
[0, 0, 0],
[0, 1, 2],
[0, 1, 2, 3, 4, 5],
[5, 4, 3, 2, 1, 0],
[0, 1, 2, 2, 1, 0],
[0, 1, 2, 3, 2, 1, 0],
[5, 2, 3, 1, 0, 4],
[5, 2, 5, 3, 1, 3, 0, 4, 5],
[5, 2, 5, 3, 1, 3, 0, 4, 5, 3, 1, 0, 1, 5, 2, 5, 3, 1, 3, 0, 4, 5],
]
for t in TESTS:
print(t)
ref_lst, tst_lst = list(t), list(t)
ref_lst.sort()
natural_merge(tst_lst)
print(ref_lst, tst_lst)
assert ref_lst == tst_lst
More improvements in find_next_stop
We have a while
loop but we can compute the number of iterations we'll need: it corresponds to have the distance between start
and stop
. We could use a for _ in range
loop to perform this. It has pros and cons but one of the key aspect is that we do not need to change start
and stop
, thus we don't need to copy the value in a variable.
for k in range((1 + stop - start) // 2):
i, j = start + k, stop - k
a[i], a[j] = a[j], a[i]
More improvements in natural_merge
A few steps can be used to re-organise the function:
- see that we can move the assignment
lo = hi
from the beginning of the loop to the end of the loop with no impact - realise that it is already done in the first branch of the test already so move it to the
else
block exclusively - see that the initialisation of
hi
is not required anymore - notice that the condition
mi == upper
is checked in 2 places (with the same value ofmi
andupper
and that iflo != 0
, we see thatmi == upper
directly leads tofind_next(a, mi)
returningupper
and thus ending withmi == hi == upper
and thus to the assignmentlo = hi = 0
.
At this stage, we have:
def natural_merge(a):
upper = len(a) - 1
if upper <= 0:
return
lo = 0
while True:
mi = find_next_stop(a, lo)
if mi == upper:
if lo == 0:
return
lo = hi = 0
else:
hi = find_next_stop(a, mi)
merge(a, lo, mi, hi)
lo = hi
We can go further:
- the assignment
hi = 0
has no effect - we can reorganise conditions
We'd get
def natural_merge(a):
upper = len(a) - 1
if upper <= 0:
return
lo = 0
while True:
mi = find_next_stop(a, lo)
if mi != upper:
hi = find_next_stop(a, mi)
merge(a, lo, mi, hi)
lo = hi
elif lo == 0:
return
else:
lo = 0
Interestingly, removing lo = hi
leads a much more efficient code on my benchmark: the function returns much more quickly (because we always have lo == 0
, we get out of the loop as soon as mi == upper
) and the list is still fully sorted.
def natural_merge(a):
upper = len(a) - 1
if upper <= 0:
return
while True:
mi = find_next_stop(a, 0)
if mi == upper:
return
hi = find_next_stop(a, mi)
merge(a, 0, mi, hi)
This looked surprising at first but thinking about it, it looks like this may be the way this algorithm is supposed to be.
Oh, I get to know the strategy of test. Thanks.
– lerner
Dec 26 '18 at 9:47
add a comment |
Tests and bugs
Your code looks mostly good. Before trying to go and change the code to see what can be improved. I usually try to write a few simple tests. This is even easier when you have a reference implementation that can be compared to your function. In your case, I wrote:
TESTS = [
,
[0],
[0, 0, 0],
[0, 1, 2],
[0, 1, 2, 3, 4, 5],
[5, 4, 3, 2, 1, 0],
[5, 2, 3, 1, 0, 4],
[5, 2, 5, 3, 1, 3, 0, 4, 5],
]
for t in TESTS:
print(t)
ref_lst, tst_lst = list(t), list(t)
ref_lst.sort()
natural_merge(tst_lst)
print(ref_lst, tst_lst)
assert ref_lst == tst_lst
which leads to a first comment: the empty list is not handled properly and the function never returns.
Improving merge
The case elif len(aux_lo) or len(aux_hi)
seems complicated as we check if aux_lo
just after. Things would be clearer if we were to split in two different cases:
if len(aux_lo) and len(aux_hi):
a[i] = aux_lo.popleft() if aux_lo[0] < aux_hi[0] else aux_hi.popleft()
elif len(aux_lo):
a[i] = aux_lo.popleft()
elif len(aux_hi):
a[i] = aux_hi.popleft()
Also, we could reuse the fact that in a boolean context, list are considered to be true if and only if they are not empty to write:
for i in range(lo, hi):
if aux_lo and aux_hi:
a[i] = aux_lo.popleft() if aux_lo[0] < aux_hi[0] else aux_hi.popleft()
elif aux_lo:
a[i] = aux_lo.popleft()
elif aux_hi:
a[i] = aux_hi.popleft()
Improving find_next_stop
You don't need so many parenthesis.
You could store len(a) - 1
in a variable in order not to re-compute it every time.
The name _stop
is pretty ugly. I do not have any great suggestion for an alternative but end
seems okay-ish.
More tests... and more bugs
I wanted to add a few tests to verify an assumption... and I stumbled upon another issue.
Here is the corresponding test suite:
TESTS = [
,
[0],
[0, 0, 0],
[0, 1, 2],
[0, 1, 2, 3, 4, 5],
[5, 4, 3, 2, 1, 0],
[0, 1, 2, 2, 1, 0],
[0, 1, 2, 3, 2, 1, 0],
[5, 2, 3, 1, 0, 4],
[5, 2, 5, 3, 1, 3, 0, 4, 5],
[5, 2, 5, 3, 1, 3, 0, 4, 5, 3, 1, 0, 1, 5, 2, 5, 3, 1, 3, 0, 4, 5],
]
Taking into account my comments and the fix you tried to add in the question, we now have:
from collections import deque
def merge(a, lo, mi, hi):
aux_lo = deque(a[lo:mi])
aux_hi = deque(a[mi:hi])
for i in range(lo, hi):
if aux_lo and aux_hi:
a[i] = aux_lo.popleft() if aux_lo[0] < aux_hi[0] else aux_hi.popleft()
elif aux_lo:
a[i] = aux_lo.popleft()
elif aux_hi:
a[i] = aux_hi.popleft()
def find_next_stop(a, start):
upper = len(a) - 1
if start >= upper:
return start
stop = start + 1
if a[start] <= a[stop]:
while stop < upper and a[stop] <= a[stop+1]:
stop += 1
else:
while stop < upper and a[stop] >= a[stop+1]:
stop += 1
end = stop
while start < end:
a[end], a[start] = a[start], a[end]
start += 1
end -= 1
return stop
def natural_merge(a):
upper = len(a) - 1
if upper <= 0:
return
lo = hi = 0
while True:
lo = hi
mi = find_next_stop(a, lo)
if lo == 0 and mi == upper:
return
hi = find_next_stop(a, mi)
if mi == hi == upper:
lo = hi = 0
else:
merge(a, lo, mi, hi)
TESTS = [
,
[0],
[0, 0, 0],
[0, 1, 2],
[0, 1, 2, 3, 4, 5],
[5, 4, 3, 2, 1, 0],
[0, 1, 2, 2, 1, 0],
[0, 1, 2, 3, 2, 1, 0],
[5, 2, 3, 1, 0, 4],
[5, 2, 5, 3, 1, 3, 0, 4, 5],
[5, 2, 5, 3, 1, 3, 0, 4, 5, 3, 1, 0, 1, 5, 2, 5, 3, 1, 3, 0, 4, 5],
]
for t in TESTS:
print(t)
ref_lst, tst_lst = list(t), list(t)
ref_lst.sort()
natural_merge(tst_lst)
print(ref_lst, tst_lst)
assert ref_lst == tst_lst
More improvements in find_next_stop
We have a while
loop but we can compute the number of iterations we'll need: it corresponds to have the distance between start
and stop
. We could use a for _ in range
loop to perform this. It has pros and cons but one of the key aspect is that we do not need to change start
and stop
, thus we don't need to copy the value in a variable.
for k in range((1 + stop - start) // 2):
i, j = start + k, stop - k
a[i], a[j] = a[j], a[i]
More improvements in natural_merge
A few steps can be used to re-organise the function:
- see that we can move the assignment
lo = hi
from the beginning of the loop to the end of the loop with no impact - realise that it is already done in the first branch of the test already so move it to the
else
block exclusively - see that the initialisation of
hi
is not required anymore - notice that the condition
mi == upper
is checked in 2 places (with the same value ofmi
andupper
and that iflo != 0
, we see thatmi == upper
directly leads tofind_next(a, mi)
returningupper
and thus ending withmi == hi == upper
and thus to the assignmentlo = hi = 0
.
At this stage, we have:
def natural_merge(a):
upper = len(a) - 1
if upper <= 0:
return
lo = 0
while True:
mi = find_next_stop(a, lo)
if mi == upper:
if lo == 0:
return
lo = hi = 0
else:
hi = find_next_stop(a, mi)
merge(a, lo, mi, hi)
lo = hi
We can go further:
- the assignment
hi = 0
has no effect - we can reorganise conditions
We'd get
def natural_merge(a):
upper = len(a) - 1
if upper <= 0:
return
lo = 0
while True:
mi = find_next_stop(a, lo)
if mi != upper:
hi = find_next_stop(a, mi)
merge(a, lo, mi, hi)
lo = hi
elif lo == 0:
return
else:
lo = 0
Interestingly, removing lo = hi
leads a much more efficient code on my benchmark: the function returns much more quickly (because we always have lo == 0
, we get out of the loop as soon as mi == upper
) and the list is still fully sorted.
def natural_merge(a):
upper = len(a) - 1
if upper <= 0:
return
while True:
mi = find_next_stop(a, 0)
if mi == upper:
return
hi = find_next_stop(a, mi)
merge(a, 0, mi, hi)
This looked surprising at first but thinking about it, it looks like this may be the way this algorithm is supposed to be.
Oh, I get to know the strategy of test. Thanks.
– lerner
Dec 26 '18 at 9:47
add a comment |
Tests and bugs
Your code looks mostly good. Before trying to go and change the code to see what can be improved. I usually try to write a few simple tests. This is even easier when you have a reference implementation that can be compared to your function. In your case, I wrote:
TESTS = [
,
[0],
[0, 0, 0],
[0, 1, 2],
[0, 1, 2, 3, 4, 5],
[5, 4, 3, 2, 1, 0],
[5, 2, 3, 1, 0, 4],
[5, 2, 5, 3, 1, 3, 0, 4, 5],
]
for t in TESTS:
print(t)
ref_lst, tst_lst = list(t), list(t)
ref_lst.sort()
natural_merge(tst_lst)
print(ref_lst, tst_lst)
assert ref_lst == tst_lst
which leads to a first comment: the empty list is not handled properly and the function never returns.
Improving merge
The case elif len(aux_lo) or len(aux_hi)
seems complicated as we check if aux_lo
just after. Things would be clearer if we were to split in two different cases:
if len(aux_lo) and len(aux_hi):
a[i] = aux_lo.popleft() if aux_lo[0] < aux_hi[0] else aux_hi.popleft()
elif len(aux_lo):
a[i] = aux_lo.popleft()
elif len(aux_hi):
a[i] = aux_hi.popleft()
Also, we could reuse the fact that in a boolean context, list are considered to be true if and only if they are not empty to write:
for i in range(lo, hi):
if aux_lo and aux_hi:
a[i] = aux_lo.popleft() if aux_lo[0] < aux_hi[0] else aux_hi.popleft()
elif aux_lo:
a[i] = aux_lo.popleft()
elif aux_hi:
a[i] = aux_hi.popleft()
Improving find_next_stop
You don't need so many parenthesis.
You could store len(a) - 1
in a variable in order not to re-compute it every time.
The name _stop
is pretty ugly. I do not have any great suggestion for an alternative but end
seems okay-ish.
More tests... and more bugs
I wanted to add a few tests to verify an assumption... and I stumbled upon another issue.
Here is the corresponding test suite:
TESTS = [
,
[0],
[0, 0, 0],
[0, 1, 2],
[0, 1, 2, 3, 4, 5],
[5, 4, 3, 2, 1, 0],
[0, 1, 2, 2, 1, 0],
[0, 1, 2, 3, 2, 1, 0],
[5, 2, 3, 1, 0, 4],
[5, 2, 5, 3, 1, 3, 0, 4, 5],
[5, 2, 5, 3, 1, 3, 0, 4, 5, 3, 1, 0, 1, 5, 2, 5, 3, 1, 3, 0, 4, 5],
]
Taking into account my comments and the fix you tried to add in the question, we now have:
from collections import deque
def merge(a, lo, mi, hi):
aux_lo = deque(a[lo:mi])
aux_hi = deque(a[mi:hi])
for i in range(lo, hi):
if aux_lo and aux_hi:
a[i] = aux_lo.popleft() if aux_lo[0] < aux_hi[0] else aux_hi.popleft()
elif aux_lo:
a[i] = aux_lo.popleft()
elif aux_hi:
a[i] = aux_hi.popleft()
def find_next_stop(a, start):
upper = len(a) - 1
if start >= upper:
return start
stop = start + 1
if a[start] <= a[stop]:
while stop < upper and a[stop] <= a[stop+1]:
stop += 1
else:
while stop < upper and a[stop] >= a[stop+1]:
stop += 1
end = stop
while start < end:
a[end], a[start] = a[start], a[end]
start += 1
end -= 1
return stop
def natural_merge(a):
upper = len(a) - 1
if upper <= 0:
return
lo = hi = 0
while True:
lo = hi
mi = find_next_stop(a, lo)
if lo == 0 and mi == upper:
return
hi = find_next_stop(a, mi)
if mi == hi == upper:
lo = hi = 0
else:
merge(a, lo, mi, hi)
TESTS = [
,
[0],
[0, 0, 0],
[0, 1, 2],
[0, 1, 2, 3, 4, 5],
[5, 4, 3, 2, 1, 0],
[0, 1, 2, 2, 1, 0],
[0, 1, 2, 3, 2, 1, 0],
[5, 2, 3, 1, 0, 4],
[5, 2, 5, 3, 1, 3, 0, 4, 5],
[5, 2, 5, 3, 1, 3, 0, 4, 5, 3, 1, 0, 1, 5, 2, 5, 3, 1, 3, 0, 4, 5],
]
for t in TESTS:
print(t)
ref_lst, tst_lst = list(t), list(t)
ref_lst.sort()
natural_merge(tst_lst)
print(ref_lst, tst_lst)
assert ref_lst == tst_lst
More improvements in find_next_stop
We have a while
loop but we can compute the number of iterations we'll need: it corresponds to have the distance between start
and stop
. We could use a for _ in range
loop to perform this. It has pros and cons but one of the key aspect is that we do not need to change start
and stop
, thus we don't need to copy the value in a variable.
for k in range((1 + stop - start) // 2):
i, j = start + k, stop - k
a[i], a[j] = a[j], a[i]
More improvements in natural_merge
A few steps can be used to re-organise the function:
- see that we can move the assignment
lo = hi
from the beginning of the loop to the end of the loop with no impact - realise that it is already done in the first branch of the test already so move it to the
else
block exclusively - see that the initialisation of
hi
is not required anymore - notice that the condition
mi == upper
is checked in 2 places (with the same value ofmi
andupper
and that iflo != 0
, we see thatmi == upper
directly leads tofind_next(a, mi)
returningupper
and thus ending withmi == hi == upper
and thus to the assignmentlo = hi = 0
.
At this stage, we have:
def natural_merge(a):
upper = len(a) - 1
if upper <= 0:
return
lo = 0
while True:
mi = find_next_stop(a, lo)
if mi == upper:
if lo == 0:
return
lo = hi = 0
else:
hi = find_next_stop(a, mi)
merge(a, lo, mi, hi)
lo = hi
We can go further:
- the assignment
hi = 0
has no effect - we can reorganise conditions
We'd get
def natural_merge(a):
upper = len(a) - 1
if upper <= 0:
return
lo = 0
while True:
mi = find_next_stop(a, lo)
if mi != upper:
hi = find_next_stop(a, mi)
merge(a, lo, mi, hi)
lo = hi
elif lo == 0:
return
else:
lo = 0
Interestingly, removing lo = hi
leads a much more efficient code on my benchmark: the function returns much more quickly (because we always have lo == 0
, we get out of the loop as soon as mi == upper
) and the list is still fully sorted.
def natural_merge(a):
upper = len(a) - 1
if upper <= 0:
return
while True:
mi = find_next_stop(a, 0)
if mi == upper:
return
hi = find_next_stop(a, mi)
merge(a, 0, mi, hi)
This looked surprising at first but thinking about it, it looks like this may be the way this algorithm is supposed to be.
Tests and bugs
Your code looks mostly good. Before trying to go and change the code to see what can be improved. I usually try to write a few simple tests. This is even easier when you have a reference implementation that can be compared to your function. In your case, I wrote:
TESTS = [
,
[0],
[0, 0, 0],
[0, 1, 2],
[0, 1, 2, 3, 4, 5],
[5, 4, 3, 2, 1, 0],
[5, 2, 3, 1, 0, 4],
[5, 2, 5, 3, 1, 3, 0, 4, 5],
]
for t in TESTS:
print(t)
ref_lst, tst_lst = list(t), list(t)
ref_lst.sort()
natural_merge(tst_lst)
print(ref_lst, tst_lst)
assert ref_lst == tst_lst
which leads to a first comment: the empty list is not handled properly and the function never returns.
Improving merge
The case elif len(aux_lo) or len(aux_hi)
seems complicated as we check if aux_lo
just after. Things would be clearer if we were to split in two different cases:
if len(aux_lo) and len(aux_hi):
a[i] = aux_lo.popleft() if aux_lo[0] < aux_hi[0] else aux_hi.popleft()
elif len(aux_lo):
a[i] = aux_lo.popleft()
elif len(aux_hi):
a[i] = aux_hi.popleft()
Also, we could reuse the fact that in a boolean context, list are considered to be true if and only if they are not empty to write:
for i in range(lo, hi):
if aux_lo and aux_hi:
a[i] = aux_lo.popleft() if aux_lo[0] < aux_hi[0] else aux_hi.popleft()
elif aux_lo:
a[i] = aux_lo.popleft()
elif aux_hi:
a[i] = aux_hi.popleft()
Improving find_next_stop
You don't need so many parenthesis.
You could store len(a) - 1
in a variable in order not to re-compute it every time.
The name _stop
is pretty ugly. I do not have any great suggestion for an alternative but end
seems okay-ish.
More tests... and more bugs
I wanted to add a few tests to verify an assumption... and I stumbled upon another issue.
Here is the corresponding test suite:
TESTS = [
,
[0],
[0, 0, 0],
[0, 1, 2],
[0, 1, 2, 3, 4, 5],
[5, 4, 3, 2, 1, 0],
[0, 1, 2, 2, 1, 0],
[0, 1, 2, 3, 2, 1, 0],
[5, 2, 3, 1, 0, 4],
[5, 2, 5, 3, 1, 3, 0, 4, 5],
[5, 2, 5, 3, 1, 3, 0, 4, 5, 3, 1, 0, 1, 5, 2, 5, 3, 1, 3, 0, 4, 5],
]
Taking into account my comments and the fix you tried to add in the question, we now have:
from collections import deque
def merge(a, lo, mi, hi):
aux_lo = deque(a[lo:mi])
aux_hi = deque(a[mi:hi])
for i in range(lo, hi):
if aux_lo and aux_hi:
a[i] = aux_lo.popleft() if aux_lo[0] < aux_hi[0] else aux_hi.popleft()
elif aux_lo:
a[i] = aux_lo.popleft()
elif aux_hi:
a[i] = aux_hi.popleft()
def find_next_stop(a, start):
upper = len(a) - 1
if start >= upper:
return start
stop = start + 1
if a[start] <= a[stop]:
while stop < upper and a[stop] <= a[stop+1]:
stop += 1
else:
while stop < upper and a[stop] >= a[stop+1]:
stop += 1
end = stop
while start < end:
a[end], a[start] = a[start], a[end]
start += 1
end -= 1
return stop
def natural_merge(a):
upper = len(a) - 1
if upper <= 0:
return
lo = hi = 0
while True:
lo = hi
mi = find_next_stop(a, lo)
if lo == 0 and mi == upper:
return
hi = find_next_stop(a, mi)
if mi == hi == upper:
lo = hi = 0
else:
merge(a, lo, mi, hi)
TESTS = [
,
[0],
[0, 0, 0],
[0, 1, 2],
[0, 1, 2, 3, 4, 5],
[5, 4, 3, 2, 1, 0],
[0, 1, 2, 2, 1, 0],
[0, 1, 2, 3, 2, 1, 0],
[5, 2, 3, 1, 0, 4],
[5, 2, 5, 3, 1, 3, 0, 4, 5],
[5, 2, 5, 3, 1, 3, 0, 4, 5, 3, 1, 0, 1, 5, 2, 5, 3, 1, 3, 0, 4, 5],
]
for t in TESTS:
print(t)
ref_lst, tst_lst = list(t), list(t)
ref_lst.sort()
natural_merge(tst_lst)
print(ref_lst, tst_lst)
assert ref_lst == tst_lst
More improvements in find_next_stop
We have a while
loop but we can compute the number of iterations we'll need: it corresponds to have the distance between start
and stop
. We could use a for _ in range
loop to perform this. It has pros and cons but one of the key aspect is that we do not need to change start
and stop
, thus we don't need to copy the value in a variable.
for k in range((1 + stop - start) // 2):
i, j = start + k, stop - k
a[i], a[j] = a[j], a[i]
More improvements in natural_merge
A few steps can be used to re-organise the function:
- see that we can move the assignment
lo = hi
from the beginning of the loop to the end of the loop with no impact - realise that it is already done in the first branch of the test already so move it to the
else
block exclusively - see that the initialisation of
hi
is not required anymore - notice that the condition
mi == upper
is checked in 2 places (with the same value ofmi
andupper
and that iflo != 0
, we see thatmi == upper
directly leads tofind_next(a, mi)
returningupper
and thus ending withmi == hi == upper
and thus to the assignmentlo = hi = 0
.
At this stage, we have:
def natural_merge(a):
upper = len(a) - 1
if upper <= 0:
return
lo = 0
while True:
mi = find_next_stop(a, lo)
if mi == upper:
if lo == 0:
return
lo = hi = 0
else:
hi = find_next_stop(a, mi)
merge(a, lo, mi, hi)
lo = hi
We can go further:
- the assignment
hi = 0
has no effect - we can reorganise conditions
We'd get
def natural_merge(a):
upper = len(a) - 1
if upper <= 0:
return
lo = 0
while True:
mi = find_next_stop(a, lo)
if mi != upper:
hi = find_next_stop(a, mi)
merge(a, lo, mi, hi)
lo = hi
elif lo == 0:
return
else:
lo = 0
Interestingly, removing lo = hi
leads a much more efficient code on my benchmark: the function returns much more quickly (because we always have lo == 0
, we get out of the loop as soon as mi == upper
) and the list is still fully sorted.
def natural_merge(a):
upper = len(a) - 1
if upper <= 0:
return
while True:
mi = find_next_stop(a, 0)
if mi == upper:
return
hi = find_next_stop(a, mi)
merge(a, 0, mi, hi)
This looked surprising at first but thinking about it, it looks like this may be the way this algorithm is supposed to be.
edited Dec 27 '18 at 2:10
answered Dec 25 '18 at 23:25
JosayJosay
25.6k14087
25.6k14087
Oh, I get to know the strategy of test. Thanks.
– lerner
Dec 26 '18 at 9:47
add a comment |
Oh, I get to know the strategy of test. Thanks.
– lerner
Dec 26 '18 at 9:47
Oh, I get to know the strategy of test. Thanks.
– lerner
Dec 26 '18 at 9:47
Oh, I get to know the strategy of test. Thanks.
– lerner
Dec 26 '18 at 9:47
add a comment |
Thanks for contributing an answer to Code Review Stack Exchange!
- Please be sure to answer the question. Provide details and share your research!
But avoid …
- Asking for help, clarification, or responding to other answers.
- Making statements based on opinion; back them up with references or personal experience.
Use MathJax to format equations. MathJax reference.
To learn more, see our tips on writing great answers.
Some of your past answers have not been well-received, and you're in danger of being blocked from answering.
Please pay close attention to the following guidance:
- Please be sure to answer the question. Provide details and share your research!
But avoid …
- Asking for help, clarification, or responding to other answers.
- Making statements based on opinion; back them up with references or personal experience.
To learn more, see our tips on writing great answers.
Sign up or log in
StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Required, but never shown
StackExchange.ready(
function () {
StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fcodereview.stackexchange.com%2fquestions%2f210315%2fnatural-merge-mergesort-that-uses-already-sorted-subarrays%23new-answer', 'question_page');
}
);
Post as a guest
Required, but never shown
Sign up or log in
StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Required, but never shown
Sign up or log in
StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Required, but never shown
Sign up or log in
StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Add docstrings and typed arguments. Otherwise it's not bad
– Reinderien
Dec 25 '18 at 17:23
1
A pity the code presented is uncommented but for a cryptic
this takes more space than allowed
- extra constraint(s)? (There's more than one error inI take this anwser as a referenced.
.)– greybeard
Dec 25 '18 at 17:32
@greybeard Thanks for helping me point out the errors.
– lerner
Dec 26 '18 at 10:05
3
I've reverted your change. The code in the question should not be updated to ensure that answers stay relevant.
– Josay
Dec 26 '18 at 11:59
@Josay But if you could update your answer according to my change reverted by you, I'd appreciate it very much.
– lerner
Dec 26 '18 at 16:09