It took a long time to discover the optimizer problem, the main reason is that compiling with debug the bug disappear.
Then I disassembled the routine and analyzed the assembler source.
Here the problem is evident, due to an aggressive optimization, the loop erroneously starts including the increment of the string pointer:
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; Bad implementation is the exact copy, obtained disassembling the C
; source
strrpl_asm_bad@16:
push ebp
mov ebp,esp
sub esp,4
push ebx
push esi
push edi
mov edi,dword ptr [n]
mov bl,byte ptr [oldchar]
push 1
lea esi,[edi+1]
push esi
call calloc
add esp,8
mov esi,eax
mov dword ptr [loc_var],esi
test eax,eax
je lbl_52
test edi,edi
jbe lbl_4D
xor esi,esi
; Here is the optimizer bug.
; The loop is entered on pointer increment
; The result is that the loop starts on 2nd character
lbl_2C:
add dword ptr [pstr],1
mov ecx,dword ptr [pstr]
mov cl,byte ptr [ecx]
mov edx,dword ptr [pstr]
cmp byte ptr [edx],0
je lbl_4D
mov dl,byte ptr [newchar]
cmp cl,bl
cmove ecx,edx
mov byte ptr [eax],cl
inc eax
inc esi
cmp esi,edi
jb lbl_2C
lbl_4D:
mov eax,dword ptr [loc_var]
jmp lbl_54
lbl_52:
xor eax,eax
lbl_54:
pop edi
pop esi
pop ebx
mov esp,ebp
pop ebp
ret 10h
The correct code should be:
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; This is the correct version
strrpl_asm_good@16:
push ebp
mov ebp,esp
sub esp,4
push ebx
push esi
push edi
mov edi,dword ptr [n]
mov bl,byte ptr [oldchar]
push 1
lea esi,[edi+1]
push esi
call calloc
add esp,8
mov esi,eax
mov dword ptr [loc_var],esi
test eax,eax
je lbl_g_52
test edi,edi
jbe lbl_g_4D
xor esi,esi
; The correct version jumps in the loop after the increment
jmp lbl_should_jump_here
lbl_g_2C:
add dword ptr [pstr],1
lbl_should_jump_here:
mov ecx,dword ptr [pstr]
mov cl,byte ptr [ecx]
mov edx,dword ptr [pstr]
cmp byte ptr [edx],0
je lbl_g_4D
mov dl,byte ptr [newchar]
cmp cl,bl
cmove ecx,edx
mov byte ptr [eax],cl
inc eax
inc esi
cmp esi,edi
jb lbl_g_2C
lbl_g_4D:
mov eax,dword ptr [loc_var]
jmp lbl_g_54
lbl_g_52:
xor eax,eax
lbl_g_54:
pop edi
pop esi
pop ebx
mov esp,ebp
pop ebp
ret 10h
I attach the project from JohnZ with an assembler module containing the 2 assembly procedures.
I had to write it because there was no other way to debug the problem.