Well, the problem seems to be that I was using the wrong asm supplied by the original poster. My bad!
I was using this
__asm {
movups xmm1, vec1
mulps xmm1, xmm1
movups vec1, xmm1
}
Dancho has it right with this
edit Bryant has it right with this
__asm {
mov ecx, vec1
movaps xmm1, [ecx]
mulps xmm1, xmm1
movaps [ecx], xmm1
}
The PelesC malloc does return addresses aligned to 16 bytes - however, the msvcrt.lib does not, so when using msvcrt.lib one has to use _aligned_malloc.
Here it is working using PellesC malloc and MS _aligned_malloc.
#include <stdio.h>
#include <stdlib.h>
struct cVector{ float x, y, z; };
//#define MS
#ifndef MS
int main(void)
{
struct cVector *vec1 = malloc(sizeof(struct cVector));
vec1->x = 0.5;
vec1->y = 1.5;
vec1->z = -3.141;
__asm
{
mov ecx, vec1
movaps xmm1,[ecx]
mulps xmm1, xmm1
movaps[ecx], xmm1
}
printf("PO lib %f %f %f\n", vec1->x, vec1->y, vec1->z);
return (0);
}
#else
#pragma lib "msvcrt.lib"
void * __cdecl _aligned_malloc( size_t _Size, size_t _Alignment );
void __cdecl _aligned_free( void * _Memory );
int main(void)
{
struct cVector* vec1;
vec1 = _aligned_malloc( sizeof(struct cVector), 16 );
vec1->x = 0.5;
vec1->y = 1.5;
vec1->z = -3.141;
__asm {
mov ecx, vec1
movaps xmm1, [ecx]
mulps xmm1, xmm1
movaps [ecx], xmm1
}
printf( "MS lib %f %f %f\n", vec1->x, vec1->y, vec1->z );
_aligned_free( vec1 );
return( 0 );
}
#endif
Of course we still want to do it by aligning the struct without using malloc.
John