the following intrinsic gives a compiler error:
c1=_mm256_fmadd_ps(a1,b1,c1);
apparently you cannot have c1 in both places.
so I use c2:
c2=_mm256_fmadd_ps(a1,b1,c1);
that works but I cannot use c2 again in any following fmadd intrinsics!
Can you please provide a little more info...?
1) Compiler options
2) Preferably a small snippet, otherwise a little more info about a1/b1/c1 (Globals? Parameters? Locals? Result of other intrinsics? If so, which ones...?)
Not sure if this is helpful:
#include <immintrin.h>
int __cdecl main(void)
{
__m256 a1, b1, c1;
a1 = _mm256_setzero_ps(); // vxorps ymm0, ymm0, ymm0
b1 = _mm256_setzero_ps(); // vxorps ymm1, ymm1, ymm1
c1 = _mm256_setzero_ps(); // vxorps ymm2, ymm2, ymm2
c1 = _mm256_fmadd_ps(a1,b1,c1); // vfmadd213ps ymm0, ymm1, ymm2
return 0;
}/*
VEX.256.66.0F38.W0 A8 /r VFMADD213PS ymm1, ymm2, ymm3/m256 A V/V FMA
Multiply packed single-precision floating-point values from ymm1 and ymm2, add to ymm3/mem and put result in ymm1.
*/
#include <immintrin.h>
int __cdecl main(void)
{
__m256 a1, b1, c1, c2;
a1 = _mm256_setzero_ps(); // vxorps ymm0, ymm0, ymm0
b1 = _mm256_setzero_ps(); // vxorps ymm1, ymm1, ymm1
c1 = _mm256_setzero_ps(); // vxorps ymm2, ymm2, ymm2
c2 = _mm256_setzero_ps(); // vxorps ymm?, ymm?, ymm?
c2 = _mm256_fmadd_ps(a1,b1,c1); // vfmadd213ps ymm0, ymm1, ymm2
return 0;
}
Helpful to me at least...
thanks!