I have the following code
Image BYTE 1424124 dup(?)
Masque BYTE 1424124 dup(?)
Result BYTE 1424124 dup(?)
.Code
Start PROC USES RBX RDI RSI PARMAREA=4*QWORD
LOCAL _dwDelta_8:DWORD
LOCAL _dwDelta_4:DWORD
LOCAL __dwInputImageSize:DWORD
MOV RDI,OFFSET Result
MOV RSI,OFFSET Image
MOV RBX,OFFSET Masque
; Image Width : 661...668 pixels
; Image Height : 537 pixels
; A x 4 B x H C / 16 D * 16 C - E F / 8 E + (8 x G) C - H I / 4 H + (4 x J)
; A B C D E F G H I J K L
; 661 2644 1419828 88739 1419824 4 0 1419824 4 1 1419828 DWORD
; 662 2648 1421976 88873 1421968 8 1 1421976 0 0 1421976 QWORD
; 663 2652 1424124 89007 1424112 12 1 1424120 4 1 1424124 QWORD + DWORD
; 664 2656 1426272 89142 1426272 0 0 1426272 0 0 1426272
; 665 2660 1428420 89276 1428416 4 0 1428416 4 1 1428420 DWORD
; 666 2664 1430568 89410 1430560 8 1 1430568 0 0 1430568 QWORD
; 667 2668 1432716 89544 1432704 12 1 1432712 4 1 1432716 QWORD + DWORD
; 668 2672 1434864 89679 1434864 0 0 1434864 0 0 1434864
; Result = Source + Masque
MOV _dwDelta_8,0
MOV _dwDelta_4,0
MOV EAX,__dwInputImageSize ; 663 * 4 * 537 = 1424124
MOV EDX,EAX
SHR EAX,4 ; 1424124 Divided by 16 = 89007
MOV ECX,EAX ; 89007 XMMWORDS
SHL EAX,4 ; 89007 Multiplied by 16 = 1424112
SUB EDX,EAX ; 1424124 - 1424112 = 12 (Possible values : 0, 4, 8, 12)
JZ Loop_0 ; The number (W * 4) * H is multiple of 16
CMP EDX,8
JL @Delta_4
SUB EDX,8 ; 12 - 8 = 4 => 1 DWORD
MOV _dwDelta_8,1 ; 1 QWORD
@Delta_4 :
CMP EDX,4
JNE @Loop_0
MOV _dwDelta_4,1 ; 1 DWORD
Loop_0 :
MOV RDX,16
ALIGN 16
@Loop :
LDDQU XMM1,XMMWORD PTR [RSI] ; Load 16 bytes
PADDUSB XMM1,XMMWORD PTR [RBX] ; Saturate (IF < 0 Result = 0) AND (IF > 255 Result = 255)
MOVNTDQ XMMWORD PTR [RDI],XMM1 ; Store result
ADD RSI,RDX ; Next color
ADD RDI,RDX ; Next destination
ADD RBX,RDX ; Next mask
SUB ECX,1
JNZ @Loop ; Next loop if ECX > 0
; -------------- Loop 128 bits finished --------------
CMP _dwDelta_8,1
JNE @Loop_4
; -------------- Get 8 Bytes --------------
SHR RDX,1 ; RDX = 8
XORPD XMM1,XMM1 ; XMM1 = 0
LDDQU XMM1,QWORD PTR [RSI] ; Load 8 bytes
PADDUSB XMM1,QWORD PTR [RBX] ; Add and Saturate
MOVDQ2Q MM1,XMM1 ; Move XMM1 to MM1 POASM says 'error: Invalid instruction operand.'
MOVNTQ QWORD PTR [RDI],MM1 ; Store the result : MM1 => Memory
ADD RSI,RDX ; Next color
ADD RDI,RDX ; Next destination
ADD RBX,RDX ; Next mask
@Loop_4 :
CMP _dwDelta_4,1
JNE @Loop_0 ; If it is equal to 0, it is finished
; -------------- Traitement de 4 octets --------------
LDDQU XMM1,DWORD PTR [RSI]
; PADDUSB XMM1,DWORD PTR [RBX]
; MOVD DWORD PTR [RBX],XMM1
@Loop_0 :
EMMS ; Release XMM registers
SFENCE ; All writes are done
ret ; Bye
My problem is to move 64 bits from XMM1 to MM1.
But PoAsm does not want!
MOVDQ2QMM1,XMM1; Move XMM1 to MM1POASM says 'error: Invalid instruction operand.'
If someone could help me I would be very happy