您的位置:首页 > 其它

Assembly x64 Intro - SSE2 4x8 Load

2015-12-16 16:04 871 查看
%macro SSE2_Load4x8p 6

MOVDQ %2, [%1+0x00]

MOVDQ %4, [%1+0x10]

MOVDQ %6, [%1+0x20]

MOVDQ %3, [%1+0x30]

SSE2_XSawp qdq, %4, %3, %5 ; 为什么要做这两步???

SSE2_XSawp qdq, %2, %6, %3

%endmacro

notes: MOVDQ defined as movdqa

;for TRANSPOSE

%macro SSE2_XSawp 4

movdqa %4, %2

punpckl%1 %2, %3

punpckh%1 %4, %3

%endmacro

如:

;Load 4x8

SSE2_Load4x8p r4, xmm0, xmm1, xmm4, xmm2, xmm5

=> (r4: int16_t *)

movdqa xmm0, [r4 + 0x00]; // mov first 8 x 16 into 128bit' xmm0 => xmm0 = aw7aw6aw5aw4aw3aw2aw1aw0

movdqa xmm4, [r4 + 0x10]; // 2nd 8x16 into xmm4 => xmm4 = bw7bw6bw5bw4bw3bw2bw1bw0

movdqa xmm5, [r4 + 0x20]; // 3rd 8x16 into xmm5 => xmm5 = cw7cw6cw5cw4cw3cw2cw1cw0

movdqa xmm1, [r4 + 0x30]; // 4th 8x16 into xmm1 => xmm1= dw7dw6dw5dw4dw3dw2dw1dw0

SSE2_XSawp qdq, xmm4, xmm1, xmm2 =>

movdqa xmm2, xmm4 => xmm2 = xmm4 = bw7bw6bw5bw4bw3bw2bw1bw0

punpcklqdq xmm4, xmm1 => xmm1 = dw7dw6dw5dw4dw3dw2dw1dw0, xmm4 = dw3dw2dw1dw0bw3bw2bw1bw0

punpckhqdq xmm2, xmm1 => xmml = dw7dw6dw5dw4dw3dw2dw1dw0, xmm2 = dw7dw6dw5dw4bw7bw6bw5bw4

SSE2_XSawp qdq, xmm0, xmm5, xmm1 =>

movdqa xmm1, xmm0 => xmm1 = xmm0 = aw7aw6aw5aw4aw3aw2aw1aw0

punpcklqdq xmm0, xmm5 => xmm5 = cw7cw6cw5cw4cw3cw2cw1cw0, xmm0 = cw3cw2cw1cw0aw3aw2aw1aw0

punpckhqdq xmm1, xmm5 => xmm5 = cw7cw6cw5cw4cw3cw2cw1cw0, xmm1 = cw7cw6cw5cw4aw7aw6aw5aw4

%macro SSE2_Load4x8p 6

MOVDQ %2, [%1+0x00]

MOVDQ %4, [%1+0x10]

MOVDQ %6, [%1+0x20]

MOVDQ %3, [%1+0x30]

SSE2_XSawp qdq, %4, %3, %5

SSE2_XSawp qdq, %2, %6, %3

%endmacro

;for TRANSPOSE

%macro SSE2_XSawp 4

movdqa %4, %2

punpckl%1 %2, %3

punpckh%1 %4, %3

%endmacro

;Load 4x8

SSE2_Load4x8p r4, xmm0, xmm1, xmm4, xmm2, xmm5

=> movdqa xmm0, [r4]

movdqa xmm4 [r4 + 16]

movdqa xmm5 [r4 + 32]

movdqa xmm1 [r4 + 48]

; 隔行置换, xmm0 <=> xmm5, xmm4 <=> xmm1

(xmm00, xmm01等分别表示4个16bit的word, 低高位)

xmm00xmm01
xmm40xmm41
xmm50xmm51
xmm10xmm11
SSE2_XSawp qdq xmm4, xmm1 xmm2 =>

movdqa xmm2, xmm4

punpcklqdq xmm4, xmm1

punpckhqdq xmm2, xmm1

SSE2_XSawp qdq xmm0, xmm5, xmm1 =>

movdqa xmm1, xmm0

punpcklqdq xmm0, xmm5

punpckhqdq xmm1, xmm5



内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: