BITS 64

%macro cglobal 1
    global %1
    ALIGN 16
    %1:
%endmacro

%ifdef __PIC__
    %define GLOBAL wrt rip
%else
    %define GLOBAL
%endif

%define parm1q rdi
%define parm2q rsi
%define parm3q rdx
%define parm4q rcx
%define parm5q r8
%define parm6q r9
%define parm7q [rsp+8]
%define parm8q [rsp+16]
%define parm1d edi
%define parm2d esi
%define parm3d edx
%define parm4d ecx
%define parm5d r8d
%define parm6d r9d
%define parm7d dword parm7q
%define parm8d dword parm8q

; This is needed for ELF, otherwise the GNU linker assumes the stack is
; executable by default.
%ifidn __YASM_OBJFMT__,elf
section ".note.GNU-stack" noalloc noexec nowrite progbits
%endif


; I often want to use macros that permute their arguments. e.g. there's no
; efficient way to implement butterfly or transpose or dct without swapping some
; arguments.
;
; I would like to not have to manually keep track of the permutations:
; If I insert a permutation in the middle of a function, it should automatically
; change everything that follows. For more complex macros I may also have multiple
; implementations, e.g. the SSE2 and SSSE3 versions may have different permutations.
;
; Hence these macros. Insert a PERMUTE or some SWAPs at the end of a macro that
; permutes its arguments. It's equivalent to exchanging the contents of the
; registers, except that this way you exchange the register names instead, so the
; the exchange takes no execution resources.
;
; To pass data in (x)mmregs to a function:
; put RESET_MMPERM both at the beginning of the function and before calling it.
; To return data in (x)mmregs from a static function:
; put SAVE_MMPERM at the end of the function.

%macro RESET_MMPERM 0
    %xdefine m0 xmm0
    %xdefine m1 xmm1
    %xdefine m2 xmm2
    %xdefine m3 xmm3
    %xdefine m4 xmm4
    %xdefine m5 xmm5
    %xdefine m6 xmm6
    %xdefine m7 xmm7
    %xdefine m8 xmm8
    %xdefine m9 xmm9
    %xdefine m10 xmm10
    %xdefine m11 xmm11
    %xdefine m12 xmm12
    %xdefine m13 xmm13
    %xdefine m14 xmm14
    %xdefine m15 xmm15
%endmacro

%macro SAVE_MMPERM 1
    %xdefine %1_m0 m0
    %xdefine %1_m1 m1
    %xdefine %1_m2 m2
    %xdefine %1_m3 m3
    %xdefine %1_m4 m4
    %xdefine %1_m5 m5
    %xdefine %1_m6 m6
    %xdefine %1_m7 m7
    %xdefine %1_m8 m8
    %xdefine %1_m9 m9
    %xdefine %1_m10 m10
    %xdefine %1_m11 m11
    %xdefine %1_m12 m12
    %xdefine %1_m13 m13
    %xdefine %1_m14 m14
    %xdefine %1_m15 m15
%endmacro

%macro LOAD_MMPERM 1
    %xdefine m0 %1_m0
    %xdefine m1 %1_m1
    %xdefine m2 %1_m2
    %xdefine m3 %1_m3
    %xdefine m4 %1_m4
    %xdefine m5 %1_m5
    %xdefine m6 %1_m6
    %xdefine m7 %1_m7
    %xdefine m8 %1_m8
    %xdefine m9 %1_m9
    %xdefine m10 %1_m10
    %xdefine m11 %1_m11
    %xdefine m12 %1_m12
    %xdefine m13 %1_m13
    %xdefine m14 %1_m14
    %xdefine m15 %1_m15
%endmacro

%macro call 1
    call %1
    %ifdef %1_m0
        LOAD_MMPERM %1
    %endif
%endmacro

%macro SWAP 2-*
%rep %0-1
    %xdefine tmp m%1
    %xdefine m%1 m%2
    %xdefine m%2 tmp
    %rotate 1
%endrep
%endmacro

%macro PERMUTE 2-*
%rep %0/2
    %xdefine tmp%2 m%2
    %rotate 2
%endrep
%rep %0/2
    %xdefine m%1 tmp%2
    %rotate 2
%endrep
%endmacro

RESET_MMPERM
