summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorQuentin Carbonneaux2023-02-28 18:40:29 +0100
committerQuentin Carbonneaux2023-02-28 18:40:29 +0100
commit8face096c7b809b28208078a92ebc8b5af8f67b8 (patch)
tree883a77a9cc9cff7a26acc0a6fe83a4d5cc0c0aaf
parent8c26966d888f20db77107af524ad73b4d3f00018 (diff)
embed nacl
The nacl build script is obnoxious: it takes ages to run, it is hard to debug, and it is not even all that portable. This patch repackages nacl sources into a simple directory tree and builds them from the main Makefile.
-rw-r--r--Makefile56
-rw-r--r--sdar/arch.c2
-rw-r--r--sdar/key.c4
-rw-r--r--sdar/lib/nacl/amd64/poly1305_auth.s2875
-rw-r--r--sdar/lib/nacl/amd64/salsa20_stream.s4823
-rw-r--r--sdar/lib/nacl/box.c70
-rw-r--r--sdar/lib/nacl/curve25519.c484
-rw-r--r--sdar/lib/nacl/generic/poly1305_auth.c1616
-rw-r--r--sdar/lib/nacl/generic/salsa20.c134
-rw-r--r--sdar/lib/nacl/generic/salsa20_stream.c88
-rw-r--r--sdar/lib/nacl/hsalsa20.c135
-rw-r--r--sdar/lib/nacl/nacl.h70
-rw-r--r--sdar/lib/nacl/poly1305_verify.c8
-rw-r--r--sdar/lib/nacl/randombytes.c33
-rw-r--r--sdar/lib/nacl/secretbox.c33
-rw-r--r--sdar/lib/nacl/verify_16.c24
-rw-r--r--sdar/lib/nacl/xsalsa20.c32
-rw-r--r--sdar/slice.c2
-rw-r--r--sdar/stash.c2
19 files changed, 10460 insertions, 31 deletions
diff --git a/Makefile b/Makefile
index 2077df6..1dff535 100644
--- a/Makefile
+++ b/Makefile
@@ -1,76 +1,82 @@
BIN = sdar
PREFIX ?= /usr
-UNAME = $(shell uname -s)
-ifeq ($(UNAME),Linux)
+SYS := $(shell uname -s)
+CPU := $(shell uname -m)
+
+ifeq ($(SYS),Linux)
CFLAGS += -D_POSIX_C_SOURCE=200809L -D_DEFAULT_SOURCE
endif
-NACL = nacl-20110221
-NACLURL = https://hyperelliptic.org/nacl/$(NACL).tar.bz2
-NACLDIR = sdar/lib/$(NACL)/build/$(shell hostname | tr -dC [:alnum:])
-NACLINC = $(NACLDIR)/include/amd64
-NACLLIB = -L$(NACLDIR)/lib/amd64 -lnacl $(NACLDIR)/lib/amd64/randombytes.o
+NACLDIR = sdar/lib/nacl
LZ4INC = sdar/lib/lz4/lib
LZ4LIB = sdar/lib/lz4/lib/liblz4.a
BLAKE3INC = sdar/lib/BLAKE3/c
BLAKE3LIB = obj/blake3.a
-CRYPTOLIB = obj/crypto.a
CRYPTOSRC = $(wildcard sdar/lib/crypto/*.c)
CRYPTOOBJ = $(CRYPTOSRC:sdar/%.c=obj/%.o)
+NACLSRC = $(wildcard $(NACLDIR)/*.c \
+ $(NACLDIR)/$(if $(subst x86_64,,$(CPU)),generic,amd64)/*.[cs])
+NACLOBJ = $(NACLSRC:sdar/%.c=obj/%.o)
+NACLOBJ := $(NACLOBJ:sdar/%.s=obj/%.o)
V = @
SRC = $(wildcard sdar/*.c)
OBJ = $(SRC:sdar/%.c=obj/%.o)
-SANFLAGS = -fsanitize=address,undefined
+SANFLAGS = -g -fsanitize=address,undefined
LDFLAGS += $(SANFLAGS) $(NACLLIB) $(LZ4LIB) $(BLAKE3LIB)
-CFLAGS += -fno-omit-frame-pointer -std=c11 -Wall -Wextra -pedantic -g
-CFLAGS += -I$(NACLINC) -I$(LZ4INC) -I$(BLAKE3INC)
+CFLAGS += -fno-omit-frame-pointer -std=c11 -Wall -Wextra -pedantic
+CFLAGS += -I$(NACLDIR) -I$(LZ4INC) -I$(BLAKE3INC)
CFLAGS += -DMMH_KEYSZ=32 -DMMH_VALSZ=16 -DMMH_HDRSZ=8
-B3FLAGS = -O3 -DBLAKE3_NO_SSE2 -DBLAKE3_NO_AVX512
-CRFLAGS = -O3 -I sdar
+LIBFLAGS = -O3 -Isdar -DBLAKE3_NO_SSE2 -DBLAKE3_NO_AVX512
-obj/$(BIN): $(OBJ) $(BLAKE3LIB) $(CRYPTOOBJ)
+obj/$(BIN): $(OBJ) $(NACLOBJ) $(CRYPTOOBJ) $(BLAKE3LIB)
@test -z "$(V)" || echo "ld $@"
$(V)$(CC) $^ -o $@ $(LDFLAGS)
obj/%.o: sdar/%.c
@test -z "$(V)" || echo "cc $<"
$(V)$(CC) $(CFLAGS) \
- $(if $(findstring $<,$(CRYPTOSRC)),$(CRFLAGS),$(SANFLAGS)) \
+ $(if $(findstring sdar/lib,$<),$(LIBFLAGS),$(SANFLAGS)) \
+ -c $< -o $@
+
+obj/%.o: sdar/%.s
+ @test -z "$(V)" || echo "as $<"
+ $(V)$(CC) $(CFLAGS) \
+ $(if $(findstring sdar/lib,$<),$(LIBFLAGS),$(SANFLAGS)) \
-c $< -o $@
$(BLAKE3LIB): obj/timestamp
@test -z "$(V)" || echo "cc/ar $@"
- $(V)$(CC) $(B3FLAGS) -c sdar/lib/BLAKE3/c/blake3.c -o obj/blake3.o
- $(V)$(CC) $(B3FLAGS) -c sdar/lib/BLAKE3/c/blake3_dispatch.c -o obj/blake3_dispatch.o
- $(V)$(CC) $(B3FLAGS) -c sdar/lib/BLAKE3/c/blake3_avx2_x86-64_unix.S -o obj/blake3_avx2.o
- $(V)$(CC) $(B3FLAGS) -c sdar/lib/BLAKE3/c/blake3_sse41_x86-64_unix.S -o obj/blake3_sse41.o
- $(V)$(CC) $(B3FLAGS) -c sdar/lib/BLAKE3/c/blake3_portable.c -o obj/blake3_portable.o
+ $(V)$(CC) $(LIBFLAGS) -c sdar/lib/BLAKE3/c/blake3.c -o obj/blake3.o
+ $(V)$(CC) $(LIBFLAGS) -c sdar/lib/BLAKE3/c/blake3_dispatch.c -o obj/blake3_dispatch.o
+ $(V)$(CC) $(LIBFLAGS) -c sdar/lib/BLAKE3/c/blake3_avx2_x86-64_unix.S -o obj/blake3_avx2.o
+ $(V)$(CC) $(LIBFLAGS) -c sdar/lib/BLAKE3/c/blake3_sse41_x86-64_unix.S -o obj/blake3_sse41.o
+ $(V)$(CC) $(LIBFLAGS) -c sdar/lib/BLAKE3/c/blake3_portable.c -o obj/blake3_portable.o
$(V)ar crs $@ \
obj/blake3.o obj/blake3_dispatch.o \
obj/blake3_avx2.o obj/blake3_sse41.o obj/blake3_portable.o
sdar/lib/timestamp:
- curl $(NACLURL) | tar -jxCsdar/lib
- # patch for crufty osx
- sed -ie 's/[a-z]*cc /&-Wno-implicit-function-declaration /' \
- sdar/lib/$(NACL)/okcompilers/c
- cd sdar/lib/$(NACL) && ./do
make -C sdar/lib/lz4
@touch $@
obj/timestamp:
@mkdir -p obj
@mkdir -p obj/lib/crypto
+ @mkdir -p obj/lib/nacl
+ @mkdir -p obj/lib/nacl/amd64
+ @mkdir -p obj/lib/nacl/generic
@touch $@
$(OBJ): sdar/all.h sdar/mmh.h
$(OBJ): sdar/lib/timestamp
$(OBJ): obj/timestamp
+$(NACLOBJ): $(NACLDIR)/nacl.h
+
clean:
rm -fr obj
diff --git a/sdar/arch.c b/sdar/arch.c
index 2494603..6626457 100644
--- a/sdar/arch.c
+++ b/sdar/arch.c
@@ -7,7 +7,7 @@
#include <fcntl.h>
#include <sys/stat.h>
#include <unistd.h>
-#include <crypto_box.h>
+#include <nacl.h>
#include <lz4.h>
typedef struct Cacheblk Cacheblk;
diff --git a/sdar/key.c b/sdar/key.c
index 59c0400..7c9a8fb 100644
--- a/sdar/key.c
+++ b/sdar/key.c
@@ -1,9 +1,7 @@
#include "all.h"
#include <stddef.h>
#include <string.h>
-#include <crypto_box.h>
-#include <crypto_secretbox.h>
-#include <randombytes.h>
+#include <nacl.h>
MAKESURE(nacl_nonce_len_is_Noncesz, crypto_secretbox_NONCEBYTES == Noncesz);
MAKESURE(nacl_key_len_is_Keysz, crypto_secretbox_KEYBYTES == Keysz);
diff --git a/sdar/lib/nacl/amd64/poly1305_auth.s b/sdar/lib/nacl/amd64/poly1305_auth.s
new file mode 100644
index 0000000..21a504c
--- /dev/null
+++ b/sdar/lib/nacl/amd64/poly1305_auth.s
@@ -0,0 +1,2875 @@
+# auth.s
+
+# qhasm: int64 r11_caller
+
+# qhasm: int64 r12_caller
+
+# qhasm: int64 r13_caller
+
+# qhasm: int64 r14_caller
+
+# qhasm: int64 r15_caller
+
+# qhasm: int64 rbx_caller
+
+# qhasm: int64 rbp_caller
+
+# qhasm: caller r11_caller
+
+# qhasm: caller r12_caller
+
+# qhasm: caller r13_caller
+
+# qhasm: caller r14_caller
+
+# qhasm: caller r15_caller
+
+# qhasm: caller rbx_caller
+
+# qhasm: caller rbp_caller
+
+# qhasm: stack64 r11_stack
+
+# qhasm: stack64 r12_stack
+
+# qhasm: stack64 r13_stack
+
+# qhasm: stack64 r14_stack
+
+# qhasm: stack64 r15_stack
+
+# qhasm: stack64 rbx_stack
+
+# qhasm: stack64 rbp_stack
+
+# qhasm: int64 out
+
+# qhasm: stack64 out_stack
+
+# qhasm: int64 m
+
+# qhasm: int64 l
+
+# qhasm: int64 k
+
+# qhasm: stack64 k_stack
+
+# qhasm: int64 m0
+
+# qhasm: int64 m1
+
+# qhasm: int64 m2
+
+# qhasm: int64 m3
+
+# qhasm: float80 a0
+
+# qhasm: float80 a1
+
+# qhasm: float80 a2
+
+# qhasm: float80 a3
+
+# qhasm: float80 h0
+
+# qhasm: float80 h1
+
+# qhasm: float80 h2
+
+# qhasm: float80 h3
+
+# qhasm: float80 x0
+
+# qhasm: float80 x1
+
+# qhasm: float80 x2
+
+# qhasm: float80 x3
+
+# qhasm: float80 y0
+
+# qhasm: float80 y1
+
+# qhasm: float80 y2
+
+# qhasm: float80 y3
+
+# qhasm: float80 r0x0
+
+# qhasm: float80 r1x0
+
+# qhasm: float80 r2x0
+
+# qhasm: float80 r3x0
+
+# qhasm: float80 r0x1
+
+# qhasm: float80 r1x1
+
+# qhasm: float80 r2x1
+
+# qhasm: float80 sr3x1
+
+# qhasm: float80 r0x2
+
+# qhasm: float80 r1x2
+
+# qhasm: float80 sr2x2
+
+# qhasm: float80 sr3x2
+
+# qhasm: float80 r0x3
+
+# qhasm: float80 sr1x3
+
+# qhasm: float80 sr2x3
+
+# qhasm: float80 sr3x3
+
+# qhasm: stack64 d0
+
+# qhasm: stack64 d1
+
+# qhasm: stack64 d2
+
+# qhasm: stack64 d3
+
+# qhasm: stack64 r0
+
+# qhasm: stack64 r1
+
+# qhasm: stack64 r2
+
+# qhasm: stack64 r3
+
+# qhasm: stack64 sr1
+
+# qhasm: stack64 sr2
+
+# qhasm: stack64 sr3
+
+# qhasm: enter crypto_onetimeauth_poly1305_amd64
+.text
+.p2align 5
+.globl _crypto_onetimeauth_poly1305
+.globl crypto_onetimeauth_poly1305
+_crypto_onetimeauth_poly1305:
+crypto_onetimeauth_poly1305:
+mov %rsp,%r11
+and $31,%r11
+add $192,%r11
+sub %r11,%rsp
+
+# qhasm: input out
+
+# qhasm: input m
+
+# qhasm: input l
+
+# qhasm: input k
+
+# qhasm: r11_stack = r11_caller
+# asm 1: movq <r11_caller=int64#9,>r11_stack=stack64#1
+# asm 2: movq <r11_caller=%r11,>r11_stack=32(%rsp)
+movq %r11,32(%rsp)
+
+# qhasm: r12_stack = r12_caller
+# asm 1: movq <r12_caller=int64#10,>r12_stack=stack64#2
+# asm 2: movq <r12_caller=%r12,>r12_stack=40(%rsp)
+movq %r12,40(%rsp)
+
+# qhasm: r13_stack = r13_caller
+# asm 1: movq <r13_caller=int64#11,>r13_stack=stack64#3
+# asm 2: movq <r13_caller=%r13,>r13_stack=48(%rsp)
+movq %r13,48(%rsp)
+
+# qhasm: r14_stack = r14_caller
+# asm 1: movq <r14_caller=int64#12,>r14_stack=stack64#4
+# asm 2: movq <r14_caller=%r14,>r14_stack=56(%rsp)
+movq %r14,56(%rsp)
+
+# qhasm: r15_stack = r15_caller
+# asm 1: movq <r15_caller=int64#13,>r15_stack=stack64#5
+# asm 2: movq <r15_caller=%r15,>r15_stack=64(%rsp)
+movq %r15,64(%rsp)
+
+# qhasm: rbx_stack = rbx_caller
+# asm 1: movq <rbx_caller=int64#14,>rbx_stack=stack64#6
+# asm 2: movq <rbx_caller=%rbx,>rbx_stack=72(%rsp)
+movq %rbx,72(%rsp)
+
+# qhasm: rbp_stack = rbp_caller
+# asm 1: movq <rbp_caller=int64#15,>rbp_stack=stack64#7
+# asm 2: movq <rbp_caller=%rbp,>rbp_stack=80(%rsp)
+movq %rbp,80(%rsp)
+
+# qhasm: round *(uint16 *) &crypto_onetimeauth_poly1305_amd64_rounding
+fldcw crypto_onetimeauth_poly1305_amd64_rounding(%rip)
+
+# qhasm: m0 = *(uint32 *) (k + 0)
+# asm 1: movl 0(<k=int64#4),>m0=int64#5d
+# asm 2: movl 0(<k=%rcx),>m0=%r8d
+movl 0(%rcx),%r8d
+
+# qhasm: m1 = *(uint32 *) (k + 4)
+# asm 1: movl 4(<k=int64#4),>m1=int64#6d
+# asm 2: movl 4(<k=%rcx),>m1=%r9d
+movl 4(%rcx),%r9d
+
+# qhasm: m2 = *(uint32 *) (k + 8)
+# asm 1: movl 8(<k=int64#4),>m2=int64#7d
+# asm 2: movl 8(<k=%rcx),>m2=%eax
+movl 8(%rcx),%eax
+
+# qhasm: m3 = *(uint32 *) (k + 12)
+# asm 1: movl 12(<k=int64#4),>m3=int64#8d
+# asm 2: movl 12(<k=%rcx),>m3=%r10d
+movl 12(%rcx),%r10d
+
+# qhasm: out_stack = out
+# asm 1: movq <out=int64#1,>out_stack=stack64#8
+# asm 2: movq <out=%rdi,>out_stack=88(%rsp)
+movq %rdi,88(%rsp)
+
+# qhasm: k_stack = k
+# asm 1: movq <k=int64#4,>k_stack=stack64#9
+# asm 2: movq <k=%rcx,>k_stack=96(%rsp)
+movq %rcx,96(%rsp)
+
+# qhasm: d0 top = 0x43300000
+# asm 1: movl $0x43300000,>d0=stack64#10
+# asm 2: movl $0x43300000,>d0=108(%rsp)
+movl $0x43300000,108(%rsp)
+
+# qhasm: d1 top = 0x45300000
+# asm 1: movl $0x45300000,>d1=stack64#11
+# asm 2: movl $0x45300000,>d1=116(%rsp)
+movl $0x45300000,116(%rsp)
+
+# qhasm: d2 top = 0x47300000
+# asm 1: movl $0x47300000,>d2=stack64#12
+# asm 2: movl $0x47300000,>d2=124(%rsp)
+movl $0x47300000,124(%rsp)
+
+# qhasm: d3 top = 0x49300000
+# asm 1: movl $0x49300000,>d3=stack64#13
+# asm 2: movl $0x49300000,>d3=132(%rsp)
+movl $0x49300000,132(%rsp)
+
+# qhasm: (uint32) m0 &= 0x0fffffff
+# asm 1: and $0x0fffffff,<m0=int64#5d
+# asm 2: and $0x0fffffff,<m0=%r8d
+and $0x0fffffff,%r8d
+
+# qhasm: (uint32) m1 &= 0x0ffffffc
+# asm 1: and $0x0ffffffc,<m1=int64#6d
+# asm 2: and $0x0ffffffc,<m1=%r9d
+and $0x0ffffffc,%r9d
+
+# qhasm: (uint32) m2 &= 0x0ffffffc
+# asm 1: and $0x0ffffffc,<m2=int64#7d
+# asm 2: and $0x0ffffffc,<m2=%eax
+and $0x0ffffffc,%eax
+
+# qhasm: (uint32) m3 &= 0x0ffffffc
+# asm 1: and $0x0ffffffc,<m3=int64#8d
+# asm 2: and $0x0ffffffc,<m3=%r10d
+and $0x0ffffffc,%r10d
+
+# qhasm: inplace d0 bottom = m0
+# asm 1: movl <m0=int64#5d,<d0=stack64#10
+# asm 2: movl <m0=%r8d,<d0=104(%rsp)
+movl %r8d,104(%rsp)
+
+# qhasm: inplace d1 bottom = m1
+# asm 1: movl <m1=int64#6d,<d1=stack64#11
+# asm 2: movl <m1=%r9d,<d1=112(%rsp)
+movl %r9d,112(%rsp)
+
+# qhasm: inplace d2 bottom = m2
+# asm 1: movl <m2=int64#7d,<d2=stack64#12
+# asm 2: movl <m2=%eax,<d2=120(%rsp)
+movl %eax,120(%rsp)
+
+# qhasm: inplace d3 bottom = m3
+# asm 1: movl <m3=int64#8d,<d3=stack64#13
+# asm 2: movl <m3=%r10d,<d3=128(%rsp)
+movl %r10d,128(%rsp)
+
+# qhasm: a0 = *(float64 *) &d0
+# asm 1: fldl <d0=stack64#10
+# asm 2: fldl <d0=104(%rsp)
+fldl 104(%rsp)
+# comment:fpstackfrombottom:<a0#28:
+
+# qhasm: a0 -= *(float64 *) &crypto_onetimeauth_poly1305_amd64_doffset0
+fsubl crypto_onetimeauth_poly1305_amd64_doffset0(%rip)
+# comment:fpstackfrombottom:<a0#28:
+
+# qhasm: a1 = *(float64 *) &d1
+# asm 1: fldl <d1=stack64#11
+# asm 2: fldl <d1=112(%rsp)
+fldl 112(%rsp)
+# comment:fpstackfrombottom:<a0#28:<a1#29:
+
+# qhasm: a1 -= *(float64 *) &crypto_onetimeauth_poly1305_amd64_doffset1
+fsubl crypto_onetimeauth_poly1305_amd64_doffset1(%rip)
+# comment:fpstackfrombottom:<a0#28:<a1#29:
+
+# qhasm: a2 = *(float64 *) &d2
+# asm 1: fldl <d2=stack64#12
+# asm 2: fldl <d2=120(%rsp)
+fldl 120(%rsp)
+# comment:fpstackfrombottom:<a0#28:<a1#29:<a2#30:
+
+# qhasm: a2 -= *(float64 *) &crypto_onetimeauth_poly1305_amd64_doffset2
+fsubl crypto_onetimeauth_poly1305_amd64_doffset2(%rip)
+# comment:fpstackfrombottom:<a0#28:<a1#29:<a2#30:
+
+# qhasm: a3 = *(float64 *) &d3
+# asm 1: fldl <d3=stack64#13
+# asm 2: fldl <d3=128(%rsp)
+fldl 128(%rsp)
+# comment:fpstackfrombottom:<a0#28:<a1#29:<a2#30:<a3#31:
+
+# qhasm: a3 -= *(float64 *) &crypto_onetimeauth_poly1305_amd64_doffset3
+fsubl crypto_onetimeauth_poly1305_amd64_doffset3(%rip)
+# comment:fpstackfrombottom:<a0#28:<a1#29:<a2#30:<a3#31:
+
+# qhasm: internal stacktop a0
+# asm 1: fxch <a0=float80#4
+# asm 2: fxch <a0=%st(3)
+fxch %st(3)
+
+# qhasm: *(float64 *) &r0 = a0
+# asm 1: fstpl >r0=stack64#14
+# asm 2: fstpl >r0=136(%rsp)
+fstpl 136(%rsp)
+# comment:fpstackfrombottom:<a3#31:<a1#29:<a2#30:
+
+# qhasm: internal stacktop a1
+# asm 1: fxch <a1=float80#2
+# asm 2: fxch <a1=%st(1)
+fxch %st(1)
+
+# qhasm: *(float64 *) &r1 = a1
+# asm 1: fstl >r1=stack64#15
+# asm 2: fstl >r1=144(%rsp)
+fstl 144(%rsp)
+# comment:fpstackfrombottom:<a3#31:<a2#30:<a1#29:
+
+# qhasm: a1 *= *(float64 *) &crypto_onetimeauth_poly1305_amd64_scale
+fmull crypto_onetimeauth_poly1305_amd64_scale(%rip)
+# comment:fpstackfrombottom:<a3#31:<a2#30:<a1#29:
+
+# qhasm: *(float64 *) &sr1 = a1
+# asm 1: fstpl >sr1=stack64#16
+# asm 2: fstpl >sr1=152(%rsp)
+fstpl 152(%rsp)
+# comment:fpstackfrombottom:<a3#31:<a2#30:
+
+# qhasm: *(float64 *) &r2 = a2
+# asm 1: fstl >r2=stack64#17
+# asm 2: fstl >r2=160(%rsp)
+fstl 160(%rsp)
+# comment:fpstackfrombottom:<a3#31:<a2#30:
+
+# qhasm: a2 *= *(float64 *) &crypto_onetimeauth_poly1305_amd64_scale
+fmull crypto_onetimeauth_poly1305_amd64_scale(%rip)
+# comment:fpstackfrombottom:<a3#31:<a2#30:
+
+# qhasm: *(float64 *) &sr2 = a2
+# asm 1: fstpl >sr2=stack64#18
+# asm 2: fstpl >sr2=168(%rsp)
+fstpl 168(%rsp)
+# comment:fpstackfrombottom:<a3#31:
+
+# qhasm: *(float64 *) &r3 = a3
+# asm 1: fstl >r3=stack64#19
+# asm 2: fstl >r3=176(%rsp)
+fstl 176(%rsp)
+# comment:fpstackfrombottom:<a3#31:
+
+# qhasm: a3 *= *(float64 *) &crypto_onetimeauth_poly1305_amd64_scale
+fmull crypto_onetimeauth_poly1305_amd64_scale(%rip)
+# comment:fpstackfrombottom:<a3#31:
+
+# qhasm: *(float64 *) &sr3 = a3
+# asm 1: fstpl >sr3=stack64#20
+# asm 2: fstpl >sr3=184(%rsp)
+fstpl 184(%rsp)
+# comment:fpstackfrombottom:
+
+# qhasm: h3 = 0
+fldz
+# comment:fpstackfrombottom:<h3#39:
+
+# qhasm: h2 = 0
+fldz
+# comment:fpstackfrombottom:<h3#39:<h2#40:
+
+# qhasm: h1 = 0
+fldz
+# comment:fpstackfrombottom:<h3#39:<h2#40:<h1#41:
+
+# qhasm: h0 = 0
+fldz
+# comment:fpstackfrombottom:<h3#39:<h2#40:<h1#41:<h0#42:
+
+# qhasm: unsigned<? l - 16
+# asm 1: cmp $16,<l=int64#3
+# asm 2: cmp $16,<l=%rdx
+cmp $16,%rdx
+# comment:fpstackfrombottom:<h3#39:<h2#40:<h1#41:<h0#42:
+# comment:fp stack unchanged by jump
+# comment:fpstackfrombottom:<h3#39:<h2#40:<h1#41:<h0#42:
+
+# qhasm: goto addatmost15bytes if unsigned<
+jb ._addatmost15bytes
+# comment:fpstackfrombottom:<h3#39:<h2#40:<h1#41:<h0#42:
+# comment:fpstackfrombottom:<h3#39:<h2#40:<h1#41:<h0#42:
+
+# qhasm: initialatleast16bytes:
+._initialatleast16bytes:
+# comment:fpstackfrombottom:<h3#39:<h2#40:<h1#41:<h0#42:
+
+# qhasm: m3 = *(uint32 *) (m + 12)
+# asm 1: movl 12(<m=int64#2),>m3=int64#1d
+# asm 2: movl 12(<m=%rsi),>m3=%edi
+movl 12(%rsi),%edi
+# comment:fpstackfrombottom:<h3#39:<h2#40:<h1#41:<h0#42:
+
+# qhasm: m2 = *(uint32 *) (m + 8)
+# asm 1: movl 8(<m=int64#2),>m2=int64#4d
+# asm 2: movl 8(<m=%rsi),>m2=%ecx
+movl 8(%rsi),%ecx
+# comment:fpstackfrombottom:<h3#39:<h2#40:<h1#41:<h0#42:
+
+# qhasm: m1 = *(uint32 *) (m + 4)
+# asm 1: movl 4(<m=int64#2),>m1=int64#5d
+# asm 2: movl 4(<m=%rsi),>m1=%r8d
+movl 4(%rsi),%r8d
+# comment:fpstackfrombottom:<h3#39:<h2#40:<h1#41:<h0#42:
+
+# qhasm: m0 = *(uint32 *) (m + 0)
+# asm 1: movl 0(<m=int64#2),>m0=int64#6d
+# asm 2: movl 0(<m=%rsi),>m0=%r9d
+movl 0(%rsi),%r9d
+# comment:fpstackfrombottom:<h3#39:<h2#40:<h1#41:<h0#42:
+
+# qhasm: inplace d3 bottom = m3
+# asm 1: movl <m3=int64#1d,<d3=stack64#13
+# asm 2: movl <m3=%edi,<d3=128(%rsp)
+movl %edi,128(%rsp)
+# comment:fpstackfrombottom:<h3#39:<h2#40:<h1#41:<h0#42:
+
+# qhasm: inplace d2 bottom = m2
+# asm 1: movl <m2=int64#4d,<d2=stack64#12
+# asm 2: movl <m2=%ecx,<d2=120(%rsp)
+movl %ecx,120(%rsp)
+# comment:fpstackfrombottom:<h3#39:<h2#40:<h1#41:<h0#42:
+
+# qhasm: inplace d1 bottom = m1
+# asm 1: movl <m1=int64#5d,<d1=stack64#11
+# asm 2: movl <m1=%r8d,<d1=112(%rsp)
+movl %r8d,112(%rsp)
+# comment:fpstackfrombottom:<h3#39:<h2#40:<h1#41:<h0#42:
+
+# qhasm: inplace d0 bottom = m0
+# asm 1: movl <m0=int64#6d,<d0=stack64#10
+# asm 2: movl <m0=%r9d,<d0=104(%rsp)
+movl %r9d,104(%rsp)
+# comment:fpstackfrombottom:<h3#39:<h2#40:<h1#41:<h0#42:
+
+# qhasm: m += 16
+# asm 1: add $16,<m=int64#2
+# asm 2: add $16,<m=%rsi
+add $16,%rsi
+# comment:fpstackfrombottom:<h3#39:<h2#40:<h1#41:<h0#42:
+
+# qhasm: l -= 16
+# asm 1: sub $16,<l=int64#3
+# asm 2: sub $16,<l=%rdx
+sub $16,%rdx
+# comment:fpstackfrombottom:<h3#39:<h2#40:<h1#41:<h0#42:
+
+# qhasm: internal stacktop h3
+# asm 1: fxch <h3=float80#4
+# asm 2: fxch <h3=%st(3)
+fxch %st(3)
+
+# qhasm: h3 += *(float64 *) &d3
+# asm 1: faddl <d3=stack64#13
+# asm 2: faddl <d3=128(%rsp)
+faddl 128(%rsp)
+# comment:fpstackfrombottom:<h0#42:<h2#40:<h1#41:<h3#39:
+
+# qhasm: h3 -= *(float64 *) &crypto_onetimeauth_poly1305_amd64_doffset3minustwo128
+fsubl crypto_onetimeauth_poly1305_amd64_doffset3minustwo128(%rip)
+# comment:fpstackfrombottom:<h0#42:<h2#40:<h1#41:<h3#39:
+
+# qhasm: internal stacktop h1
+# asm 1: fxch <h1=float80#2
+# asm 2: fxch <h1=%st(1)
+fxch %st(1)
+
+# qhasm: h1 += *(float64 *) &d1
+# asm 1: faddl <d1=stack64#11
+# asm 2: faddl <d1=112(%rsp)
+faddl 112(%rsp)
+# comment:fpstackfrombottom:<h0#42:<h2#40:<h3#39:<h1#41:
+
+# qhasm: h1 -= *(float64 *) &crypto_onetimeauth_poly1305_amd64_doffset1
+fsubl crypto_onetimeauth_poly1305_amd64_doffset1(%rip)
+# comment:fpstackfrombottom:<h0#42:<h2#40:<h3#39:<h1#41:
+
+# qhasm: internal stacktop h2
+# asm 1: fxch <h2=float80#3
+# asm 2: fxch <h2=%st(2)
+fxch %st(2)
+
+# qhasm: h2 += *(float64 *) &d2
+# asm 1: faddl <d2=stack64#12
+# asm 2: faddl <d2=120(%rsp)
+faddl 120(%rsp)
+# comment:fpstackfrombottom:<h0#42:<h1#41:<h3#39:<h2#40:
+
+# qhasm: h2 -= *(float64 *) &crypto_onetimeauth_poly1305_amd64_doffset2
+fsubl crypto_onetimeauth_poly1305_amd64_doffset2(%rip)
+# comment:fpstackfrombottom:<h0#42:<h1#41:<h3#39:<h2#40:
+
+# qhasm: internal stacktop h0
+# asm 1: fxch <h0=float80#4
+# asm 2: fxch <h0=%st(3)
+fxch %st(3)
+
+# qhasm: h0 += *(float64 *) &d0
+# asm 1: faddl <d0=stack64#10
+# asm 2: faddl <d0=104(%rsp)
+faddl 104(%rsp)
+# comment:fpstackfrombottom:<h2#40:<h1#41:<h3#39:<h0#42:
+
+# qhasm: h0 -= *(float64 *) &crypto_onetimeauth_poly1305_amd64_doffset0
+fsubl crypto_onetimeauth_poly1305_amd64_doffset0(%rip)
+# comment:fpstackfrombottom:<h2#40:<h1#41:<h3#39:<h0#42:
+
+# qhasm: unsigned<? l - 16
+# asm 1: cmp $16,<l=int64#3
+# asm 2: cmp $16,<l=%rdx
+cmp $16,%rdx
+# comment:fpstackfrombottom:<h2#40:<h1#41:<h3#39:<h0#42:
+# comment:fp stack unchanged by jump
+# comment:fpstackfrombottom:<h2#40:<h1#41:<h3#39:<h0#42:
+
+# qhasm: goto multiplyaddatmost15bytes if unsigned<
+jb ._multiplyaddatmost15bytes
+# comment:fpstackfrombottom:<h2#40:<h1#41:<h3#39:<h0#42:
+# comment:fpstackfrombottom:<h2#40:<h1#41:<h3#39:<h0#42:
+
+# qhasm: multiplyaddatleast16bytes:
+._multiplyaddatleast16bytes:
+# comment:fpstackfrombottom:<h2#40:<h1#41:<h3#39:<h0#42:
+
+# qhasm: m3 = *(uint32 *) (m + 12)
+# asm 1: movl 12(<m=int64#2),>m3=int64#1d
+# asm 2: movl 12(<m=%rsi),>m3=%edi
+movl 12(%rsi),%edi
+# comment:fpstackfrombottom:<h2#40:<h1#41:<h3#39:<h0#42:
+
+# qhasm: m2 = *(uint32 *) (m + 8)
+# asm 1: movl 8(<m=int64#2),>m2=int64#4d
+# asm 2: movl 8(<m=%rsi),>m2=%ecx
+movl 8(%rsi),%ecx
+# comment:fpstackfrombottom:<h2#40:<h1#41:<h3#39:<h0#42:
+
+# qhasm: m1 = *(uint32 *) (m + 4)
+# asm 1: movl 4(<m=int64#2),>m1=int64#5d
+# asm 2: movl 4(<m=%rsi),>m1=%r8d
+movl 4(%rsi),%r8d
+# comment:fpstackfrombottom:<h2#40:<h1#41:<h3#39:<h0#42:
+
+# qhasm: m0 = *(uint32 *) (m + 0)
+# asm 1: movl 0(<m=int64#2),>m0=int64#6d
+# asm 2: movl 0(<m=%rsi),>m0=%r9d
+movl 0(%rsi),%r9d
+# comment:fpstackfrombottom:<h2#40:<h1#41:<h3#39:<h0#42:
+
+# qhasm: inplace d3 bottom = m3
+# asm 1: movl <m3=int64#1d,<d3=stack64#13
+# asm 2: movl <m3=%edi,<d3=128(%rsp)
+movl %edi,128(%rsp)
+# comment:fpstackfrombottom:<h2#40:<h1#41:<h3#39:<h0#42:
+
+# qhasm: inplace d2 bottom = m2
+# asm 1: movl <m2=int64#4d,<d2=stack64#12
+# asm 2: movl <m2=%ecx,<d2=120(%rsp)
+movl %ecx,120(%rsp)
+# comment:fpstackfrombottom:<h2#40:<h1#41:<h3#39:<h0#42:
+
+# qhasm: inplace d1 bottom = m1
+# asm 1: movl <m1=int64#5d,<d1=stack64#11
+# asm 2: movl <m1=%r8d,<d1=112(%rsp)
+movl %r8d,112(%rsp)
+# comment:fpstackfrombottom:<h2#40:<h1#41:<h3#39:<h0#42:
+
+# qhasm: inplace d0 bottom = m0
+# asm 1: movl <m0=int64#6d,<d0=stack64#10
+# asm 2: movl <m0=%r9d,<d0=104(%rsp)
+movl %r9d,104(%rsp)
+# comment:fpstackfrombottom:<h2#40:<h1#41:<h3#39:<h0#42:
+
+# qhasm: m += 16
+# asm 1: add $16,<m=int64#2
+# asm 2: add $16,<m=%rsi
+add $16,%rsi
+# comment:fpstackfrombottom:<h2#40:<h1#41:<h3#39:<h0#42:
+
+# qhasm: l -= 16
+# asm 1: sub $16,<l=int64#3
+# asm 2: sub $16,<l=%rdx
+sub $16,%rdx
+# comment:fpstackfrombottom:<h2#40:<h1#41:<h3#39:<h0#42:
+
+# qhasm: x0 = *(float64 *) &crypto_onetimeauth_poly1305_amd64_alpha130
+fldl crypto_onetimeauth_poly1305_amd64_alpha130(%rip)
+# comment:fpstackfrombottom:<h2#40:<h1#41:<h3#39:<h0#42:<x0#53:
+
+# qhasm: x0 += h3
+# asm 1: fadd <h3=float80#3,<x0=float80#1
+# asm 2: fadd <h3=%st(2),<x0=%st(0)
+fadd %st(2),%st(0)
+# comment:fpstackfrombottom:<h2#40:<h1#41:<h3#39:<h0#42:<x0#53:
+
+# qhasm: x0 -= *(float64 *) &crypto_onetimeauth_poly1305_amd64_alpha130
+fsubl crypto_onetimeauth_poly1305_amd64_alpha130(%rip)
+# comment:fpstackfrombottom:<h2#40:<h1#41:<h3#39:<h0#42:<x0#53:
+
+# qhasm: h3 -= x0
+# asm 1: fsubr <x0=float80#1,<h3=float80#3
+# asm 2: fsubr <x0=%st(0),<h3=%st(2)
+fsubr %st(0),%st(2)
+# comment:fpstackfrombottom:<h2#40:<h1#41:<h3#39:<h0#42:<x0#53:
+
+# qhasm: x0 *= *(float64 *) &crypto_onetimeauth_poly1305_amd64_scale
+fmull crypto_onetimeauth_poly1305_amd64_scale(%rip)
+# comment:fpstackfrombottom:<h2#40:<h1#41:<h3#39:<h0#42:<x0#53:
+
+# qhasm: x1 = *(float64 *) &crypto_onetimeauth_poly1305_amd64_alpha32
+fldl crypto_onetimeauth_poly1305_amd64_alpha32(%rip)
+# comment:fpstackfrombottom:<h2#40:<h1#41:<h3#39:<h0#42:<x0#53:<x1#54:
+
+# qhasm: x1 += h0
+# asm 1: fadd <h0=float80#3,<x1=float80#1
+# asm 2: fadd <h0=%st(2),<x1=%st(0)
+fadd %st(2),%st(0)
+# comment:fpstackfrombottom:<h2#40:<h1#41:<h3#39:<h0#42:<x0#53:<x1#54:
+
+# qhasm: x1 -= *(float64 *) &crypto_onetimeauth_poly1305_amd64_alpha32
+fsubl crypto_onetimeauth_poly1305_amd64_alpha32(%rip)
+# comment:fpstackfrombottom:<h2#40:<h1#41:<h3#39:<h0#42:<x0#53:<x1#54:
+
+# qhasm: h0 -= x1
+# asm 1: fsubr <x1=float80#1,<h0=float80#3
+# asm 2: fsubr <x1=%st(0),<h0=%st(2)
+fsubr %st(0),%st(2)
+# comment:fpstackfrombottom:<h2#40:<h1#41:<h3#39:<h0#42:<x0#53:<x1#54:
+
+# qhasm: internal stacktop h0
+# asm 1: fxch <h0=float80#3
+# asm 2: fxch <h0=%st(2)
+fxch %st(2)
+
+# qhasm: x0 += h0
+# asm 1: faddp <h0=float80#1,<x0=float80#2
+# asm 2: faddp <h0=%st(0),<x0=%st(1)
+faddp %st(0),%st(1)
+# comment:fpstackfrombottom:<h2#40:<h1#41:<h3#39:<x1#54:<x0#53:
+
+# qhasm: x2 = *(float64 *) &crypto_onetimeauth_poly1305_amd64_alpha64
+fldl crypto_onetimeauth_poly1305_amd64_alpha64(%rip)
+# comment:fpstackfrombottom:<h2#40:<h1#41:<h3#39:<x1#54:<x0#53:<x2#55:
+
+# qhasm: x2 += h1
+# asm 1: fadd <h1=float80#5,<x2=float80#1
+# asm 2: fadd <h1=%st(4),<x2=%st(0)
+fadd %st(4),%st(0)
+# comment:fpstackfrombottom:<h2#40:<h1#41:<h3#39:<x1#54:<x0#53:<x2#55:
+
+# qhasm: x2 -= *(float64 *) &crypto_onetimeauth_poly1305_amd64_alpha64
+fsubl crypto_onetimeauth_poly1305_amd64_alpha64(%rip)
+# comment:fpstackfrombottom:<h2#40:<h1#41:<h3#39:<x1#54:<x0#53:<x2#55:
+
+# qhasm: h1 -= x2
+# asm 1: fsubr <x2=float80#1,<h1=float80#5
+# asm 2: fsubr <x2=%st(0),<h1=%st(4)
+fsubr %st(0),%st(4)
+# comment:fpstackfrombottom:<h2#40:<h1#41:<h3#39:<x1#54:<x0#53:<x2#55:
+
+# qhasm: x3 = *(float64 *) &crypto_onetimeauth_poly1305_amd64_alpha96
+fldl crypto_onetimeauth_poly1305_amd64_alpha96(%rip)
+# comment:fpstackfrombottom:<h2#40:<h1#41:<h3#39:<x1#54:<x0#53:<x2#55:<x3#56:
+
+# qhasm: x3 += h2
+# asm 1: fadd <h2=float80#7,<x3=float80#1
+# asm 2: fadd <h2=%st(6),<x3=%st(0)
+fadd %st(6),%st(0)
+# comment:fpstackfrombottom:<h2#40:<h1#41:<h3#39:<x1#54:<x0#53:<x2#55:<x3#56:
+
+# qhasm: x3 -= *(float64 *) &crypto_onetimeauth_poly1305_amd64_alpha96
+fsubl crypto_onetimeauth_poly1305_amd64_alpha96(%rip)
+# comment:fpstackfrombottom:<h2#40:<h1#41:<h3#39:<x1#54:<x0#53:<x2#55:<x3#56:
+
+# qhasm: h2 -= x3
+# asm 1: fsubr <x3=float80#1,<h2=float80#7
+# asm 2: fsubr <x3=%st(0),<h2=%st(6)
+fsubr %st(0),%st(6)
+# comment:fpstackfrombottom:<h2#40:<h1#41:<h3#39:<x1#54:<x0#53:<x2#55:<x3#56:
+
+# qhasm: internal stacktop h2
+# asm 1: fxch <h2=float80#7
+# asm 2: fxch <h2=%st(6)
+fxch %st(6)
+
+# qhasm: x2 += h2
+# asm 1: faddp <h2=float80#1,<x2=float80#2
+# asm 2: faddp <h2=%st(0),<x2=%st(1)
+faddp %st(0),%st(1)
+# comment:fpstackfrombottom:<x3#56:<h1#41:<h3#39:<x1#54:<x0#53:<x2#55:
+
+# qhasm: internal stacktop h3
+# asm 1: fxch <h3=float80#4
+# asm 2: fxch <h3=%st(3)
+fxch %st(3)
+
+# qhasm: x3 += h3
+# asm 1: faddp <h3=float80#1,<x3=float80#6
+# asm 2: faddp <h3=%st(0),<x3=%st(5)
+faddp %st(0),%st(5)
+# comment:fpstackfrombottom:<x3#56:<h1#41:<x2#55:<x1#54:<x0#53:
+
+# qhasm: internal stacktop h1
+# asm 1: fxch <h1=float80#4
+# asm 2: fxch <h1=%st(3)
+fxch %st(3)
+
+# qhasm: x1 += h1
+# asm 1: faddp <h1=float80#1,<x1=float80#2
+# asm 2: faddp <h1=%st(0),<x1=%st(1)
+faddp %st(0),%st(1)
+# comment:fpstackfrombottom:<x3#56:<x0#53:<x2#55:<x1#54:
+
+# qhasm: h3 = *(float64 *) &r3
+# asm 1: fldl <r3=stack64#19
+# asm 2: fldl <r3=176(%rsp)
+fldl 176(%rsp)
+# comment:fpstackfrombottom:<x3#56:<x0#53:<x2#55:<x1#54:<h3#39:
+
+# qhasm: h3 *= x0
+# asm 1: fmul <x0=float80#4,<h3=float80#1
+# asm 2: fmul <x0=%st(3),<h3=%st(0)
+fmul %st(3),%st(0)
+# comment:fpstackfrombottom:<x3#56:<x0#53:<x2#55:<x1#54:<h3#39:
+
+# qhasm: h2 = *(float64 *) &r2
+# asm 1: fldl <r2=stack64#17
+# asm 2: fldl <r2=160(%rsp)
+fldl 160(%rsp)
+# comment:fpstackfrombottom:<x3#56:<x0#53:<x2#55:<x1#54:<h3#39:<h2#40:
+
+# qhasm: h2 *= x0
+# asm 1: fmul <x0=float80#5,<h2=float80#1
+# asm 2: fmul <x0=%st(4),<h2=%st(0)
+fmul %st(4),%st(0)
+# comment:fpstackfrombottom:<x3#56:<x0#53:<x2#55:<x1#54:<h3#39:<h2#40:
+
+# qhasm: h1 = *(float64 *) &r1
+# asm 1: fldl <r1=stack64#15
+# asm 2: fldl <r1=144(%rsp)
+fldl 144(%rsp)
+# comment:fpstackfrombottom:<x3#56:<x0#53:<x2#55:<x1#54:<h3#39:<h2#40:<h1#41:
+
+# qhasm: h1 *= x0
+# asm 1: fmul <x0=float80#6,<h1=float80#1
+# asm 2: fmul <x0=%st(5),<h1=%st(0)
+fmul %st(5),%st(0)
+# comment:fpstackfrombottom:<x3#56:<x0#53:<x2#55:<x1#54:<h3#39:<h2#40:<h1#41:
+
+# qhasm: h0 = *(float64 *) &r0
+# asm 1: fldl <r0=stack64#14
+# asm 2: fldl <r0=136(%rsp)
+fldl 136(%rsp)
+# comment:fpstackfrombottom:<x3#56:<x0#53:<x2#55:<x1#54:<h3#39:<h2#40:<h1#41:<h0#42:
+
+# qhasm: h0 *= x0
+# asm 1: fmulp <x0=float80#1,<h0=float80#7
+# asm 2: fmulp <x0=%st(0),<h0=%st(6)
+fmulp %st(0),%st(6)
+# comment:fpstackfrombottom:<x3#56:<h0#42:<x2#55:<x1#54:<h3#39:<h2#40:<h1#41:
+
+# qhasm: r2x1 = *(float64 *) &r2
+# asm 1: fldl <r2=stack64#17
+# asm 2: fldl <r2=160(%rsp)
+fldl 160(%rsp)
+# comment:fpstackfrombottom:<x3#56:<h0#42:<x2#55:<x1#54:<h3#39:<h2#40:<h1#41:<r2x1#57:
+
+# qhasm: r2x1 *= x1
+# asm 1: fmul <x1=float80#5,<r2x1=float80#1
+# asm 2: fmul <x1=%st(4),<r2x1=%st(0)
+fmul %st(4),%st(0)
+# comment:fpstackfrombottom:<x3#56:<h0#42:<x2#55:<x1#54:<h3#39:<h2#40:<h1#41:<r2x1#57:
+
+# qhasm: h3 += r2x1
+# asm 1: faddp <r2x1=float80#1,<h3=float80#4
+# asm 2: faddp <r2x1=%st(0),<h3=%st(3)
+faddp %st(0),%st(3)
+# comment:fpstackfrombottom:<x3#56:<h0#42:<x2#55:<x1#54:<h3#39:<h2#40:<h1#41:
+
+# qhasm: r1x1 = *(float64 *) &r1
+# asm 1: fldl <r1=stack64#15
+# asm 2: fldl <r1=144(%rsp)
+fldl 144(%rsp)
+# comment:fpstackfrombottom:<x3#56:<h0#42:<x2#55:<x1#54:<h3#39:<h2#40:<h1#41:<r1x1#58:
+
+# qhasm: r1x1 *= x1
+# asm 1: fmul <x1=float80#5,<r1x1=float80#1
+# asm 2: fmul <x1=%st(4),<r1x1=%st(0)
+fmul %st(4),%st(0)
+# comment:fpstackfrombottom:<x3#56:<h0#42:<x2#55:<x1#54:<h3#39:<h2#40:<h1#41:<r1x1#58:
+
+# qhasm: h2 += r1x1
+# asm 1: faddp <r1x1=float80#1,<h2=float80#3
+# asm 2: faddp <r1x1=%st(0),<h2=%st(2)
+faddp %st(0),%st(2)
+# comment:fpstackfrombottom:<x3#56:<h0#42:<x2#55:<x1#54:<h3#39:<h2#40:<h1#41:
+
+# qhasm: r0x1 = *(float64 *) &r0
+# asm 1: fldl <r0=stack64#14
+# asm 2: fldl <r0=136(%rsp)
+fldl 136(%rsp)
+# comment:fpstackfrombottom:<x3#56:<h0#42:<x2#55:<x1#54:<h3#39:<h2#40:<h1#41:<r0x1#59:
+
+# qhasm: r0x1 *= x1
+# asm 1: fmul <x1=float80#5,<r0x1=float80#1
+# asm 2: fmul <x1=%st(4),<r0x1=%st(0)
+fmul %st(4),%st(0)
+# comment:fpstackfrombottom:<x3#56:<h0#42:<x2#55:<x1#54:<h3#39:<h2#40:<h1#41:<r0x1#59:
+
+# qhasm: h1 += r0x1
+# asm 1: faddp <r0x1=float80#1,<h1=float80#2
+# asm 2: faddp <r0x1=%st(0),<h1=%st(1)
+faddp %st(0),%st(1)
+# comment:fpstackfrombottom:<x3#56:<h0#42:<x2#55:<x1#54:<h3#39:<h2#40:<h1#41:
+
+# qhasm: sr3x1 = *(float64 *) &sr3
+# asm 1: fldl <sr3=stack64#20
+# asm 2: fldl <sr3=184(%rsp)
+fldl 184(%rsp)
+# comment:fpstackfrombottom:<x3#56:<h0#42:<x2#55:<x1#54:<h3#39:<h2#40:<h1#41:<sr3x1#60:
+
+# qhasm: sr3x1 *= x1
+# asm 1: fmulp <x1=float80#1,<sr3x1=float80#5
+# asm 2: fmulp <x1=%st(0),<sr3x1=%st(4)
+fmulp %st(0),%st(4)
+# comment:fpstackfrombottom:<x3#56:<h0#42:<x2#55:<sr3x1#60:<h3#39:<h2#40:<h1#41:
+
+# qhasm: internal stacktop sr3x1
+# asm 1: fxch <sr3x1=float80#4
+# asm 2: fxch <sr3x1=%st(3)
+fxch %st(3)
+
+# qhasm: h0 += sr3x1
+# asm 1: faddp <sr3x1=float80#1,<h0=float80#6
+# asm 2: faddp <sr3x1=%st(0),<h0=%st(5)
+faddp %st(0),%st(5)
+# comment:fpstackfrombottom:<x3#56:<h0#42:<x2#55:<h1#41:<h3#39:<h2#40:
+
+# qhasm: r1x2 = *(float64 *) &r1
+# asm 1: fldl <r1=stack64#15
+# asm 2: fldl <r1=144(%rsp)
+fldl 144(%rsp)
+# comment:fpstackfrombottom:<x3#56:<h0#42:<x2#55:<h1#41:<h3#39:<h2#40:<r1x2#61:
+
+# qhasm: r1x2 *= x2
+# asm 1: fmul <x2=float80#5,<r1x2=float80#1
+# asm 2: fmul <x2=%st(4),<r1x2=%st(0)
+fmul %st(4),%st(0)
+# comment:fpstackfrombottom:<x3#56:<h0#42:<x2#55:<h1#41:<h3#39:<h2#40:<r1x2#61:
+
+# qhasm: h3 += r1x2
+# asm 1: faddp <r1x2=float80#1,<h3=float80#3
+# asm 2: faddp <r1x2=%st(0),<h3=%st(2)
+faddp %st(0),%st(2)
+# comment:fpstackfrombottom:<x3#56:<h0#42:<x2#55:<h1#41:<h3#39:<h2#40:
+
+# qhasm: r0x2 = *(float64 *) &r0
+# asm 1: fldl <r0=stack64#14
+# asm 2: fldl <r0=136(%rsp)
+fldl 136(%rsp)
+# comment:fpstackfrombottom:<x3#56:<h0#42:<x2#55:<h1#41:<h3#39:<h2#40:<r0x2#62:
+
+# qhasm: r0x2 *= x2
+# asm 1: fmul <x2=float80#5,<r0x2=float80#1
+# asm 2: fmul <x2=%st(4),<r0x2=%st(0)
+fmul %st(4),%st(0)
+# comment:fpstackfrombottom:<x3#56:<h0#42:<x2#55:<h1#41:<h3#39:<h2#40:<r0x2#62:
+
+# qhasm: h2 += r0x2
+# asm 1: faddp <r0x2=float80#1,<h2=float80#2
+# asm 2: faddp <r0x2=%st(0),<h2=%st(1)
+faddp %st(0),%st(1)
+# comment:fpstackfrombottom:<x3#56:<h0#42:<x2#55:<h1#41:<h3#39:<h2#40:
+
+# qhasm: sr3x2 = *(float64 *) &sr3
+# asm 1: fldl <sr3=stack64#20
+# asm 2: fldl <sr3=184(%rsp)
+fldl 184(%rsp)
+# comment:fpstackfrombottom:<x3#56:<h0#42:<x2#55:<h1#41:<h3#39:<h2#40:<sr3x2#63:
+
+# qhasm: sr3x2 *= x2
+# asm 1: fmul <x2=float80#5,<sr3x2=float80#1
+# asm 2: fmul <x2=%st(4),<sr3x2=%st(0)
+fmul %st(4),%st(0)
+# comment:fpstackfrombottom:<x3#56:<h0#42:<x2#55:<h1#41:<h3#39:<h2#40:<sr3x2#63:
+
+# qhasm: h1 += sr3x2
+# asm 1: faddp <sr3x2=float80#1,<h1=float80#4
+# asm 2: faddp <sr3x2=%st(0),<h1=%st(3)
+faddp %st(0),%st(3)
+# comment:fpstackfrombottom:<x3#56:<h0#42:<x2#55:<h1#41:<h3#39:<h2#40:
+
+# qhasm: sr2x2 = *(float64 *) &sr2
+# asm 1: fldl <sr2=stack64#18
+# asm 2: fldl <sr2=168(%rsp)
+fldl 168(%rsp)
+# comment:fpstackfrombottom:<x3#56:<h0#42:<x2#55:<h1#41:<h3#39:<h2#40:<sr2x2#64:
+
+# qhasm: sr2x2 *= x2
+# asm 1: fmulp <x2=float80#1,<sr2x2=float80#5
+# asm 2: fmulp <x2=%st(0),<sr2x2=%st(4)
+fmulp %st(0),%st(4)
+# comment:fpstackfrombottom:<x3#56:<h0#42:<sr2x2#64:<h1#41:<h3#39:<h2#40:
+
+# qhasm: internal stacktop sr2x2
+# asm 1: fxch <sr2x2=float80#4
+# asm 2: fxch <sr2x2=%st(3)
+fxch %st(3)
+
+# qhasm: h0 += sr2x2
+# asm 1: faddp <sr2x2=float80#1,<h0=float80#5
+# asm 2: faddp <sr2x2=%st(0),<h0=%st(4)
+faddp %st(0),%st(4)
+# comment:fpstackfrombottom:<x3#56:<h0#42:<h2#40:<h1#41:<h3#39:
+
+# qhasm: r0x3 = *(float64 *) &r0
+# asm 1: fldl <r0=stack64#14
+# asm 2: fldl <r0=136(%rsp)
+fldl 136(%rsp)
+# comment:fpstackfrombottom:<x3#56:<h0#42:<h2#40:<h1#41:<h3#39:<r0x3#65:
+
+# qhasm: r0x3 *= x3
+# asm 1: fmul <x3=float80#6,<r0x3=float80#1
+# asm 2: fmul <x3=%st(5),<r0x3=%st(0)
+fmul %st(5),%st(0)
+# comment:fpstackfrombottom:<x3#56:<h0#42:<h2#40:<h1#41:<h3#39:<r0x3#65:
+
+# qhasm: h3 += r0x3
+# asm 1: faddp <r0x3=float80#1,<h3=float80#2
+# asm 2: faddp <r0x3=%st(0),<h3=%st(1)
+faddp %st(0),%st(1)
+# comment:fpstackfrombottom:<x3#56:<h0#42:<h2#40:<h1#41:<h3#39:
+
+# qhasm: stacktop h0
+# asm 1: fxch <h0=float80#4
+# asm 2: fxch <h0=%st(3)
+fxch %st(3)
+# comment:fpstackfrombottom:<x3#56:<h3#39:<h2#40:<h1#41:<h0#42:
+
+# qhasm: sr3x3 = *(float64 *) &sr3
+# asm 1: fldl <sr3=stack64#20
+# asm 2: fldl <sr3=184(%rsp)
+fldl 184(%rsp)
+# comment:fpstackfrombottom:<x3#56:<h3#39:<h2#40:<h1#41:<h0#42:<sr3x3#66:
+
+# qhasm: sr3x3 *= x3
+# asm 1: fmul <x3=float80#6,<sr3x3=float80#1
+# asm 2: fmul <x3=%st(5),<sr3x3=%st(0)
+fmul %st(5),%st(0)
+# comment:fpstackfrombottom:<x3#56:<h3#39:<h2#40:<h1#41:<h0#42:<sr3x3#66:
+
+# qhasm: h2 += sr3x3
+# asm 1: faddp <sr3x3=float80#1,<h2=float80#4
+# asm 2: faddp <sr3x3=%st(0),<h2=%st(3)
+faddp %st(0),%st(3)
+# comment:fpstackfrombottom:<x3#56:<h3#39:<h2#40:<h1#41:<h0#42:
+
+# qhasm: stacktop h1
+# asm 1: fxch <h1=float80#2
+# asm 2: fxch <h1=%st(1)
+fxch %st(1)
+# comment:fpstackfrombottom:<x3#56:<h3#39:<h2#40:<h0#42:<h1#41:
+
+# qhasm: sr2x3 = *(float64 *) &sr2
+# asm 1: fldl <sr2=stack64#18
+# asm 2: fldl <sr2=168(%rsp)
+fldl 168(%rsp)
+# comment:fpstackfrombottom:<x3#56:<h3#39:<h2#40:<h0#42:<h1#41:<sr2x3#67:
+
+# qhasm: sr2x3 *= x3
+# asm 1: fmul <x3=float80#6,<sr2x3=float80#1
+# asm 2: fmul <x3=%st(5),<sr2x3=%st(0)
+fmul %st(5),%st(0)
+# comment:fpstackfrombottom:<x3#56:<h3#39:<h2#40:<h0#42:<h1#41:<sr2x3#67:
+
+# qhasm: h1 += sr2x3
+# asm 1: faddp <sr2x3=float80#1,<h1=float80#2
+# asm 2: faddp <sr2x3=%st(0),<h1=%st(1)
+faddp %st(0),%st(1)
+# comment:fpstackfrombottom:<x3#56:<h3#39:<h2#40:<h0#42:<h1#41:
+
+# qhasm: sr1x3 = *(float64 *) &sr1
+# asm 1: fldl <sr1=stack64#16
+# asm 2: fldl <sr1=152(%rsp)
+fldl 152(%rsp)
+# comment:fpstackfrombottom:<x3#56:<h3#39:<h2#40:<h0#42:<h1#41:<sr1x3#68:
+
+# qhasm: sr1x3 *= x3
+# asm 1: fmulp <x3=float80#1,<sr1x3=float80#6
+# asm 2: fmulp <x3=%st(0),<sr1x3=%st(5)
+fmulp %st(0),%st(5)
+# comment:fpstackfrombottom:<sr1x3#68:<h3#39:<h2#40:<h0#42:<h1#41:
+
+# qhasm: internal stacktop sr1x3
+# asm 1: fxch <sr1x3=float80#5
+# asm 2: fxch <sr1x3=%st(4)
+fxch %st(4)
+
+# qhasm: h0 += sr1x3
+# asm 1: faddp <sr1x3=float80#1,<h0=float80#2
+# asm 2: faddp <sr1x3=%st(0),<h0=%st(1)
+faddp %st(0),%st(1)
+# comment:fpstackfrombottom:<h1#41:<h3#39:<h2#40:<h0#42:
+
+# qhasm: unsigned<? l - 16
+# asm 1: cmp $16,<l=int64#3
+# asm 2: cmp $16,<l=%rdx
+cmp $16,%rdx
+# comment:fpstackfrombottom:<h1#41:<h3#39:<h2#40:<h0#42:
+
+# qhasm: stacktop h3
+# asm 1: fxch <h3=float80#3
+# asm 2: fxch <h3=%st(2)
+fxch %st(2)
+# comment:fpstackfrombottom:<h1#41:<h0#42:<h2#40:<h3#39:
+
+# qhasm: y3 = *(float64 *) &d3
+# asm 1: fldl <d3=stack64#13
+# asm 2: fldl <d3=128(%rsp)
+fldl 128(%rsp)
+# comment:fpstackfrombottom:<h1#41:<h0#42:<h2#40:<h3#39:<y3#70:
+
+# qhasm: y3 -= *(float64 *) &crypto_onetimeauth_poly1305_amd64_doffset3minustwo128
+fsubl crypto_onetimeauth_poly1305_amd64_doffset3minustwo128(%rip)
+# comment:fpstackfrombottom:<h1#41:<h0#42:<h2#40:<h3#39:<y3#70:
+
+# qhasm: h3 += y3
+# asm 1: faddp <y3=float80#1,<h3=float80#2
+# asm 2: faddp <y3=%st(0),<h3=%st(1)
+faddp %st(0),%st(1)
+# comment:fpstackfrombottom:<h1#41:<h0#42:<h2#40:<h3#39:
+
+# qhasm: stacktop h2
+# asm 1: fxch <h2=float80#2
+# asm 2: fxch <h2=%st(1)
+fxch %st(1)
+# comment:fpstackfrombottom:<h1#41:<h0#42:<h3#39:<h2#40:
+
+# qhasm: y2 = *(float64 *) &d2
+# asm 1: fldl <d2=stack64#12
+# asm 2: fldl <d2=120(%rsp)
+fldl 120(%rsp)
+# comment:fpstackfrombottom:<h1#41:<h0#42:<h3#39:<h2#40:<y2#71:
+
+# qhasm: y2 -= *(float64 *) &crypto_onetimeauth_poly1305_amd64_doffset2
+fsubl crypto_onetimeauth_poly1305_amd64_doffset2(%rip)
+# comment:fpstackfrombottom:<h1#41:<h0#42:<h3#39:<h2#40:<y2#71:
+
+# qhasm: h2 += y2
+# asm 1: faddp <y2=float80#1,<h2=float80#2
+# asm 2: faddp <y2=%st(0),<h2=%st(1)
+faddp %st(0),%st(1)
+# comment:fpstackfrombottom:<h1#41:<h0#42:<h3#39:<h2#40:
+
+# qhasm: stacktop h1
+# asm 1: fxch <h1=float80#4
+# asm 2: fxch <h1=%st(3)
+fxch %st(3)
+# comment:fpstackfrombottom:<h2#40:<h0#42:<h3#39:<h1#41:
+
+# qhasm: y1 = *(float64 *) &d1
+# asm 1: fldl <d1=stack64#11
+# asm 2: fldl <d1=112(%rsp)
+fldl 112(%rsp)
+# comment:fpstackfrombottom:<h2#40:<h0#42:<h3#39:<h1#41:<y1#72:
+
+# qhasm: y1 -= *(float64 *) &crypto_onetimeauth_poly1305_amd64_doffset1
+fsubl crypto_onetimeauth_poly1305_amd64_doffset1(%rip)
+# comment:fpstackfrombottom:<h2#40:<h0#42:<h3#39:<h1#41:<y1#72:
+
+# qhasm: h1 += y1
+# asm 1: faddp <y1=float80#1,<h1=float80#2
+# asm 2: faddp <y1=%st(0),<h1=%st(1)
+faddp %st(0),%st(1)
+# comment:fpstackfrombottom:<h2#40:<h0#42:<h3#39:<h1#41:
+
+# qhasm: stacktop h0
+# asm 1: fxch <h0=float80#3
+# asm 2: fxch <h0=%st(2)
+fxch %st(2)
+# comment:fpstackfrombottom:<h2#40:<h1#41:<h3#39:<h0#42:
+
+# qhasm: y0 = *(float64 *) &d0
+# asm 1: fldl <d0=stack64#10
+# asm 2: fldl <d0=104(%rsp)
+fldl 104(%rsp)
+# comment:fpstackfrombottom:<h2#40:<h1#41:<h3#39:<h0#42:<y0#73:
+
+# qhasm: y0 -= *(float64 *) &crypto_onetimeauth_poly1305_amd64_doffset0
+fsubl crypto_onetimeauth_poly1305_amd64_doffset0(%rip)
+# comment:fpstackfrombottom:<h2#40:<h1#41:<h3#39:<h0#42:<y0#73:
+
+# qhasm: h0 += y0
+# asm 1: faddp <y0=float80#1,<h0=float80#2
+# asm 2: faddp <y0=%st(0),<h0=%st(1)
+faddp %st(0),%st(1)
+# comment:fpstackfrombottom:<h2#40:<h1#41:<h3#39:<h0#42:
+# comment:fp stack unchanged by jump
+# comment:fpstackfrombottom:<h2#40:<h1#41:<h3#39:<h0#42:
+
+# qhasm: goto multiplyaddatleast16bytes if !unsigned<
+jae ._multiplyaddatleast16bytes
+# comment:fpstackfrombottom:<h2#40:<h1#41:<h3#39:<h0#42:
+# comment:fp stack unchanged by fallthrough
+# comment:fpstackfrombottom:<h2#40:<h1#41:<h3#39:<h0#42:
+
+# qhasm: multiplyaddatmost15bytes:
+._multiplyaddatmost15bytes:
+# comment:fpstackfrombottom:<h2#40:<h1#41:<h3#39:<h0#42:
+
+# qhasm: x0 = *(float64 *) &crypto_onetimeauth_poly1305_amd64_alpha130
+fldl crypto_onetimeauth_poly1305_amd64_alpha130(%rip)
+# comment:fpstackfrombottom:<h2#40:<h1#41:<h3#39:<h0#42:<x0#74:
+
+# qhasm: x0 += h3
+# asm 1: fadd <h3=float80#3,<x0=float80#1
+# asm 2: fadd <h3=%st(2),<x0=%st(0)
+fadd %st(2),%st(0)
+# comment:fpstackfrombottom:<h2#40:<h1#41:<h3#39:<h0#42:<x0#74:
+
+# qhasm: x0 -= *(float64 *) &crypto_onetimeauth_poly1305_amd64_alpha130
+fsubl crypto_onetimeauth_poly1305_amd64_alpha130(%rip)
+# comment:fpstackfrombottom:<h2#40:<h1#41:<h3#39:<h0#42:<x0#74:
+
+# qhasm: h3 -= x0
+# asm 1: fsubr <x0=float80#1,<h3=float80#3
+# asm 2: fsubr <x0=%st(0),<h3=%st(2)
+fsubr %st(0),%st(2)
+# comment:fpstackfrombottom:<h2#40:<h1#41:<h3#39:<h0#42:<x0#74:
+
+# qhasm: x0 *= *(float64 *) &crypto_onetimeauth_poly1305_amd64_scale
+fmull crypto_onetimeauth_poly1305_amd64_scale(%rip)
+# comment:fpstackfrombottom:<h2#40:<h1#41:<h3#39:<h0#42:<x0#74:
+
+# qhasm: x1 = *(float64 *) &crypto_onetimeauth_poly1305_amd64_alpha32
+fldl crypto_onetimeauth_poly1305_amd64_alpha32(%rip)
+# comment:fpstackfrombottom:<h2#40:<h1#41:<h3#39:<h0#42:<x0#74:<x1#75:
+
+# qhasm: x1 += h0
+# asm 1: fadd <h0=float80#3,<x1=float80#1
+# asm 2: fadd <h0=%st(2),<x1=%st(0)
+fadd %st(2),%st(0)
+# comment:fpstackfrombottom:<h2#40:<h1#41:<h3#39:<h0#42:<x0#74:<x1#75:
+
+# qhasm: x1 -= *(float64 *) &crypto_onetimeauth_poly1305_amd64_alpha32
+fsubl crypto_onetimeauth_poly1305_amd64_alpha32(%rip)
+# comment:fpstackfrombottom:<h2#40:<h1#41:<h3#39:<h0#42:<x0#74:<x1#75:
+
+# qhasm: h0 -= x1
+# asm 1: fsubr <x1=float80#1,<h0=float80#3
+# asm 2: fsubr <x1=%st(0),<h0=%st(2)
+fsubr %st(0),%st(2)
+# comment:fpstackfrombottom:<h2#40:<h1#41:<h3#39:<h0#42:<x0#74:<x1#75:
+
+# qhasm: x2 = *(float64 *) &crypto_onetimeauth_poly1305_amd64_alpha64
+fldl crypto_onetimeauth_poly1305_amd64_alpha64(%rip)
+# comment:fpstackfrombottom:<h2#40:<h1#41:<h3#39:<h0#42:<x0#74:<x1#75:<x2#76:
+
+# qhasm: x2 += h1
+# asm 1: fadd <h1=float80#6,<x2=float80#1
+# asm 2: fadd <h1=%st(5),<x2=%st(0)
+fadd %st(5),%st(0)
+# comment:fpstackfrombottom:<h2#40:<h1#41:<h3#39:<h0#42:<x0#74:<x1#75:<x2#76:
+
+# qhasm: x2 -= *(float64 *) &crypto_onetimeauth_poly1305_amd64_alpha64
+fsubl crypto_onetimeauth_poly1305_amd64_alpha64(%rip)
+# comment:fpstackfrombottom:<h2#40:<h1#41:<h3#39:<h0#42:<x0#74:<x1#75:<x2#76:
+
+# qhasm: h1 -= x2
+# asm 1: fsubr <x2=float80#1,<h1=float80#6
+# asm 2: fsubr <x2=%st(0),<h1=%st(5)
+fsubr %st(0),%st(5)
+# comment:fpstackfrombottom:<h2#40:<h1#41:<h3#39:<h0#42:<x0#74:<x1#75:<x2#76:
+
+# qhasm: x3 = *(float64 *) &crypto_onetimeauth_poly1305_amd64_alpha96
+fldl crypto_onetimeauth_poly1305_amd64_alpha96(%rip)
+# comment:fpstackfrombottom:<h2#40:<h1#41:<h3#39:<h0#42:<x0#74:<x1#75:<x2#76:<x3#77:
+
+# qhasm: x3 += h2
+# asm 1: fadd <h2=float80#8,<x3=float80#1
+# asm 2: fadd <h2=%st(7),<x3=%st(0)
+fadd %st(7),%st(0)
+# comment:fpstackfrombottom:<h2#40:<h1#41:<h3#39:<h0#42:<x0#74:<x1#75:<x2#76:<x3#77:
+
+# qhasm: x3 -= *(float64 *) &crypto_onetimeauth_poly1305_amd64_alpha96
+fsubl crypto_onetimeauth_poly1305_amd64_alpha96(%rip)
+# comment:fpstackfrombottom:<h2#40:<h1#41:<h3#39:<h0#42:<x0#74:<x1#75:<x2#76:<x3#77:
+
+# qhasm: h2 -= x3
+# asm 1: fsubr <x3=float80#1,<h2=float80#8
+# asm 2: fsubr <x3=%st(0),<h2=%st(7)
+fsubr %st(0),%st(7)
+# comment:fpstackfrombottom:<h2#40:<h1#41:<h3#39:<h0#42:<x0#74:<x1#75:<x2#76:<x3#77:
+
+# qhasm: internal stacktop h2
+# asm 1: fxch <h2=float80#8
+# asm 2: fxch <h2=%st(7)
+fxch %st(7)
+
+# qhasm: x2 += h2
+# asm 1: faddp <h2=float80#1,<x2=float80#2
+# asm 2: faddp <h2=%st(0),<x2=%st(1)
+faddp %st(0),%st(1)
+# comment:fpstackfrombottom:<x3#77:<h1#41:<h3#39:<h0#42:<x0#74:<x1#75:<x2#76:
+
+# qhasm: internal stacktop h1
+# asm 1: fxch <h1=float80#6
+# asm 2: fxch <h1=%st(5)
+fxch %st(5)
+
+# qhasm: x1 += h1
+# asm 1: faddp <h1=float80#1,<x1=float80#2
+# asm 2: faddp <h1=%st(0),<x1=%st(1)
+faddp %st(0),%st(1)
+# comment:fpstackfrombottom:<x3#77:<x2#76:<h3#39:<h0#42:<x0#74:<x1#75:
+
+# qhasm: internal stacktop h3
+# asm 1: fxch <h3=float80#4
+# asm 2: fxch <h3=%st(3)
+fxch %st(3)
+
+# qhasm: x3 += h3
+# asm 1: faddp <h3=float80#1,<x3=float80#6
+# asm 2: faddp <h3=%st(0),<x3=%st(5)
+faddp %st(0),%st(5)
+# comment:fpstackfrombottom:<x3#77:<x2#76:<x1#75:<h0#42:<x0#74:
+
+# qhasm: x0 += h0
+# asm 1: faddp <h0=float80#1,<x0=float80#2
+# asm 2: faddp <h0=%st(0),<x0=%st(1)
+faddp %st(0),%st(1)
+# comment:fpstackfrombottom:<x3#77:<x2#76:<x1#75:<x0#74:
+
+# qhasm: h3 = *(float64 *) &r3
+# asm 1: fldl <r3=stack64#19
+# asm 2: fldl <r3=176(%rsp)
+fldl 176(%rsp)
+# comment:fpstackfrombottom:<x3#77:<x2#76:<x1#75:<x0#74:<h3#39:
+
+# qhasm: h3 *= x0
+# asm 1: fmul <x0=float80#2,<h3=float80#1
+# asm 2: fmul <x0=%st(1),<h3=%st(0)
+fmul %st(1),%st(0)
+# comment:fpstackfrombottom:<x3#77:<x2#76:<x1#75:<x0#74:<h3#39:
+
+# qhasm: h2 = *(float64 *) &r2
+# asm 1: fldl <r2=stack64#17
+# asm 2: fldl <r2=160(%rsp)
+fldl 160(%rsp)
+# comment:fpstackfrombottom:<x3#77:<x2#76:<x1#75:<x0#74:<h3#39:<h2#40:
+
+# qhasm: h2 *= x0
+# asm 1: fmul <x0=float80#3,<h2=float80#1
+# asm 2: fmul <x0=%st(2),<h2=%st(0)
+fmul %st(2),%st(0)
+# comment:fpstackfrombottom:<x3#77:<x2#76:<x1#75:<x0#74:<h3#39:<h2#40:
+
+# qhasm: h1 = *(float64 *) &r1
+# asm 1: fldl <r1=stack64#15
+# asm 2: fldl <r1=144(%rsp)
+fldl 144(%rsp)
+# comment:fpstackfrombottom:<x3#77:<x2#76:<x1#75:<x0#74:<h3#39:<h2#40:<h1#41:
+
+# qhasm: h1 *= x0
+# asm 1: fmul <x0=float80#4,<h1=float80#1
+# asm 2: fmul <x0=%st(3),<h1=%st(0)
+fmul %st(3),%st(0)
+# comment:fpstackfrombottom:<x3#77:<x2#76:<x1#75:<x0#74:<h3#39:<h2#40:<h1#41:
+
+# qhasm: h0 = *(float64 *) &r0
+# asm 1: fldl <r0=stack64#14
+# asm 2: fldl <r0=136(%rsp)
+fldl 136(%rsp)
+# comment:fpstackfrombottom:<x3#77:<x2#76:<x1#75:<x0#74:<h3#39:<h2#40:<h1#41:<h0#42:
+
+# qhasm: h0 *= x0
+# asm 1: fmulp <x0=float80#1,<h0=float80#5
+# asm 2: fmulp <x0=%st(0),<h0=%st(4)
+fmulp %st(0),%st(4)
+# comment:fpstackfrombottom:<x3#77:<x2#76:<x1#75:<h0#42:<h3#39:<h2#40:<h1#41:
+
+# qhasm: r2x1 = *(float64 *) &r2
+# asm 1: fldl <r2=stack64#17
+# asm 2: fldl <r2=160(%rsp)
+fldl 160(%rsp)
+# comment:fpstackfrombottom:<x3#77:<x2#76:<x1#75:<h0#42:<h3#39:<h2#40:<h1#41:<r2x1#78:
+
+# qhasm: r2x1 *= x1
+# asm 1: fmul <x1=float80#6,<r2x1=float80#1
+# asm 2: fmul <x1=%st(5),<r2x1=%st(0)
+fmul %st(5),%st(0)
+# comment:fpstackfrombottom:<x3#77:<x2#76:<x1#75:<h0#42:<h3#39:<h2#40:<h1#41:<r2x1#78:
+
+# qhasm: h3 += r2x1
+# asm 1: faddp <r2x1=float80#1,<h3=float80#4
+# asm 2: faddp <r2x1=%st(0),<h3=%st(3)
+faddp %st(0),%st(3)
+# comment:fpstackfrombottom:<x3#77:<x2#76:<x1#75:<h0#42:<h3#39:<h2#40:<h1#41:
+
+# qhasm: r1x1 = *(float64 *) &r1
+# asm 1: fldl <r1=stack64#15
+# asm 2: fldl <r1=144(%rsp)
+fldl 144(%rsp)
+# comment:fpstackfrombottom:<x3#77:<x2#76:<x1#75:<h0#42:<h3#39:<h2#40:<h1#41:<r1x1#79:
+
+# qhasm: r1x1 *= x1
+# asm 1: fmul <x1=float80#6,<r1x1=float80#1
+# asm 2: fmul <x1=%st(5),<r1x1=%st(0)
+fmul %st(5),%st(0)
+# comment:fpstackfrombottom:<x3#77:<x2#76:<x1#75:<h0#42:<h3#39:<h2#40:<h1#41:<r1x1#79:
+
+# qhasm: h2 += r1x1
+# asm 1: faddp <r1x1=float80#1,<h2=float80#3
+# asm 2: faddp <r1x1=%st(0),<h2=%st(2)
+faddp %st(0),%st(2)
+# comment:fpstackfrombottom:<x3#77:<x2#76:<x1#75:<h0#42:<h3#39:<h2#40:<h1#41:
+
+# qhasm: r0x1 = *(float64 *) &r0
+# asm 1: fldl <r0=stack64#14
+# asm 2: fldl <r0=136(%rsp)
+fldl 136(%rsp)
+# comment:fpstackfrombottom:<x3#77:<x2#76:<x1#75:<h0#42:<h3#39:<h2#40:<h1#41:<r0x1#80:
+
+# qhasm: r0x1 *= x1
+# asm 1: fmul <x1=float80#6,<r0x1=float80#1
+# asm 2: fmul <x1=%st(5),<r0x1=%st(0)
+fmul %st(5),%st(0)
+# comment:fpstackfrombottom:<x3#77:<x2#76:<x1#75:<h0#42:<h3#39:<h2#40:<h1#41:<r0x1#80:
+
+# qhasm: h1 += r0x1
+# asm 1: faddp <r0x1=float80#1,<h1=float80#2
+# asm 2: faddp <r0x1=%st(0),<h1=%st(1)
+faddp %st(0),%st(1)
+# comment:fpstackfrombottom:<x3#77:<x2#76:<x1#75:<h0#42:<h3#39:<h2#40:<h1#41:
+
+# qhasm: sr3x1 = *(float64 *) &sr3
+# asm 1: fldl <sr3=stack64#20
+# asm 2: fldl <sr3=184(%rsp)
+fldl 184(%rsp)
+# comment:fpstackfrombottom:<x3#77:<x2#76:<x1#75:<h0#42:<h3#39:<h2#40:<h1#41:<sr3x1#81:
+
+# qhasm: sr3x1 *= x1
+# asm 1: fmulp <x1=float80#1,<sr3x1=float80#6
+# asm 2: fmulp <x1=%st(0),<sr3x1=%st(5)
+fmulp %st(0),%st(5)
+# comment:fpstackfrombottom:<x3#77:<x2#76:<sr3x1#81:<h0#42:<h3#39:<h2#40:<h1#41:
+
+# qhasm: internal stacktop sr3x1
+# asm 1: fxch <sr3x1=float80#5
+# asm 2: fxch <sr3x1=%st(4)
+fxch %st(4)
+
+# qhasm: h0 += sr3x1
+# asm 1: faddp <sr3x1=float80#1,<h0=float80#4
+# asm 2: faddp <sr3x1=%st(0),<h0=%st(3)
+faddp %st(0),%st(3)
+# comment:fpstackfrombottom:<x3#77:<x2#76:<h1#41:<h0#42:<h3#39:<h2#40:
+
+# qhasm: r1x2 = *(float64 *) &r1
+# asm 1: fldl <r1=stack64#15
+# asm 2: fldl <r1=144(%rsp)
+fldl 144(%rsp)
+# comment:fpstackfrombottom:<x3#77:<x2#76:<h1#41:<h0#42:<h3#39:<h2#40:<r1x2#82:
+
+# qhasm: r1x2 *= x2
+# asm 1: fmul <x2=float80#6,<r1x2=float80#1
+# asm 2: fmul <x2=%st(5),<r1x2=%st(0)
+fmul %st(5),%st(0)
+# comment:fpstackfrombottom:<x3#77:<x2#76:<h1#41:<h0#42:<h3#39:<h2#40:<r1x2#82:
+
+# qhasm: h3 += r1x2
+# asm 1: faddp <r1x2=float80#1,<h3=float80#3
+# asm 2: faddp <r1x2=%st(0),<h3=%st(2)
+faddp %st(0),%st(2)
+# comment:fpstackfrombottom:<x3#77:<x2#76:<h1#41:<h0#42:<h3#39:<h2#40:
+
+# qhasm: r0x2 = *(float64 *) &r0
+# asm 1: fldl <r0=stack64#14
+# asm 2: fldl <r0=136(%rsp)
+fldl 136(%rsp)
+# comment:fpstackfrombottom:<x3#77:<x2#76:<h1#41:<h0#42:<h3#39:<h2#40:<r0x2#83:
+
+# qhasm: r0x2 *= x2
+# asm 1: fmul <x2=float80#6,<r0x2=float80#1
+# asm 2: fmul <x2=%st(5),<r0x2=%st(0)
+fmul %st(5),%st(0)
+# comment:fpstackfrombottom:<x3#77:<x2#76:<h1#41:<h0#42:<h3#39:<h2#40:<r0x2#83:
+
+# qhasm: h2 += r0x2
+# asm 1: faddp <r0x2=float80#1,<h2=float80#2
+# asm 2: faddp <r0x2=%st(0),<h2=%st(1)
+faddp %st(0),%st(1)
+# comment:fpstackfrombottom:<x3#77:<x2#76:<h1#41:<h0#42:<h3#39:<h2#40:
+
+# qhasm: sr3x2 = *(float64 *) &sr3
+# asm 1: fldl <sr3=stack64#20
+# asm 2: fldl <sr3=184(%rsp)
+fldl 184(%rsp)
+# comment:fpstackfrombottom:<x3#77:<x2#76:<h1#41:<h0#42:<h3#39:<h2#40:<sr3x2#84:
+
+# qhasm: sr3x2 *= x2
+# asm 1: fmul <x2=float80#6,<sr3x2=float80#1
+# asm 2: fmul <x2=%st(5),<sr3x2=%st(0)
+fmul %st(5),%st(0)
+# comment:fpstackfrombottom:<x3#77:<x2#76:<h1#41:<h0#42:<h3#39:<h2#40:<sr3x2#84:
+
+# qhasm: h1 += sr3x2
+# asm 1: faddp <sr3x2=float80#1,<h1=float80#5
+# asm 2: faddp <sr3x2=%st(0),<h1=%st(4)
+faddp %st(0),%st(4)
+# comment:fpstackfrombottom:<x3#77:<x2#76:<h1#41:<h0#42:<h3#39:<h2#40:
+
+# qhasm: sr2x2 = *(float64 *) &sr2
+# asm 1: fldl <sr2=stack64#18
+# asm 2: fldl <sr2=168(%rsp)
+fldl 168(%rsp)
+# comment:fpstackfrombottom:<x3#77:<x2#76:<h1#41:<h0#42:<h3#39:<h2#40:<sr2x2#85:
+
+# qhasm: sr2x2 *= x2
+# asm 1: fmulp <x2=float80#1,<sr2x2=float80#6
+# asm 2: fmulp <x2=%st(0),<sr2x2=%st(5)
+fmulp %st(0),%st(5)
+# comment:fpstackfrombottom:<x3#77:<sr2x2#85:<h1#41:<h0#42:<h3#39:<h2#40:
+
+# qhasm: internal stacktop sr2x2
+# asm 1: fxch <sr2x2=float80#5
+# asm 2: fxch <sr2x2=%st(4)
+fxch %st(4)
+
+# qhasm: h0 += sr2x2
+# asm 1: faddp <sr2x2=float80#1,<h0=float80#3
+# asm 2: faddp <sr2x2=%st(0),<h0=%st(2)
+faddp %st(0),%st(2)
+# comment:fpstackfrombottom:<x3#77:<h2#40:<h1#41:<h0#42:<h3#39:
+
+# qhasm: r0x3 = *(float64 *) &r0
+# asm 1: fldl <r0=stack64#14
+# asm 2: fldl <r0=136(%rsp)
+fldl 136(%rsp)
+# comment:fpstackfrombottom:<x3#77:<h2#40:<h1#41:<h0#42:<h3#39:<r0x3#86:
+
+# qhasm: r0x3 *= x3
+# asm 1: fmul <x3=float80#6,<r0x3=float80#1
+# asm 2: fmul <x3=%st(5),<r0x3=%st(0)
+fmul %st(5),%st(0)
+# comment:fpstackfrombottom:<x3#77:<h2#40:<h1#41:<h0#42:<h3#39:<r0x3#86:
+
+# qhasm: h3 += r0x3
+# asm 1: faddp <r0x3=float80#1,<h3=float80#2
+# asm 2: faddp <r0x3=%st(0),<h3=%st(1)
+faddp %st(0),%st(1)
+# comment:fpstackfrombottom:<x3#77:<h2#40:<h1#41:<h0#42:<h3#39:
+
+# qhasm: sr3x3 = *(float64 *) &sr3
+# asm 1: fldl <sr3=stack64#20
+# asm 2: fldl <sr3=184(%rsp)
+fldl 184(%rsp)
+# comment:fpstackfrombottom:<x3#77:<h2#40:<h1#41:<h0#42:<h3#39:<sr3x3#87:
+
+# qhasm: sr3x3 *= x3
+# asm 1: fmul <x3=float80#6,<sr3x3=float80#1
+# asm 2: fmul <x3=%st(5),<sr3x3=%st(0)
+fmul %st(5),%st(0)
+# comment:fpstackfrombottom:<x3#77:<h2#40:<h1#41:<h0#42:<h3#39:<sr3x3#87:
+
+# qhasm: h2 += sr3x3
+# asm 1: faddp <sr3x3=float80#1,<h2=float80#5
+# asm 2: faddp <sr3x3=%st(0),<h2=%st(4)
+faddp %st(0),%st(4)
+# comment:fpstackfrombottom:<x3#77:<h2#40:<h1#41:<h0#42:<h3#39:
+
+# qhasm: sr2x3 = *(float64 *) &sr2
+# asm 1: fldl <sr2=stack64#18
+# asm 2: fldl <sr2=168(%rsp)
+fldl 168(%rsp)
+# comment:fpstackfrombottom:<x3#77:<h2#40:<h1#41:<h0#42:<h3#39:<sr2x3#88:
+
+# qhasm: sr2x3 *= x3
+# asm 1: fmul <x3=float80#6,<sr2x3=float80#1
+# asm 2: fmul <x3=%st(5),<sr2x3=%st(0)
+fmul %st(5),%st(0)
+# comment:fpstackfrombottom:<x3#77:<h2#40:<h1#41:<h0#42:<h3#39:<sr2x3#88:
+
+# qhasm: h1 += sr2x3
+# asm 1: faddp <sr2x3=float80#1,<h1=float80#4
+# asm 2: faddp <sr2x3=%st(0),<h1=%st(3)
+faddp %st(0),%st(3)
+# comment:fpstackfrombottom:<x3#77:<h2#40:<h1#41:<h0#42:<h3#39:
+
+# qhasm: sr1x3 = *(float64 *) &sr1
+# asm 1: fldl <sr1=stack64#16
+# asm 2: fldl <sr1=152(%rsp)
+fldl 152(%rsp)
+# comment:fpstackfrombottom:<x3#77:<h2#40:<h1#41:<h0#42:<h3#39:<sr1x3#89:
+
+# qhasm: sr1x3 *= x3
+# asm 1: fmulp <x3=float80#1,<sr1x3=float80#6
+# asm 2: fmulp <x3=%st(0),<sr1x3=%st(5)
+fmulp %st(0),%st(5)
+# comment:fpstackfrombottom:<sr1x3#89:<h2#40:<h1#41:<h0#42:<h3#39:
+
+# qhasm: internal stacktop sr1x3
+# asm 1: fxch <sr1x3=float80#5
+# asm 2: fxch <sr1x3=%st(4)
+fxch %st(4)
+
+# qhasm: h0 += sr1x3
+# asm 1: faddp <sr1x3=float80#1,<h0=float80#2
+# asm 2: faddp <sr1x3=%st(0),<h0=%st(1)
+faddp %st(0),%st(1)
+# comment:fpstackfrombottom:<h3#39:<h2#40:<h1#41:<h0#42:
+# comment:fp stack unchanged by fallthrough
+# comment:fpstackfrombottom:<h3#39:<h2#40:<h1#41:<h0#42:
+
+# qhasm: addatmost15bytes:
+._addatmost15bytes:
+# comment:fpstackfrombottom:<h3#39:<h2#40:<h1#41:<h0#42:
+
+# qhasm: =? l - 0
+# asm 1: cmp $0,<l=int64#3
+# asm 2: cmp $0,<l=%rdx
+cmp $0,%rdx
+# comment:fpstackfrombottom:<h3#39:<h2#40:<h1#41:<h0#42:
+# comment:fp stack unchanged by jump
+# comment:fpstackfrombottom:<h3#39:<h2#40:<h1#41:<h0#42:
+
+# qhasm: goto nomorebytes if =
+je ._nomorebytes
+# comment:fpstackfrombottom:<h3#39:<h2#40:<h1#41:<h0#42:
+# comment:fpstackfrombottom:<h3#39:<h2#40:<h1#41:<h0#42:
+
+# qhasm: stack128 lastchunk
+# comment:fpstackfrombottom:<h3#39:<h2#40:<h1#41:<h0#42:
+# comment:fpstackfrombottom:<h3#39:<h2#40:<h1#41:<h0#42:
+
+# qhasm: int64 destination
+# comment:fpstackfrombottom:<h3#39:<h2#40:<h1#41:<h0#42:
+# comment:fpstackfrombottom:<h3#39:<h2#40:<h1#41:<h0#42:
+
+# qhasm: int64 numbytes
+# comment:fpstackfrombottom:<h3#39:<h2#40:<h1#41:<h0#42:
+
+# qhasm: ((uint32 *)&lastchunk)[0] = 0
+# asm 1: movl $0,>lastchunk=stack128#1
+# asm 2: movl $0,>lastchunk=0(%rsp)
+movl $0,0(%rsp)
+# comment:fpstackfrombottom:<h3#39:<h2#40:<h1#41:<h0#42:
+
+# qhasm: ((uint32 *)&lastchunk)[1] = 0
+# asm 1: movl $0,4+<lastchunk=stack128#1
+# asm 2: movl $0,4+<lastchunk=0(%rsp)
+movl $0,4+0(%rsp)
+# comment:fpstackfrombottom:<h3#39:<h2#40:<h1#41:<h0#42:
+
+# qhasm: ((uint32 *)&lastchunk)[2] = 0
+# asm 1: movl $0,8+<lastchunk=stack128#1
+# asm 2: movl $0,8+<lastchunk=0(%rsp)
+movl $0,8+0(%rsp)
+# comment:fpstackfrombottom:<h3#39:<h2#40:<h1#41:<h0#42:
+
+# qhasm: ((uint32 *)&lastchunk)[3] = 0
+# asm 1: movl $0,12+<lastchunk=stack128#1
+# asm 2: movl $0,12+<lastchunk=0(%rsp)
+movl $0,12+0(%rsp)
+# comment:fpstackfrombottom:<h3#39:<h2#40:<h1#41:<h0#42:
+
+# qhasm: destination = &lastchunk
+# asm 1: leaq <lastchunk=stack128#1,>destination=int64#1
+# asm 2: leaq <lastchunk=0(%rsp),>destination=%rdi
+leaq 0(%rsp),%rdi
+# comment:fpstackfrombottom:<h3#39:<h2#40:<h1#41:<h0#42:
+
+# qhasm: numbytes = l
+# asm 1: mov <l=int64#3,>numbytes=int64#4
+# asm 2: mov <l=%rdx,>numbytes=%rcx
+mov %rdx,%rcx
+# comment:fpstackfrombottom:<h3#39:<h2#40:<h1#41:<h0#42:
+# comment:fpstackfrombottom:<h3#39:<h2#40:<h1#41:<h0#42:
+# comment:fpstackfrombottom:<h3#39:<h2#40:<h1#41:<h0#42:
+# comment:fpstackfrombottom:<h3#39:<h2#40:<h1#41:<h0#42:
+
+# qhasm: while (numbytes) { *destination++ = *m++; --numbytes }
+rep movsb
+# comment:fpstackfrombottom:<h3#39:<h2#40:<h1#41:<h0#42:
+
+# qhasm: *(uint8 *) (destination + 0) = 1
+# asm 1: movb $1,0(<destination=int64#1)
+# asm 2: movb $1,0(<destination=%rdi)
+movb $1,0(%rdi)
+# comment:fpstackfrombottom:<h3#39:<h2#40:<h1#41:<h0#42:
+
+# qhasm: m3 = ((uint32 *)&lastchunk)[3]
+# asm 1: movl 12+<lastchunk=stack128#1,>m3=int64#1d
+# asm 2: movl 12+<lastchunk=0(%rsp),>m3=%edi
+movl 12+0(%rsp),%edi
+# comment:fpstackfrombottom:<h3#39:<h2#40:<h1#41:<h0#42:
+
+# qhasm: m2 = ((uint32 *)&lastchunk)[2]
+# asm 1: movl 8+<lastchunk=stack128#1,>m2=int64#2d
+# asm 2: movl 8+<lastchunk=0(%rsp),>m2=%esi
+movl 8+0(%rsp),%esi
+# comment:fpstackfrombottom:<h3#39:<h2#40:<h1#41:<h0#42:
+
+# qhasm: m1 = ((uint32 *)&lastchunk)[1]
+# asm 1: movl 4+<lastchunk=stack128#1,>m1=int64#3d
+# asm 2: movl 4+<lastchunk=0(%rsp),>m1=%edx
+movl 4+0(%rsp),%edx
+# comment:fpstackfrombottom:<h3#39:<h2#40:<h1#41:<h0#42:
+
+# qhasm: m0 = ((uint32 *)&lastchunk)[0]
+# asm 1: movl <lastchunk=stack128#1,>m0=int64#4d
+# asm 2: movl <lastchunk=0(%rsp),>m0=%ecx
+movl 0(%rsp),%ecx
+# comment:fpstackfrombottom:<h3#39:<h2#40:<h1#41:<h0#42:
+
+# qhasm: inplace d3 bottom = m3
+# asm 1: movl <m3=int64#1d,<d3=stack64#13
+# asm 2: movl <m3=%edi,<d3=128(%rsp)
+movl %edi,128(%rsp)
+# comment:fpstackfrombottom:<h3#39:<h2#40:<h1#41:<h0#42:
+
+# qhasm: inplace d2 bottom = m2
+# asm 1: movl <m2=int64#2d,<d2=stack64#12
+# asm 2: movl <m2=%esi,<d2=120(%rsp)
+movl %esi,120(%rsp)
+# comment:fpstackfrombottom:<h3#39:<h2#40:<h1#41:<h0#42:
+
+# qhasm: inplace d1 bottom = m1
+# asm 1: movl <m1=int64#3d,<d1=stack64#11
+# asm 2: movl <m1=%edx,<d1=112(%rsp)
+movl %edx,112(%rsp)
+# comment:fpstackfrombottom:<h3#39:<h2#40:<h1#41:<h0#42:
+
+# qhasm: inplace d0 bottom = m0
+# asm 1: movl <m0=int64#4d,<d0=stack64#10
+# asm 2: movl <m0=%ecx,<d0=104(%rsp)
+movl %ecx,104(%rsp)
+# comment:fpstackfrombottom:<h3#39:<h2#40:<h1#41:<h0#42:
+
+# qhasm: internal stacktop h3
+# asm 1: fxch <h3=float80#4
+# asm 2: fxch <h3=%st(3)
+fxch %st(3)
+
+# qhasm: h3 += *(float64 *) &d3
+# asm 1: faddl <d3=stack64#13
+# asm 2: faddl <d3=128(%rsp)
+faddl 128(%rsp)
+# comment:fpstackfrombottom:<h0#42:<h2#40:<h1#41:<h3#39:
+
+# qhasm: h3 -= *(float64 *) &crypto_onetimeauth_poly1305_amd64_doffset3
+fsubl crypto_onetimeauth_poly1305_amd64_doffset3(%rip)
+# comment:fpstackfrombottom:<h0#42:<h2#40:<h1#41:<h3#39:
+
+# qhasm: internal stacktop h2
+# asm 1: fxch <h2=float80#3
+# asm 2: fxch <h2=%st(2)
+fxch %st(2)
+
+# qhasm: h2 += *(float64 *) &d2
+# asm 1: faddl <d2=stack64#12
+# asm 2: faddl <d2=120(%rsp)
+faddl 120(%rsp)
+# comment:fpstackfrombottom:<h0#42:<h3#39:<h1#41:<h2#40:
+
+# qhasm: h2 -= *(float64 *) &crypto_onetimeauth_poly1305_amd64_doffset2
+fsubl crypto_onetimeauth_poly1305_amd64_doffset2(%rip)
+# comment:fpstackfrombottom:<h0#42:<h3#39:<h1#41:<h2#40:
+
+# qhasm: internal stacktop h1
+# asm 1: fxch <h1=float80#2
+# asm 2: fxch <h1=%st(1)
+fxch %st(1)
+
+# qhasm: h1 += *(float64 *) &d1
+# asm 1: faddl <d1=stack64#11
+# asm 2: faddl <d1=112(%rsp)
+faddl 112(%rsp)
+# comment:fpstackfrombottom:<h0#42:<h3#39:<h2#40:<h1#41:
+
+# qhasm: h1 -= *(float64 *) &crypto_onetimeauth_poly1305_amd64_doffset1
+fsubl crypto_onetimeauth_poly1305_amd64_doffset1(%rip)
+# comment:fpstackfrombottom:<h0#42:<h3#39:<h2#40:<h1#41:
+
+# qhasm: internal stacktop h0
+# asm 1: fxch <h0=float80#4
+# asm 2: fxch <h0=%st(3)
+fxch %st(3)
+
+# qhasm: h0 += *(float64 *) &d0
+# asm 1: faddl <d0=stack64#10
+# asm 2: faddl <d0=104(%rsp)
+faddl 104(%rsp)
+# comment:fpstackfrombottom:<h1#41:<h3#39:<h2#40:<h0#42:
+
+# qhasm: h0 -= *(float64 *) &crypto_onetimeauth_poly1305_amd64_doffset0
+fsubl crypto_onetimeauth_poly1305_amd64_doffset0(%rip)
+# comment:fpstackfrombottom:<h1#41:<h3#39:<h2#40:<h0#42:
+
+# qhasm: x0 = *(float64 *) &crypto_onetimeauth_poly1305_amd64_alpha130
+fldl crypto_onetimeauth_poly1305_amd64_alpha130(%rip)
+# comment:fpstackfrombottom:<h1#41:<h3#39:<h2#40:<h0#42:<x0#98:
+
+# qhasm: x0 += h3
+# asm 1: fadd <h3=float80#4,<x0=float80#1
+# asm 2: fadd <h3=%st(3),<x0=%st(0)
+fadd %st(3),%st(0)
+# comment:fpstackfrombottom:<h1#41:<h3#39:<h2#40:<h0#42:<x0#98:
+
+# qhasm: x0 -= *(float64 *) &crypto_onetimeauth_poly1305_amd64_alpha130
+fsubl crypto_onetimeauth_poly1305_amd64_alpha130(%rip)
+# comment:fpstackfrombottom:<h1#41:<h3#39:<h2#40:<h0#42:<x0#98:
+
+# qhasm: h3 -= x0
+# asm 1: fsubr <x0=float80#1,<h3=float80#4
+# asm 2: fsubr <x0=%st(0),<h3=%st(3)
+fsubr %st(0),%st(3)
+# comment:fpstackfrombottom:<h1#41:<h3#39:<h2#40:<h0#42:<x0#98:
+
+# qhasm: x0 *= *(float64 *) &crypto_onetimeauth_poly1305_amd64_scale
+fmull crypto_onetimeauth_poly1305_amd64_scale(%rip)
+# comment:fpstackfrombottom:<h1#41:<h3#39:<h2#40:<h0#42:<x0#98:
+
+# qhasm: x1 = *(float64 *) &crypto_onetimeauth_poly1305_amd64_alpha32
+fldl crypto_onetimeauth_poly1305_amd64_alpha32(%rip)
+# comment:fpstackfrombottom:<h1#41:<h3#39:<h2#40:<h0#42:<x0#98:<x1#99:
+
+# qhasm: x1 += h0
+# asm 1: fadd <h0=float80#3,<x1=float80#1
+# asm 2: fadd <h0=%st(2),<x1=%st(0)
+fadd %st(2),%st(0)
+# comment:fpstackfrombottom:<h1#41:<h3#39:<h2#40:<h0#42:<x0#98:<x1#99:
+
+# qhasm: x1 -= *(float64 *) &crypto_onetimeauth_poly1305_amd64_alpha32
+fsubl crypto_onetimeauth_poly1305_amd64_alpha32(%rip)
+# comment:fpstackfrombottom:<h1#41:<h3#39:<h2#40:<h0#42:<x0#98:<x1#99:
+
+# qhasm: h0 -= x1
+# asm 1: fsubr <x1=float80#1,<h0=float80#3
+# asm 2: fsubr <x1=%st(0),<h0=%st(2)
+fsubr %st(0),%st(2)
+# comment:fpstackfrombottom:<h1#41:<h3#39:<h2#40:<h0#42:<x0#98:<x1#99:
+
+# qhasm: x2 = *(float64 *) &crypto_onetimeauth_poly1305_amd64_alpha64
+fldl crypto_onetimeauth_poly1305_amd64_alpha64(%rip)
+# comment:fpstackfrombottom:<h1#41:<h3#39:<h2#40:<h0#42:<x0#98:<x1#99:<x2#100:
+
+# qhasm: x2 += h1
+# asm 1: fadd <h1=float80#7,<x2=float80#1
+# asm 2: fadd <h1=%st(6),<x2=%st(0)
+fadd %st(6),%st(0)
+# comment:fpstackfrombottom:<h1#41:<h3#39:<h2#40:<h0#42:<x0#98:<x1#99:<x2#100:
+
+# qhasm: x2 -= *(float64 *) &crypto_onetimeauth_poly1305_amd64_alpha64
+fsubl crypto_onetimeauth_poly1305_amd64_alpha64(%rip)
+# comment:fpstackfrombottom:<h1#41:<h3#39:<h2#40:<h0#42:<x0#98:<x1#99:<x2#100:
+
+# qhasm: h1 -= x2
+# asm 1: fsubr <x2=float80#1,<h1=float80#7
+# asm 2: fsubr <x2=%st(0),<h1=%st(6)
+fsubr %st(0),%st(6)
+# comment:fpstackfrombottom:<h1#41:<h3#39:<h2#40:<h0#42:<x0#98:<x1#99:<x2#100:
+
+# qhasm: x3 = *(float64 *) &crypto_onetimeauth_poly1305_amd64_alpha96
+fldl crypto_onetimeauth_poly1305_amd64_alpha96(%rip)
+# comment:fpstackfrombottom:<h1#41:<h3#39:<h2#40:<h0#42:<x0#98:<x1#99:<x2#100:<x3#101:
+
+# qhasm: x3 += h2
+# asm 1: fadd <h2=float80#6,<x3=float80#1
+# asm 2: fadd <h2=%st(5),<x3=%st(0)
+fadd %st(5),%st(0)
+# comment:fpstackfrombottom:<h1#41:<h3#39:<h2#40:<h0#42:<x0#98:<x1#99:<x2#100:<x3#101:
+
+# qhasm: x3 -= *(float64 *) &crypto_onetimeauth_poly1305_amd64_alpha96
+fsubl crypto_onetimeauth_poly1305_amd64_alpha96(%rip)
+# comment:fpstackfrombottom:<h1#41:<h3#39:<h2#40:<h0#42:<x0#98:<x1#99:<x2#100:<x3#101:
+
+# qhasm: h2 -= x3
+# asm 1: fsubr <x3=float80#1,<h2=float80#6
+# asm 2: fsubr <x3=%st(0),<h2=%st(5)
+fsubr %st(0),%st(5)
+# comment:fpstackfrombottom:<h1#41:<h3#39:<h2#40:<h0#42:<x0#98:<x1#99:<x2#100:<x3#101:
+
+# qhasm: internal stacktop h0
+# asm 1: fxch <h0=float80#5
+# asm 2: fxch <h0=%st(4)
+fxch %st(4)
+
+# qhasm: x0 += h0
+# asm 1: faddp <h0=float80#1,<x0=float80#4
+# asm 2: faddp <h0=%st(0),<x0=%st(3)
+faddp %st(0),%st(3)
+# comment:fpstackfrombottom:<h1#41:<h3#39:<h2#40:<x3#101:<x0#98:<x1#99:<x2#100:
+
+# qhasm: internal stacktop h1
+# asm 1: fxch <h1=float80#7
+# asm 2: fxch <h1=%st(6)
+fxch %st(6)
+
+# qhasm: x1 += h1
+# asm 1: faddp <h1=float80#1,<x1=float80#2
+# asm 2: faddp <h1=%st(0),<x1=%st(1)
+faddp %st(0),%st(1)
+# comment:fpstackfrombottom:<x2#100:<h3#39:<h2#40:<x3#101:<x0#98:<x1#99:
+
+# qhasm: internal stacktop h2
+# asm 1: fxch <h2=float80#4
+# asm 2: fxch <h2=%st(3)
+fxch %st(3)
+
+# qhasm: x2 += h2
+# asm 1: faddp <h2=float80#1,<x2=float80#6
+# asm 2: faddp <h2=%st(0),<x2=%st(5)
+faddp %st(0),%st(5)
+# comment:fpstackfrombottom:<x2#100:<h3#39:<x1#99:<x3#101:<x0#98:
+
+# qhasm: internal stacktop h3
+# asm 1: fxch <h3=float80#4
+# asm 2: fxch <h3=%st(3)
+fxch %st(3)
+
+# qhasm: x3 += h3
+# asm 1: faddp <h3=float80#1,<x3=float80#2
+# asm 2: faddp <h3=%st(0),<x3=%st(1)
+faddp %st(0),%st(1)
+# comment:fpstackfrombottom:<x2#100:<x0#98:<x1#99:<x3#101:
+
+# qhasm: h3 = *(float64 *) &r3
+# asm 1: fldl <r3=stack64#19
+# asm 2: fldl <r3=176(%rsp)
+fldl 176(%rsp)
+# comment:fpstackfrombottom:<x2#100:<x0#98:<x1#99:<x3#101:<h3#39:
+
+# qhasm: h3 *= x0
+# asm 1: fmul <x0=float80#4,<h3=float80#1
+# asm 2: fmul <x0=%st(3),<h3=%st(0)
+fmul %st(3),%st(0)
+# comment:fpstackfrombottom:<x2#100:<x0#98:<x1#99:<x3#101:<h3#39:
+
+# qhasm: h2 = *(float64 *) &r2
+# asm 1: fldl <r2=stack64#17
+# asm 2: fldl <r2=160(%rsp)
+fldl 160(%rsp)
+# comment:fpstackfrombottom:<x2#100:<x0#98:<x1#99:<x3#101:<h3#39:<h2#40:
+
+# qhasm: h2 *= x0
+# asm 1: fmul <x0=float80#5,<h2=float80#1
+# asm 2: fmul <x0=%st(4),<h2=%st(0)
+fmul %st(4),%st(0)
+# comment:fpstackfrombottom:<x2#100:<x0#98:<x1#99:<x3#101:<h3#39:<h2#40:
+
+# qhasm: h1 = *(float64 *) &r1
+# asm 1: fldl <r1=stack64#15
+# asm 2: fldl <r1=144(%rsp)
+fldl 144(%rsp)
+# comment:fpstackfrombottom:<x2#100:<x0#98:<x1#99:<x3#101:<h3#39:<h2#40:<h1#41:
+
+# qhasm: h1 *= x0
+# asm 1: fmul <x0=float80#6,<h1=float80#1
+# asm 2: fmul <x0=%st(5),<h1=%st(0)
+fmul %st(5),%st(0)
+# comment:fpstackfrombottom:<x2#100:<x0#98:<x1#99:<x3#101:<h3#39:<h2#40:<h1#41:
+
+# qhasm: h0 = *(float64 *) &r0
+# asm 1: fldl <r0=stack64#14
+# asm 2: fldl <r0=136(%rsp)
+fldl 136(%rsp)
+# comment:fpstackfrombottom:<x2#100:<x0#98:<x1#99:<x3#101:<h3#39:<h2#40:<h1#41:<h0#42:
+
+# qhasm: h0 *= x0
+# asm 1: fmulp <x0=float80#1,<h0=float80#7
+# asm 2: fmulp <x0=%st(0),<h0=%st(6)
+fmulp %st(0),%st(6)
+# comment:fpstackfrombottom:<x2#100:<h0#42:<x1#99:<x3#101:<h3#39:<h2#40:<h1#41:
+
+# qhasm: r2x1 = *(float64 *) &r2
+# asm 1: fldl <r2=stack64#17
+# asm 2: fldl <r2=160(%rsp)
+fldl 160(%rsp)
+# comment:fpstackfrombottom:<x2#100:<h0#42:<x1#99:<x3#101:<h3#39:<h2#40:<h1#41:<r2x1#102:
+
+# qhasm: r2x1 *= x1
+# asm 1: fmul <x1=float80#6,<r2x1=float80#1
+# asm 2: fmul <x1=%st(5),<r2x1=%st(0)
+fmul %st(5),%st(0)
+# comment:fpstackfrombottom:<x2#100:<h0#42:<x1#99:<x3#101:<h3#39:<h2#40:<h1#41:<r2x1#102:
+
+# qhasm: h3 += r2x1
+# asm 1: faddp <r2x1=float80#1,<h3=float80#4
+# asm 2: faddp <r2x1=%st(0),<h3=%st(3)
+faddp %st(0),%st(3)
+# comment:fpstackfrombottom:<x2#100:<h0#42:<x1#99:<x3#101:<h3#39:<h2#40:<h1#41:
+
+# qhasm: r1x1 = *(float64 *) &r1
+# asm 1: fldl <r1=stack64#15
+# asm 2: fldl <r1=144(%rsp)
+fldl 144(%rsp)
+# comment:fpstackfrombottom:<x2#100:<h0#42:<x1#99:<x3#101:<h3#39:<h2#40:<h1#41:<r1x1#103:
+
+# qhasm: r1x1 *= x1
+# asm 1: fmul <x1=float80#6,<r1x1=float80#1
+# asm 2: fmul <x1=%st(5),<r1x1=%st(0)
+fmul %st(5),%st(0)
+# comment:fpstackfrombottom:<x2#100:<h0#42:<x1#99:<x3#101:<h3#39:<h2#40:<h1#41:<r1x1#103:
+
+# qhasm: h2 += r1x1
+# asm 1: faddp <r1x1=float80#1,<h2=float80#3
+# asm 2: faddp <r1x1=%st(0),<h2=%st(2)
+faddp %st(0),%st(2)
+# comment:fpstackfrombottom:<x2#100:<h0#42:<x1#99:<x3#101:<h3#39:<h2#40:<h1#41:
+
+# qhasm: r0x1 = *(float64 *) &r0
+# asm 1: fldl <r0=stack64#14
+# asm 2: fldl <r0=136(%rsp)
+fldl 136(%rsp)
+# comment:fpstackfrombottom:<x2#100:<h0#42:<x1#99:<x3#101:<h3#39:<h2#40:<h1#41:<r0x1#104:
+
+# qhasm: r0x1 *= x1
+# asm 1: fmul <x1=float80#6,<r0x1=float80#1
+# asm 2: fmul <x1=%st(5),<r0x1=%st(0)
+fmul %st(5),%st(0)
+# comment:fpstackfrombottom:<x2#100:<h0#42:<x1#99:<x3#101:<h3#39:<h2#40:<h1#41:<r0x1#104:
+
+# qhasm: h1 += r0x1
+# asm 1: faddp <r0x1=float80#1,<h1=float80#2
+# asm 2: faddp <r0x1=%st(0),<h1=%st(1)
+faddp %st(0),%st(1)
+# comment:fpstackfrombottom:<x2#100:<h0#42:<x1#99:<x3#101:<h3#39:<h2#40:<h1#41:
+
+# qhasm: sr3x1 = *(float64 *) &sr3
+# asm 1: fldl <sr3=stack64#20
+# asm 2: fldl <sr3=184(%rsp)
+fldl 184(%rsp)
+# comment:fpstackfrombottom:<x2#100:<h0#42:<x1#99:<x3#101:<h3#39:<h2#40:<h1#41:<sr3x1#105:
+
+# qhasm: sr3x1 *= x1
+# asm 1: fmulp <x1=float80#1,<sr3x1=float80#6
+# asm 2: fmulp <x1=%st(0),<sr3x1=%st(5)
+fmulp %st(0),%st(5)
+# comment:fpstackfrombottom:<x2#100:<h0#42:<sr3x1#105:<x3#101:<h3#39:<h2#40:<h1#41:
+
+# qhasm: internal stacktop sr3x1
+# asm 1: fxch <sr3x1=float80#5
+# asm 2: fxch <sr3x1=%st(4)
+fxch %st(4)
+
+# qhasm: h0 += sr3x1
+# asm 1: faddp <sr3x1=float80#1,<h0=float80#6
+# asm 2: faddp <sr3x1=%st(0),<h0=%st(5)
+faddp %st(0),%st(5)
+# comment:fpstackfrombottom:<x2#100:<h0#42:<h1#41:<x3#101:<h3#39:<h2#40:
+
+# qhasm: r1x2 = *(float64 *) &r1
+# asm 1: fldl <r1=stack64#15
+# asm 2: fldl <r1=144(%rsp)
+fldl 144(%rsp)
+# comment:fpstackfrombottom:<x2#100:<h0#42:<h1#41:<x3#101:<h3#39:<h2#40:<r1x2#106:
+
+# qhasm: r1x2 *= x2
+# asm 1: fmul <x2=float80#7,<r1x2=float80#1
+# asm 2: fmul <x2=%st(6),<r1x2=%st(0)
+fmul %st(6),%st(0)
+# comment:fpstackfrombottom:<x2#100:<h0#42:<h1#41:<x3#101:<h3#39:<h2#40:<r1x2#106:
+
+# qhasm: h3 += r1x2
+# asm 1: faddp <r1x2=float80#1,<h3=float80#3
+# asm 2: faddp <r1x2=%st(0),<h3=%st(2)
+faddp %st(0),%st(2)
+# comment:fpstackfrombottom:<x2#100:<h0#42:<h1#41:<x3#101:<h3#39:<h2#40:
+
+# qhasm: r0x2 = *(float64 *) &r0
+# asm 1: fldl <r0=stack64#14
+# asm 2: fldl <r0=136(%rsp)
+fldl 136(%rsp)
+# comment:fpstackfrombottom:<x2#100:<h0#42:<h1#41:<x3#101:<h3#39:<h2#40:<r0x2#107:
+
+# qhasm: r0x2 *= x2
+# asm 1: fmul <x2=float80#7,<r0x2=float80#1
+# asm 2: fmul <x2=%st(6),<r0x2=%st(0)
+fmul %st(6),%st(0)
+# comment:fpstackfrombottom:<x2#100:<h0#42:<h1#41:<x3#101:<h3#39:<h2#40:<r0x2#107:
+
+# qhasm: h2 += r0x2
+# asm 1: faddp <r0x2=float80#1,<h2=float80#2
+# asm 2: faddp <r0x2=%st(0),<h2=%st(1)
+faddp %st(0),%st(1)
+# comment:fpstackfrombottom:<x2#100:<h0#42:<h1#41:<x3#101:<h3#39:<h2#40:
+
+# qhasm: sr3x2 = *(float64 *) &sr3
+# asm 1: fldl <sr3=stack64#20
+# asm 2: fldl <sr3=184(%rsp)
+fldl 184(%rsp)
+# comment:fpstackfrombottom:<x2#100:<h0#42:<h1#41:<x3#101:<h3#39:<h2#40:<sr3x2#108:
+
+# qhasm: sr3x2 *= x2
+# asm 1: fmul <x2=float80#7,<sr3x2=float80#1
+# asm 2: fmul <x2=%st(6),<sr3x2=%st(0)
+fmul %st(6),%st(0)
+# comment:fpstackfrombottom:<x2#100:<h0#42:<h1#41:<x3#101:<h3#39:<h2#40:<sr3x2#108:
+
+# qhasm: h1 += sr3x2
+# asm 1: faddp <sr3x2=float80#1,<h1=float80#5
+# asm 2: faddp <sr3x2=%st(0),<h1=%st(4)
+faddp %st(0),%st(4)
+# comment:fpstackfrombottom:<x2#100:<h0#42:<h1#41:<x3#101:<h3#39:<h2#40:
+
+# qhasm: sr2x2 = *(float64 *) &sr2
+# asm 1: fldl <sr2=stack64#18
+# asm 2: fldl <sr2=168(%rsp)
+fldl 168(%rsp)
+# comment:fpstackfrombottom:<x2#100:<h0#42:<h1#41:<x3#101:<h3#39:<h2#40:<sr2x2#109:
+
+# qhasm: sr2x2 *= x2
+# asm 1: fmulp <x2=float80#1,<sr2x2=float80#7
+# asm 2: fmulp <x2=%st(0),<sr2x2=%st(6)
+fmulp %st(0),%st(6)
+# comment:fpstackfrombottom:<sr2x2#109:<h0#42:<h1#41:<x3#101:<h3#39:<h2#40:
+
+# qhasm: internal stacktop sr2x2
+# asm 1: fxch <sr2x2=float80#6
+# asm 2: fxch <sr2x2=%st(5)
+fxch %st(5)
+
+# qhasm: h0 += sr2x2
+# asm 1: faddp <sr2x2=float80#1,<h0=float80#5
+# asm 2: faddp <sr2x2=%st(0),<h0=%st(4)
+faddp %st(0),%st(4)
+# comment:fpstackfrombottom:<h2#40:<h0#42:<h1#41:<x3#101:<h3#39:
+
+# qhasm: r0x3 = *(float64 *) &r0
+# asm 1: fldl <r0=stack64#14
+# asm 2: fldl <r0=136(%rsp)
+fldl 136(%rsp)
+# comment:fpstackfrombottom:<h2#40:<h0#42:<h1#41:<x3#101:<h3#39:<r0x3#110:
+
+# qhasm: r0x3 *= x3
+# asm 1: fmul <x3=float80#3,<r0x3=float80#1
+# asm 2: fmul <x3=%st(2),<r0x3=%st(0)
+fmul %st(2),%st(0)
+# comment:fpstackfrombottom:<h2#40:<h0#42:<h1#41:<x3#101:<h3#39:<r0x3#110:
+
+# qhasm: h3 += r0x3
+# asm 1: faddp <r0x3=float80#1,<h3=float80#2
+# asm 2: faddp <r0x3=%st(0),<h3=%st(1)
+faddp %st(0),%st(1)
+# comment:fpstackfrombottom:<h2#40:<h0#42:<h1#41:<x3#101:<h3#39:
+
+# qhasm: sr3x3 = *(float64 *) &sr3
+# asm 1: fldl <sr3=stack64#20
+# asm 2: fldl <sr3=184(%rsp)
+fldl 184(%rsp)
+# comment:fpstackfrombottom:<h2#40:<h0#42:<h1#41:<x3#101:<h3#39:<sr3x3#111:
+
+# qhasm: sr3x3 *= x3
+# asm 1: fmul <x3=float80#3,<sr3x3=float80#1
+# asm 2: fmul <x3=%st(2),<sr3x3=%st(0)
+fmul %st(2),%st(0)
+# comment:fpstackfrombottom:<h2#40:<h0#42:<h1#41:<x3#101:<h3#39:<sr3x3#111:
+
+# qhasm: h2 += sr3x3
+# asm 1: faddp <sr3x3=float80#1,<h2=float80#6
+# asm 2: faddp <sr3x3=%st(0),<h2=%st(5)
+faddp %st(0),%st(5)
+# comment:fpstackfrombottom:<h2#40:<h0#42:<h1#41:<x3#101:<h3#39:
+
+# qhasm: sr2x3 = *(float64 *) &sr2
+# asm 1: fldl <sr2=stack64#18
+# asm 2: fldl <sr2=168(%rsp)
+fldl 168(%rsp)
+# comment:fpstackfrombottom:<h2#40:<h0#42:<h1#41:<x3#101:<h3#39:<sr2x3#112:
+
+# qhasm: sr2x3 *= x3
+# asm 1: fmul <x3=float80#3,<sr2x3=float80#1
+# asm 2: fmul <x3=%st(2),<sr2x3=%st(0)
+fmul %st(2),%st(0)
+# comment:fpstackfrombottom:<h2#40:<h0#42:<h1#41:<x3#101:<h3#39:<sr2x3#112:
+
+# qhasm: h1 += sr2x3
+# asm 1: faddp <sr2x3=float80#1,<h1=float80#4
+# asm 2: faddp <sr2x3=%st(0),<h1=%st(3)
+faddp %st(0),%st(3)
+# comment:fpstackfrombottom:<h2#40:<h0#42:<h1#41:<x3#101:<h3#39:
+
+# qhasm: sr1x3 = *(float64 *) &sr1
+# asm 1: fldl <sr1=stack64#16
+# asm 2: fldl <sr1=152(%rsp)
+fldl 152(%rsp)
+# comment:fpstackfrombottom:<h2#40:<h0#42:<h1#41:<x3#101:<h3#39:<sr1x3#113:
+
+# qhasm: sr1x3 *= x3
+# asm 1: fmulp <x3=float80#1,<sr1x3=float80#3
+# asm 2: fmulp <x3=%st(0),<sr1x3=%st(2)
+fmulp %st(0),%st(2)
+# comment:fpstackfrombottom:<h2#40:<h0#42:<h1#41:<sr1x3#113:<h3#39:
+
+# qhasm: internal stacktop sr1x3
+# asm 1: fxch <sr1x3=float80#2
+# asm 2: fxch <sr1x3=%st(1)
+fxch %st(1)
+
+# qhasm: h0 += sr1x3
+# asm 1: faddp <sr1x3=float80#1,<h0=float80#4
+# asm 2: faddp <sr1x3=%st(0),<h0=%st(3)
+faddp %st(0),%st(3)
+# comment:fpstackfrombottom:<h2#40:<h0#42:<h1#41:<h3#39:
+# comment:automatically reorganizing fp stack for fallthrough
+
+# qhasm: internal stacktop h2
+# asm 1: fxch <h2=float80#4
+# asm 2: fxch <h2=%st(3)
+fxch %st(3)
+# comment:fpstackfrombottom:<h3#39:<h0#42:<h1#41:<h2#40:
+
+# qhasm: internal stacktop h0
+# asm 1: fxch <h0=float80#3
+# asm 2: fxch <h0=%st(2)
+fxch %st(2)
+# comment:fpstackfrombottom:<h3#39:<h2#40:<h1#41:<h0#42:
+# comment:fpstackfrombottom:<h3#39:<h2#40:<h1#41:<h0#42:
+
+# qhasm: nomorebytes:
+._nomorebytes:
+# comment:fpstackfrombottom:<h3#39:<h2#40:<h1#41:<h0#42:
+
+# qhasm: x0 = *(float64 *) &crypto_onetimeauth_poly1305_amd64_alpha130
+fldl crypto_onetimeauth_poly1305_amd64_alpha130(%rip)
+# comment:fpstackfrombottom:<h3#39:<h2#40:<h1#41:<h0#42:<x0#114:
+
+# qhasm: x0 += h3
+# asm 1: fadd <h3=float80#5,<x0=float80#1
+# asm 2: fadd <h3=%st(4),<x0=%st(0)
+fadd %st(4),%st(0)
+# comment:fpstackfrombottom:<h3#39:<h2#40:<h1#41:<h0#42:<x0#114:
+
+# qhasm: x0 -= *(float64 *) &crypto_onetimeauth_poly1305_amd64_alpha130
+fsubl crypto_onetimeauth_poly1305_amd64_alpha130(%rip)
+# comment:fpstackfrombottom:<h3#39:<h2#40:<h1#41:<h0#42:<x0#114:
+
+# qhasm: h3 -= x0
+# asm 1: fsubr <x0=float80#1,<h3=float80#5
+# asm 2: fsubr <x0=%st(0),<h3=%st(4)
+fsubr %st(0),%st(4)
+# comment:fpstackfrombottom:<h3#39:<h2#40:<h1#41:<h0#42:<x0#114:
+
+# qhasm: x0 *= *(float64 *) &crypto_onetimeauth_poly1305_amd64_scale
+fmull crypto_onetimeauth_poly1305_amd64_scale(%rip)
+# comment:fpstackfrombottom:<h3#39:<h2#40:<h1#41:<h0#42:<x0#114:
+
+# qhasm: x1 = *(float64 *) &crypto_onetimeauth_poly1305_amd64_alpha32
+fldl crypto_onetimeauth_poly1305_amd64_alpha32(%rip)
+# comment:fpstackfrombottom:<h3#39:<h2#40:<h1#41:<h0#42:<x0#114:<x1#115:
+
+# qhasm: x1 += h0
+# asm 1: fadd <h0=float80#3,<x1=float80#1
+# asm 2: fadd <h0=%st(2),<x1=%st(0)
+fadd %st(2),%st(0)
+# comment:fpstackfrombottom:<h3#39:<h2#40:<h1#41:<h0#42:<x0#114:<x1#115:
+
+# qhasm: x1 -= *(float64 *) &crypto_onetimeauth_poly1305_amd64_alpha32
+fsubl crypto_onetimeauth_poly1305_amd64_alpha32(%rip)
+# comment:fpstackfrombottom:<h3#39:<h2#40:<h1#41:<h0#42:<x0#114:<x1#115:
+
+# qhasm: h0 -= x1
+# asm 1: fsubr <x1=float80#1,<h0=float80#3
+# asm 2: fsubr <x1=%st(0),<h0=%st(2)
+fsubr %st(0),%st(2)
+# comment:fpstackfrombottom:<h3#39:<h2#40:<h1#41:<h0#42:<x0#114:<x1#115:
+
+# qhasm: x2 = *(float64 *) &crypto_onetimeauth_poly1305_amd64_alpha64
+fldl crypto_onetimeauth_poly1305_amd64_alpha64(%rip)
+# comment:fpstackfrombottom:<h3#39:<h2#40:<h1#41:<h0#42:<x0#114:<x1#115:<x2#116:
+
+# qhasm: x2 += h1
+# asm 1: fadd <h1=float80#5,<x2=float80#1
+# asm 2: fadd <h1=%st(4),<x2=%st(0)
+fadd %st(4),%st(0)
+# comment:fpstackfrombottom:<h3#39:<h2#40:<h1#41:<h0#42:<x0#114:<x1#115:<x2#116:
+
+# qhasm: x2 -= *(float64 *) &crypto_onetimeauth_poly1305_amd64_alpha64
+fsubl crypto_onetimeauth_poly1305_amd64_alpha64(%rip)
+# comment:fpstackfrombottom:<h3#39:<h2#40:<h1#41:<h0#42:<x0#114:<x1#115:<x2#116:
+
+# qhasm: h1 -= x2
+# asm 1: fsubr <x2=float80#1,<h1=float80#5
+# asm 2: fsubr <x2=%st(0),<h1=%st(4)
+fsubr %st(0),%st(4)
+# comment:fpstackfrombottom:<h3#39:<h2#40:<h1#41:<h0#42:<x0#114:<x1#115:<x2#116:
+
+# qhasm: x3 = *(float64 *) &crypto_onetimeauth_poly1305_amd64_alpha96
+fldl crypto_onetimeauth_poly1305_amd64_alpha96(%rip)
+# comment:fpstackfrombottom:<h3#39:<h2#40:<h1#41:<h0#42:<x0#114:<x1#115:<x2#116:<x3#117:
+
+# qhasm: x3 += h2
+# asm 1: fadd <h2=float80#7,<x3=float80#1
+# asm 2: fadd <h2=%st(6),<x3=%st(0)
+fadd %st(6),%st(0)
+# comment:fpstackfrombottom:<h3#39:<h2#40:<h1#41:<h0#42:<x0#114:<x1#115:<x2#116:<x3#117:
+
+# qhasm: x3 -= *(float64 *) &crypto_onetimeauth_poly1305_amd64_alpha96
+fsubl crypto_onetimeauth_poly1305_amd64_alpha96(%rip)
+# comment:fpstackfrombottom:<h3#39:<h2#40:<h1#41:<h0#42:<x0#114:<x1#115:<x2#116:<x3#117:
+
+# qhasm: stacktop h2
+# asm 1: fxch <h2=float80#7
+# asm 2: fxch <h2=%st(6)
+fxch %st(6)
+# comment:fpstackfrombottom:<h3#39:<x3#117:<h1#41:<h0#42:<x0#114:<x1#115:<x2#116:<h2#40:
+
+# qhasm: h2 -= x3
+# asm 1: fsub <x3=float80#7,<h2=float80#1
+# asm 2: fsub <x3=%st(6),<h2=%st(0)
+fsub %st(6),%st(0)
+# comment:fpstackfrombottom:<h3#39:<x3#117:<h1#41:<h0#42:<x0#114:<x1#115:<x2#116:<h2#40:
+
+# qhasm: internal stacktop h0
+# asm 1: fxch <h0=float80#5
+# asm 2: fxch <h0=%st(4)
+fxch %st(4)
+
+# qhasm: x0 += h0
+# asm 1: faddp <h0=float80#1,<x0=float80#4
+# asm 2: faddp <h0=%st(0),<x0=%st(3)
+faddp %st(0),%st(3)
+# comment:fpstackfrombottom:<h3#39:<x3#117:<h1#41:<h2#40:<x0#114:<x1#115:<x2#116:
+
+# qhasm: internal stacktop h1
+# asm 1: fxch <h1=float80#5
+# asm 2: fxch <h1=%st(4)
+fxch %st(4)
+
+# qhasm: x1 += h1
+# asm 1: faddp <h1=float80#1,<x1=float80#2
+# asm 2: faddp <h1=%st(0),<x1=%st(1)
+faddp %st(0),%st(1)
+# comment:fpstackfrombottom:<h3#39:<x3#117:<x2#116:<h2#40:<x0#114:<x1#115:
+
+# qhasm: internal stacktop h2
+# asm 1: fxch <h2=float80#3
+# asm 2: fxch <h2=%st(2)
+fxch %st(2)
+
+# qhasm: x2 += h2
+# asm 1: faddp <h2=float80#1,<x2=float80#4
+# asm 2: faddp <h2=%st(0),<x2=%st(3)
+faddp %st(0),%st(3)
+# comment:fpstackfrombottom:<h3#39:<x3#117:<x2#116:<x1#115:<x0#114:
+
+# qhasm: internal stacktop h3
+# asm 1: fxch <h3=float80#5
+# asm 2: fxch <h3=%st(4)
+fxch %st(4)
+
+# qhasm: x3 += h3
+# asm 1: faddp <h3=float80#1,<x3=float80#4
+# asm 2: faddp <h3=%st(0),<x3=%st(3)
+faddp %st(0),%st(3)
+# comment:fpstackfrombottom:<x0#114:<x3#117:<x2#116:<x1#115:
+
+# qhasm: internal stacktop x0
+# asm 1: fxch <x0=float80#4
+# asm 2: fxch <x0=%st(3)
+fxch %st(3)
+
+# qhasm: x0 += *(float64 *) &crypto_onetimeauth_poly1305_amd64_hoffset0
+faddl crypto_onetimeauth_poly1305_amd64_hoffset0(%rip)
+# comment:fpstackfrombottom:<x1#115:<x3#117:<x2#116:<x0#114:
+
+# qhasm: internal stacktop x1
+# asm 1: fxch <x1=float80#4
+# asm 2: fxch <x1=%st(3)
+fxch %st(3)
+
+# qhasm: x1 += *(float64 *) &crypto_onetimeauth_poly1305_amd64_hoffset1
+faddl crypto_onetimeauth_poly1305_amd64_hoffset1(%rip)
+# comment:fpstackfrombottom:<x0#114:<x3#117:<x2#116:<x1#115:
+
+# qhasm: internal stacktop x2
+# asm 1: fxch <x2=float80#2
+# asm 2: fxch <x2=%st(1)
+fxch %st(1)
+
+# qhasm: x2 += *(float64 *) &crypto_onetimeauth_poly1305_amd64_hoffset2
+faddl crypto_onetimeauth_poly1305_amd64_hoffset2(%rip)
+# comment:fpstackfrombottom:<x0#114:<x3#117:<x1#115:<x2#116:
+
+# qhasm: internal stacktop x3
+# asm 1: fxch <x3=float80#3
+# asm 2: fxch <x3=%st(2)
+fxch %st(2)
+
+# qhasm: x3 += *(float64 *) &crypto_onetimeauth_poly1305_amd64_hoffset3
+faddl crypto_onetimeauth_poly1305_amd64_hoffset3(%rip)
+# comment:fpstackfrombottom:<x0#114:<x2#116:<x1#115:<x3#117:
+
+# qhasm: internal stacktop x0
+# asm 1: fxch <x0=float80#4
+# asm 2: fxch <x0=%st(3)
+fxch %st(3)
+
+# qhasm: *(float64 *) &d0 = x0
+# asm 1: fstpl >d0=stack64#10
+# asm 2: fstpl >d0=104(%rsp)
+fstpl 104(%rsp)
+# comment:fpstackfrombottom:<x3#117:<x2#116:<x1#115:
+
+# qhasm: *(float64 *) &d1 = x1
+# asm 1: fstpl >d1=stack64#11
+# asm 2: fstpl >d1=112(%rsp)
+fstpl 112(%rsp)
+# comment:fpstackfrombottom:<x3#117:<x2#116:
+
+# qhasm: *(float64 *) &d2 = x2
+# asm 1: fstpl >d2=stack64#12
+# asm 2: fstpl >d2=120(%rsp)
+fstpl 120(%rsp)
+# comment:fpstackfrombottom:<x3#117:
+
+# qhasm: *(float64 *) &d3 = x3
+# asm 1: fstpl >d3=stack64#13
+# asm 2: fstpl >d3=128(%rsp)
+fstpl 128(%rsp)
+# comment:fpstackfrombottom:
+
+# qhasm: int64 f0
+
+# qhasm: int64 f1
+
+# qhasm: int64 f2
+
+# qhasm: int64 f3
+
+# qhasm: int64 f4
+
+# qhasm: int64 g0
+
+# qhasm: int64 g1
+
+# qhasm: int64 g2
+
+# qhasm: int64 g3
+
+# qhasm: int64 f
+
+# qhasm: int64 notf
+
+# qhasm: stack64 f1_stack
+
+# qhasm: stack64 f2_stack
+
+# qhasm: stack64 f3_stack
+
+# qhasm: stack64 f4_stack
+
+# qhasm: stack64 g0_stack
+
+# qhasm: stack64 g1_stack
+
+# qhasm: stack64 g2_stack
+
+# qhasm: stack64 g3_stack
+
+# qhasm: g0 = top d0
+# asm 1: movl <d0=stack64#10,>g0=int64#1d
+# asm 2: movl <d0=108(%rsp),>g0=%edi
+movl 108(%rsp),%edi
+
+# qhasm: (uint32) g0 &= 63
+# asm 1: and $63,<g0=int64#1d
+# asm 2: and $63,<g0=%edi
+and $63,%edi
+
+# qhasm: g1 = top d1
+# asm 1: movl <d1=stack64#11,>g1=int64#2d
+# asm 2: movl <d1=116(%rsp),>g1=%esi
+movl 116(%rsp),%esi
+
+# qhasm: (uint32) g1 &= 63
+# asm 1: and $63,<g1=int64#2d
+# asm 2: and $63,<g1=%esi
+and $63,%esi
+
+# qhasm: g2 = top d2
+# asm 1: movl <d2=stack64#12,>g2=int64#3d
+# asm 2: movl <d2=124(%rsp),>g2=%edx
+movl 124(%rsp),%edx
+
+# qhasm: (uint32) g2 &= 63
+# asm 1: and $63,<g2=int64#3d
+# asm 2: and $63,<g2=%edx
+and $63,%edx
+
+# qhasm: g3 = top d3
+# asm 1: movl <d3=stack64#13,>g3=int64#4d
+# asm 2: movl <d3=132(%rsp),>g3=%ecx
+movl 132(%rsp),%ecx
+
+# qhasm: (uint32) g3 &= 63
+# asm 1: and $63,<g3=int64#4d
+# asm 2: and $63,<g3=%ecx
+and $63,%ecx
+
+# qhasm: f1 = bottom d1
+# asm 1: movl <d1=stack64#11,>f1=int64#5d
+# asm 2: movl <d1=112(%rsp),>f1=%r8d
+movl 112(%rsp),%r8d
+
+# qhasm: carry? (uint32) f1 += g0
+# asm 1: add <g0=int64#1d,<f1=int64#5d
+# asm 2: add <g0=%edi,<f1=%r8d
+add %edi,%r8d
+
+# qhasm: f1_stack = f1
+# asm 1: movq <f1=int64#5,>f1_stack=stack64#11
+# asm 2: movq <f1=%r8,>f1_stack=112(%rsp)
+movq %r8,112(%rsp)
+
+# qhasm: f2 = bottom d2
+# asm 1: movl <d2=stack64#12,>f2=int64#1d
+# asm 2: movl <d2=120(%rsp),>f2=%edi
+movl 120(%rsp),%edi
+
+# qhasm: carry? (uint32) f2 += g1 + carry
+# asm 1: adc <g1=int64#2d,<f2=int64#1d
+# asm 2: adc <g1=%esi,<f2=%edi
+adc %esi,%edi
+
+# qhasm: f2_stack = f2
+# asm 1: movq <f2=int64#1,>f2_stack=stack64#12
+# asm 2: movq <f2=%rdi,>f2_stack=120(%rsp)
+movq %rdi,120(%rsp)
+
+# qhasm: f3 = bottom d3
+# asm 1: movl <d3=stack64#13,>f3=int64#1d
+# asm 2: movl <d3=128(%rsp),>f3=%edi
+movl 128(%rsp),%edi
+
+# qhasm: carry? (uint32) f3 += g2 + carry
+# asm 1: adc <g2=int64#3d,<f3=int64#1d
+# asm 2: adc <g2=%edx,<f3=%edi
+adc %edx,%edi
+
+# qhasm: f3_stack = f3
+# asm 1: movq <f3=int64#1,>f3_stack=stack64#13
+# asm 2: movq <f3=%rdi,>f3_stack=128(%rsp)
+movq %rdi,128(%rsp)
+
+# qhasm: f4 = 0
+# asm 1: mov $0,>f4=int64#1
+# asm 2: mov $0,>f4=%rdi
+mov $0,%rdi
+
+# qhasm: carry? (uint32) f4 += g3 + carry
+# asm 1: adc <g3=int64#4d,<f4=int64#1d
+# asm 2: adc <g3=%ecx,<f4=%edi
+adc %ecx,%edi
+
+# qhasm: f4_stack = f4
+# asm 1: movq <f4=int64#1,>f4_stack=stack64#14
+# asm 2: movq <f4=%rdi,>f4_stack=136(%rsp)
+movq %rdi,136(%rsp)
+
+# qhasm: g0 = 5
+# asm 1: mov $5,>g0=int64#1
+# asm 2: mov $5,>g0=%rdi
+mov $5,%rdi
+
+# qhasm: f0 = bottom d0
+# asm 1: movl <d0=stack64#10,>f0=int64#2d
+# asm 2: movl <d0=104(%rsp),>f0=%esi
+movl 104(%rsp),%esi
+
+# qhasm: carry? (uint32) g0 += f0
+# asm 1: add <f0=int64#2d,<g0=int64#1d
+# asm 2: add <f0=%esi,<g0=%edi
+add %esi,%edi
+
+# qhasm: g0_stack = g0
+# asm 1: movq <g0=int64#1,>g0_stack=stack64#10
+# asm 2: movq <g0=%rdi,>g0_stack=104(%rsp)
+movq %rdi,104(%rsp)
+
+# qhasm: g1 = 0
+# asm 1: mov $0,>g1=int64#1
+# asm 2: mov $0,>g1=%rdi
+mov $0,%rdi
+
+# qhasm: f1 = f1_stack
+# asm 1: movq <f1_stack=stack64#11,>f1=int64#3
+# asm 2: movq <f1_stack=112(%rsp),>f1=%rdx
+movq 112(%rsp),%rdx
+
+# qhasm: carry? (uint32) g1 += f1 + carry
+# asm 1: adc <f1=int64#3d,<g1=int64#1d
+# asm 2: adc <f1=%edx,<g1=%edi
+adc %edx,%edi
+
+# qhasm: g1_stack = g1
+# asm 1: movq <g1=int64#1,>g1_stack=stack64#11
+# asm 2: movq <g1=%rdi,>g1_stack=112(%rsp)
+movq %rdi,112(%rsp)
+
+# qhasm: g2 = 0
+# asm 1: mov $0,>g2=int64#1
+# asm 2: mov $0,>g2=%rdi
+mov $0,%rdi
+
+# qhasm: f2 = f2_stack
+# asm 1: movq <f2_stack=stack64#12,>f2=int64#4
+# asm 2: movq <f2_stack=120(%rsp),>f2=%rcx
+movq 120(%rsp),%rcx
+
+# qhasm: carry? (uint32) g2 += f2 + carry
+# asm 1: adc <f2=int64#4d,<g2=int64#1d
+# asm 2: adc <f2=%ecx,<g2=%edi
+adc %ecx,%edi
+
+# qhasm: g2_stack = g2
+# asm 1: movq <g2=int64#1,>g2_stack=stack64#12
+# asm 2: movq <g2=%rdi,>g2_stack=120(%rsp)
+movq %rdi,120(%rsp)
+
+# qhasm: g3 = 0
+# asm 1: mov $0,>g3=int64#1
+# asm 2: mov $0,>g3=%rdi
+mov $0,%rdi
+
+# qhasm: f3 = f3_stack
+# asm 1: movq <f3_stack=stack64#13,>f3=int64#5
+# asm 2: movq <f3_stack=128(%rsp),>f3=%r8
+movq 128(%rsp),%r8
+
+# qhasm: carry? (uint32) g3 += f3 + carry
+# asm 1: adc <f3=int64#5d,<g3=int64#1d
+# asm 2: adc <f3=%r8d,<g3=%edi
+adc %r8d,%edi
+
+# qhasm: g3_stack = g3
+# asm 1: movq <g3=int64#1,>g3_stack=stack64#13
+# asm 2: movq <g3=%rdi,>g3_stack=128(%rsp)
+movq %rdi,128(%rsp)
+
+# qhasm: f = 0xfffffffc
+# asm 1: mov $0xfffffffc,>f=int64#1
+# asm 2: mov $0xfffffffc,>f=%rdi
+mov $0xfffffffc,%rdi
+
+# qhasm: f4 = f4_stack
+# asm 1: movq <f4_stack=stack64#14,>f4=int64#6
+# asm 2: movq <f4_stack=136(%rsp),>f4=%r9
+movq 136(%rsp),%r9
+
+# qhasm: carry? (uint32) f += f4 + carry
+# asm 1: adc <f4=int64#6d,<f=int64#1d
+# asm 2: adc <f4=%r9d,<f=%edi
+adc %r9d,%edi
+
+# qhasm: (int32) f >>= 16
+# asm 1: sar $16,<f=int64#1d
+# asm 2: sar $16,<f=%edi
+sar $16,%edi
+
+# qhasm: notf = f
+# asm 1: mov <f=int64#1,>notf=int64#6
+# asm 2: mov <f=%rdi,>notf=%r9
+mov %rdi,%r9
+
+# qhasm: (uint32) notf ^= 0xffffffff
+# asm 1: xor $0xffffffff,<notf=int64#6d
+# asm 2: xor $0xffffffff,<notf=%r9d
+xor $0xffffffff,%r9d
+
+# qhasm: f0 &= f
+# asm 1: and <f=int64#1,<f0=int64#2
+# asm 2: and <f=%rdi,<f0=%rsi
+and %rdi,%rsi
+
+# qhasm: g0 = g0_stack
+# asm 1: movq <g0_stack=stack64#10,>g0=int64#7
+# asm 2: movq <g0_stack=104(%rsp),>g0=%rax
+movq 104(%rsp),%rax
+
+# qhasm: g0 &= notf
+# asm 1: and <notf=int64#6,<g0=int64#7
+# asm 2: and <notf=%r9,<g0=%rax
+and %r9,%rax
+
+# qhasm: f0 |= g0
+# asm 1: or <g0=int64#7,<f0=int64#2
+# asm 2: or <g0=%rax,<f0=%rsi
+or %rax,%rsi
+
+# qhasm: f1 &= f
+# asm 1: and <f=int64#1,<f1=int64#3
+# asm 2: and <f=%rdi,<f1=%rdx
+and %rdi,%rdx
+
+# qhasm: g1 = g1_stack
+# asm 1: movq <g1_stack=stack64#11,>g1=int64#7
+# asm 2: movq <g1_stack=112(%rsp),>g1=%rax
+movq 112(%rsp),%rax
+
+# qhasm: g1 &= notf
+# asm 1: and <notf=int64#6,<g1=int64#7
+# asm 2: and <notf=%r9,<g1=%rax
+and %r9,%rax
+
+# qhasm: f1 |= g1
+# asm 1: or <g1=int64#7,<f1=int64#3
+# asm 2: or <g1=%rax,<f1=%rdx
+or %rax,%rdx
+
+# qhasm: f2 &= f
+# asm 1: and <f=int64#1,<f2=int64#4
+# asm 2: and <f=%rdi,<f2=%rcx
+and %rdi,%rcx
+
+# qhasm: g2 = g2_stack
+# asm 1: movq <g2_stack=stack64#12,>g2=int64#7
+# asm 2: movq <g2_stack=120(%rsp),>g2=%rax
+movq 120(%rsp),%rax
+
+# qhasm: g2 &= notf
+# asm 1: and <notf=int64#6,<g2=int64#7
+# asm 2: and <notf=%r9,<g2=%rax
+and %r9,%rax
+
+# qhasm: f2 |= g2
+# asm 1: or <g2=int64#7,<f2=int64#4
+# asm 2: or <g2=%rax,<f2=%rcx
+or %rax,%rcx
+
+# qhasm: f3 &= f
+# asm 1: and <f=int64#1,<f3=int64#5
+# asm 2: and <f=%rdi,<f3=%r8
+and %rdi,%r8
+
+# qhasm: g3 = g3_stack
+# asm 1: movq <g3_stack=stack64#13,>g3=int64#1
+# asm 2: movq <g3_stack=128(%rsp),>g3=%rdi
+movq 128(%rsp),%rdi
+
+# qhasm: g3 &= notf
+# asm 1: and <notf=int64#6,<g3=int64#1
+# asm 2: and <notf=%r9,<g3=%rdi
+and %r9,%rdi
+
+# qhasm: f3 |= g3
+# asm 1: or <g3=int64#1,<f3=int64#5
+# asm 2: or <g3=%rdi,<f3=%r8
+or %rdi,%r8
+
+# qhasm: out = out_stack
+# asm 1: movq <out_stack=stack64#8,>out=int64#1
+# asm 2: movq <out_stack=88(%rsp),>out=%rdi
+movq 88(%rsp),%rdi
+
+# qhasm: k = k_stack
+# asm 1: movq <k_stack=stack64#9,>k=int64#6
+# asm 2: movq <k_stack=96(%rsp),>k=%r9
+movq 96(%rsp),%r9
+
+# qhasm: carry? (uint32) f0 += *(uint32 *) (k + 16)
+# asm 1: addl 16(<k=int64#6),<f0=int64#2d
+# asm 2: addl 16(<k=%r9),<f0=%esi
+addl 16(%r9),%esi
+
+# qhasm: carry? (uint32) f1 += *(uint32 *) (k + 20) + carry
+# asm 1: adcl 20(<k=int64#6),<f1=int64#3d
+# asm 2: adcl 20(<k=%r9),<f1=%edx
+adcl 20(%r9),%edx
+
+# qhasm: carry? (uint32) f2 += *(uint32 *) (k + 24) + carry
+# asm 1: adcl 24(<k=int64#6),<f2=int64#4d
+# asm 2: adcl 24(<k=%r9),<f2=%ecx
+adcl 24(%r9),%ecx
+
+# qhasm: carry? (uint32) f3 += *(uint32 *) (k + 28) + carry
+# asm 1: adcl 28(<k=int64#6),<f3=int64#5d
+# asm 2: adcl 28(<k=%r9),<f3=%r8d
+adcl 28(%r9),%r8d
+
+# qhasm: *(uint32 *) (out + 0) = f0
+# asm 1: movl <f0=int64#2d,0(<out=int64#1)
+# asm 2: movl <f0=%esi,0(<out=%rdi)
+movl %esi,0(%rdi)
+
+# qhasm: *(uint32 *) (out + 4) = f1
+# asm 1: movl <f1=int64#3d,4(<out=int64#1)
+# asm 2: movl <f1=%edx,4(<out=%rdi)
+movl %edx,4(%rdi)
+
+# qhasm: *(uint32 *) (out + 8) = f2
+# asm 1: movl <f2=int64#4d,8(<out=int64#1)
+# asm 2: movl <f2=%ecx,8(<out=%rdi)
+movl %ecx,8(%rdi)
+
+# qhasm: *(uint32 *) (out + 12) = f3
+# asm 1: movl <f3=int64#5d,12(<out=int64#1)
+# asm 2: movl <f3=%r8d,12(<out=%rdi)
+movl %r8d,12(%rdi)
+
+# qhasm: r11_caller = r11_stack
+# asm 1: movq <r11_stack=stack64#1,>r11_caller=int64#9
+# asm 2: movq <r11_stack=32(%rsp),>r11_caller=%r11
+movq 32(%rsp),%r11
+
+# qhasm: r12_caller = r12_stack
+# asm 1: movq <r12_stack=stack64#2,>r12_caller=int64#10
+# asm 2: movq <r12_stack=40(%rsp),>r12_caller=%r12
+movq 40(%rsp),%r12
+
+# qhasm: r13_caller = r13_stack
+# asm 1: movq <r13_stack=stack64#3,>r13_caller=int64#11
+# asm 2: movq <r13_stack=48(%rsp),>r13_caller=%r13
+movq 48(%rsp),%r13
+
+# qhasm: r14_caller = r14_stack
+# asm 1: movq <r14_stack=stack64#4,>r14_caller=int64#12
+# asm 2: movq <r14_stack=56(%rsp),>r14_caller=%r14
+movq 56(%rsp),%r14
+
+# qhasm: r15_caller = r15_stack
+# asm 1: movq <r15_stack=stack64#5,>r15_caller=int64#13
+# asm 2: movq <r15_stack=64(%rsp),>r15_caller=%r15
+movq 64(%rsp),%r15
+
+# qhasm: rbx_caller = rbx_stack
+# asm 1: movq <rbx_stack=stack64#6,>rbx_caller=int64#14
+# asm 2: movq <rbx_stack=72(%rsp),>rbx_caller=%rbx
+movq 72(%rsp),%rbx
+
+# qhasm: rbp_caller = rbp_stack
+# asm 1: movq <rbp_stack=stack64#7,>rbp_caller=int64#15
+# asm 2: movq <rbp_stack=80(%rsp),>rbp_caller=%rbp
+movq 80(%rsp),%rbp
+
+# qhasm: leave
+add %r11,%rsp
+xor %rax,%rax
+xor %rdx,%rdx
+ret
+
+# constants.s
+# version 20080913
+# D. J. Bernstein
+# Public domain.
+
+.data
+.section .rodata
+.p2align 5
+
+.globl _crypto_onetimeauth_poly1305_amd64_constants
+.globl crypto_onetimeauth_poly1305_amd64_constants
+.globl crypto_onetimeauth_poly1305_amd64_scale
+.globl crypto_onetimeauth_poly1305_amd64_two32
+.globl crypto_onetimeauth_poly1305_amd64_two64
+.globl crypto_onetimeauth_poly1305_amd64_two96
+.globl crypto_onetimeauth_poly1305_amd64_alpha32
+.globl crypto_onetimeauth_poly1305_amd64_alpha64
+.globl crypto_onetimeauth_poly1305_amd64_alpha96
+.globl crypto_onetimeauth_poly1305_amd64_alpha130
+.globl crypto_onetimeauth_poly1305_amd64_doffset0
+.globl crypto_onetimeauth_poly1305_amd64_doffset1
+.globl crypto_onetimeauth_poly1305_amd64_doffset2
+.globl crypto_onetimeauth_poly1305_amd64_doffset3
+.globl crypto_onetimeauth_poly1305_amd64_doffset3minustwo128
+.globl crypto_onetimeauth_poly1305_amd64_hoffset0
+.globl crypto_onetimeauth_poly1305_amd64_hoffset1
+.globl crypto_onetimeauth_poly1305_amd64_hoffset2
+.globl crypto_onetimeauth_poly1305_amd64_hoffset3
+.globl crypto_onetimeauth_poly1305_amd64_rounding
+
+_crypto_onetimeauth_poly1305_amd64_constants:
+crypto_onetimeauth_poly1305_amd64_constants:
+crypto_onetimeauth_poly1305_amd64_scale:
+.long 0x0,0x37f40000
+
+crypto_onetimeauth_poly1305_amd64_two32:
+.long 0x0,0x41f00000
+
+crypto_onetimeauth_poly1305_amd64_two64:
+.long 0x0,0x43f00000
+
+crypto_onetimeauth_poly1305_amd64_two96:
+.long 0x0,0x45f00000
+
+crypto_onetimeauth_poly1305_amd64_alpha32:
+.long 0x0,0x45e80000
+
+crypto_onetimeauth_poly1305_amd64_alpha64:
+.long 0x0,0x47e80000
+
+crypto_onetimeauth_poly1305_amd64_alpha96:
+.long 0x0,0x49e80000
+
+crypto_onetimeauth_poly1305_amd64_alpha130:
+.long 0x0,0x4c080000
+
+crypto_onetimeauth_poly1305_amd64_doffset0:
+.long 0x0,0x43300000
+
+crypto_onetimeauth_poly1305_amd64_doffset1:
+.long 0x0,0x45300000
+
+crypto_onetimeauth_poly1305_amd64_doffset2:
+.long 0x0,0x47300000
+
+crypto_onetimeauth_poly1305_amd64_doffset3:
+.long 0x0,0x49300000
+
+crypto_onetimeauth_poly1305_amd64_doffset3minustwo128:
+.long 0x0,0x492ffffe
+
+crypto_onetimeauth_poly1305_amd64_hoffset0:
+.long 0xfffffffb,0x43300001
+
+crypto_onetimeauth_poly1305_amd64_hoffset1:
+.long 0xfffffffe,0x45300001
+
+crypto_onetimeauth_poly1305_amd64_hoffset2:
+.long 0xfffffffe,0x47300001
+
+crypto_onetimeauth_poly1305_amd64_hoffset3:
+.long 0xfffffffe,0x49300003
+
+crypto_onetimeauth_poly1305_amd64_rounding:
+.byte 0x7f
+.byte 0x13
diff --git a/sdar/lib/nacl/amd64/salsa20_stream.s b/sdar/lib/nacl/amd64/salsa20_stream.s
new file mode 100644
index 0000000..ff131fd
--- /dev/null
+++ b/sdar/lib/nacl/amd64/salsa20_stream.s
@@ -0,0 +1,4823 @@
+
+# qhasm: int64 r11_caller
+
+# qhasm: int64 r12_caller
+
+# qhasm: int64 r13_caller
+
+# qhasm: int64 r14_caller
+
+# qhasm: int64 r15_caller
+
+# qhasm: int64 rbx_caller
+
+# qhasm: int64 rbp_caller
+
+# qhasm: caller r11_caller
+
+# qhasm: caller r12_caller
+
+# qhasm: caller r13_caller
+
+# qhasm: caller r14_caller
+
+# qhasm: caller r15_caller
+
+# qhasm: caller rbx_caller
+
+# qhasm: caller rbp_caller
+
+# qhasm: stack64 r11_stack
+
+# qhasm: stack64 r12_stack
+
+# qhasm: stack64 r13_stack
+
+# qhasm: stack64 r14_stack
+
+# qhasm: stack64 r15_stack
+
+# qhasm: stack64 rbx_stack
+
+# qhasm: stack64 rbp_stack
+
+# qhasm: int64 a
+
+# qhasm: int64 arg1
+
+# qhasm: int64 arg2
+
+# qhasm: int64 arg3
+
+# qhasm: int64 arg4
+
+# qhasm: int64 arg5
+
+# qhasm: input arg1
+
+# qhasm: input arg2
+
+# qhasm: input arg3
+
+# qhasm: input arg4
+
+# qhasm: input arg5
+
+# qhasm: int64 k
+
+# qhasm: int64 kbits
+
+# qhasm: int64 iv
+
+# qhasm: int64 i
+
+# qhasm: stack128 x0
+
+# qhasm: stack128 x1
+
+# qhasm: stack128 x2
+
+# qhasm: stack128 x3
+
+# qhasm: int64 m
+
+# qhasm: int64 out
+
+# qhasm: int64 bytes
+
+# qhasm: stack32 eax_stack
+
+# qhasm: stack32 ebx_stack
+
+# qhasm: stack32 esi_stack
+
+# qhasm: stack32 edi_stack
+
+# qhasm: stack32 ebp_stack
+
+# qhasm: int6464 diag0
+
+# qhasm: int6464 diag1
+
+# qhasm: int6464 diag2
+
+# qhasm: int6464 diag3
+
+# qhasm: int6464 a0
+
+# qhasm: int6464 a1
+
+# qhasm: int6464 a2
+
+# qhasm: int6464 a3
+
+# qhasm: int6464 a4
+
+# qhasm: int6464 a5
+
+# qhasm: int6464 a6
+
+# qhasm: int6464 a7
+
+# qhasm: int6464 b0
+
+# qhasm: int6464 b1
+
+# qhasm: int6464 b2
+
+# qhasm: int6464 b3
+
+# qhasm: int6464 b4
+
+# qhasm: int6464 b5
+
+# qhasm: int6464 b6
+
+# qhasm: int6464 b7
+
+# qhasm: int6464 z0
+
+# qhasm: int6464 z1
+
+# qhasm: int6464 z2
+
+# qhasm: int6464 z3
+
+# qhasm: int6464 z4
+
+# qhasm: int6464 z5
+
+# qhasm: int6464 z6
+
+# qhasm: int6464 z7
+
+# qhasm: int6464 z8
+
+# qhasm: int6464 z9
+
+# qhasm: int6464 z10
+
+# qhasm: int6464 z11
+
+# qhasm: int6464 z12
+
+# qhasm: int6464 z13
+
+# qhasm: int6464 z14
+
+# qhasm: int6464 z15
+
+# qhasm: stack128 z0_stack
+
+# qhasm: stack128 z1_stack
+
+# qhasm: stack128 z2_stack
+
+# qhasm: stack128 z3_stack
+
+# qhasm: stack128 z4_stack
+
+# qhasm: stack128 z5_stack
+
+# qhasm: stack128 z6_stack
+
+# qhasm: stack128 z7_stack
+
+# qhasm: stack128 z8_stack
+
+# qhasm: stack128 z9_stack
+
+# qhasm: stack128 z10_stack
+
+# qhasm: stack128 z11_stack
+
+# qhasm: stack128 z12_stack
+
+# qhasm: stack128 z13_stack
+
+# qhasm: stack128 z14_stack
+
+# qhasm: stack128 z15_stack
+
+# qhasm: int6464 y0
+
+# qhasm: int6464 y1
+
+# qhasm: int6464 y2
+
+# qhasm: int6464 y3
+
+# qhasm: int6464 y4
+
+# qhasm: int6464 y5
+
+# qhasm: int6464 y6
+
+# qhasm: int6464 y7
+
+# qhasm: int6464 y8
+
+# qhasm: int6464 y9
+
+# qhasm: int6464 y10
+
+# qhasm: int6464 y11
+
+# qhasm: int6464 y12
+
+# qhasm: int6464 y13
+
+# qhasm: int6464 y14
+
+# qhasm: int6464 y15
+
+# qhasm: int6464 r0
+
+# qhasm: int6464 r1
+
+# qhasm: int6464 r2
+
+# qhasm: int6464 r3
+
+# qhasm: int6464 r4
+
+# qhasm: int6464 r5
+
+# qhasm: int6464 r6
+
+# qhasm: int6464 r7
+
+# qhasm: int6464 r8
+
+# qhasm: int6464 r9
+
+# qhasm: int6464 r10
+
+# qhasm: int6464 r11
+
+# qhasm: int6464 r12
+
+# qhasm: int6464 r13
+
+# qhasm: int6464 r14
+
+# qhasm: int6464 r15
+
+# qhasm: stack128 orig0
+
+# qhasm: stack128 orig1
+
+# qhasm: stack128 orig2
+
+# qhasm: stack128 orig3
+
+# qhasm: stack128 orig4
+
+# qhasm: stack128 orig5
+
+# qhasm: stack128 orig6
+
+# qhasm: stack128 orig7
+
+# qhasm: stack128 orig8
+
+# qhasm: stack128 orig9
+
+# qhasm: stack128 orig10
+
+# qhasm: stack128 orig11
+
+# qhasm: stack128 orig12
+
+# qhasm: stack128 orig13
+
+# qhasm: stack128 orig14
+
+# qhasm: stack128 orig15
+
+# qhasm: int64 in0
+
+# qhasm: int64 in1
+
+# qhasm: int64 in2
+
+# qhasm: int64 in3
+
+# qhasm: int64 in4
+
+# qhasm: int64 in5
+
+# qhasm: int64 in6
+
+# qhasm: int64 in7
+
+# qhasm: int64 in8
+
+# qhasm: int64 in9
+
+# qhasm: int64 in10
+
+# qhasm: int64 in11
+
+# qhasm: int64 in12
+
+# qhasm: int64 in13
+
+# qhasm: int64 in14
+
+# qhasm: int64 in15
+
+# qhasm: stack512 tmp
+
+# qhasm: int64 ctarget
+
+# qhasm: stack64 bytes_backup
+
+# qhasm: enter crypto_stream_salsa20_amd64_xmm6
+.text
+.p2align 5
+.globl _crypto_stream_salsa20
+.globl crypto_stream_salsa20
+_crypto_stream_salsa20:
+crypto_stream_salsa20:
+mov %rsp,%r11
+and $31,%r11
+add $480,%r11
+sub %r11,%rsp
+
+# qhasm: r11_stack = r11_caller
+# asm 1: movq <r11_caller=int64#9,>r11_stack=stack64#1
+# asm 2: movq <r11_caller=%r11,>r11_stack=352(%rsp)
+movq %r11,352(%rsp)
+
+# qhasm: r12_stack = r12_caller
+# asm 1: movq <r12_caller=int64#10,>r12_stack=stack64#2
+# asm 2: movq <r12_caller=%r12,>r12_stack=360(%rsp)
+movq %r12,360(%rsp)
+
+# qhasm: r13_stack = r13_caller
+# asm 1: movq <r13_caller=int64#11,>r13_stack=stack64#3
+# asm 2: movq <r13_caller=%r13,>r13_stack=368(%rsp)
+movq %r13,368(%rsp)
+
+# qhasm: r14_stack = r14_caller
+# asm 1: movq <r14_caller=int64#12,>r14_stack=stack64#4
+# asm 2: movq <r14_caller=%r14,>r14_stack=376(%rsp)
+movq %r14,376(%rsp)
+
+# qhasm: r15_stack = r15_caller
+# asm 1: movq <r15_caller=int64#13,>r15_stack=stack64#5
+# asm 2: movq <r15_caller=%r15,>r15_stack=384(%rsp)
+movq %r15,384(%rsp)
+
+# qhasm: rbx_stack = rbx_caller
+# asm 1: movq <rbx_caller=int64#14,>rbx_stack=stack64#6
+# asm 2: movq <rbx_caller=%rbx,>rbx_stack=392(%rsp)
+movq %rbx,392(%rsp)
+
+# qhasm: rbp_stack = rbp_caller
+# asm 1: movq <rbp_caller=int64#15,>rbp_stack=stack64#7
+# asm 2: movq <rbp_caller=%rbp,>rbp_stack=400(%rsp)
+movq %rbp,400(%rsp)
+
+# qhasm: bytes = arg2
+# asm 1: mov <arg2=int64#2,>bytes=int64#6
+# asm 2: mov <arg2=%rsi,>bytes=%r9
+mov %rsi,%r9
+
+# qhasm: out = arg1
+# asm 1: mov <arg1=int64#1,>out=int64#1
+# asm 2: mov <arg1=%rdi,>out=%rdi
+mov %rdi,%rdi
+
+# qhasm: m = out
+# asm 1: mov <out=int64#1,>m=int64#2
+# asm 2: mov <out=%rdi,>m=%rsi
+mov %rdi,%rsi
+
+# qhasm: iv = arg3
+# asm 1: mov <arg3=int64#3,>iv=int64#3
+# asm 2: mov <arg3=%rdx,>iv=%rdx
+mov %rdx,%rdx
+
+# qhasm: k = arg4
+# asm 1: mov <arg4=int64#4,>k=int64#8
+# asm 2: mov <arg4=%rcx,>k=%r10
+mov %rcx,%r10
+
+# qhasm: unsigned>? bytes - 0
+# asm 1: cmp $0,<bytes=int64#6
+# asm 2: cmp $0,<bytes=%r9
+cmp $0,%r9
+# comment:fp stack unchanged by jump
+
+# qhasm: goto done if !unsigned>
+jbe ._done
+
+# qhasm: a = 0
+# asm 1: mov $0,>a=int64#7
+# asm 2: mov $0,>a=%rax
+mov $0,%rax
+
+# qhasm: i = bytes
+# asm 1: mov <bytes=int64#6,>i=int64#4
+# asm 2: mov <bytes=%r9,>i=%rcx
+mov %r9,%rcx
+
+# qhasm: while (i) { *out++ = a; --i }
+rep stosb
+
+# qhasm: out -= bytes
+# asm 1: sub <bytes=int64#6,<out=int64#1
+# asm 2: sub <bytes=%r9,<out=%rdi
+sub %r9,%rdi
+# comment:fp stack unchanged by jump
+
+# qhasm: goto start
+jmp ._start
+
+# qhasm: enter crypto_stream_salsa20_amd64_xmm6_xor
+.text
+.p2align 5
+.globl _crypto_stream_salsa20_xor
+.globl crypto_stream_salsa20_xor
+_crypto_stream_salsa20_xor:
+crypto_stream_salsa20_xor:
+mov %rsp,%r11
+and $31,%r11
+add $480,%r11
+sub %r11,%rsp
+
+# qhasm: r11_stack = r11_caller
+# asm 1: movq <r11_caller=int64#9,>r11_stack=stack64#1
+# asm 2: movq <r11_caller=%r11,>r11_stack=352(%rsp)
+movq %r11,352(%rsp)
+
+# qhasm: r12_stack = r12_caller
+# asm 1: movq <r12_caller=int64#10,>r12_stack=stack64#2
+# asm 2: movq <r12_caller=%r12,>r12_stack=360(%rsp)
+movq %r12,360(%rsp)
+
+# qhasm: r13_stack = r13_caller
+# asm 1: movq <r13_caller=int64#11,>r13_stack=stack64#3
+# asm 2: movq <r13_caller=%r13,>r13_stack=368(%rsp)
+movq %r13,368(%rsp)
+
+# qhasm: r14_stack = r14_caller
+# asm 1: movq <r14_caller=int64#12,>r14_stack=stack64#4
+# asm 2: movq <r14_caller=%r14,>r14_stack=376(%rsp)
+movq %r14,376(%rsp)
+
+# qhasm: r15_stack = r15_caller
+# asm 1: movq <r15_caller=int64#13,>r15_stack=stack64#5
+# asm 2: movq <r15_caller=%r15,>r15_stack=384(%rsp)
+movq %r15,384(%rsp)
+
+# qhasm: rbx_stack = rbx_caller
+# asm 1: movq <rbx_caller=int64#14,>rbx_stack=stack64#6
+# asm 2: movq <rbx_caller=%rbx,>rbx_stack=392(%rsp)
+movq %rbx,392(%rsp)
+
+# qhasm: rbp_stack = rbp_caller
+# asm 1: movq <rbp_caller=int64#15,>rbp_stack=stack64#7
+# asm 2: movq <rbp_caller=%rbp,>rbp_stack=400(%rsp)
+movq %rbp,400(%rsp)
+
+# qhasm: out = arg1
+# asm 1: mov <arg1=int64#1,>out=int64#1
+# asm 2: mov <arg1=%rdi,>out=%rdi
+mov %rdi,%rdi
+
+# qhasm: m = arg2
+# asm 1: mov <arg2=int64#2,>m=int64#2
+# asm 2: mov <arg2=%rsi,>m=%rsi
+mov %rsi,%rsi
+
+# qhasm: bytes = arg3
+# asm 1: mov <arg3=int64#3,>bytes=int64#6
+# asm 2: mov <arg3=%rdx,>bytes=%r9
+mov %rdx,%r9
+
+# qhasm: iv = arg4
+# asm 1: mov <arg4=int64#4,>iv=int64#3
+# asm 2: mov <arg4=%rcx,>iv=%rdx
+mov %rcx,%rdx
+
+# qhasm: k = arg5
+# asm 1: mov <arg5=int64#5,>k=int64#8
+# asm 2: mov <arg5=%r8,>k=%r10
+mov %r8,%r10
+
+# qhasm: unsigned>? bytes - 0
+# asm 1: cmp $0,<bytes=int64#6
+# asm 2: cmp $0,<bytes=%r9
+cmp $0,%r9
+# comment:fp stack unchanged by jump
+
+# qhasm: goto done if !unsigned>
+jbe ._done
+# comment:fp stack unchanged by fallthrough
+
+# qhasm: start:
+._start:
+
+# qhasm: in12 = *(uint32 *) (k + 20)
+# asm 1: movl 20(<k=int64#8),>in12=int64#4d
+# asm 2: movl 20(<k=%r10),>in12=%ecx
+movl 20(%r10),%ecx
+
+# qhasm: in1 = *(uint32 *) (k + 0)
+# asm 1: movl 0(<k=int64#8),>in1=int64#5d
+# asm 2: movl 0(<k=%r10),>in1=%r8d
+movl 0(%r10),%r8d
+
+# qhasm: in6 = *(uint32 *) (iv + 0)
+# asm 1: movl 0(<iv=int64#3),>in6=int64#7d
+# asm 2: movl 0(<iv=%rdx),>in6=%eax
+movl 0(%rdx),%eax
+
+# qhasm: in11 = *(uint32 *) (k + 16)
+# asm 1: movl 16(<k=int64#8),>in11=int64#9d
+# asm 2: movl 16(<k=%r10),>in11=%r11d
+movl 16(%r10),%r11d
+
+# qhasm: ((uint32 *)&x1)[0] = in12
+# asm 1: movl <in12=int64#4d,>x1=stack128#1
+# asm 2: movl <in12=%ecx,>x1=0(%rsp)
+movl %ecx,0(%rsp)
+
+# qhasm: ((uint32 *)&x1)[1] = in1
+# asm 1: movl <in1=int64#5d,4+<x1=stack128#1
+# asm 2: movl <in1=%r8d,4+<x1=0(%rsp)
+movl %r8d,4+0(%rsp)
+
+# qhasm: ((uint32 *)&x1)[2] = in6
+# asm 1: movl <in6=int64#7d,8+<x1=stack128#1
+# asm 2: movl <in6=%eax,8+<x1=0(%rsp)
+movl %eax,8+0(%rsp)
+
+# qhasm: ((uint32 *)&x1)[3] = in11
+# asm 1: movl <in11=int64#9d,12+<x1=stack128#1
+# asm 2: movl <in11=%r11d,12+<x1=0(%rsp)
+movl %r11d,12+0(%rsp)
+
+# qhasm: in8 = 0
+# asm 1: mov $0,>in8=int64#4
+# asm 2: mov $0,>in8=%rcx
+mov $0,%rcx
+
+# qhasm: in13 = *(uint32 *) (k + 24)
+# asm 1: movl 24(<k=int64#8),>in13=int64#5d
+# asm 2: movl 24(<k=%r10),>in13=%r8d
+movl 24(%r10),%r8d
+
+# qhasm: in2 = *(uint32 *) (k + 4)
+# asm 1: movl 4(<k=int64#8),>in2=int64#7d
+# asm 2: movl 4(<k=%r10),>in2=%eax
+movl 4(%r10),%eax
+
+# qhasm: in7 = *(uint32 *) (iv + 4)
+# asm 1: movl 4(<iv=int64#3),>in7=int64#3d
+# asm 2: movl 4(<iv=%rdx),>in7=%edx
+movl 4(%rdx),%edx
+
+# qhasm: ((uint32 *)&x2)[0] = in8
+# asm 1: movl <in8=int64#4d,>x2=stack128#2
+# asm 2: movl <in8=%ecx,>x2=16(%rsp)
+movl %ecx,16(%rsp)
+
+# qhasm: ((uint32 *)&x2)[1] = in13
+# asm 1: movl <in13=int64#5d,4+<x2=stack128#2
+# asm 2: movl <in13=%r8d,4+<x2=16(%rsp)
+movl %r8d,4+16(%rsp)
+
+# qhasm: ((uint32 *)&x2)[2] = in2
+# asm 1: movl <in2=int64#7d,8+<x2=stack128#2
+# asm 2: movl <in2=%eax,8+<x2=16(%rsp)
+movl %eax,8+16(%rsp)
+
+# qhasm: ((uint32 *)&x2)[3] = in7
+# asm 1: movl <in7=int64#3d,12+<x2=stack128#2
+# asm 2: movl <in7=%edx,12+<x2=16(%rsp)
+movl %edx,12+16(%rsp)
+
+# qhasm: in4 = *(uint32 *) (k + 12)
+# asm 1: movl 12(<k=int64#8),>in4=int64#3d
+# asm 2: movl 12(<k=%r10),>in4=%edx
+movl 12(%r10),%edx
+
+# qhasm: in9 = 0
+# asm 1: mov $0,>in9=int64#4
+# asm 2: mov $0,>in9=%rcx
+mov $0,%rcx
+
+# qhasm: in14 = *(uint32 *) (k + 28)
+# asm 1: movl 28(<k=int64#8),>in14=int64#5d
+# asm 2: movl 28(<k=%r10),>in14=%r8d
+movl 28(%r10),%r8d
+
+# qhasm: in3 = *(uint32 *) (k + 8)
+# asm 1: movl 8(<k=int64#8),>in3=int64#7d
+# asm 2: movl 8(<k=%r10),>in3=%eax
+movl 8(%r10),%eax
+
+# qhasm: ((uint32 *)&x3)[0] = in4
+# asm 1: movl <in4=int64#3d,>x3=stack128#3
+# asm 2: movl <in4=%edx,>x3=32(%rsp)
+movl %edx,32(%rsp)
+
+# qhasm: ((uint32 *)&x3)[1] = in9
+# asm 1: movl <in9=int64#4d,4+<x3=stack128#3
+# asm 2: movl <in9=%ecx,4+<x3=32(%rsp)
+movl %ecx,4+32(%rsp)
+
+# qhasm: ((uint32 *)&x3)[2] = in14
+# asm 1: movl <in14=int64#5d,8+<x3=stack128#3
+# asm 2: movl <in14=%r8d,8+<x3=32(%rsp)
+movl %r8d,8+32(%rsp)
+
+# qhasm: ((uint32 *)&x3)[3] = in3
+# asm 1: movl <in3=int64#7d,12+<x3=stack128#3
+# asm 2: movl <in3=%eax,12+<x3=32(%rsp)
+movl %eax,12+32(%rsp)
+
+# qhasm: in0 = 1634760805
+# asm 1: mov $1634760805,>in0=int64#3
+# asm 2: mov $1634760805,>in0=%rdx
+mov $1634760805,%rdx
+
+# qhasm: in5 = 857760878
+# asm 1: mov $857760878,>in5=int64#4
+# asm 2: mov $857760878,>in5=%rcx
+mov $857760878,%rcx
+
+# qhasm: in10 = 2036477234
+# asm 1: mov $2036477234,>in10=int64#5
+# asm 2: mov $2036477234,>in10=%r8
+mov $2036477234,%r8
+
+# qhasm: in15 = 1797285236
+# asm 1: mov $1797285236,>in15=int64#7
+# asm 2: mov $1797285236,>in15=%rax
+mov $1797285236,%rax
+
+# qhasm: ((uint32 *)&x0)[0] = in0
+# asm 1: movl <in0=int64#3d,>x0=stack128#4
+# asm 2: movl <in0=%edx,>x0=48(%rsp)
+movl %edx,48(%rsp)
+
+# qhasm: ((uint32 *)&x0)[1] = in5
+# asm 1: movl <in5=int64#4d,4+<x0=stack128#4
+# asm 2: movl <in5=%ecx,4+<x0=48(%rsp)
+movl %ecx,4+48(%rsp)
+
+# qhasm: ((uint32 *)&x0)[2] = in10
+# asm 1: movl <in10=int64#5d,8+<x0=stack128#4
+# asm 2: movl <in10=%r8d,8+<x0=48(%rsp)
+movl %r8d,8+48(%rsp)
+
+# qhasm: ((uint32 *)&x0)[3] = in15
+# asm 1: movl <in15=int64#7d,12+<x0=stack128#4
+# asm 2: movl <in15=%eax,12+<x0=48(%rsp)
+movl %eax,12+48(%rsp)
+
+# qhasm: unsigned<? bytes - 256
+# asm 1: cmp $256,<bytes=int64#6
+# asm 2: cmp $256,<bytes=%r9
+cmp $256,%r9
+# comment:fp stack unchanged by jump
+
+# qhasm: goto bytesbetween1and255 if unsigned<
+jb ._bytesbetween1and255
+
+# qhasm: z0 = x0
+# asm 1: movdqa <x0=stack128#4,>z0=int6464#1
+# asm 2: movdqa <x0=48(%rsp),>z0=%xmm0
+movdqa 48(%rsp),%xmm0
+
+# qhasm: z5 = z0[1,1,1,1]
+# asm 1: pshufd $0x55,<z0=int6464#1,>z5=int6464#2
+# asm 2: pshufd $0x55,<z0=%xmm0,>z5=%xmm1
+pshufd $0x55,%xmm0,%xmm1
+
+# qhasm: z10 = z0[2,2,2,2]
+# asm 1: pshufd $0xaa,<z0=int6464#1,>z10=int6464#3
+# asm 2: pshufd $0xaa,<z0=%xmm0,>z10=%xmm2
+pshufd $0xaa,%xmm0,%xmm2
+
+# qhasm: z15 = z0[3,3,3,3]
+# asm 1: pshufd $0xff,<z0=int6464#1,>z15=int6464#4
+# asm 2: pshufd $0xff,<z0=%xmm0,>z15=%xmm3
+pshufd $0xff,%xmm0,%xmm3
+
+# qhasm: z0 = z0[0,0,0,0]
+# asm 1: pshufd $0x00,<z0=int6464#1,>z0=int6464#1
+# asm 2: pshufd $0x00,<z0=%xmm0,>z0=%xmm0
+pshufd $0x00,%xmm0,%xmm0
+
+# qhasm: orig5 = z5
+# asm 1: movdqa <z5=int6464#2,>orig5=stack128#5
+# asm 2: movdqa <z5=%xmm1,>orig5=64(%rsp)
+movdqa %xmm1,64(%rsp)
+
+# qhasm: orig10 = z10
+# asm 1: movdqa <z10=int6464#3,>orig10=stack128#6
+# asm 2: movdqa <z10=%xmm2,>orig10=80(%rsp)
+movdqa %xmm2,80(%rsp)
+
+# qhasm: orig15 = z15
+# asm 1: movdqa <z15=int6464#4,>orig15=stack128#7
+# asm 2: movdqa <z15=%xmm3,>orig15=96(%rsp)
+movdqa %xmm3,96(%rsp)
+
+# qhasm: orig0 = z0
+# asm 1: movdqa <z0=int6464#1,>orig0=stack128#8
+# asm 2: movdqa <z0=%xmm0,>orig0=112(%rsp)
+movdqa %xmm0,112(%rsp)
+
+# qhasm: z1 = x1
+# asm 1: movdqa <x1=stack128#1,>z1=int6464#1
+# asm 2: movdqa <x1=0(%rsp),>z1=%xmm0
+movdqa 0(%rsp),%xmm0
+
+# qhasm: z6 = z1[2,2,2,2]
+# asm 1: pshufd $0xaa,<z1=int6464#1,>z6=int6464#2
+# asm 2: pshufd $0xaa,<z1=%xmm0,>z6=%xmm1
+pshufd $0xaa,%xmm0,%xmm1
+
+# qhasm: z11 = z1[3,3,3,3]
+# asm 1: pshufd $0xff,<z1=int6464#1,>z11=int6464#3
+# asm 2: pshufd $0xff,<z1=%xmm0,>z11=%xmm2
+pshufd $0xff,%xmm0,%xmm2
+
+# qhasm: z12 = z1[0,0,0,0]
+# asm 1: pshufd $0x00,<z1=int6464#1,>z12=int6464#4
+# asm 2: pshufd $0x00,<z1=%xmm0,>z12=%xmm3
+pshufd $0x00,%xmm0,%xmm3
+
+# qhasm: z1 = z1[1,1,1,1]
+# asm 1: pshufd $0x55,<z1=int6464#1,>z1=int6464#1
+# asm 2: pshufd $0x55,<z1=%xmm0,>z1=%xmm0
+pshufd $0x55,%xmm0,%xmm0
+
+# qhasm: orig6 = z6
+# asm 1: movdqa <z6=int6464#2,>orig6=stack128#9
+# asm 2: movdqa <z6=%xmm1,>orig6=128(%rsp)
+movdqa %xmm1,128(%rsp)
+
+# qhasm: orig11 = z11
+# asm 1: movdqa <z11=int6464#3,>orig11=stack128#10
+# asm 2: movdqa <z11=%xmm2,>orig11=144(%rsp)
+movdqa %xmm2,144(%rsp)
+
+# qhasm: orig12 = z12
+# asm 1: movdqa <z12=int6464#4,>orig12=stack128#11
+# asm 2: movdqa <z12=%xmm3,>orig12=160(%rsp)
+movdqa %xmm3,160(%rsp)
+
+# qhasm: orig1 = z1
+# asm 1: movdqa <z1=int6464#1,>orig1=stack128#12
+# asm 2: movdqa <z1=%xmm0,>orig1=176(%rsp)
+movdqa %xmm0,176(%rsp)
+
+# qhasm: z2 = x2
+# asm 1: movdqa <x2=stack128#2,>z2=int6464#1
+# asm 2: movdqa <x2=16(%rsp),>z2=%xmm0
+movdqa 16(%rsp),%xmm0
+
+# qhasm: z7 = z2[3,3,3,3]
+# asm 1: pshufd $0xff,<z2=int6464#1,>z7=int6464#2
+# asm 2: pshufd $0xff,<z2=%xmm0,>z7=%xmm1
+pshufd $0xff,%xmm0,%xmm1
+
+# qhasm: z13 = z2[1,1,1,1]
+# asm 1: pshufd $0x55,<z2=int6464#1,>z13=int6464#3
+# asm 2: pshufd $0x55,<z2=%xmm0,>z13=%xmm2
+pshufd $0x55,%xmm0,%xmm2
+
+# qhasm: z2 = z2[2,2,2,2]
+# asm 1: pshufd $0xaa,<z2=int6464#1,>z2=int6464#1
+# asm 2: pshufd $0xaa,<z2=%xmm0,>z2=%xmm0
+pshufd $0xaa,%xmm0,%xmm0
+
+# qhasm: orig7 = z7
+# asm 1: movdqa <z7=int6464#2,>orig7=stack128#13
+# asm 2: movdqa <z7=%xmm1,>orig7=192(%rsp)
+movdqa %xmm1,192(%rsp)
+
+# qhasm: orig13 = z13
+# asm 1: movdqa <z13=int6464#3,>orig13=stack128#14
+# asm 2: movdqa <z13=%xmm2,>orig13=208(%rsp)
+movdqa %xmm2,208(%rsp)
+
+# qhasm: orig2 = z2
+# asm 1: movdqa <z2=int6464#1,>orig2=stack128#15
+# asm 2: movdqa <z2=%xmm0,>orig2=224(%rsp)
+movdqa %xmm0,224(%rsp)
+
+# qhasm: z3 = x3
+# asm 1: movdqa <x3=stack128#3,>z3=int6464#1
+# asm 2: movdqa <x3=32(%rsp),>z3=%xmm0
+movdqa 32(%rsp),%xmm0
+
+# qhasm: z4 = z3[0,0,0,0]
+# asm 1: pshufd $0x00,<z3=int6464#1,>z4=int6464#2
+# asm 2: pshufd $0x00,<z3=%xmm0,>z4=%xmm1
+pshufd $0x00,%xmm0,%xmm1
+
+# qhasm: z14 = z3[2,2,2,2]
+# asm 1: pshufd $0xaa,<z3=int6464#1,>z14=int6464#3
+# asm 2: pshufd $0xaa,<z3=%xmm0,>z14=%xmm2
+pshufd $0xaa,%xmm0,%xmm2
+
+# qhasm: z3 = z3[3,3,3,3]
+# asm 1: pshufd $0xff,<z3=int6464#1,>z3=int6464#1
+# asm 2: pshufd $0xff,<z3=%xmm0,>z3=%xmm0
+pshufd $0xff,%xmm0,%xmm0
+
+# qhasm: orig4 = z4
+# asm 1: movdqa <z4=int6464#2,>orig4=stack128#16
+# asm 2: movdqa <z4=%xmm1,>orig4=240(%rsp)
+movdqa %xmm1,240(%rsp)
+
+# qhasm: orig14 = z14
+# asm 1: movdqa <z14=int6464#3,>orig14=stack128#17
+# asm 2: movdqa <z14=%xmm2,>orig14=256(%rsp)
+movdqa %xmm2,256(%rsp)
+
+# qhasm: orig3 = z3
+# asm 1: movdqa <z3=int6464#1,>orig3=stack128#18
+# asm 2: movdqa <z3=%xmm0,>orig3=272(%rsp)
+movdqa %xmm0,272(%rsp)
+
+# qhasm: bytesatleast256:
+._bytesatleast256:
+
+# qhasm: in8 = ((uint32 *)&x2)[0]
+# asm 1: movl <x2=stack128#2,>in8=int64#3d
+# asm 2: movl <x2=16(%rsp),>in8=%edx
+movl 16(%rsp),%edx
+
+# qhasm: in9 = ((uint32 *)&x3)[1]
+# asm 1: movl 4+<x3=stack128#3,>in9=int64#4d
+# asm 2: movl 4+<x3=32(%rsp),>in9=%ecx
+movl 4+32(%rsp),%ecx
+
+# qhasm: ((uint32 *) &orig8)[0] = in8
+# asm 1: movl <in8=int64#3d,>orig8=stack128#19
+# asm 2: movl <in8=%edx,>orig8=288(%rsp)
+movl %edx,288(%rsp)
+
+# qhasm: ((uint32 *) &orig9)[0] = in9
+# asm 1: movl <in9=int64#4d,>orig9=stack128#20
+# asm 2: movl <in9=%ecx,>orig9=304(%rsp)
+movl %ecx,304(%rsp)
+
+# qhasm: in8 += 1
+# asm 1: add $1,<in8=int64#3
+# asm 2: add $1,<in8=%rdx
+add $1,%rdx
+
+# qhasm: in9 <<= 32
+# asm 1: shl $32,<in9=int64#4
+# asm 2: shl $32,<in9=%rcx
+shl $32,%rcx
+
+# qhasm: in8 += in9
+# asm 1: add <in9=int64#4,<in8=int64#3
+# asm 2: add <in9=%rcx,<in8=%rdx
+add %rcx,%rdx
+
+# qhasm: in9 = in8
+# asm 1: mov <in8=int64#3,>in9=int64#4
+# asm 2: mov <in8=%rdx,>in9=%rcx
+mov %rdx,%rcx
+
+# qhasm: (uint64) in9 >>= 32
+# asm 1: shr $32,<in9=int64#4
+# asm 2: shr $32,<in9=%rcx
+shr $32,%rcx
+
+# qhasm: ((uint32 *) &orig8)[1] = in8
+# asm 1: movl <in8=int64#3d,4+<orig8=stack128#19
+# asm 2: movl <in8=%edx,4+<orig8=288(%rsp)
+movl %edx,4+288(%rsp)
+
+# qhasm: ((uint32 *) &orig9)[1] = in9
+# asm 1: movl <in9=int64#4d,4+<orig9=stack128#20
+# asm 2: movl <in9=%ecx,4+<orig9=304(%rsp)
+movl %ecx,4+304(%rsp)
+
+# qhasm: in8 += 1
+# asm 1: add $1,<in8=int64#3
+# asm 2: add $1,<in8=%rdx
+add $1,%rdx
+
+# qhasm: in9 <<= 32
+# asm 1: shl $32,<in9=int64#4
+# asm 2: shl $32,<in9=%rcx
+shl $32,%rcx
+
+# qhasm: in8 += in9
+# asm 1: add <in9=int64#4,<in8=int64#3
+# asm 2: add <in9=%rcx,<in8=%rdx
+add %rcx,%rdx
+
+# qhasm: in9 = in8
+# asm 1: mov <in8=int64#3,>in9=int64#4
+# asm 2: mov <in8=%rdx,>in9=%rcx
+mov %rdx,%rcx
+
+# qhasm: (uint64) in9 >>= 32
+# asm 1: shr $32,<in9=int64#4
+# asm 2: shr $32,<in9=%rcx
+shr $32,%rcx
+
+# qhasm: ((uint32 *) &orig8)[2] = in8
+# asm 1: movl <in8=int64#3d,8+<orig8=stack128#19
+# asm 2: movl <in8=%edx,8+<orig8=288(%rsp)
+movl %edx,8+288(%rsp)
+
+# qhasm: ((uint32 *) &orig9)[2] = in9
+# asm 1: movl <in9=int64#4d,8+<orig9=stack128#20
+# asm 2: movl <in9=%ecx,8+<orig9=304(%rsp)
+movl %ecx,8+304(%rsp)
+
+# qhasm: in8 += 1
+# asm 1: add $1,<in8=int64#3
+# asm 2: add $1,<in8=%rdx
+add $1,%rdx
+
+# qhasm: in9 <<= 32
+# asm 1: shl $32,<in9=int64#4
+# asm 2: shl $32,<in9=%rcx
+shl $32,%rcx
+
+# qhasm: in8 += in9
+# asm 1: add <in9=int64#4,<in8=int64#3
+# asm 2: add <in9=%rcx,<in8=%rdx
+add %rcx,%rdx
+
+# qhasm: in9 = in8
+# asm 1: mov <in8=int64#3,>in9=int64#4
+# asm 2: mov <in8=%rdx,>in9=%rcx
+mov %rdx,%rcx
+
+# qhasm: (uint64) in9 >>= 32
+# asm 1: shr $32,<in9=int64#4
+# asm 2: shr $32,<in9=%rcx
+shr $32,%rcx
+
+# qhasm: ((uint32 *) &orig8)[3] = in8
+# asm 1: movl <in8=int64#3d,12+<orig8=stack128#19
+# asm 2: movl <in8=%edx,12+<orig8=288(%rsp)
+movl %edx,12+288(%rsp)
+
+# qhasm: ((uint32 *) &orig9)[3] = in9
+# asm 1: movl <in9=int64#4d,12+<orig9=stack128#20
+# asm 2: movl <in9=%ecx,12+<orig9=304(%rsp)
+movl %ecx,12+304(%rsp)
+
+# qhasm: in8 += 1
+# asm 1: add $1,<in8=int64#3
+# asm 2: add $1,<in8=%rdx
+add $1,%rdx
+
+# qhasm: in9 <<= 32
+# asm 1: shl $32,<in9=int64#4
+# asm 2: shl $32,<in9=%rcx
+shl $32,%rcx
+
+# qhasm: in8 += in9
+# asm 1: add <in9=int64#4,<in8=int64#3
+# asm 2: add <in9=%rcx,<in8=%rdx
+add %rcx,%rdx
+
+# qhasm: in9 = in8
+# asm 1: mov <in8=int64#3,>in9=int64#4
+# asm 2: mov <in8=%rdx,>in9=%rcx
+mov %rdx,%rcx
+
+# qhasm: (uint64) in9 >>= 32
+# asm 1: shr $32,<in9=int64#4
+# asm 2: shr $32,<in9=%rcx
+shr $32,%rcx
+
+# qhasm: ((uint32 *)&x2)[0] = in8
+# asm 1: movl <in8=int64#3d,>x2=stack128#2
+# asm 2: movl <in8=%edx,>x2=16(%rsp)
+movl %edx,16(%rsp)
+
+# qhasm: ((uint32 *)&x3)[1] = in9
+# asm 1: movl <in9=int64#4d,4+<x3=stack128#3
+# asm 2: movl <in9=%ecx,4+<x3=32(%rsp)
+movl %ecx,4+32(%rsp)
+
+# qhasm: bytes_backup = bytes
+# asm 1: movq <bytes=int64#6,>bytes_backup=stack64#8
+# asm 2: movq <bytes=%r9,>bytes_backup=408(%rsp)
+movq %r9,408(%rsp)
+
+# qhasm: i = 20
+# asm 1: mov $20,>i=int64#3
+# asm 2: mov $20,>i=%rdx
+mov $20,%rdx
+
+# qhasm: z5 = orig5
+# asm 1: movdqa <orig5=stack128#5,>z5=int6464#1
+# asm 2: movdqa <orig5=64(%rsp),>z5=%xmm0
+movdqa 64(%rsp),%xmm0
+
+# qhasm: z10 = orig10
+# asm 1: movdqa <orig10=stack128#6,>z10=int6464#2
+# asm 2: movdqa <orig10=80(%rsp),>z10=%xmm1
+movdqa 80(%rsp),%xmm1
+
+# qhasm: z15 = orig15
+# asm 1: movdqa <orig15=stack128#7,>z15=int6464#3
+# asm 2: movdqa <orig15=96(%rsp),>z15=%xmm2
+movdqa 96(%rsp),%xmm2
+
+# qhasm: z14 = orig14
+# asm 1: movdqa <orig14=stack128#17,>z14=int6464#4
+# asm 2: movdqa <orig14=256(%rsp),>z14=%xmm3
+movdqa 256(%rsp),%xmm3
+
+# qhasm: z3 = orig3
+# asm 1: movdqa <orig3=stack128#18,>z3=int6464#5
+# asm 2: movdqa <orig3=272(%rsp),>z3=%xmm4
+movdqa 272(%rsp),%xmm4
+
+# qhasm: z6 = orig6
+# asm 1: movdqa <orig6=stack128#9,>z6=int6464#6
+# asm 2: movdqa <orig6=128(%rsp),>z6=%xmm5
+movdqa 128(%rsp),%xmm5
+
+# qhasm: z11 = orig11
+# asm 1: movdqa <orig11=stack128#10,>z11=int6464#7
+# asm 2: movdqa <orig11=144(%rsp),>z11=%xmm6
+movdqa 144(%rsp),%xmm6
+
+# qhasm: z1 = orig1
+# asm 1: movdqa <orig1=stack128#12,>z1=int6464#8
+# asm 2: movdqa <orig1=176(%rsp),>z1=%xmm7
+movdqa 176(%rsp),%xmm7
+
+# qhasm: z7 = orig7
+# asm 1: movdqa <orig7=stack128#13,>z7=int6464#9
+# asm 2: movdqa <orig7=192(%rsp),>z7=%xmm8
+movdqa 192(%rsp),%xmm8
+
+# qhasm: z13 = orig13
+# asm 1: movdqa <orig13=stack128#14,>z13=int6464#10
+# asm 2: movdqa <orig13=208(%rsp),>z13=%xmm9
+movdqa 208(%rsp),%xmm9
+
+# qhasm: z2 = orig2
+# asm 1: movdqa <orig2=stack128#15,>z2=int6464#11
+# asm 2: movdqa <orig2=224(%rsp),>z2=%xmm10
+movdqa 224(%rsp),%xmm10
+
+# qhasm: z9 = orig9
+# asm 1: movdqa <orig9=stack128#20,>z9=int6464#12
+# asm 2: movdqa <orig9=304(%rsp),>z9=%xmm11
+movdqa 304(%rsp),%xmm11
+
+# qhasm: z0 = orig0
+# asm 1: movdqa <orig0=stack128#8,>z0=int6464#13
+# asm 2: movdqa <orig0=112(%rsp),>z0=%xmm12
+movdqa 112(%rsp),%xmm12
+
+# qhasm: z12 = orig12
+# asm 1: movdqa <orig12=stack128#11,>z12=int6464#14
+# asm 2: movdqa <orig12=160(%rsp),>z12=%xmm13
+movdqa 160(%rsp),%xmm13
+
+# qhasm: z4 = orig4
+# asm 1: movdqa <orig4=stack128#16,>z4=int6464#15
+# asm 2: movdqa <orig4=240(%rsp),>z4=%xmm14
+movdqa 240(%rsp),%xmm14
+
+# qhasm: z8 = orig8
+# asm 1: movdqa <orig8=stack128#19,>z8=int6464#16
+# asm 2: movdqa <orig8=288(%rsp),>z8=%xmm15
+movdqa 288(%rsp),%xmm15
+
+# qhasm: mainloop1:
+._mainloop1:
+
+# qhasm: z10_stack = z10
+# asm 1: movdqa <z10=int6464#2,>z10_stack=stack128#21
+# asm 2: movdqa <z10=%xmm1,>z10_stack=320(%rsp)
+movdqa %xmm1,320(%rsp)
+
+# qhasm: z15_stack = z15
+# asm 1: movdqa <z15=int6464#3,>z15_stack=stack128#22
+# asm 2: movdqa <z15=%xmm2,>z15_stack=336(%rsp)
+movdqa %xmm2,336(%rsp)
+
+# qhasm: y4 = z12
+# asm 1: movdqa <z12=int6464#14,>y4=int6464#2
+# asm 2: movdqa <z12=%xmm13,>y4=%xmm1
+movdqa %xmm13,%xmm1
+
+# qhasm: uint32323232 y4 += z0
+# asm 1: paddd <z0=int6464#13,<y4=int6464#2
+# asm 2: paddd <z0=%xmm12,<y4=%xmm1
+paddd %xmm12,%xmm1
+
+# qhasm: r4 = y4
+# asm 1: movdqa <y4=int6464#2,>r4=int6464#3
+# asm 2: movdqa <y4=%xmm1,>r4=%xmm2
+movdqa %xmm1,%xmm2
+
+# qhasm: uint32323232 y4 <<= 7
+# asm 1: pslld $7,<y4=int6464#2
+# asm 2: pslld $7,<y4=%xmm1
+pslld $7,%xmm1
+
+# qhasm: z4 ^= y4
+# asm 1: pxor <y4=int6464#2,<z4=int6464#15
+# asm 2: pxor <y4=%xmm1,<z4=%xmm14
+pxor %xmm1,%xmm14
+
+# qhasm: uint32323232 r4 >>= 25
+# asm 1: psrld $25,<r4=int6464#3
+# asm 2: psrld $25,<r4=%xmm2
+psrld $25,%xmm2
+
+# qhasm: z4 ^= r4
+# asm 1: pxor <r4=int6464#3,<z4=int6464#15
+# asm 2: pxor <r4=%xmm2,<z4=%xmm14
+pxor %xmm2,%xmm14
+
+# qhasm: y9 = z1
+# asm 1: movdqa <z1=int6464#8,>y9=int6464#2
+# asm 2: movdqa <z1=%xmm7,>y9=%xmm1
+movdqa %xmm7,%xmm1
+
+# qhasm: uint32323232 y9 += z5
+# asm 1: paddd <z5=int6464#1,<y9=int6464#2
+# asm 2: paddd <z5=%xmm0,<y9=%xmm1
+paddd %xmm0,%xmm1
+
+# qhasm: r9 = y9
+# asm 1: movdqa <y9=int6464#2,>r9=int6464#3
+# asm 2: movdqa <y9=%xmm1,>r9=%xmm2
+movdqa %xmm1,%xmm2
+
+# qhasm: uint32323232 y9 <<= 7
+# asm 1: pslld $7,<y9=int6464#2
+# asm 2: pslld $7,<y9=%xmm1
+pslld $7,%xmm1
+
+# qhasm: z9 ^= y9
+# asm 1: pxor <y9=int6464#2,<z9=int6464#12
+# asm 2: pxor <y9=%xmm1,<z9=%xmm11
+pxor %xmm1,%xmm11
+
+# qhasm: uint32323232 r9 >>= 25
+# asm 1: psrld $25,<r9=int6464#3
+# asm 2: psrld $25,<r9=%xmm2
+psrld $25,%xmm2
+
+# qhasm: z9 ^= r9
+# asm 1: pxor <r9=int6464#3,<z9=int6464#12
+# asm 2: pxor <r9=%xmm2,<z9=%xmm11
+pxor %xmm2,%xmm11
+
+# qhasm: y8 = z0
+# asm 1: movdqa <z0=int6464#13,>y8=int6464#2
+# asm 2: movdqa <z0=%xmm12,>y8=%xmm1
+movdqa %xmm12,%xmm1
+
+# qhasm: uint32323232 y8 += z4
+# asm 1: paddd <z4=int6464#15,<y8=int6464#2
+# asm 2: paddd <z4=%xmm14,<y8=%xmm1
+paddd %xmm14,%xmm1
+
+# qhasm: r8 = y8
+# asm 1: movdqa <y8=int6464#2,>r8=int6464#3
+# asm 2: movdqa <y8=%xmm1,>r8=%xmm2
+movdqa %xmm1,%xmm2
+
+# qhasm: uint32323232 y8 <<= 9
+# asm 1: pslld $9,<y8=int6464#2
+# asm 2: pslld $9,<y8=%xmm1
+pslld $9,%xmm1
+
+# qhasm: z8 ^= y8
+# asm 1: pxor <y8=int6464#2,<z8=int6464#16
+# asm 2: pxor <y8=%xmm1,<z8=%xmm15
+pxor %xmm1,%xmm15
+
+# qhasm: uint32323232 r8 >>= 23
+# asm 1: psrld $23,<r8=int6464#3
+# asm 2: psrld $23,<r8=%xmm2
+psrld $23,%xmm2
+
+# qhasm: z8 ^= r8
+# asm 1: pxor <r8=int6464#3,<z8=int6464#16
+# asm 2: pxor <r8=%xmm2,<z8=%xmm15
+pxor %xmm2,%xmm15
+
+# qhasm: y13 = z5
+# asm 1: movdqa <z5=int6464#1,>y13=int6464#2
+# asm 2: movdqa <z5=%xmm0,>y13=%xmm1
+movdqa %xmm0,%xmm1
+
+# qhasm: uint32323232 y13 += z9
+# asm 1: paddd <z9=int6464#12,<y13=int6464#2
+# asm 2: paddd <z9=%xmm11,<y13=%xmm1
+paddd %xmm11,%xmm1
+
+# qhasm: r13 = y13
+# asm 1: movdqa <y13=int6464#2,>r13=int6464#3
+# asm 2: movdqa <y13=%xmm1,>r13=%xmm2
+movdqa %xmm1,%xmm2
+
+# qhasm: uint32323232 y13 <<= 9
+# asm 1: pslld $9,<y13=int6464#2
+# asm 2: pslld $9,<y13=%xmm1
+pslld $9,%xmm1
+
+# qhasm: z13 ^= y13
+# asm 1: pxor <y13=int6464#2,<z13=int6464#10
+# asm 2: pxor <y13=%xmm1,<z13=%xmm9
+pxor %xmm1,%xmm9
+
+# qhasm: uint32323232 r13 >>= 23
+# asm 1: psrld $23,<r13=int6464#3
+# asm 2: psrld $23,<r13=%xmm2
+psrld $23,%xmm2
+
+# qhasm: z13 ^= r13
+# asm 1: pxor <r13=int6464#3,<z13=int6464#10
+# asm 2: pxor <r13=%xmm2,<z13=%xmm9
+pxor %xmm2,%xmm9
+
+# qhasm: y12 = z4
+# asm 1: movdqa <z4=int6464#15,>y12=int6464#2
+# asm 2: movdqa <z4=%xmm14,>y12=%xmm1
+movdqa %xmm14,%xmm1
+
+# qhasm: uint32323232 y12 += z8
+# asm 1: paddd <z8=int6464#16,<y12=int6464#2
+# asm 2: paddd <z8=%xmm15,<y12=%xmm1
+paddd %xmm15,%xmm1
+
+# qhasm: r12 = y12
+# asm 1: movdqa <y12=int6464#2,>r12=int6464#3
+# asm 2: movdqa <y12=%xmm1,>r12=%xmm2
+movdqa %xmm1,%xmm2
+
+# qhasm: uint32323232 y12 <<= 13
+# asm 1: pslld $13,<y12=int6464#2
+# asm 2: pslld $13,<y12=%xmm1
+pslld $13,%xmm1
+
+# qhasm: z12 ^= y12
+# asm 1: pxor <y12=int6464#2,<z12=int6464#14
+# asm 2: pxor <y12=%xmm1,<z12=%xmm13
+pxor %xmm1,%xmm13
+
+# qhasm: uint32323232 r12 >>= 19
+# asm 1: psrld $19,<r12=int6464#3
+# asm 2: psrld $19,<r12=%xmm2
+psrld $19,%xmm2
+
+# qhasm: z12 ^= r12
+# asm 1: pxor <r12=int6464#3,<z12=int6464#14
+# asm 2: pxor <r12=%xmm2,<z12=%xmm13
+pxor %xmm2,%xmm13
+
+# qhasm: y1 = z9
+# asm 1: movdqa <z9=int6464#12,>y1=int6464#2
+# asm 2: movdqa <z9=%xmm11,>y1=%xmm1
+movdqa %xmm11,%xmm1
+
+# qhasm: uint32323232 y1 += z13
+# asm 1: paddd <z13=int6464#10,<y1=int6464#2
+# asm 2: paddd <z13=%xmm9,<y1=%xmm1
+paddd %xmm9,%xmm1
+
+# qhasm: r1 = y1
+# asm 1: movdqa <y1=int6464#2,>r1=int6464#3
+# asm 2: movdqa <y1=%xmm1,>r1=%xmm2
+movdqa %xmm1,%xmm2
+
+# qhasm: uint32323232 y1 <<= 13
+# asm 1: pslld $13,<y1=int6464#2
+# asm 2: pslld $13,<y1=%xmm1
+pslld $13,%xmm1
+
+# qhasm: z1 ^= y1
+# asm 1: pxor <y1=int6464#2,<z1=int6464#8
+# asm 2: pxor <y1=%xmm1,<z1=%xmm7
+pxor %xmm1,%xmm7
+
+# qhasm: uint32323232 r1 >>= 19
+# asm 1: psrld $19,<r1=int6464#3
+# asm 2: psrld $19,<r1=%xmm2
+psrld $19,%xmm2
+
+# qhasm: z1 ^= r1
+# asm 1: pxor <r1=int6464#3,<z1=int6464#8
+# asm 2: pxor <r1=%xmm2,<z1=%xmm7
+pxor %xmm2,%xmm7
+
+# qhasm: y0 = z8
+# asm 1: movdqa <z8=int6464#16,>y0=int6464#2
+# asm 2: movdqa <z8=%xmm15,>y0=%xmm1
+movdqa %xmm15,%xmm1
+
+# qhasm: uint32323232 y0 += z12
+# asm 1: paddd <z12=int6464#14,<y0=int6464#2
+# asm 2: paddd <z12=%xmm13,<y0=%xmm1
+paddd %xmm13,%xmm1
+
+# qhasm: r0 = y0
+# asm 1: movdqa <y0=int6464#2,>r0=int6464#3
+# asm 2: movdqa <y0=%xmm1,>r0=%xmm2
+movdqa %xmm1,%xmm2
+
+# qhasm: uint32323232 y0 <<= 18
+# asm 1: pslld $18,<y0=int6464#2
+# asm 2: pslld $18,<y0=%xmm1
+pslld $18,%xmm1
+
+# qhasm: z0 ^= y0
+# asm 1: pxor <y0=int6464#2,<z0=int6464#13
+# asm 2: pxor <y0=%xmm1,<z0=%xmm12
+pxor %xmm1,%xmm12
+
+# qhasm: uint32323232 r0 >>= 14
+# asm 1: psrld $14,<r0=int6464#3
+# asm 2: psrld $14,<r0=%xmm2
+psrld $14,%xmm2
+
+# qhasm: z0 ^= r0
+# asm 1: pxor <r0=int6464#3,<z0=int6464#13
+# asm 2: pxor <r0=%xmm2,<z0=%xmm12
+pxor %xmm2,%xmm12
+
+# qhasm: z10 = z10_stack
+# asm 1: movdqa <z10_stack=stack128#21,>z10=int6464#2
+# asm 2: movdqa <z10_stack=320(%rsp),>z10=%xmm1
+movdqa 320(%rsp),%xmm1
+
+# qhasm: z0_stack = z0
+# asm 1: movdqa <z0=int6464#13,>z0_stack=stack128#21
+# asm 2: movdqa <z0=%xmm12,>z0_stack=320(%rsp)
+movdqa %xmm12,320(%rsp)
+
+# qhasm: y5 = z13
+# asm 1: movdqa <z13=int6464#10,>y5=int6464#3
+# asm 2: movdqa <z13=%xmm9,>y5=%xmm2
+movdqa %xmm9,%xmm2
+
+# qhasm: uint32323232 y5 += z1
+# asm 1: paddd <z1=int6464#8,<y5=int6464#3
+# asm 2: paddd <z1=%xmm7,<y5=%xmm2
+paddd %xmm7,%xmm2
+
+# qhasm: r5 = y5
+# asm 1: movdqa <y5=int6464#3,>r5=int6464#13
+# asm 2: movdqa <y5=%xmm2,>r5=%xmm12
+movdqa %xmm2,%xmm12
+
+# qhasm: uint32323232 y5 <<= 18
+# asm 1: pslld $18,<y5=int6464#3
+# asm 2: pslld $18,<y5=%xmm2
+pslld $18,%xmm2
+
+# qhasm: z5 ^= y5
+# asm 1: pxor <y5=int6464#3,<z5=int6464#1
+# asm 2: pxor <y5=%xmm2,<z5=%xmm0
+pxor %xmm2,%xmm0
+
+# qhasm: uint32323232 r5 >>= 14
+# asm 1: psrld $14,<r5=int6464#13
+# asm 2: psrld $14,<r5=%xmm12
+psrld $14,%xmm12
+
+# qhasm: z5 ^= r5
+# asm 1: pxor <r5=int6464#13,<z5=int6464#1
+# asm 2: pxor <r5=%xmm12,<z5=%xmm0
+pxor %xmm12,%xmm0
+
+# qhasm: y14 = z6
+# asm 1: movdqa <z6=int6464#6,>y14=int6464#3
+# asm 2: movdqa <z6=%xmm5,>y14=%xmm2
+movdqa %xmm5,%xmm2
+
+# qhasm: uint32323232 y14 += z10
+# asm 1: paddd <z10=int6464#2,<y14=int6464#3
+# asm 2: paddd <z10=%xmm1,<y14=%xmm2
+paddd %xmm1,%xmm2
+
+# qhasm: r14 = y14
+# asm 1: movdqa <y14=int6464#3,>r14=int6464#13
+# asm 2: movdqa <y14=%xmm2,>r14=%xmm12
+movdqa %xmm2,%xmm12
+
+# qhasm: uint32323232 y14 <<= 7
+# asm 1: pslld $7,<y14=int6464#3
+# asm 2: pslld $7,<y14=%xmm2
+pslld $7,%xmm2
+
+# qhasm: z14 ^= y14
+# asm 1: pxor <y14=int6464#3,<z14=int6464#4
+# asm 2: pxor <y14=%xmm2,<z14=%xmm3
+pxor %xmm2,%xmm3
+
+# qhasm: uint32323232 r14 >>= 25
+# asm 1: psrld $25,<r14=int6464#13
+# asm 2: psrld $25,<r14=%xmm12
+psrld $25,%xmm12
+
+# qhasm: z14 ^= r14
+# asm 1: pxor <r14=int6464#13,<z14=int6464#4
+# asm 2: pxor <r14=%xmm12,<z14=%xmm3
+pxor %xmm12,%xmm3
+
+# qhasm: z15 = z15_stack
+# asm 1: movdqa <z15_stack=stack128#22,>z15=int6464#3
+# asm 2: movdqa <z15_stack=336(%rsp),>z15=%xmm2
+movdqa 336(%rsp),%xmm2
+
+# qhasm: z5_stack = z5
+# asm 1: movdqa <z5=int6464#1,>z5_stack=stack128#22
+# asm 2: movdqa <z5=%xmm0,>z5_stack=336(%rsp)
+movdqa %xmm0,336(%rsp)
+
+# qhasm: y3 = z11
+# asm 1: movdqa <z11=int6464#7,>y3=int6464#1
+# asm 2: movdqa <z11=%xmm6,>y3=%xmm0
+movdqa %xmm6,%xmm0
+
+# qhasm: uint32323232 y3 += z15
+# asm 1: paddd <z15=int6464#3,<y3=int6464#1
+# asm 2: paddd <z15=%xmm2,<y3=%xmm0
+paddd %xmm2,%xmm0
+
+# qhasm: r3 = y3
+# asm 1: movdqa <y3=int6464#1,>r3=int6464#13
+# asm 2: movdqa <y3=%xmm0,>r3=%xmm12
+movdqa %xmm0,%xmm12
+
+# qhasm: uint32323232 y3 <<= 7
+# asm 1: pslld $7,<y3=int6464#1
+# asm 2: pslld $7,<y3=%xmm0
+pslld $7,%xmm0
+
+# qhasm: z3 ^= y3
+# asm 1: pxor <y3=int6464#1,<z3=int6464#5
+# asm 2: pxor <y3=%xmm0,<z3=%xmm4
+pxor %xmm0,%xmm4
+
+# qhasm: uint32323232 r3 >>= 25
+# asm 1: psrld $25,<r3=int6464#13
+# asm 2: psrld $25,<r3=%xmm12
+psrld $25,%xmm12
+
+# qhasm: z3 ^= r3
+# asm 1: pxor <r3=int6464#13,<z3=int6464#5
+# asm 2: pxor <r3=%xmm12,<z3=%xmm4
+pxor %xmm12,%xmm4
+
+# qhasm: y2 = z10
+# asm 1: movdqa <z10=int6464#2,>y2=int6464#1
+# asm 2: movdqa <z10=%xmm1,>y2=%xmm0
+movdqa %xmm1,%xmm0
+
+# qhasm: uint32323232 y2 += z14
+# asm 1: paddd <z14=int6464#4,<y2=int6464#1
+# asm 2: paddd <z14=%xmm3,<y2=%xmm0
+paddd %xmm3,%xmm0
+
+# qhasm: r2 = y2
+# asm 1: movdqa <y2=int6464#1,>r2=int6464#13
+# asm 2: movdqa <y2=%xmm0,>r2=%xmm12
+movdqa %xmm0,%xmm12
+
+# qhasm: uint32323232 y2 <<= 9
+# asm 1: pslld $9,<y2=int6464#1
+# asm 2: pslld $9,<y2=%xmm0
+pslld $9,%xmm0
+
+# qhasm: z2 ^= y2
+# asm 1: pxor <y2=int6464#1,<z2=int6464#11
+# asm 2: pxor <y2=%xmm0,<z2=%xmm10
+pxor %xmm0,%xmm10
+
+# qhasm: uint32323232 r2 >>= 23
+# asm 1: psrld $23,<r2=int6464#13
+# asm 2: psrld $23,<r2=%xmm12
+psrld $23,%xmm12
+
+# qhasm: z2 ^= r2
+# asm 1: pxor <r2=int6464#13,<z2=int6464#11
+# asm 2: pxor <r2=%xmm12,<z2=%xmm10
+pxor %xmm12,%xmm10
+
+# qhasm: y7 = z15
+# asm 1: movdqa <z15=int6464#3,>y7=int6464#1
+# asm 2: movdqa <z15=%xmm2,>y7=%xmm0
+movdqa %xmm2,%xmm0
+
+# qhasm: uint32323232 y7 += z3
+# asm 1: paddd <z3=int6464#5,<y7=int6464#1
+# asm 2: paddd <z3=%xmm4,<y7=%xmm0
+paddd %xmm4,%xmm0
+
+# qhasm: r7 = y7
+# asm 1: movdqa <y7=int6464#1,>r7=int6464#13
+# asm 2: movdqa <y7=%xmm0,>r7=%xmm12
+movdqa %xmm0,%xmm12
+
+# qhasm: uint32323232 y7 <<= 9
+# asm 1: pslld $9,<y7=int6464#1
+# asm 2: pslld $9,<y7=%xmm0
+pslld $9,%xmm0
+
+# qhasm: z7 ^= y7
+# asm 1: pxor <y7=int6464#1,<z7=int6464#9
+# asm 2: pxor <y7=%xmm0,<z7=%xmm8
+pxor %xmm0,%xmm8
+
+# qhasm: uint32323232 r7 >>= 23
+# asm 1: psrld $23,<r7=int6464#13
+# asm 2: psrld $23,<r7=%xmm12
+psrld $23,%xmm12
+
+# qhasm: z7 ^= r7
+# asm 1: pxor <r7=int6464#13,<z7=int6464#9
+# asm 2: pxor <r7=%xmm12,<z7=%xmm8
+pxor %xmm12,%xmm8
+
+# qhasm: y6 = z14
+# asm 1: movdqa <z14=int6464#4,>y6=int6464#1
+# asm 2: movdqa <z14=%xmm3,>y6=%xmm0
+movdqa %xmm3,%xmm0
+
+# qhasm: uint32323232 y6 += z2
+# asm 1: paddd <z2=int6464#11,<y6=int6464#1
+# asm 2: paddd <z2=%xmm10,<y6=%xmm0
+paddd %xmm10,%xmm0
+
+# qhasm: r6 = y6
+# asm 1: movdqa <y6=int6464#1,>r6=int6464#13
+# asm 2: movdqa <y6=%xmm0,>r6=%xmm12
+movdqa %xmm0,%xmm12
+
+# qhasm: uint32323232 y6 <<= 13
+# asm 1: pslld $13,<y6=int6464#1
+# asm 2: pslld $13,<y6=%xmm0
+pslld $13,%xmm0
+
+# qhasm: z6 ^= y6
+# asm 1: pxor <y6=int6464#1,<z6=int6464#6
+# asm 2: pxor <y6=%xmm0,<z6=%xmm5
+pxor %xmm0,%xmm5
+
+# qhasm: uint32323232 r6 >>= 19
+# asm 1: psrld $19,<r6=int6464#13
+# asm 2: psrld $19,<r6=%xmm12
+psrld $19,%xmm12
+
+# qhasm: z6 ^= r6
+# asm 1: pxor <r6=int6464#13,<z6=int6464#6
+# asm 2: pxor <r6=%xmm12,<z6=%xmm5
+pxor %xmm12,%xmm5
+
+# qhasm: y11 = z3
+# asm 1: movdqa <z3=int6464#5,>y11=int6464#1
+# asm 2: movdqa <z3=%xmm4,>y11=%xmm0
+movdqa %xmm4,%xmm0
+
+# qhasm: uint32323232 y11 += z7
+# asm 1: paddd <z7=int6464#9,<y11=int6464#1
+# asm 2: paddd <z7=%xmm8,<y11=%xmm0
+paddd %xmm8,%xmm0
+
+# qhasm: r11 = y11
+# asm 1: movdqa <y11=int6464#1,>r11=int6464#13
+# asm 2: movdqa <y11=%xmm0,>r11=%xmm12
+movdqa %xmm0,%xmm12
+
+# qhasm: uint32323232 y11 <<= 13
+# asm 1: pslld $13,<y11=int6464#1
+# asm 2: pslld $13,<y11=%xmm0
+pslld $13,%xmm0
+
+# qhasm: z11 ^= y11
+# asm 1: pxor <y11=int6464#1,<z11=int6464#7
+# asm 2: pxor <y11=%xmm0,<z11=%xmm6
+pxor %xmm0,%xmm6
+
+# qhasm: uint32323232 r11 >>= 19
+# asm 1: psrld $19,<r11=int6464#13
+# asm 2: psrld $19,<r11=%xmm12
+psrld $19,%xmm12
+
+# qhasm: z11 ^= r11
+# asm 1: pxor <r11=int6464#13,<z11=int6464#7
+# asm 2: pxor <r11=%xmm12,<z11=%xmm6
+pxor %xmm12,%xmm6
+
+# qhasm: y10 = z2
+# asm 1: movdqa <z2=int6464#11,>y10=int6464#1
+# asm 2: movdqa <z2=%xmm10,>y10=%xmm0
+movdqa %xmm10,%xmm0
+
+# qhasm: uint32323232 y10 += z6
+# asm 1: paddd <z6=int6464#6,<y10=int6464#1
+# asm 2: paddd <z6=%xmm5,<y10=%xmm0
+paddd %xmm5,%xmm0
+
+# qhasm: r10 = y10
+# asm 1: movdqa <y10=int6464#1,>r10=int6464#13
+# asm 2: movdqa <y10=%xmm0,>r10=%xmm12
+movdqa %xmm0,%xmm12
+
+# qhasm: uint32323232 y10 <<= 18
+# asm 1: pslld $18,<y10=int6464#1
+# asm 2: pslld $18,<y10=%xmm0
+pslld $18,%xmm0
+
+# qhasm: z10 ^= y10
+# asm 1: pxor <y10=int6464#1,<z10=int6464#2
+# asm 2: pxor <y10=%xmm0,<z10=%xmm1
+pxor %xmm0,%xmm1
+
+# qhasm: uint32323232 r10 >>= 14
+# asm 1: psrld $14,<r10=int6464#13
+# asm 2: psrld $14,<r10=%xmm12
+psrld $14,%xmm12
+
+# qhasm: z10 ^= r10
+# asm 1: pxor <r10=int6464#13,<z10=int6464#2
+# asm 2: pxor <r10=%xmm12,<z10=%xmm1
+pxor %xmm12,%xmm1
+
+# qhasm: z0 = z0_stack
+# asm 1: movdqa <z0_stack=stack128#21,>z0=int6464#1
+# asm 2: movdqa <z0_stack=320(%rsp),>z0=%xmm0
+movdqa 320(%rsp),%xmm0
+
+# qhasm: z10_stack = z10
+# asm 1: movdqa <z10=int6464#2,>z10_stack=stack128#21
+# asm 2: movdqa <z10=%xmm1,>z10_stack=320(%rsp)
+movdqa %xmm1,320(%rsp)
+
+# qhasm: y1 = z3
+# asm 1: movdqa <z3=int6464#5,>y1=int6464#2
+# asm 2: movdqa <z3=%xmm4,>y1=%xmm1
+movdqa %xmm4,%xmm1
+
+# qhasm: uint32323232 y1 += z0
+# asm 1: paddd <z0=int6464#1,<y1=int6464#2
+# asm 2: paddd <z0=%xmm0,<y1=%xmm1
+paddd %xmm0,%xmm1
+
+# qhasm: r1 = y1
+# asm 1: movdqa <y1=int6464#2,>r1=int6464#13
+# asm 2: movdqa <y1=%xmm1,>r1=%xmm12
+movdqa %xmm1,%xmm12
+
+# qhasm: uint32323232 y1 <<= 7
+# asm 1: pslld $7,<y1=int6464#2
+# asm 2: pslld $7,<y1=%xmm1
+pslld $7,%xmm1
+
+# qhasm: z1 ^= y1
+# asm 1: pxor <y1=int6464#2,<z1=int6464#8
+# asm 2: pxor <y1=%xmm1,<z1=%xmm7
+pxor %xmm1,%xmm7
+
+# qhasm: uint32323232 r1 >>= 25
+# asm 1: psrld $25,<r1=int6464#13
+# asm 2: psrld $25,<r1=%xmm12
+psrld $25,%xmm12
+
+# qhasm: z1 ^= r1
+# asm 1: pxor <r1=int6464#13,<z1=int6464#8
+# asm 2: pxor <r1=%xmm12,<z1=%xmm7
+pxor %xmm12,%xmm7
+
+# qhasm: y15 = z7
+# asm 1: movdqa <z7=int6464#9,>y15=int6464#2
+# asm 2: movdqa <z7=%xmm8,>y15=%xmm1
+movdqa %xmm8,%xmm1
+
+# qhasm: uint32323232 y15 += z11
+# asm 1: paddd <z11=int6464#7,<y15=int6464#2
+# asm 2: paddd <z11=%xmm6,<y15=%xmm1
+paddd %xmm6,%xmm1
+
+# qhasm: r15 = y15
+# asm 1: movdqa <y15=int6464#2,>r15=int6464#13
+# asm 2: movdqa <y15=%xmm1,>r15=%xmm12
+movdqa %xmm1,%xmm12
+
+# qhasm: uint32323232 y15 <<= 18
+# asm 1: pslld $18,<y15=int6464#2
+# asm 2: pslld $18,<y15=%xmm1
+pslld $18,%xmm1
+
+# qhasm: z15 ^= y15
+# asm 1: pxor <y15=int6464#2,<z15=int6464#3
+# asm 2: pxor <y15=%xmm1,<z15=%xmm2
+pxor %xmm1,%xmm2
+
+# qhasm: uint32323232 r15 >>= 14
+# asm 1: psrld $14,<r15=int6464#13
+# asm 2: psrld $14,<r15=%xmm12
+psrld $14,%xmm12
+
+# qhasm: z15 ^= r15
+# asm 1: pxor <r15=int6464#13,<z15=int6464#3
+# asm 2: pxor <r15=%xmm12,<z15=%xmm2
+pxor %xmm12,%xmm2
+
+# qhasm: z5 = z5_stack
+# asm 1: movdqa <z5_stack=stack128#22,>z5=int6464#13
+# asm 2: movdqa <z5_stack=336(%rsp),>z5=%xmm12
+movdqa 336(%rsp),%xmm12
+
+# qhasm: z15_stack = z15
+# asm 1: movdqa <z15=int6464#3,>z15_stack=stack128#22
+# asm 2: movdqa <z15=%xmm2,>z15_stack=336(%rsp)
+movdqa %xmm2,336(%rsp)
+
+# qhasm: y6 = z4
+# asm 1: movdqa <z4=int6464#15,>y6=int6464#2
+# asm 2: movdqa <z4=%xmm14,>y6=%xmm1
+movdqa %xmm14,%xmm1
+
+# qhasm: uint32323232 y6 += z5
+# asm 1: paddd <z5=int6464#13,<y6=int6464#2
+# asm 2: paddd <z5=%xmm12,<y6=%xmm1
+paddd %xmm12,%xmm1
+
+# qhasm: r6 = y6
+# asm 1: movdqa <y6=int6464#2,>r6=int6464#3
+# asm 2: movdqa <y6=%xmm1,>r6=%xmm2
+movdqa %xmm1,%xmm2
+
+# qhasm: uint32323232 y6 <<= 7
+# asm 1: pslld $7,<y6=int6464#2
+# asm 2: pslld $7,<y6=%xmm1
+pslld $7,%xmm1
+
+# qhasm: z6 ^= y6
+# asm 1: pxor <y6=int6464#2,<z6=int6464#6
+# asm 2: pxor <y6=%xmm1,<z6=%xmm5
+pxor %xmm1,%xmm5
+
+# qhasm: uint32323232 r6 >>= 25
+# asm 1: psrld $25,<r6=int6464#3
+# asm 2: psrld $25,<r6=%xmm2
+psrld $25,%xmm2
+
+# qhasm: z6 ^= r6
+# asm 1: pxor <r6=int6464#3,<z6=int6464#6
+# asm 2: pxor <r6=%xmm2,<z6=%xmm5
+pxor %xmm2,%xmm5
+
+# qhasm: y2 = z0
+# asm 1: movdqa <z0=int6464#1,>y2=int6464#2
+# asm 2: movdqa <z0=%xmm0,>y2=%xmm1
+movdqa %xmm0,%xmm1
+
+# qhasm: uint32323232 y2 += z1
+# asm 1: paddd <z1=int6464#8,<y2=int6464#2
+# asm 2: paddd <z1=%xmm7,<y2=%xmm1
+paddd %xmm7,%xmm1
+
+# qhasm: r2 = y2
+# asm 1: movdqa <y2=int6464#2,>r2=int6464#3
+# asm 2: movdqa <y2=%xmm1,>r2=%xmm2
+movdqa %xmm1,%xmm2
+
+# qhasm: uint32323232 y2 <<= 9
+# asm 1: pslld $9,<y2=int6464#2
+# asm 2: pslld $9,<y2=%xmm1
+pslld $9,%xmm1
+
+# qhasm: z2 ^= y2
+# asm 1: pxor <y2=int6464#2,<z2=int6464#11
+# asm 2: pxor <y2=%xmm1,<z2=%xmm10
+pxor %xmm1,%xmm10
+
+# qhasm: uint32323232 r2 >>= 23
+# asm 1: psrld $23,<r2=int6464#3
+# asm 2: psrld $23,<r2=%xmm2
+psrld $23,%xmm2
+
+# qhasm: z2 ^= r2
+# asm 1: pxor <r2=int6464#3,<z2=int6464#11
+# asm 2: pxor <r2=%xmm2,<z2=%xmm10
+pxor %xmm2,%xmm10
+
+# qhasm: y7 = z5
+# asm 1: movdqa <z5=int6464#13,>y7=int6464#2
+# asm 2: movdqa <z5=%xmm12,>y7=%xmm1
+movdqa %xmm12,%xmm1
+
+# qhasm: uint32323232 y7 += z6
+# asm 1: paddd <z6=int6464#6,<y7=int6464#2
+# asm 2: paddd <z6=%xmm5,<y7=%xmm1
+paddd %xmm5,%xmm1
+
+# qhasm: r7 = y7
+# asm 1: movdqa <y7=int6464#2,>r7=int6464#3
+# asm 2: movdqa <y7=%xmm1,>r7=%xmm2
+movdqa %xmm1,%xmm2
+
+# qhasm: uint32323232 y7 <<= 9
+# asm 1: pslld $9,<y7=int6464#2
+# asm 2: pslld $9,<y7=%xmm1
+pslld $9,%xmm1
+
+# qhasm: z7 ^= y7
+# asm 1: pxor <y7=int6464#2,<z7=int6464#9
+# asm 2: pxor <y7=%xmm1,<z7=%xmm8
+pxor %xmm1,%xmm8
+
+# qhasm: uint32323232 r7 >>= 23
+# asm 1: psrld $23,<r7=int6464#3
+# asm 2: psrld $23,<r7=%xmm2
+psrld $23,%xmm2
+
+# qhasm: z7 ^= r7
+# asm 1: pxor <r7=int6464#3,<z7=int6464#9
+# asm 2: pxor <r7=%xmm2,<z7=%xmm8
+pxor %xmm2,%xmm8
+
+# qhasm: y3 = z1
+# asm 1: movdqa <z1=int6464#8,>y3=int6464#2
+# asm 2: movdqa <z1=%xmm7,>y3=%xmm1
+movdqa %xmm7,%xmm1
+
+# qhasm: uint32323232 y3 += z2
+# asm 1: paddd <z2=int6464#11,<y3=int6464#2
+# asm 2: paddd <z2=%xmm10,<y3=%xmm1
+paddd %xmm10,%xmm1
+
+# qhasm: r3 = y3
+# asm 1: movdqa <y3=int6464#2,>r3=int6464#3
+# asm 2: movdqa <y3=%xmm1,>r3=%xmm2
+movdqa %xmm1,%xmm2
+
+# qhasm: uint32323232 y3 <<= 13
+# asm 1: pslld $13,<y3=int6464#2
+# asm 2: pslld $13,<y3=%xmm1
+pslld $13,%xmm1
+
+# qhasm: z3 ^= y3
+# asm 1: pxor <y3=int6464#2,<z3=int6464#5
+# asm 2: pxor <y3=%xmm1,<z3=%xmm4
+pxor %xmm1,%xmm4
+
+# qhasm: uint32323232 r3 >>= 19
+# asm 1: psrld $19,<r3=int6464#3
+# asm 2: psrld $19,<r3=%xmm2
+psrld $19,%xmm2
+
+# qhasm: z3 ^= r3
+# asm 1: pxor <r3=int6464#3,<z3=int6464#5
+# asm 2: pxor <r3=%xmm2,<z3=%xmm4
+pxor %xmm2,%xmm4
+
+# qhasm: y4 = z6
+# asm 1: movdqa <z6=int6464#6,>y4=int6464#2
+# asm 2: movdqa <z6=%xmm5,>y4=%xmm1
+movdqa %xmm5,%xmm1
+
+# qhasm: uint32323232 y4 += z7
+# asm 1: paddd <z7=int6464#9,<y4=int6464#2
+# asm 2: paddd <z7=%xmm8,<y4=%xmm1
+paddd %xmm8,%xmm1
+
+# qhasm: r4 = y4
+# asm 1: movdqa <y4=int6464#2,>r4=int6464#3
+# asm 2: movdqa <y4=%xmm1,>r4=%xmm2
+movdqa %xmm1,%xmm2
+
+# qhasm: uint32323232 y4 <<= 13
+# asm 1: pslld $13,<y4=int6464#2
+# asm 2: pslld $13,<y4=%xmm1
+pslld $13,%xmm1
+
+# qhasm: z4 ^= y4
+# asm 1: pxor <y4=int6464#2,<z4=int6464#15
+# asm 2: pxor <y4=%xmm1,<z4=%xmm14
+pxor %xmm1,%xmm14
+
+# qhasm: uint32323232 r4 >>= 19
+# asm 1: psrld $19,<r4=int6464#3
+# asm 2: psrld $19,<r4=%xmm2
+psrld $19,%xmm2
+
+# qhasm: z4 ^= r4
+# asm 1: pxor <r4=int6464#3,<z4=int6464#15
+# asm 2: pxor <r4=%xmm2,<z4=%xmm14
+pxor %xmm2,%xmm14
+
+# qhasm: y0 = z2
+# asm 1: movdqa <z2=int6464#11,>y0=int6464#2
+# asm 2: movdqa <z2=%xmm10,>y0=%xmm1
+movdqa %xmm10,%xmm1
+
+# qhasm: uint32323232 y0 += z3
+# asm 1: paddd <z3=int6464#5,<y0=int6464#2
+# asm 2: paddd <z3=%xmm4,<y0=%xmm1
+paddd %xmm4,%xmm1
+
+# qhasm: r0 = y0
+# asm 1: movdqa <y0=int6464#2,>r0=int6464#3
+# asm 2: movdqa <y0=%xmm1,>r0=%xmm2
+movdqa %xmm1,%xmm2
+
+# qhasm: uint32323232 y0 <<= 18
+# asm 1: pslld $18,<y0=int6464#2
+# asm 2: pslld $18,<y0=%xmm1
+pslld $18,%xmm1
+
+# qhasm: z0 ^= y0
+# asm 1: pxor <y0=int6464#2,<z0=int6464#1
+# asm 2: pxor <y0=%xmm1,<z0=%xmm0
+pxor %xmm1,%xmm0
+
+# qhasm: uint32323232 r0 >>= 14
+# asm 1: psrld $14,<r0=int6464#3
+# asm 2: psrld $14,<r0=%xmm2
+psrld $14,%xmm2
+
+# qhasm: z0 ^= r0
+# asm 1: pxor <r0=int6464#3,<z0=int6464#1
+# asm 2: pxor <r0=%xmm2,<z0=%xmm0
+pxor %xmm2,%xmm0
+
+# qhasm: z10 = z10_stack
+# asm 1: movdqa <z10_stack=stack128#21,>z10=int6464#2
+# asm 2: movdqa <z10_stack=320(%rsp),>z10=%xmm1
+movdqa 320(%rsp),%xmm1
+
+# qhasm: z0_stack = z0
+# asm 1: movdqa <z0=int6464#1,>z0_stack=stack128#21
+# asm 2: movdqa <z0=%xmm0,>z0_stack=320(%rsp)
+movdqa %xmm0,320(%rsp)
+
+# qhasm: y5 = z7
+# asm 1: movdqa <z7=int6464#9,>y5=int6464#1
+# asm 2: movdqa <z7=%xmm8,>y5=%xmm0
+movdqa %xmm8,%xmm0
+
+# qhasm: uint32323232 y5 += z4
+# asm 1: paddd <z4=int6464#15,<y5=int6464#1
+# asm 2: paddd <z4=%xmm14,<y5=%xmm0
+paddd %xmm14,%xmm0
+
+# qhasm: r5 = y5
+# asm 1: movdqa <y5=int6464#1,>r5=int6464#3
+# asm 2: movdqa <y5=%xmm0,>r5=%xmm2
+movdqa %xmm0,%xmm2
+
+# qhasm: uint32323232 y5 <<= 18
+# asm 1: pslld $18,<y5=int6464#1
+# asm 2: pslld $18,<y5=%xmm0
+pslld $18,%xmm0
+
+# qhasm: z5 ^= y5
+# asm 1: pxor <y5=int6464#1,<z5=int6464#13
+# asm 2: pxor <y5=%xmm0,<z5=%xmm12
+pxor %xmm0,%xmm12
+
+# qhasm: uint32323232 r5 >>= 14
+# asm 1: psrld $14,<r5=int6464#3
+# asm 2: psrld $14,<r5=%xmm2
+psrld $14,%xmm2
+
+# qhasm: z5 ^= r5
+# asm 1: pxor <r5=int6464#3,<z5=int6464#13
+# asm 2: pxor <r5=%xmm2,<z5=%xmm12
+pxor %xmm2,%xmm12
+
+# qhasm: y11 = z9
+# asm 1: movdqa <z9=int6464#12,>y11=int6464#1
+# asm 2: movdqa <z9=%xmm11,>y11=%xmm0
+movdqa %xmm11,%xmm0
+
+# qhasm: uint32323232 y11 += z10
+# asm 1: paddd <z10=int6464#2,<y11=int6464#1
+# asm 2: paddd <z10=%xmm1,<y11=%xmm0
+paddd %xmm1,%xmm0
+
+# qhasm: r11 = y11
+# asm 1: movdqa <y11=int6464#1,>r11=int6464#3
+# asm 2: movdqa <y11=%xmm0,>r11=%xmm2
+movdqa %xmm0,%xmm2
+
+# qhasm: uint32323232 y11 <<= 7
+# asm 1: pslld $7,<y11=int6464#1
+# asm 2: pslld $7,<y11=%xmm0
+pslld $7,%xmm0
+
+# qhasm: z11 ^= y11
+# asm 1: pxor <y11=int6464#1,<z11=int6464#7
+# asm 2: pxor <y11=%xmm0,<z11=%xmm6
+pxor %xmm0,%xmm6
+
+# qhasm: uint32323232 r11 >>= 25
+# asm 1: psrld $25,<r11=int6464#3
+# asm 2: psrld $25,<r11=%xmm2
+psrld $25,%xmm2
+
+# qhasm: z11 ^= r11
+# asm 1: pxor <r11=int6464#3,<z11=int6464#7
+# asm 2: pxor <r11=%xmm2,<z11=%xmm6
+pxor %xmm2,%xmm6
+
+# qhasm: z15 = z15_stack
+# asm 1: movdqa <z15_stack=stack128#22,>z15=int6464#3
+# asm 2: movdqa <z15_stack=336(%rsp),>z15=%xmm2
+movdqa 336(%rsp),%xmm2
+
+# qhasm: z5_stack = z5
+# asm 1: movdqa <z5=int6464#13,>z5_stack=stack128#22
+# asm 2: movdqa <z5=%xmm12,>z5_stack=336(%rsp)
+movdqa %xmm12,336(%rsp)
+
+# qhasm: y12 = z14
+# asm 1: movdqa <z14=int6464#4,>y12=int6464#1
+# asm 2: movdqa <z14=%xmm3,>y12=%xmm0
+movdqa %xmm3,%xmm0
+
+# qhasm: uint32323232 y12 += z15
+# asm 1: paddd <z15=int6464#3,<y12=int6464#1
+# asm 2: paddd <z15=%xmm2,<y12=%xmm0
+paddd %xmm2,%xmm0
+
+# qhasm: r12 = y12
+# asm 1: movdqa <y12=int6464#1,>r12=int6464#13
+# asm 2: movdqa <y12=%xmm0,>r12=%xmm12
+movdqa %xmm0,%xmm12
+
+# qhasm: uint32323232 y12 <<= 7
+# asm 1: pslld $7,<y12=int6464#1
+# asm 2: pslld $7,<y12=%xmm0
+pslld $7,%xmm0
+
+# qhasm: z12 ^= y12
+# asm 1: pxor <y12=int6464#1,<z12=int6464#14
+# asm 2: pxor <y12=%xmm0,<z12=%xmm13
+pxor %xmm0,%xmm13
+
+# qhasm: uint32323232 r12 >>= 25
+# asm 1: psrld $25,<r12=int6464#13
+# asm 2: psrld $25,<r12=%xmm12
+psrld $25,%xmm12
+
+# qhasm: z12 ^= r12
+# asm 1: pxor <r12=int6464#13,<z12=int6464#14
+# asm 2: pxor <r12=%xmm12,<z12=%xmm13
+pxor %xmm12,%xmm13
+
+# qhasm: y8 = z10
+# asm 1: movdqa <z10=int6464#2,>y8=int6464#1
+# asm 2: movdqa <z10=%xmm1,>y8=%xmm0
+movdqa %xmm1,%xmm0
+
+# qhasm: uint32323232 y8 += z11
+# asm 1: paddd <z11=int6464#7,<y8=int6464#1
+# asm 2: paddd <z11=%xmm6,<y8=%xmm0
+paddd %xmm6,%xmm0
+
+# qhasm: r8 = y8
+# asm 1: movdqa <y8=int6464#1,>r8=int6464#13
+# asm 2: movdqa <y8=%xmm0,>r8=%xmm12
+movdqa %xmm0,%xmm12
+
+# qhasm: uint32323232 y8 <<= 9
+# asm 1: pslld $9,<y8=int6464#1
+# asm 2: pslld $9,<y8=%xmm0
+pslld $9,%xmm0
+
+# qhasm: z8 ^= y8
+# asm 1: pxor <y8=int6464#1,<z8=int6464#16
+# asm 2: pxor <y8=%xmm0,<z8=%xmm15
+pxor %xmm0,%xmm15
+
+# qhasm: uint32323232 r8 >>= 23
+# asm 1: psrld $23,<r8=int6464#13
+# asm 2: psrld $23,<r8=%xmm12
+psrld $23,%xmm12
+
+# qhasm: z8 ^= r8
+# asm 1: pxor <r8=int6464#13,<z8=int6464#16
+# asm 2: pxor <r8=%xmm12,<z8=%xmm15
+pxor %xmm12,%xmm15
+
+# qhasm: y13 = z15
+# asm 1: movdqa <z15=int6464#3,>y13=int6464#1
+# asm 2: movdqa <z15=%xmm2,>y13=%xmm0
+movdqa %xmm2,%xmm0
+
+# qhasm: uint32323232 y13 += z12
+# asm 1: paddd <z12=int6464#14,<y13=int6464#1
+# asm 2: paddd <z12=%xmm13,<y13=%xmm0
+paddd %xmm13,%xmm0
+
+# qhasm: r13 = y13
+# asm 1: movdqa <y13=int6464#1,>r13=int6464#13
+# asm 2: movdqa <y13=%xmm0,>r13=%xmm12
+movdqa %xmm0,%xmm12
+
+# qhasm: uint32323232 y13 <<= 9
+# asm 1: pslld $9,<y13=int6464#1
+# asm 2: pslld $9,<y13=%xmm0
+pslld $9,%xmm0
+
+# qhasm: z13 ^= y13
+# asm 1: pxor <y13=int6464#1,<z13=int6464#10
+# asm 2: pxor <y13=%xmm0,<z13=%xmm9
+pxor %xmm0,%xmm9
+
+# qhasm: uint32323232 r13 >>= 23
+# asm 1: psrld $23,<r13=int6464#13
+# asm 2: psrld $23,<r13=%xmm12
+psrld $23,%xmm12
+
+# qhasm: z13 ^= r13
+# asm 1: pxor <r13=int6464#13,<z13=int6464#10
+# asm 2: pxor <r13=%xmm12,<z13=%xmm9
+pxor %xmm12,%xmm9
+
+# qhasm: y9 = z11
+# asm 1: movdqa <z11=int6464#7,>y9=int6464#1
+# asm 2: movdqa <z11=%xmm6,>y9=%xmm0
+movdqa %xmm6,%xmm0
+
+# qhasm: uint32323232 y9 += z8
+# asm 1: paddd <z8=int6464#16,<y9=int6464#1
+# asm 2: paddd <z8=%xmm15,<y9=%xmm0
+paddd %xmm15,%xmm0
+
+# qhasm: r9 = y9
+# asm 1: movdqa <y9=int6464#1,>r9=int6464#13
+# asm 2: movdqa <y9=%xmm0,>r9=%xmm12
+movdqa %xmm0,%xmm12
+
+# qhasm: uint32323232 y9 <<= 13
+# asm 1: pslld $13,<y9=int6464#1
+# asm 2: pslld $13,<y9=%xmm0
+pslld $13,%xmm0
+
+# qhasm: z9 ^= y9
+# asm 1: pxor <y9=int6464#1,<z9=int6464#12
+# asm 2: pxor <y9=%xmm0,<z9=%xmm11
+pxor %xmm0,%xmm11
+
+# qhasm: uint32323232 r9 >>= 19
+# asm 1: psrld $19,<r9=int6464#13
+# asm 2: psrld $19,<r9=%xmm12
+psrld $19,%xmm12
+
+# qhasm: z9 ^= r9
+# asm 1: pxor <r9=int6464#13,<z9=int6464#12
+# asm 2: pxor <r9=%xmm12,<z9=%xmm11
+pxor %xmm12,%xmm11
+
+# qhasm: y14 = z12
+# asm 1: movdqa <z12=int6464#14,>y14=int6464#1
+# asm 2: movdqa <z12=%xmm13,>y14=%xmm0
+movdqa %xmm13,%xmm0
+
+# qhasm: uint32323232 y14 += z13
+# asm 1: paddd <z13=int6464#10,<y14=int6464#1
+# asm 2: paddd <z13=%xmm9,<y14=%xmm0
+paddd %xmm9,%xmm0
+
+# qhasm: r14 = y14
+# asm 1: movdqa <y14=int6464#1,>r14=int6464#13
+# asm 2: movdqa <y14=%xmm0,>r14=%xmm12
+movdqa %xmm0,%xmm12
+
+# qhasm: uint32323232 y14 <<= 13
+# asm 1: pslld $13,<y14=int6464#1
+# asm 2: pslld $13,<y14=%xmm0
+pslld $13,%xmm0
+
+# qhasm: z14 ^= y14
+# asm 1: pxor <y14=int6464#1,<z14=int6464#4
+# asm 2: pxor <y14=%xmm0,<z14=%xmm3
+pxor %xmm0,%xmm3
+
+# qhasm: uint32323232 r14 >>= 19
+# asm 1: psrld $19,<r14=int6464#13
+# asm 2: psrld $19,<r14=%xmm12
+psrld $19,%xmm12
+
+# qhasm: z14 ^= r14
+# asm 1: pxor <r14=int6464#13,<z14=int6464#4
+# asm 2: pxor <r14=%xmm12,<z14=%xmm3
+pxor %xmm12,%xmm3
+
+# qhasm: y10 = z8
+# asm 1: movdqa <z8=int6464#16,>y10=int6464#1
+# asm 2: movdqa <z8=%xmm15,>y10=%xmm0
+movdqa %xmm15,%xmm0
+
+# qhasm: uint32323232 y10 += z9
+# asm 1: paddd <z9=int6464#12,<y10=int6464#1
+# asm 2: paddd <z9=%xmm11,<y10=%xmm0
+paddd %xmm11,%xmm0
+
+# qhasm: r10 = y10
+# asm 1: movdqa <y10=int6464#1,>r10=int6464#13
+# asm 2: movdqa <y10=%xmm0,>r10=%xmm12
+movdqa %xmm0,%xmm12
+
+# qhasm: uint32323232 y10 <<= 18
+# asm 1: pslld $18,<y10=int6464#1
+# asm 2: pslld $18,<y10=%xmm0
+pslld $18,%xmm0
+
+# qhasm: z10 ^= y10
+# asm 1: pxor <y10=int6464#1,<z10=int6464#2
+# asm 2: pxor <y10=%xmm0,<z10=%xmm1
+pxor %xmm0,%xmm1
+
+# qhasm: uint32323232 r10 >>= 14
+# asm 1: psrld $14,<r10=int6464#13
+# asm 2: psrld $14,<r10=%xmm12
+psrld $14,%xmm12
+
+# qhasm: z10 ^= r10
+# asm 1: pxor <r10=int6464#13,<z10=int6464#2
+# asm 2: pxor <r10=%xmm12,<z10=%xmm1
+pxor %xmm12,%xmm1
+
+# qhasm: y15 = z13
+# asm 1: movdqa <z13=int6464#10,>y15=int6464#1
+# asm 2: movdqa <z13=%xmm9,>y15=%xmm0
+movdqa %xmm9,%xmm0
+
+# qhasm: uint32323232 y15 += z14
+# asm 1: paddd <z14=int6464#4,<y15=int6464#1
+# asm 2: paddd <z14=%xmm3,<y15=%xmm0
+paddd %xmm3,%xmm0
+
+# qhasm: r15 = y15
+# asm 1: movdqa <y15=int6464#1,>r15=int6464#13
+# asm 2: movdqa <y15=%xmm0,>r15=%xmm12
+movdqa %xmm0,%xmm12
+
+# qhasm: uint32323232 y15 <<= 18
+# asm 1: pslld $18,<y15=int6464#1
+# asm 2: pslld $18,<y15=%xmm0
+pslld $18,%xmm0
+
+# qhasm: z15 ^= y15
+# asm 1: pxor <y15=int6464#1,<z15=int6464#3
+# asm 2: pxor <y15=%xmm0,<z15=%xmm2
+pxor %xmm0,%xmm2
+
+# qhasm: uint32323232 r15 >>= 14
+# asm 1: psrld $14,<r15=int6464#13
+# asm 2: psrld $14,<r15=%xmm12
+psrld $14,%xmm12
+
+# qhasm: z15 ^= r15
+# asm 1: pxor <r15=int6464#13,<z15=int6464#3
+# asm 2: pxor <r15=%xmm12,<z15=%xmm2
+pxor %xmm12,%xmm2
+
+# qhasm: z0 = z0_stack
+# asm 1: movdqa <z0_stack=stack128#21,>z0=int6464#13
+# asm 2: movdqa <z0_stack=320(%rsp),>z0=%xmm12
+movdqa 320(%rsp),%xmm12
+
+# qhasm: z5 = z5_stack
+# asm 1: movdqa <z5_stack=stack128#22,>z5=int6464#1
+# asm 2: movdqa <z5_stack=336(%rsp),>z5=%xmm0
+movdqa 336(%rsp),%xmm0
+
+# qhasm: unsigned>? i -= 2
+# asm 1: sub $2,<i=int64#3
+# asm 2: sub $2,<i=%rdx
+sub $2,%rdx
+# comment:fp stack unchanged by jump
+
+# qhasm: goto mainloop1 if unsigned>
+ja ._mainloop1
+
+# qhasm: uint32323232 z0 += orig0
+# asm 1: paddd <orig0=stack128#8,<z0=int6464#13
+# asm 2: paddd <orig0=112(%rsp),<z0=%xmm12
+paddd 112(%rsp),%xmm12
+
+# qhasm: uint32323232 z1 += orig1
+# asm 1: paddd <orig1=stack128#12,<z1=int6464#8
+# asm 2: paddd <orig1=176(%rsp),<z1=%xmm7
+paddd 176(%rsp),%xmm7
+
+# qhasm: uint32323232 z2 += orig2
+# asm 1: paddd <orig2=stack128#15,<z2=int6464#11
+# asm 2: paddd <orig2=224(%rsp),<z2=%xmm10
+paddd 224(%rsp),%xmm10
+
+# qhasm: uint32323232 z3 += orig3
+# asm 1: paddd <orig3=stack128#18,<z3=int6464#5
+# asm 2: paddd <orig3=272(%rsp),<z3=%xmm4
+paddd 272(%rsp),%xmm4
+
+# qhasm: in0 = z0
+# asm 1: movd <z0=int6464#13,>in0=int64#3
+# asm 2: movd <z0=%xmm12,>in0=%rdx
+movd %xmm12,%rdx
+
+# qhasm: in1 = z1
+# asm 1: movd <z1=int6464#8,>in1=int64#4
+# asm 2: movd <z1=%xmm7,>in1=%rcx
+movd %xmm7,%rcx
+
+# qhasm: in2 = z2
+# asm 1: movd <z2=int6464#11,>in2=int64#5
+# asm 2: movd <z2=%xmm10,>in2=%r8
+movd %xmm10,%r8
+
+# qhasm: in3 = z3
+# asm 1: movd <z3=int6464#5,>in3=int64#6
+# asm 2: movd <z3=%xmm4,>in3=%r9
+movd %xmm4,%r9
+
+# qhasm: z0 <<<= 96
+# asm 1: pshufd $0x39,<z0=int6464#13,<z0=int6464#13
+# asm 2: pshufd $0x39,<z0=%xmm12,<z0=%xmm12
+pshufd $0x39,%xmm12,%xmm12
+
+# qhasm: z1 <<<= 96
+# asm 1: pshufd $0x39,<z1=int6464#8,<z1=int6464#8
+# asm 2: pshufd $0x39,<z1=%xmm7,<z1=%xmm7
+pshufd $0x39,%xmm7,%xmm7
+
+# qhasm: z2 <<<= 96
+# asm 1: pshufd $0x39,<z2=int6464#11,<z2=int6464#11
+# asm 2: pshufd $0x39,<z2=%xmm10,<z2=%xmm10
+pshufd $0x39,%xmm10,%xmm10
+
+# qhasm: z3 <<<= 96
+# asm 1: pshufd $0x39,<z3=int6464#5,<z3=int6464#5
+# asm 2: pshufd $0x39,<z3=%xmm4,<z3=%xmm4
+pshufd $0x39,%xmm4,%xmm4
+
+# qhasm: (uint32) in0 ^= *(uint32 *) (m + 0)
+# asm 1: xorl 0(<m=int64#2),<in0=int64#3d
+# asm 2: xorl 0(<m=%rsi),<in0=%edx
+xorl 0(%rsi),%edx
+
+# qhasm: (uint32) in1 ^= *(uint32 *) (m + 4)
+# asm 1: xorl 4(<m=int64#2),<in1=int64#4d
+# asm 2: xorl 4(<m=%rsi),<in1=%ecx
+xorl 4(%rsi),%ecx
+
+# qhasm: (uint32) in2 ^= *(uint32 *) (m + 8)
+# asm 1: xorl 8(<m=int64#2),<in2=int64#5d
+# asm 2: xorl 8(<m=%rsi),<in2=%r8d
+xorl 8(%rsi),%r8d
+
+# qhasm: (uint32) in3 ^= *(uint32 *) (m + 12)
+# asm 1: xorl 12(<m=int64#2),<in3=int64#6d
+# asm 2: xorl 12(<m=%rsi),<in3=%r9d
+xorl 12(%rsi),%r9d
+
+# qhasm: *(uint32 *) (out + 0) = in0
+# asm 1: movl <in0=int64#3d,0(<out=int64#1)
+# asm 2: movl <in0=%edx,0(<out=%rdi)
+movl %edx,0(%rdi)
+
+# qhasm: *(uint32 *) (out + 4) = in1
+# asm 1: movl <in1=int64#4d,4(<out=int64#1)
+# asm 2: movl <in1=%ecx,4(<out=%rdi)
+movl %ecx,4(%rdi)
+
+# qhasm: *(uint32 *) (out + 8) = in2
+# asm 1: movl <in2=int64#5d,8(<out=int64#1)
+# asm 2: movl <in2=%r8d,8(<out=%rdi)
+movl %r8d,8(%rdi)
+
+# qhasm: *(uint32 *) (out + 12) = in3
+# asm 1: movl <in3=int64#6d,12(<out=int64#1)
+# asm 2: movl <in3=%r9d,12(<out=%rdi)
+movl %r9d,12(%rdi)
+
+# qhasm: in0 = z0
+# asm 1: movd <z0=int6464#13,>in0=int64#3
+# asm 2: movd <z0=%xmm12,>in0=%rdx
+movd %xmm12,%rdx
+
+# qhasm: in1 = z1
+# asm 1: movd <z1=int6464#8,>in1=int64#4
+# asm 2: movd <z1=%xmm7,>in1=%rcx
+movd %xmm7,%rcx
+
+# qhasm: in2 = z2
+# asm 1: movd <z2=int6464#11,>in2=int64#5
+# asm 2: movd <z2=%xmm10,>in2=%r8
+movd %xmm10,%r8
+
+# qhasm: in3 = z3
+# asm 1: movd <z3=int6464#5,>in3=int64#6
+# asm 2: movd <z3=%xmm4,>in3=%r9
+movd %xmm4,%r9
+
+# qhasm: z0 <<<= 96
+# asm 1: pshufd $0x39,<z0=int6464#13,<z0=int6464#13
+# asm 2: pshufd $0x39,<z0=%xmm12,<z0=%xmm12
+pshufd $0x39,%xmm12,%xmm12
+
+# qhasm: z1 <<<= 96
+# asm 1: pshufd $0x39,<z1=int6464#8,<z1=int6464#8
+# asm 2: pshufd $0x39,<z1=%xmm7,<z1=%xmm7
+pshufd $0x39,%xmm7,%xmm7
+
+# qhasm: z2 <<<= 96
+# asm 1: pshufd $0x39,<z2=int6464#11,<z2=int6464#11
+# asm 2: pshufd $0x39,<z2=%xmm10,<z2=%xmm10
+pshufd $0x39,%xmm10,%xmm10
+
+# qhasm: z3 <<<= 96
+# asm 1: pshufd $0x39,<z3=int6464#5,<z3=int6464#5
+# asm 2: pshufd $0x39,<z3=%xmm4,<z3=%xmm4
+pshufd $0x39,%xmm4,%xmm4
+
+# qhasm: (uint32) in0 ^= *(uint32 *) (m + 64)
+# asm 1: xorl 64(<m=int64#2),<in0=int64#3d
+# asm 2: xorl 64(<m=%rsi),<in0=%edx
+xorl 64(%rsi),%edx
+
+# qhasm: (uint32) in1 ^= *(uint32 *) (m + 68)
+# asm 1: xorl 68(<m=int64#2),<in1=int64#4d
+# asm 2: xorl 68(<m=%rsi),<in1=%ecx
+xorl 68(%rsi),%ecx
+
+# qhasm: (uint32) in2 ^= *(uint32 *) (m + 72)
+# asm 1: xorl 72(<m=int64#2),<in2=int64#5d
+# asm 2: xorl 72(<m=%rsi),<in2=%r8d
+xorl 72(%rsi),%r8d
+
+# qhasm: (uint32) in3 ^= *(uint32 *) (m + 76)
+# asm 1: xorl 76(<m=int64#2),<in3=int64#6d
+# asm 2: xorl 76(<m=%rsi),<in3=%r9d
+xorl 76(%rsi),%r9d
+
+# qhasm: *(uint32 *) (out + 64) = in0
+# asm 1: movl <in0=int64#3d,64(<out=int64#1)
+# asm 2: movl <in0=%edx,64(<out=%rdi)
+movl %edx,64(%rdi)
+
+# qhasm: *(uint32 *) (out + 68) = in1
+# asm 1: movl <in1=int64#4d,68(<out=int64#1)
+# asm 2: movl <in1=%ecx,68(<out=%rdi)
+movl %ecx,68(%rdi)
+
+# qhasm: *(uint32 *) (out + 72) = in2
+# asm 1: movl <in2=int64#5d,72(<out=int64#1)
+# asm 2: movl <in2=%r8d,72(<out=%rdi)
+movl %r8d,72(%rdi)
+
+# qhasm: *(uint32 *) (out + 76) = in3
+# asm 1: movl <in3=int64#6d,76(<out=int64#1)
+# asm 2: movl <in3=%r9d,76(<out=%rdi)
+movl %r9d,76(%rdi)
+
+# qhasm: in0 = z0
+# asm 1: movd <z0=int6464#13,>in0=int64#3
+# asm 2: movd <z0=%xmm12,>in0=%rdx
+movd %xmm12,%rdx
+
+# qhasm: in1 = z1
+# asm 1: movd <z1=int6464#8,>in1=int64#4
+# asm 2: movd <z1=%xmm7,>in1=%rcx
+movd %xmm7,%rcx
+
+# qhasm: in2 = z2
+# asm 1: movd <z2=int6464#11,>in2=int64#5
+# asm 2: movd <z2=%xmm10,>in2=%r8
+movd %xmm10,%r8
+
+# qhasm: in3 = z3
+# asm 1: movd <z3=int6464#5,>in3=int64#6
+# asm 2: movd <z3=%xmm4,>in3=%r9
+movd %xmm4,%r9
+
+# qhasm: z0 <<<= 96
+# asm 1: pshufd $0x39,<z0=int6464#13,<z0=int6464#13
+# asm 2: pshufd $0x39,<z0=%xmm12,<z0=%xmm12
+pshufd $0x39,%xmm12,%xmm12
+
+# qhasm: z1 <<<= 96
+# asm 1: pshufd $0x39,<z1=int6464#8,<z1=int6464#8
+# asm 2: pshufd $0x39,<z1=%xmm7,<z1=%xmm7
+pshufd $0x39,%xmm7,%xmm7
+
+# qhasm: z2 <<<= 96
+# asm 1: pshufd $0x39,<z2=int6464#11,<z2=int6464#11
+# asm 2: pshufd $0x39,<z2=%xmm10,<z2=%xmm10
+pshufd $0x39,%xmm10,%xmm10
+
+# qhasm: z3 <<<= 96
+# asm 1: pshufd $0x39,<z3=int6464#5,<z3=int6464#5
+# asm 2: pshufd $0x39,<z3=%xmm4,<z3=%xmm4
+pshufd $0x39,%xmm4,%xmm4
+
+# qhasm: (uint32) in0 ^= *(uint32 *) (m + 128)
+# asm 1: xorl 128(<m=int64#2),<in0=int64#3d
+# asm 2: xorl 128(<m=%rsi),<in0=%edx
+xorl 128(%rsi),%edx
+
+# qhasm: (uint32) in1 ^= *(uint32 *) (m + 132)
+# asm 1: xorl 132(<m=int64#2),<in1=int64#4d
+# asm 2: xorl 132(<m=%rsi),<in1=%ecx
+xorl 132(%rsi),%ecx
+
+# qhasm: (uint32) in2 ^= *(uint32 *) (m + 136)
+# asm 1: xorl 136(<m=int64#2),<in2=int64#5d
+# asm 2: xorl 136(<m=%rsi),<in2=%r8d
+xorl 136(%rsi),%r8d
+
+# qhasm: (uint32) in3 ^= *(uint32 *) (m + 140)
+# asm 1: xorl 140(<m=int64#2),<in3=int64#6d
+# asm 2: xorl 140(<m=%rsi),<in3=%r9d
+xorl 140(%rsi),%r9d
+
+# qhasm: *(uint32 *) (out + 128) = in0
+# asm 1: movl <in0=int64#3d,128(<out=int64#1)
+# asm 2: movl <in0=%edx,128(<out=%rdi)
+movl %edx,128(%rdi)
+
+# qhasm: *(uint32 *) (out + 132) = in1
+# asm 1: movl <in1=int64#4d,132(<out=int64#1)
+# asm 2: movl <in1=%ecx,132(<out=%rdi)
+movl %ecx,132(%rdi)
+
+# qhasm: *(uint32 *) (out + 136) = in2
+# asm 1: movl <in2=int64#5d,136(<out=int64#1)
+# asm 2: movl <in2=%r8d,136(<out=%rdi)
+movl %r8d,136(%rdi)
+
+# qhasm: *(uint32 *) (out + 140) = in3
+# asm 1: movl <in3=int64#6d,140(<out=int64#1)
+# asm 2: movl <in3=%r9d,140(<out=%rdi)
+movl %r9d,140(%rdi)
+
+# qhasm: in0 = z0
+# asm 1: movd <z0=int6464#13,>in0=int64#3
+# asm 2: movd <z0=%xmm12,>in0=%rdx
+movd %xmm12,%rdx
+
+# qhasm: in1 = z1
+# asm 1: movd <z1=int6464#8,>in1=int64#4
+# asm 2: movd <z1=%xmm7,>in1=%rcx
+movd %xmm7,%rcx
+
+# qhasm: in2 = z2
+# asm 1: movd <z2=int6464#11,>in2=int64#5
+# asm 2: movd <z2=%xmm10,>in2=%r8
+movd %xmm10,%r8
+
+# qhasm: in3 = z3
+# asm 1: movd <z3=int6464#5,>in3=int64#6
+# asm 2: movd <z3=%xmm4,>in3=%r9
+movd %xmm4,%r9
+
+# qhasm: (uint32) in0 ^= *(uint32 *) (m + 192)
+# asm 1: xorl 192(<m=int64#2),<in0=int64#3d
+# asm 2: xorl 192(<m=%rsi),<in0=%edx
+xorl 192(%rsi),%edx
+
+# qhasm: (uint32) in1 ^= *(uint32 *) (m + 196)
+# asm 1: xorl 196(<m=int64#2),<in1=int64#4d
+# asm 2: xorl 196(<m=%rsi),<in1=%ecx
+xorl 196(%rsi),%ecx
+
+# qhasm: (uint32) in2 ^= *(uint32 *) (m + 200)
+# asm 1: xorl 200(<m=int64#2),<in2=int64#5d
+# asm 2: xorl 200(<m=%rsi),<in2=%r8d
+xorl 200(%rsi),%r8d
+
+# qhasm: (uint32) in3 ^= *(uint32 *) (m + 204)
+# asm 1: xorl 204(<m=int64#2),<in3=int64#6d
+# asm 2: xorl 204(<m=%rsi),<in3=%r9d
+xorl 204(%rsi),%r9d
+
+# qhasm: *(uint32 *) (out + 192) = in0
+# asm 1: movl <in0=int64#3d,192(<out=int64#1)
+# asm 2: movl <in0=%edx,192(<out=%rdi)
+movl %edx,192(%rdi)
+
+# qhasm: *(uint32 *) (out + 196) = in1
+# asm 1: movl <in1=int64#4d,196(<out=int64#1)
+# asm 2: movl <in1=%ecx,196(<out=%rdi)
+movl %ecx,196(%rdi)
+
+# qhasm: *(uint32 *) (out + 200) = in2
+# asm 1: movl <in2=int64#5d,200(<out=int64#1)
+# asm 2: movl <in2=%r8d,200(<out=%rdi)
+movl %r8d,200(%rdi)
+
+# qhasm: *(uint32 *) (out + 204) = in3
+# asm 1: movl <in3=int64#6d,204(<out=int64#1)
+# asm 2: movl <in3=%r9d,204(<out=%rdi)
+movl %r9d,204(%rdi)
+
+# qhasm: uint32323232 z4 += orig4
+# asm 1: paddd <orig4=stack128#16,<z4=int6464#15
+# asm 2: paddd <orig4=240(%rsp),<z4=%xmm14
+paddd 240(%rsp),%xmm14
+
+# qhasm: uint32323232 z5 += orig5
+# asm 1: paddd <orig5=stack128#5,<z5=int6464#1
+# asm 2: paddd <orig5=64(%rsp),<z5=%xmm0
+paddd 64(%rsp),%xmm0
+
+# qhasm: uint32323232 z6 += orig6
+# asm 1: paddd <orig6=stack128#9,<z6=int6464#6
+# asm 2: paddd <orig6=128(%rsp),<z6=%xmm5
+paddd 128(%rsp),%xmm5
+
+# qhasm: uint32323232 z7 += orig7
+# asm 1: paddd <orig7=stack128#13,<z7=int6464#9
+# asm 2: paddd <orig7=192(%rsp),<z7=%xmm8
+paddd 192(%rsp),%xmm8
+
+# qhasm: in4 = z4
+# asm 1: movd <z4=int6464#15,>in4=int64#3
+# asm 2: movd <z4=%xmm14,>in4=%rdx
+movd %xmm14,%rdx
+
+# qhasm: in5 = z5
+# asm 1: movd <z5=int6464#1,>in5=int64#4
+# asm 2: movd <z5=%xmm0,>in5=%rcx
+movd %xmm0,%rcx
+
+# qhasm: in6 = z6
+# asm 1: movd <z6=int6464#6,>in6=int64#5
+# asm 2: movd <z6=%xmm5,>in6=%r8
+movd %xmm5,%r8
+
+# qhasm: in7 = z7
+# asm 1: movd <z7=int6464#9,>in7=int64#6
+# asm 2: movd <z7=%xmm8,>in7=%r9
+movd %xmm8,%r9
+
+# qhasm: z4 <<<= 96
+# asm 1: pshufd $0x39,<z4=int6464#15,<z4=int6464#15
+# asm 2: pshufd $0x39,<z4=%xmm14,<z4=%xmm14
+pshufd $0x39,%xmm14,%xmm14
+
+# qhasm: z5 <<<= 96
+# asm 1: pshufd $0x39,<z5=int6464#1,<z5=int6464#1
+# asm 2: pshufd $0x39,<z5=%xmm0,<z5=%xmm0
+pshufd $0x39,%xmm0,%xmm0
+
+# qhasm: z6 <<<= 96
+# asm 1: pshufd $0x39,<z6=int6464#6,<z6=int6464#6
+# asm 2: pshufd $0x39,<z6=%xmm5,<z6=%xmm5
+pshufd $0x39,%xmm5,%xmm5
+
+# qhasm: z7 <<<= 96
+# asm 1: pshufd $0x39,<z7=int6464#9,<z7=int6464#9
+# asm 2: pshufd $0x39,<z7=%xmm8,<z7=%xmm8
+pshufd $0x39,%xmm8,%xmm8
+
+# qhasm: (uint32) in4 ^= *(uint32 *) (m + 16)
+# asm 1: xorl 16(<m=int64#2),<in4=int64#3d
+# asm 2: xorl 16(<m=%rsi),<in4=%edx
+xorl 16(%rsi),%edx
+
+# qhasm: (uint32) in5 ^= *(uint32 *) (m + 20)
+# asm 1: xorl 20(<m=int64#2),<in5=int64#4d
+# asm 2: xorl 20(<m=%rsi),<in5=%ecx
+xorl 20(%rsi),%ecx
+
+# qhasm: (uint32) in6 ^= *(uint32 *) (m + 24)
+# asm 1: xorl 24(<m=int64#2),<in6=int64#5d
+# asm 2: xorl 24(<m=%rsi),<in6=%r8d
+xorl 24(%rsi),%r8d
+
+# qhasm: (uint32) in7 ^= *(uint32 *) (m + 28)
+# asm 1: xorl 28(<m=int64#2),<in7=int64#6d
+# asm 2: xorl 28(<m=%rsi),<in7=%r9d
+xorl 28(%rsi),%r9d
+
+# qhasm: *(uint32 *) (out + 16) = in4
+# asm 1: movl <in4=int64#3d,16(<out=int64#1)
+# asm 2: movl <in4=%edx,16(<out=%rdi)
+movl %edx,16(%rdi)
+
+# qhasm: *(uint32 *) (out + 20) = in5
+# asm 1: movl <in5=int64#4d,20(<out=int64#1)
+# asm 2: movl <in5=%ecx,20(<out=%rdi)
+movl %ecx,20(%rdi)
+
+# qhasm: *(uint32 *) (out + 24) = in6
+# asm 1: movl <in6=int64#5d,24(<out=int64#1)
+# asm 2: movl <in6=%r8d,24(<out=%rdi)
+movl %r8d,24(%rdi)
+
+# qhasm: *(uint32 *) (out + 28) = in7
+# asm 1: movl <in7=int64#6d,28(<out=int64#1)
+# asm 2: movl <in7=%r9d,28(<out=%rdi)
+movl %r9d,28(%rdi)
+
+# qhasm: in4 = z4
+# asm 1: movd <z4=int6464#15,>in4=int64#3
+# asm 2: movd <z4=%xmm14,>in4=%rdx
+movd %xmm14,%rdx
+
+# qhasm: in5 = z5
+# asm 1: movd <z5=int6464#1,>in5=int64#4
+# asm 2: movd <z5=%xmm0,>in5=%rcx
+movd %xmm0,%rcx
+
+# qhasm: in6 = z6
+# asm 1: movd <z6=int6464#6,>in6=int64#5
+# asm 2: movd <z6=%xmm5,>in6=%r8
+movd %xmm5,%r8
+
+# qhasm: in7 = z7
+# asm 1: movd <z7=int6464#9,>in7=int64#6
+# asm 2: movd <z7=%xmm8,>in7=%r9
+movd %xmm8,%r9
+
+# qhasm: z4 <<<= 96
+# asm 1: pshufd $0x39,<z4=int6464#15,<z4=int6464#15
+# asm 2: pshufd $0x39,<z4=%xmm14,<z4=%xmm14
+pshufd $0x39,%xmm14,%xmm14
+
+# qhasm: z5 <<<= 96
+# asm 1: pshufd $0x39,<z5=int6464#1,<z5=int6464#1
+# asm 2: pshufd $0x39,<z5=%xmm0,<z5=%xmm0
+pshufd $0x39,%xmm0,%xmm0
+
+# qhasm: z6 <<<= 96
+# asm 1: pshufd $0x39,<z6=int6464#6,<z6=int6464#6
+# asm 2: pshufd $0x39,<z6=%xmm5,<z6=%xmm5
+pshufd $0x39,%xmm5,%xmm5
+
+# qhasm: z7 <<<= 96
+# asm 1: pshufd $0x39,<z7=int6464#9,<z7=int6464#9
+# asm 2: pshufd $0x39,<z7=%xmm8,<z7=%xmm8
+pshufd $0x39,%xmm8,%xmm8
+
+# qhasm: (uint32) in4 ^= *(uint32 *) (m + 80)
+# asm 1: xorl 80(<m=int64#2),<in4=int64#3d
+# asm 2: xorl 80(<m=%rsi),<in4=%edx
+xorl 80(%rsi),%edx
+
+# qhasm: (uint32) in5 ^= *(uint32 *) (m + 84)
+# asm 1: xorl 84(<m=int64#2),<in5=int64#4d
+# asm 2: xorl 84(<m=%rsi),<in5=%ecx
+xorl 84(%rsi),%ecx
+
+# qhasm: (uint32) in6 ^= *(uint32 *) (m + 88)
+# asm 1: xorl 88(<m=int64#2),<in6=int64#5d
+# asm 2: xorl 88(<m=%rsi),<in6=%r8d
+xorl 88(%rsi),%r8d
+
+# qhasm: (uint32) in7 ^= *(uint32 *) (m + 92)
+# asm 1: xorl 92(<m=int64#2),<in7=int64#6d
+# asm 2: xorl 92(<m=%rsi),<in7=%r9d
+xorl 92(%rsi),%r9d
+
+# qhasm: *(uint32 *) (out + 80) = in4
+# asm 1: movl <in4=int64#3d,80(<out=int64#1)
+# asm 2: movl <in4=%edx,80(<out=%rdi)
+movl %edx,80(%rdi)
+
+# qhasm: *(uint32 *) (out + 84) = in5
+# asm 1: movl <in5=int64#4d,84(<out=int64#1)
+# asm 2: movl <in5=%ecx,84(<out=%rdi)
+movl %ecx,84(%rdi)
+
+# qhasm: *(uint32 *) (out + 88) = in6
+# asm 1: movl <in6=int64#5d,88(<out=int64#1)
+# asm 2: movl <in6=%r8d,88(<out=%rdi)
+movl %r8d,88(%rdi)
+
+# qhasm: *(uint32 *) (out + 92) = in7
+# asm 1: movl <in7=int64#6d,92(<out=int64#1)
+# asm 2: movl <in7=%r9d,92(<out=%rdi)
+movl %r9d,92(%rdi)
+
+# qhasm: in4 = z4
+# asm 1: movd <z4=int6464#15,>in4=int64#3
+# asm 2: movd <z4=%xmm14,>in4=%rdx
+movd %xmm14,%rdx
+
+# qhasm: in5 = z5
+# asm 1: movd <z5=int6464#1,>in5=int64#4
+# asm 2: movd <z5=%xmm0,>in5=%rcx
+movd %xmm0,%rcx
+
+# qhasm: in6 = z6
+# asm 1: movd <z6=int6464#6,>in6=int64#5
+# asm 2: movd <z6=%xmm5,>in6=%r8
+movd %xmm5,%r8
+
+# qhasm: in7 = z7
+# asm 1: movd <z7=int6464#9,>in7=int64#6
+# asm 2: movd <z7=%xmm8,>in7=%r9
+movd %xmm8,%r9
+
+# qhasm: z4 <<<= 96
+# asm 1: pshufd $0x39,<z4=int6464#15,<z4=int6464#15
+# asm 2: pshufd $0x39,<z4=%xmm14,<z4=%xmm14
+pshufd $0x39,%xmm14,%xmm14
+
+# qhasm: z5 <<<= 96
+# asm 1: pshufd $0x39,<z5=int6464#1,<z5=int6464#1
+# asm 2: pshufd $0x39,<z5=%xmm0,<z5=%xmm0
+pshufd $0x39,%xmm0,%xmm0
+
+# qhasm: z6 <<<= 96
+# asm 1: pshufd $0x39,<z6=int6464#6,<z6=int6464#6
+# asm 2: pshufd $0x39,<z6=%xmm5,<z6=%xmm5
+pshufd $0x39,%xmm5,%xmm5
+
+# qhasm: z7 <<<= 96
+# asm 1: pshufd $0x39,<z7=int6464#9,<z7=int6464#9
+# asm 2: pshufd $0x39,<z7=%xmm8,<z7=%xmm8
+pshufd $0x39,%xmm8,%xmm8
+
+# qhasm: (uint32) in4 ^= *(uint32 *) (m + 144)
+# asm 1: xorl 144(<m=int64#2),<in4=int64#3d
+# asm 2: xorl 144(<m=%rsi),<in4=%edx
+xorl 144(%rsi),%edx
+
+# qhasm: (uint32) in5 ^= *(uint32 *) (m + 148)
+# asm 1: xorl 148(<m=int64#2),<in5=int64#4d
+# asm 2: xorl 148(<m=%rsi),<in5=%ecx
+xorl 148(%rsi),%ecx
+
+# qhasm: (uint32) in6 ^= *(uint32 *) (m + 152)
+# asm 1: xorl 152(<m=int64#2),<in6=int64#5d
+# asm 2: xorl 152(<m=%rsi),<in6=%r8d
+xorl 152(%rsi),%r8d
+
+# qhasm: (uint32) in7 ^= *(uint32 *) (m + 156)
+# asm 1: xorl 156(<m=int64#2),<in7=int64#6d
+# asm 2: xorl 156(<m=%rsi),<in7=%r9d
+xorl 156(%rsi),%r9d
+
+# qhasm: *(uint32 *) (out + 144) = in4
+# asm 1: movl <in4=int64#3d,144(<out=int64#1)
+# asm 2: movl <in4=%edx,144(<out=%rdi)
+movl %edx,144(%rdi)
+
+# qhasm: *(uint32 *) (out + 148) = in5
+# asm 1: movl <in5=int64#4d,148(<out=int64#1)
+# asm 2: movl <in5=%ecx,148(<out=%rdi)
+movl %ecx,148(%rdi)
+
+# qhasm: *(uint32 *) (out + 152) = in6
+# asm 1: movl <in6=int64#5d,152(<out=int64#1)
+# asm 2: movl <in6=%r8d,152(<out=%rdi)
+movl %r8d,152(%rdi)
+
+# qhasm: *(uint32 *) (out + 156) = in7
+# asm 1: movl <in7=int64#6d,156(<out=int64#1)
+# asm 2: movl <in7=%r9d,156(<out=%rdi)
+movl %r9d,156(%rdi)
+
+# qhasm: in4 = z4
+# asm 1: movd <z4=int6464#15,>in4=int64#3
+# asm 2: movd <z4=%xmm14,>in4=%rdx
+movd %xmm14,%rdx
+
+# qhasm: in5 = z5
+# asm 1: movd <z5=int6464#1,>in5=int64#4
+# asm 2: movd <z5=%xmm0,>in5=%rcx
+movd %xmm0,%rcx
+
+# qhasm: in6 = z6
+# asm 1: movd <z6=int6464#6,>in6=int64#5
+# asm 2: movd <z6=%xmm5,>in6=%r8
+movd %xmm5,%r8
+
+# qhasm: in7 = z7
+# asm 1: movd <z7=int6464#9,>in7=int64#6
+# asm 2: movd <z7=%xmm8,>in7=%r9
+movd %xmm8,%r9
+
+# qhasm: (uint32) in4 ^= *(uint32 *) (m + 208)
+# asm 1: xorl 208(<m=int64#2),<in4=int64#3d
+# asm 2: xorl 208(<m=%rsi),<in4=%edx
+xorl 208(%rsi),%edx
+
+# qhasm: (uint32) in5 ^= *(uint32 *) (m + 212)
+# asm 1: xorl 212(<m=int64#2),<in5=int64#4d
+# asm 2: xorl 212(<m=%rsi),<in5=%ecx
+xorl 212(%rsi),%ecx
+
+# qhasm: (uint32) in6 ^= *(uint32 *) (m + 216)
+# asm 1: xorl 216(<m=int64#2),<in6=int64#5d
+# asm 2: xorl 216(<m=%rsi),<in6=%r8d
+xorl 216(%rsi),%r8d
+
+# qhasm: (uint32) in7 ^= *(uint32 *) (m + 220)
+# asm 1: xorl 220(<m=int64#2),<in7=int64#6d
+# asm 2: xorl 220(<m=%rsi),<in7=%r9d
+xorl 220(%rsi),%r9d
+
+# qhasm: *(uint32 *) (out + 208) = in4
+# asm 1: movl <in4=int64#3d,208(<out=int64#1)
+# asm 2: movl <in4=%edx,208(<out=%rdi)
+movl %edx,208(%rdi)
+
+# qhasm: *(uint32 *) (out + 212) = in5
+# asm 1: movl <in5=int64#4d,212(<out=int64#1)
+# asm 2: movl <in5=%ecx,212(<out=%rdi)
+movl %ecx,212(%rdi)
+
+# qhasm: *(uint32 *) (out + 216) = in6
+# asm 1: movl <in6=int64#5d,216(<out=int64#1)
+# asm 2: movl <in6=%r8d,216(<out=%rdi)
+movl %r8d,216(%rdi)
+
+# qhasm: *(uint32 *) (out + 220) = in7
+# asm 1: movl <in7=int64#6d,220(<out=int64#1)
+# asm 2: movl <in7=%r9d,220(<out=%rdi)
+movl %r9d,220(%rdi)
+
+# qhasm: uint32323232 z8 += orig8
+# asm 1: paddd <orig8=stack128#19,<z8=int6464#16
+# asm 2: paddd <orig8=288(%rsp),<z8=%xmm15
+paddd 288(%rsp),%xmm15
+
+# qhasm: uint32323232 z9 += orig9
+# asm 1: paddd <orig9=stack128#20,<z9=int6464#12
+# asm 2: paddd <orig9=304(%rsp),<z9=%xmm11
+paddd 304(%rsp),%xmm11
+
+# qhasm: uint32323232 z10 += orig10
+# asm 1: paddd <orig10=stack128#6,<z10=int6464#2
+# asm 2: paddd <orig10=80(%rsp),<z10=%xmm1
+paddd 80(%rsp),%xmm1
+
+# qhasm: uint32323232 z11 += orig11
+# asm 1: paddd <orig11=stack128#10,<z11=int6464#7
+# asm 2: paddd <orig11=144(%rsp),<z11=%xmm6
+paddd 144(%rsp),%xmm6
+
+# qhasm: in8 = z8
+# asm 1: movd <z8=int6464#16,>in8=int64#3
+# asm 2: movd <z8=%xmm15,>in8=%rdx
+movd %xmm15,%rdx
+
+# qhasm: in9 = z9
+# asm 1: movd <z9=int6464#12,>in9=int64#4
+# asm 2: movd <z9=%xmm11,>in9=%rcx
+movd %xmm11,%rcx
+
+# qhasm: in10 = z10
+# asm 1: movd <z10=int6464#2,>in10=int64#5
+# asm 2: movd <z10=%xmm1,>in10=%r8
+movd %xmm1,%r8
+
+# qhasm: in11 = z11
+# asm 1: movd <z11=int6464#7,>in11=int64#6
+# asm 2: movd <z11=%xmm6,>in11=%r9
+movd %xmm6,%r9
+
+# qhasm: z8 <<<= 96
+# asm 1: pshufd $0x39,<z8=int6464#16,<z8=int6464#16
+# asm 2: pshufd $0x39,<z8=%xmm15,<z8=%xmm15
+pshufd $0x39,%xmm15,%xmm15
+
+# qhasm: z9 <<<= 96
+# asm 1: pshufd $0x39,<z9=int6464#12,<z9=int6464#12
+# asm 2: pshufd $0x39,<z9=%xmm11,<z9=%xmm11
+pshufd $0x39,%xmm11,%xmm11
+
+# qhasm: z10 <<<= 96
+# asm 1: pshufd $0x39,<z10=int6464#2,<z10=int6464#2
+# asm 2: pshufd $0x39,<z10=%xmm1,<z10=%xmm1
+pshufd $0x39,%xmm1,%xmm1
+
+# qhasm: z11 <<<= 96
+# asm 1: pshufd $0x39,<z11=int6464#7,<z11=int6464#7
+# asm 2: pshufd $0x39,<z11=%xmm6,<z11=%xmm6
+pshufd $0x39,%xmm6,%xmm6
+
+# qhasm: (uint32) in8 ^= *(uint32 *) (m + 32)
+# asm 1: xorl 32(<m=int64#2),<in8=int64#3d
+# asm 2: xorl 32(<m=%rsi),<in8=%edx
+xorl 32(%rsi),%edx
+
+# qhasm: (uint32) in9 ^= *(uint32 *) (m + 36)
+# asm 1: xorl 36(<m=int64#2),<in9=int64#4d
+# asm 2: xorl 36(<m=%rsi),<in9=%ecx
+xorl 36(%rsi),%ecx
+
+# qhasm: (uint32) in10 ^= *(uint32 *) (m + 40)
+# asm 1: xorl 40(<m=int64#2),<in10=int64#5d
+# asm 2: xorl 40(<m=%rsi),<in10=%r8d
+xorl 40(%rsi),%r8d
+
+# qhasm: (uint32) in11 ^= *(uint32 *) (m + 44)
+# asm 1: xorl 44(<m=int64#2),<in11=int64#6d
+# asm 2: xorl 44(<m=%rsi),<in11=%r9d
+xorl 44(%rsi),%r9d
+
+# qhasm: *(uint32 *) (out + 32) = in8
+# asm 1: movl <in8=int64#3d,32(<out=int64#1)
+# asm 2: movl <in8=%edx,32(<out=%rdi)
+movl %edx,32(%rdi)
+
+# qhasm: *(uint32 *) (out + 36) = in9
+# asm 1: movl <in9=int64#4d,36(<out=int64#1)
+# asm 2: movl <in9=%ecx,36(<out=%rdi)
+movl %ecx,36(%rdi)
+
+# qhasm: *(uint32 *) (out + 40) = in10
+# asm 1: movl <in10=int64#5d,40(<out=int64#1)
+# asm 2: movl <in10=%r8d,40(<out=%rdi)
+movl %r8d,40(%rdi)
+
+# qhasm: *(uint32 *) (out + 44) = in11
+# asm 1: movl <in11=int64#6d,44(<out=int64#1)
+# asm 2: movl <in11=%r9d,44(<out=%rdi)
+movl %r9d,44(%rdi)
+
+# qhasm: in8 = z8
+# asm 1: movd <z8=int6464#16,>in8=int64#3
+# asm 2: movd <z8=%xmm15,>in8=%rdx
+movd %xmm15,%rdx
+
+# qhasm: in9 = z9
+# asm 1: movd <z9=int6464#12,>in9=int64#4
+# asm 2: movd <z9=%xmm11,>in9=%rcx
+movd %xmm11,%rcx
+
+# qhasm: in10 = z10
+# asm 1: movd <z10=int6464#2,>in10=int64#5
+# asm 2: movd <z10=%xmm1,>in10=%r8
+movd %xmm1,%r8
+
+# qhasm: in11 = z11
+# asm 1: movd <z11=int6464#7,>in11=int64#6
+# asm 2: movd <z11=%xmm6,>in11=%r9
+movd %xmm6,%r9
+
+# qhasm: z8 <<<= 96
+# asm 1: pshufd $0x39,<z8=int6464#16,<z8=int6464#16
+# asm 2: pshufd $0x39,<z8=%xmm15,<z8=%xmm15
+pshufd $0x39,%xmm15,%xmm15
+
+# qhasm: z9 <<<= 96
+# asm 1: pshufd $0x39,<z9=int6464#12,<z9=int6464#12
+# asm 2: pshufd $0x39,<z9=%xmm11,<z9=%xmm11
+pshufd $0x39,%xmm11,%xmm11
+
+# qhasm: z10 <<<= 96
+# asm 1: pshufd $0x39,<z10=int6464#2,<z10=int6464#2
+# asm 2: pshufd $0x39,<z10=%xmm1,<z10=%xmm1
+pshufd $0x39,%xmm1,%xmm1
+
+# qhasm: z11 <<<= 96
+# asm 1: pshufd $0x39,<z11=int6464#7,<z11=int6464#7
+# asm 2: pshufd $0x39,<z11=%xmm6,<z11=%xmm6
+pshufd $0x39,%xmm6,%xmm6
+
+# qhasm: (uint32) in8 ^= *(uint32 *) (m + 96)
+# asm 1: xorl 96(<m=int64#2),<in8=int64#3d
+# asm 2: xorl 96(<m=%rsi),<in8=%edx
+xorl 96(%rsi),%edx
+
+# qhasm: (uint32) in9 ^= *(uint32 *) (m + 100)
+# asm 1: xorl 100(<m=int64#2),<in9=int64#4d
+# asm 2: xorl 100(<m=%rsi),<in9=%ecx
+xorl 100(%rsi),%ecx
+
+# qhasm: (uint32) in10 ^= *(uint32 *) (m + 104)
+# asm 1: xorl 104(<m=int64#2),<in10=int64#5d
+# asm 2: xorl 104(<m=%rsi),<in10=%r8d
+xorl 104(%rsi),%r8d
+
+# qhasm: (uint32) in11 ^= *(uint32 *) (m + 108)
+# asm 1: xorl 108(<m=int64#2),<in11=int64#6d
+# asm 2: xorl 108(<m=%rsi),<in11=%r9d
+xorl 108(%rsi),%r9d
+
+# qhasm: *(uint32 *) (out + 96) = in8
+# asm 1: movl <in8=int64#3d,96(<out=int64#1)
+# asm 2: movl <in8=%edx,96(<out=%rdi)
+movl %edx,96(%rdi)
+
+# qhasm: *(uint32 *) (out + 100) = in9
+# asm 1: movl <in9=int64#4d,100(<out=int64#1)
+# asm 2: movl <in9=%ecx,100(<out=%rdi)
+movl %ecx,100(%rdi)
+
+# qhasm: *(uint32 *) (out + 104) = in10
+# asm 1: movl <in10=int64#5d,104(<out=int64#1)
+# asm 2: movl <in10=%r8d,104(<out=%rdi)
+movl %r8d,104(%rdi)
+
+# qhasm: *(uint32 *) (out + 108) = in11
+# asm 1: movl <in11=int64#6d,108(<out=int64#1)
+# asm 2: movl <in11=%r9d,108(<out=%rdi)
+movl %r9d,108(%rdi)
+
+# qhasm: in8 = z8
+# asm 1: movd <z8=int6464#16,>in8=int64#3
+# asm 2: movd <z8=%xmm15,>in8=%rdx
+movd %xmm15,%rdx
+
+# qhasm: in9 = z9
+# asm 1: movd <z9=int6464#12,>in9=int64#4
+# asm 2: movd <z9=%xmm11,>in9=%rcx
+movd %xmm11,%rcx
+
+# qhasm: in10 = z10
+# asm 1: movd <z10=int6464#2,>in10=int64#5
+# asm 2: movd <z10=%xmm1,>in10=%r8
+movd %xmm1,%r8
+
+# qhasm: in11 = z11
+# asm 1: movd <z11=int6464#7,>in11=int64#6
+# asm 2: movd <z11=%xmm6,>in11=%r9
+movd %xmm6,%r9
+
+# qhasm: z8 <<<= 96
+# asm 1: pshufd $0x39,<z8=int6464#16,<z8=int6464#16
+# asm 2: pshufd $0x39,<z8=%xmm15,<z8=%xmm15
+pshufd $0x39,%xmm15,%xmm15
+
+# qhasm: z9 <<<= 96
+# asm 1: pshufd $0x39,<z9=int6464#12,<z9=int6464#12
+# asm 2: pshufd $0x39,<z9=%xmm11,<z9=%xmm11
+pshufd $0x39,%xmm11,%xmm11
+
+# qhasm: z10 <<<= 96
+# asm 1: pshufd $0x39,<z10=int6464#2,<z10=int6464#2
+# asm 2: pshufd $0x39,<z10=%xmm1,<z10=%xmm1
+pshufd $0x39,%xmm1,%xmm1
+
+# qhasm: z11 <<<= 96
+# asm 1: pshufd $0x39,<z11=int6464#7,<z11=int6464#7
+# asm 2: pshufd $0x39,<z11=%xmm6,<z11=%xmm6
+pshufd $0x39,%xmm6,%xmm6
+
+# qhasm: (uint32) in8 ^= *(uint32 *) (m + 160)
+# asm 1: xorl 160(<m=int64#2),<in8=int64#3d
+# asm 2: xorl 160(<m=%rsi),<in8=%edx
+xorl 160(%rsi),%edx
+
+# qhasm: (uint32) in9 ^= *(uint32 *) (m + 164)
+# asm 1: xorl 164(<m=int64#2),<in9=int64#4d
+# asm 2: xorl 164(<m=%rsi),<in9=%ecx
+xorl 164(%rsi),%ecx
+
+# qhasm: (uint32) in10 ^= *(uint32 *) (m + 168)
+# asm 1: xorl 168(<m=int64#2),<in10=int64#5d
+# asm 2: xorl 168(<m=%rsi),<in10=%r8d
+xorl 168(%rsi),%r8d
+
+# qhasm: (uint32) in11 ^= *(uint32 *) (m + 172)
+# asm 1: xorl 172(<m=int64#2),<in11=int64#6d
+# asm 2: xorl 172(<m=%rsi),<in11=%r9d
+xorl 172(%rsi),%r9d
+
+# qhasm: *(uint32 *) (out + 160) = in8
+# asm 1: movl <in8=int64#3d,160(<out=int64#1)
+# asm 2: movl <in8=%edx,160(<out=%rdi)
+movl %edx,160(%rdi)
+
+# qhasm: *(uint32 *) (out + 164) = in9
+# asm 1: movl <in9=int64#4d,164(<out=int64#1)
+# asm 2: movl <in9=%ecx,164(<out=%rdi)
+movl %ecx,164(%rdi)
+
+# qhasm: *(uint32 *) (out + 168) = in10
+# asm 1: movl <in10=int64#5d,168(<out=int64#1)
+# asm 2: movl <in10=%r8d,168(<out=%rdi)
+movl %r8d,168(%rdi)
+
+# qhasm: *(uint32 *) (out + 172) = in11
+# asm 1: movl <in11=int64#6d,172(<out=int64#1)
+# asm 2: movl <in11=%r9d,172(<out=%rdi)
+movl %r9d,172(%rdi)
+
+# qhasm: in8 = z8
+# asm 1: movd <z8=int6464#16,>in8=int64#3
+# asm 2: movd <z8=%xmm15,>in8=%rdx
+movd %xmm15,%rdx
+
+# qhasm: in9 = z9
+# asm 1: movd <z9=int6464#12,>in9=int64#4
+# asm 2: movd <z9=%xmm11,>in9=%rcx
+movd %xmm11,%rcx
+
+# qhasm: in10 = z10
+# asm 1: movd <z10=int6464#2,>in10=int64#5
+# asm 2: movd <z10=%xmm1,>in10=%r8
+movd %xmm1,%r8
+
+# qhasm: in11 = z11
+# asm 1: movd <z11=int6464#7,>in11=int64#6
+# asm 2: movd <z11=%xmm6,>in11=%r9
+movd %xmm6,%r9
+
+# qhasm: (uint32) in8 ^= *(uint32 *) (m + 224)
+# asm 1: xorl 224(<m=int64#2),<in8=int64#3d
+# asm 2: xorl 224(<m=%rsi),<in8=%edx
+xorl 224(%rsi),%edx
+
+# qhasm: (uint32) in9 ^= *(uint32 *) (m + 228)
+# asm 1: xorl 228(<m=int64#2),<in9=int64#4d
+# asm 2: xorl 228(<m=%rsi),<in9=%ecx
+xorl 228(%rsi),%ecx
+
+# qhasm: (uint32) in10 ^= *(uint32 *) (m + 232)
+# asm 1: xorl 232(<m=int64#2),<in10=int64#5d
+# asm 2: xorl 232(<m=%rsi),<in10=%r8d
+xorl 232(%rsi),%r8d
+
+# qhasm: (uint32) in11 ^= *(uint32 *) (m + 236)
+# asm 1: xorl 236(<m=int64#2),<in11=int64#6d
+# asm 2: xorl 236(<m=%rsi),<in11=%r9d
+xorl 236(%rsi),%r9d
+
+# qhasm: *(uint32 *) (out + 224) = in8
+# asm 1: movl <in8=int64#3d,224(<out=int64#1)
+# asm 2: movl <in8=%edx,224(<out=%rdi)
+movl %edx,224(%rdi)
+
+# qhasm: *(uint32 *) (out + 228) = in9
+# asm 1: movl <in9=int64#4d,228(<out=int64#1)
+# asm 2: movl <in9=%ecx,228(<out=%rdi)
+movl %ecx,228(%rdi)
+
+# qhasm: *(uint32 *) (out + 232) = in10
+# asm 1: movl <in10=int64#5d,232(<out=int64#1)
+# asm 2: movl <in10=%r8d,232(<out=%rdi)
+movl %r8d,232(%rdi)
+
+# qhasm: *(uint32 *) (out + 236) = in11
+# asm 1: movl <in11=int64#6d,236(<out=int64#1)
+# asm 2: movl <in11=%r9d,236(<out=%rdi)
+movl %r9d,236(%rdi)
+
+# qhasm: uint32323232 z12 += orig12
+# asm 1: paddd <orig12=stack128#11,<z12=int6464#14
+# asm 2: paddd <orig12=160(%rsp),<z12=%xmm13
+paddd 160(%rsp),%xmm13
+
+# qhasm: uint32323232 z13 += orig13
+# asm 1: paddd <orig13=stack128#14,<z13=int6464#10
+# asm 2: paddd <orig13=208(%rsp),<z13=%xmm9
+paddd 208(%rsp),%xmm9
+
+# qhasm: uint32323232 z14 += orig14
+# asm 1: paddd <orig14=stack128#17,<z14=int6464#4
+# asm 2: paddd <orig14=256(%rsp),<z14=%xmm3
+paddd 256(%rsp),%xmm3
+
+# qhasm: uint32323232 z15 += orig15
+# asm 1: paddd <orig15=stack128#7,<z15=int6464#3
+# asm 2: paddd <orig15=96(%rsp),<z15=%xmm2
+paddd 96(%rsp),%xmm2
+
+# qhasm: in12 = z12
+# asm 1: movd <z12=int6464#14,>in12=int64#3
+# asm 2: movd <z12=%xmm13,>in12=%rdx
+movd %xmm13,%rdx
+
+# qhasm: in13 = z13
+# asm 1: movd <z13=int6464#10,>in13=int64#4
+# asm 2: movd <z13=%xmm9,>in13=%rcx
+movd %xmm9,%rcx
+
+# qhasm: in14 = z14
+# asm 1: movd <z14=int6464#4,>in14=int64#5
+# asm 2: movd <z14=%xmm3,>in14=%r8
+movd %xmm3,%r8
+
+# qhasm: in15 = z15
+# asm 1: movd <z15=int6464#3,>in15=int64#6
+# asm 2: movd <z15=%xmm2,>in15=%r9
+movd %xmm2,%r9
+
+# qhasm: z12 <<<= 96
+# asm 1: pshufd $0x39,<z12=int6464#14,<z12=int6464#14
+# asm 2: pshufd $0x39,<z12=%xmm13,<z12=%xmm13
+pshufd $0x39,%xmm13,%xmm13
+
+# qhasm: z13 <<<= 96
+# asm 1: pshufd $0x39,<z13=int6464#10,<z13=int6464#10
+# asm 2: pshufd $0x39,<z13=%xmm9,<z13=%xmm9
+pshufd $0x39,%xmm9,%xmm9
+
+# qhasm: z14 <<<= 96
+# asm 1: pshufd $0x39,<z14=int6464#4,<z14=int6464#4
+# asm 2: pshufd $0x39,<z14=%xmm3,<z14=%xmm3
+pshufd $0x39,%xmm3,%xmm3
+
+# qhasm: z15 <<<= 96
+# asm 1: pshufd $0x39,<z15=int6464#3,<z15=int6464#3
+# asm 2: pshufd $0x39,<z15=%xmm2,<z15=%xmm2
+pshufd $0x39,%xmm2,%xmm2
+
+# qhasm: (uint32) in12 ^= *(uint32 *) (m + 48)
+# asm 1: xorl 48(<m=int64#2),<in12=int64#3d
+# asm 2: xorl 48(<m=%rsi),<in12=%edx
+xorl 48(%rsi),%edx
+
+# qhasm: (uint32) in13 ^= *(uint32 *) (m + 52)
+# asm 1: xorl 52(<m=int64#2),<in13=int64#4d
+# asm 2: xorl 52(<m=%rsi),<in13=%ecx
+xorl 52(%rsi),%ecx
+
+# qhasm: (uint32) in14 ^= *(uint32 *) (m + 56)
+# asm 1: xorl 56(<m=int64#2),<in14=int64#5d
+# asm 2: xorl 56(<m=%rsi),<in14=%r8d
+xorl 56(%rsi),%r8d
+
+# qhasm: (uint32) in15 ^= *(uint32 *) (m + 60)
+# asm 1: xorl 60(<m=int64#2),<in15=int64#6d
+# asm 2: xorl 60(<m=%rsi),<in15=%r9d
+xorl 60(%rsi),%r9d
+
+# qhasm: *(uint32 *) (out + 48) = in12
+# asm 1: movl <in12=int64#3d,48(<out=int64#1)
+# asm 2: movl <in12=%edx,48(<out=%rdi)
+movl %edx,48(%rdi)
+
+# qhasm: *(uint32 *) (out + 52) = in13
+# asm 1: movl <in13=int64#4d,52(<out=int64#1)
+# asm 2: movl <in13=%ecx,52(<out=%rdi)
+movl %ecx,52(%rdi)
+
+# qhasm: *(uint32 *) (out + 56) = in14
+# asm 1: movl <in14=int64#5d,56(<out=int64#1)
+# asm 2: movl <in14=%r8d,56(<out=%rdi)
+movl %r8d,56(%rdi)
+
+# qhasm: *(uint32 *) (out + 60) = in15
+# asm 1: movl <in15=int64#6d,60(<out=int64#1)
+# asm 2: movl <in15=%r9d,60(<out=%rdi)
+movl %r9d,60(%rdi)
+
+# qhasm: in12 = z12
+# asm 1: movd <z12=int6464#14,>in12=int64#3
+# asm 2: movd <z12=%xmm13,>in12=%rdx
+movd %xmm13,%rdx
+
+# qhasm: in13 = z13
+# asm 1: movd <z13=int6464#10,>in13=int64#4
+# asm 2: movd <z13=%xmm9,>in13=%rcx
+movd %xmm9,%rcx
+
+# qhasm: in14 = z14
+# asm 1: movd <z14=int6464#4,>in14=int64#5
+# asm 2: movd <z14=%xmm3,>in14=%r8
+movd %xmm3,%r8
+
+# qhasm: in15 = z15
+# asm 1: movd <z15=int6464#3,>in15=int64#6
+# asm 2: movd <z15=%xmm2,>in15=%r9
+movd %xmm2,%r9
+
+# qhasm: z12 <<<= 96
+# asm 1: pshufd $0x39,<z12=int6464#14,<z12=int6464#14
+# asm 2: pshufd $0x39,<z12=%xmm13,<z12=%xmm13
+pshufd $0x39,%xmm13,%xmm13
+
+# qhasm: z13 <<<= 96
+# asm 1: pshufd $0x39,<z13=int6464#10,<z13=int6464#10
+# asm 2: pshufd $0x39,<z13=%xmm9,<z13=%xmm9
+pshufd $0x39,%xmm9,%xmm9
+
+# qhasm: z14 <<<= 96
+# asm 1: pshufd $0x39,<z14=int6464#4,<z14=int6464#4
+# asm 2: pshufd $0x39,<z14=%xmm3,<z14=%xmm3
+pshufd $0x39,%xmm3,%xmm3
+
+# qhasm: z15 <<<= 96
+# asm 1: pshufd $0x39,<z15=int6464#3,<z15=int6464#3
+# asm 2: pshufd $0x39,<z15=%xmm2,<z15=%xmm2
+pshufd $0x39,%xmm2,%xmm2
+
+# qhasm: (uint32) in12 ^= *(uint32 *) (m + 112)
+# asm 1: xorl 112(<m=int64#2),<in12=int64#3d
+# asm 2: xorl 112(<m=%rsi),<in12=%edx
+xorl 112(%rsi),%edx
+
+# qhasm: (uint32) in13 ^= *(uint32 *) (m + 116)
+# asm 1: xorl 116(<m=int64#2),<in13=int64#4d
+# asm 2: xorl 116(<m=%rsi),<in13=%ecx
+xorl 116(%rsi),%ecx
+
+# qhasm: (uint32) in14 ^= *(uint32 *) (m + 120)
+# asm 1: xorl 120(<m=int64#2),<in14=int64#5d
+# asm 2: xorl 120(<m=%rsi),<in14=%r8d
+xorl 120(%rsi),%r8d
+
+# qhasm: (uint32) in15 ^= *(uint32 *) (m + 124)
+# asm 1: xorl 124(<m=int64#2),<in15=int64#6d
+# asm 2: xorl 124(<m=%rsi),<in15=%r9d
+xorl 124(%rsi),%r9d
+
+# qhasm: *(uint32 *) (out + 112) = in12
+# asm 1: movl <in12=int64#3d,112(<out=int64#1)
+# asm 2: movl <in12=%edx,112(<out=%rdi)
+movl %edx,112(%rdi)
+
+# qhasm: *(uint32 *) (out + 116) = in13
+# asm 1: movl <in13=int64#4d,116(<out=int64#1)
+# asm 2: movl <in13=%ecx,116(<out=%rdi)
+movl %ecx,116(%rdi)
+
+# qhasm: *(uint32 *) (out + 120) = in14
+# asm 1: movl <in14=int64#5d,120(<out=int64#1)
+# asm 2: movl <in14=%r8d,120(<out=%rdi)
+movl %r8d,120(%rdi)
+
+# qhasm: *(uint32 *) (out + 124) = in15
+# asm 1: movl <in15=int64#6d,124(<out=int64#1)
+# asm 2: movl <in15=%r9d,124(<out=%rdi)
+movl %r9d,124(%rdi)
+
+# qhasm: in12 = z12
+# asm 1: movd <z12=int6464#14,>in12=int64#3
+# asm 2: movd <z12=%xmm13,>in12=%rdx
+movd %xmm13,%rdx
+
+# qhasm: in13 = z13
+# asm 1: movd <z13=int6464#10,>in13=int64#4
+# asm 2: movd <z13=%xmm9,>in13=%rcx
+movd %xmm9,%rcx
+
+# qhasm: in14 = z14
+# asm 1: movd <z14=int6464#4,>in14=int64#5
+# asm 2: movd <z14=%xmm3,>in14=%r8
+movd %xmm3,%r8
+
+# qhasm: in15 = z15
+# asm 1: movd <z15=int6464#3,>in15=int64#6
+# asm 2: movd <z15=%xmm2,>in15=%r9
+movd %xmm2,%r9
+
+# qhasm: z12 <<<= 96
+# asm 1: pshufd $0x39,<z12=int6464#14,<z12=int6464#14
+# asm 2: pshufd $0x39,<z12=%xmm13,<z12=%xmm13
+pshufd $0x39,%xmm13,%xmm13
+
+# qhasm: z13 <<<= 96
+# asm 1: pshufd $0x39,<z13=int6464#10,<z13=int6464#10
+# asm 2: pshufd $0x39,<z13=%xmm9,<z13=%xmm9
+pshufd $0x39,%xmm9,%xmm9
+
+# qhasm: z14 <<<= 96
+# asm 1: pshufd $0x39,<z14=int6464#4,<z14=int6464#4
+# asm 2: pshufd $0x39,<z14=%xmm3,<z14=%xmm3
+pshufd $0x39,%xmm3,%xmm3
+
+# qhasm: z15 <<<= 96
+# asm 1: pshufd $0x39,<z15=int6464#3,<z15=int6464#3
+# asm 2: pshufd $0x39,<z15=%xmm2,<z15=%xmm2
+pshufd $0x39,%xmm2,%xmm2
+
+# qhasm: (uint32) in12 ^= *(uint32 *) (m + 176)
+# asm 1: xorl 176(<m=int64#2),<in12=int64#3d
+# asm 2: xorl 176(<m=%rsi),<in12=%edx
+xorl 176(%rsi),%edx
+
+# qhasm: (uint32) in13 ^= *(uint32 *) (m + 180)
+# asm 1: xorl 180(<m=int64#2),<in13=int64#4d
+# asm 2: xorl 180(<m=%rsi),<in13=%ecx
+xorl 180(%rsi),%ecx
+
+# qhasm: (uint32) in14 ^= *(uint32 *) (m + 184)
+# asm 1: xorl 184(<m=int64#2),<in14=int64#5d
+# asm 2: xorl 184(<m=%rsi),<in14=%r8d
+xorl 184(%rsi),%r8d
+
+# qhasm: (uint32) in15 ^= *(uint32 *) (m + 188)
+# asm 1: xorl 188(<m=int64#2),<in15=int64#6d
+# asm 2: xorl 188(<m=%rsi),<in15=%r9d
+xorl 188(%rsi),%r9d
+
+# qhasm: *(uint32 *) (out + 176) = in12
+# asm 1: movl <in12=int64#3d,176(<out=int64#1)
+# asm 2: movl <in12=%edx,176(<out=%rdi)
+movl %edx,176(%rdi)
+
+# qhasm: *(uint32 *) (out + 180) = in13
+# asm 1: movl <in13=int64#4d,180(<out=int64#1)
+# asm 2: movl <in13=%ecx,180(<out=%rdi)
+movl %ecx,180(%rdi)
+
+# qhasm: *(uint32 *) (out + 184) = in14
+# asm 1: movl <in14=int64#5d,184(<out=int64#1)
+# asm 2: movl <in14=%r8d,184(<out=%rdi)
+movl %r8d,184(%rdi)
+
+# qhasm: *(uint32 *) (out + 188) = in15
+# asm 1: movl <in15=int64#6d,188(<out=int64#1)
+# asm 2: movl <in15=%r9d,188(<out=%rdi)
+movl %r9d,188(%rdi)
+
+# qhasm: in12 = z12
+# asm 1: movd <z12=int6464#14,>in12=int64#3
+# asm 2: movd <z12=%xmm13,>in12=%rdx
+movd %xmm13,%rdx
+
+# qhasm: in13 = z13
+# asm 1: movd <z13=int6464#10,>in13=int64#4
+# asm 2: movd <z13=%xmm9,>in13=%rcx
+movd %xmm9,%rcx
+
+# qhasm: in14 = z14
+# asm 1: movd <z14=int6464#4,>in14=int64#5
+# asm 2: movd <z14=%xmm3,>in14=%r8
+movd %xmm3,%r8
+
+# qhasm: in15 = z15
+# asm 1: movd <z15=int6464#3,>in15=int64#6
+# asm 2: movd <z15=%xmm2,>in15=%r9
+movd %xmm2,%r9
+
+# qhasm: (uint32) in12 ^= *(uint32 *) (m + 240)
+# asm 1: xorl 240(<m=int64#2),<in12=int64#3d
+# asm 2: xorl 240(<m=%rsi),<in12=%edx
+xorl 240(%rsi),%edx
+
+# qhasm: (uint32) in13 ^= *(uint32 *) (m + 244)
+# asm 1: xorl 244(<m=int64#2),<in13=int64#4d
+# asm 2: xorl 244(<m=%rsi),<in13=%ecx
+xorl 244(%rsi),%ecx
+
+# qhasm: (uint32) in14 ^= *(uint32 *) (m + 248)
+# asm 1: xorl 248(<m=int64#2),<in14=int64#5d
+# asm 2: xorl 248(<m=%rsi),<in14=%r8d
+xorl 248(%rsi),%r8d
+
+# qhasm: (uint32) in15 ^= *(uint32 *) (m + 252)
+# asm 1: xorl 252(<m=int64#2),<in15=int64#6d
+# asm 2: xorl 252(<m=%rsi),<in15=%r9d
+xorl 252(%rsi),%r9d
+
+# qhasm: *(uint32 *) (out + 240) = in12
+# asm 1: movl <in12=int64#3d,240(<out=int64#1)
+# asm 2: movl <in12=%edx,240(<out=%rdi)
+movl %edx,240(%rdi)
+
+# qhasm: *(uint32 *) (out + 244) = in13
+# asm 1: movl <in13=int64#4d,244(<out=int64#1)
+# asm 2: movl <in13=%ecx,244(<out=%rdi)
+movl %ecx,244(%rdi)
+
+# qhasm: *(uint32 *) (out + 248) = in14
+# asm 1: movl <in14=int64#5d,248(<out=int64#1)
+# asm 2: movl <in14=%r8d,248(<out=%rdi)
+movl %r8d,248(%rdi)
+
+# qhasm: *(uint32 *) (out + 252) = in15
+# asm 1: movl <in15=int64#6d,252(<out=int64#1)
+# asm 2: movl <in15=%r9d,252(<out=%rdi)
+movl %r9d,252(%rdi)
+
+# qhasm: bytes = bytes_backup
+# asm 1: movq <bytes_backup=stack64#8,>bytes=int64#6
+# asm 2: movq <bytes_backup=408(%rsp),>bytes=%r9
+movq 408(%rsp),%r9
+
+# qhasm: bytes -= 256
+# asm 1: sub $256,<bytes=int64#6
+# asm 2: sub $256,<bytes=%r9
+sub $256,%r9
+
+# qhasm: m += 256
+# asm 1: add $256,<m=int64#2
+# asm 2: add $256,<m=%rsi
+add $256,%rsi
+
+# qhasm: out += 256
+# asm 1: add $256,<out=int64#1
+# asm 2: add $256,<out=%rdi
+add $256,%rdi
+
+# qhasm: unsigned<? bytes - 256
+# asm 1: cmp $256,<bytes=int64#6
+# asm 2: cmp $256,<bytes=%r9
+cmp $256,%r9
+# comment:fp stack unchanged by jump
+
+# qhasm: goto bytesatleast256 if !unsigned<
+jae ._bytesatleast256
+
+# qhasm: unsigned>? bytes - 0
+# asm 1: cmp $0,<bytes=int64#6
+# asm 2: cmp $0,<bytes=%r9
+cmp $0,%r9
+# comment:fp stack unchanged by jump
+
+# qhasm: goto done if !unsigned>
+jbe ._done
+# comment:fp stack unchanged by fallthrough
+
+# qhasm: bytesbetween1and255:
+._bytesbetween1and255:
+
+# qhasm: unsigned<? bytes - 64
+# asm 1: cmp $64,<bytes=int64#6
+# asm 2: cmp $64,<bytes=%r9
+cmp $64,%r9
+# comment:fp stack unchanged by jump
+
+# qhasm: goto nocopy if !unsigned<
+jae ._nocopy
+
+# qhasm: ctarget = out
+# asm 1: mov <out=int64#1,>ctarget=int64#3
+# asm 2: mov <out=%rdi,>ctarget=%rdx
+mov %rdi,%rdx
+
+# qhasm: out = &tmp
+# asm 1: leaq <tmp=stack512#1,>out=int64#1
+# asm 2: leaq <tmp=416(%rsp),>out=%rdi
+leaq 416(%rsp),%rdi
+
+# qhasm: i = bytes
+# asm 1: mov <bytes=int64#6,>i=int64#4
+# asm 2: mov <bytes=%r9,>i=%rcx
+mov %r9,%rcx
+
+# qhasm: while (i) { *out++ = *m++; --i }
+rep movsb
+
+# qhasm: out = &tmp
+# asm 1: leaq <tmp=stack512#1,>out=int64#1
+# asm 2: leaq <tmp=416(%rsp),>out=%rdi
+leaq 416(%rsp),%rdi
+
+# qhasm: m = &tmp
+# asm 1: leaq <tmp=stack512#1,>m=int64#2
+# asm 2: leaq <tmp=416(%rsp),>m=%rsi
+leaq 416(%rsp),%rsi
+# comment:fp stack unchanged by fallthrough
+
+# qhasm: nocopy:
+._nocopy:
+
+# qhasm: bytes_backup = bytes
+# asm 1: movq <bytes=int64#6,>bytes_backup=stack64#8
+# asm 2: movq <bytes=%r9,>bytes_backup=408(%rsp)
+movq %r9,408(%rsp)
+
+# qhasm: diag0 = x0
+# asm 1: movdqa <x0=stack128#4,>diag0=int6464#1
+# asm 2: movdqa <x0=48(%rsp),>diag0=%xmm0
+movdqa 48(%rsp),%xmm0
+
+# qhasm: diag1 = x1
+# asm 1: movdqa <x1=stack128#1,>diag1=int6464#2
+# asm 2: movdqa <x1=0(%rsp),>diag1=%xmm1
+movdqa 0(%rsp),%xmm1
+
+# qhasm: diag2 = x2
+# asm 1: movdqa <x2=stack128#2,>diag2=int6464#3
+# asm 2: movdqa <x2=16(%rsp),>diag2=%xmm2
+movdqa 16(%rsp),%xmm2
+
+# qhasm: diag3 = x3
+# asm 1: movdqa <x3=stack128#3,>diag3=int6464#4
+# asm 2: movdqa <x3=32(%rsp),>diag3=%xmm3
+movdqa 32(%rsp),%xmm3
+
+# qhasm: a0 = diag1
+# asm 1: movdqa <diag1=int6464#2,>a0=int6464#5
+# asm 2: movdqa <diag1=%xmm1,>a0=%xmm4
+movdqa %xmm1,%xmm4
+
+# qhasm: i = 20
+# asm 1: mov $20,>i=int64#4
+# asm 2: mov $20,>i=%rcx
+mov $20,%rcx
+
+# qhasm: mainloop2:
+._mainloop2:
+
+# qhasm: uint32323232 a0 += diag0
+# asm 1: paddd <diag0=int6464#1,<a0=int6464#5
+# asm 2: paddd <diag0=%xmm0,<a0=%xmm4
+paddd %xmm0,%xmm4
+
+# qhasm: a1 = diag0
+# asm 1: movdqa <diag0=int6464#1,>a1=int6464#6
+# asm 2: movdqa <diag0=%xmm0,>a1=%xmm5
+movdqa %xmm0,%xmm5
+
+# qhasm: b0 = a0
+# asm 1: movdqa <a0=int6464#5,>b0=int6464#7
+# asm 2: movdqa <a0=%xmm4,>b0=%xmm6
+movdqa %xmm4,%xmm6
+
+# qhasm: uint32323232 a0 <<= 7
+# asm 1: pslld $7,<a0=int6464#5
+# asm 2: pslld $7,<a0=%xmm4
+pslld $7,%xmm4
+
+# qhasm: uint32323232 b0 >>= 25
+# asm 1: psrld $25,<b0=int6464#7
+# asm 2: psrld $25,<b0=%xmm6
+psrld $25,%xmm6
+
+# qhasm: diag3 ^= a0
+# asm 1: pxor <a0=int6464#5,<diag3=int6464#4
+# asm 2: pxor <a0=%xmm4,<diag3=%xmm3
+pxor %xmm4,%xmm3
+
+# qhasm: diag3 ^= b0
+# asm 1: pxor <b0=int6464#7,<diag3=int6464#4
+# asm 2: pxor <b0=%xmm6,<diag3=%xmm3
+pxor %xmm6,%xmm3
+
+# qhasm: uint32323232 a1 += diag3
+# asm 1: paddd <diag3=int6464#4,<a1=int6464#6
+# asm 2: paddd <diag3=%xmm3,<a1=%xmm5
+paddd %xmm3,%xmm5
+
+# qhasm: a2 = diag3
+# asm 1: movdqa <diag3=int6464#4,>a2=int6464#5
+# asm 2: movdqa <diag3=%xmm3,>a2=%xmm4
+movdqa %xmm3,%xmm4
+
+# qhasm: b1 = a1
+# asm 1: movdqa <a1=int6464#6,>b1=int6464#7
+# asm 2: movdqa <a1=%xmm5,>b1=%xmm6
+movdqa %xmm5,%xmm6
+
+# qhasm: uint32323232 a1 <<= 9
+# asm 1: pslld $9,<a1=int6464#6
+# asm 2: pslld $9,<a1=%xmm5
+pslld $9,%xmm5
+
+# qhasm: uint32323232 b1 >>= 23
+# asm 1: psrld $23,<b1=int6464#7
+# asm 2: psrld $23,<b1=%xmm6
+psrld $23,%xmm6
+
+# qhasm: diag2 ^= a1
+# asm 1: pxor <a1=int6464#6,<diag2=int6464#3
+# asm 2: pxor <a1=%xmm5,<diag2=%xmm2
+pxor %xmm5,%xmm2
+
+# qhasm: diag3 <<<= 32
+# asm 1: pshufd $0x93,<diag3=int6464#4,<diag3=int6464#4
+# asm 2: pshufd $0x93,<diag3=%xmm3,<diag3=%xmm3
+pshufd $0x93,%xmm3,%xmm3
+
+# qhasm: diag2 ^= b1
+# asm 1: pxor <b1=int6464#7,<diag2=int6464#3
+# asm 2: pxor <b1=%xmm6,<diag2=%xmm2
+pxor %xmm6,%xmm2
+
+# qhasm: uint32323232 a2 += diag2
+# asm 1: paddd <diag2=int6464#3,<a2=int6464#5
+# asm 2: paddd <diag2=%xmm2,<a2=%xmm4
+paddd %xmm2,%xmm4
+
+# qhasm: a3 = diag2
+# asm 1: movdqa <diag2=int6464#3,>a3=int6464#6
+# asm 2: movdqa <diag2=%xmm2,>a3=%xmm5
+movdqa %xmm2,%xmm5
+
+# qhasm: b2 = a2
+# asm 1: movdqa <a2=int6464#5,>b2=int6464#7
+# asm 2: movdqa <a2=%xmm4,>b2=%xmm6
+movdqa %xmm4,%xmm6
+
+# qhasm: uint32323232 a2 <<= 13
+# asm 1: pslld $13,<a2=int6464#5
+# asm 2: pslld $13,<a2=%xmm4
+pslld $13,%xmm4
+
+# qhasm: uint32323232 b2 >>= 19
+# asm 1: psrld $19,<b2=int6464#7
+# asm 2: psrld $19,<b2=%xmm6
+psrld $19,%xmm6
+
+# qhasm: diag1 ^= a2
+# asm 1: pxor <a2=int6464#5,<diag1=int6464#2
+# asm 2: pxor <a2=%xmm4,<diag1=%xmm1
+pxor %xmm4,%xmm1
+
+# qhasm: diag2 <<<= 64
+# asm 1: pshufd $0x4e,<diag2=int6464#3,<diag2=int6464#3
+# asm 2: pshufd $0x4e,<diag2=%xmm2,<diag2=%xmm2
+pshufd $0x4e,%xmm2,%xmm2
+
+# qhasm: diag1 ^= b2
+# asm 1: pxor <b2=int6464#7,<diag1=int6464#2
+# asm 2: pxor <b2=%xmm6,<diag1=%xmm1
+pxor %xmm6,%xmm1
+
+# qhasm: uint32323232 a3 += diag1
+# asm 1: paddd <diag1=int6464#2,<a3=int6464#6
+# asm 2: paddd <diag1=%xmm1,<a3=%xmm5
+paddd %xmm1,%xmm5
+
+# qhasm: a4 = diag3
+# asm 1: movdqa <diag3=int6464#4,>a4=int6464#5
+# asm 2: movdqa <diag3=%xmm3,>a4=%xmm4
+movdqa %xmm3,%xmm4
+
+# qhasm: b3 = a3
+# asm 1: movdqa <a3=int6464#6,>b3=int6464#7
+# asm 2: movdqa <a3=%xmm5,>b3=%xmm6
+movdqa %xmm5,%xmm6
+
+# qhasm: uint32323232 a3 <<= 18
+# asm 1: pslld $18,<a3=int6464#6
+# asm 2: pslld $18,<a3=%xmm5
+pslld $18,%xmm5
+
+# qhasm: uint32323232 b3 >>= 14
+# asm 1: psrld $14,<b3=int6464#7
+# asm 2: psrld $14,<b3=%xmm6
+psrld $14,%xmm6
+
+# qhasm: diag0 ^= a3
+# asm 1: pxor <a3=int6464#6,<diag0=int6464#1
+# asm 2: pxor <a3=%xmm5,<diag0=%xmm0
+pxor %xmm5,%xmm0
+
+# qhasm: diag1 <<<= 96
+# asm 1: pshufd $0x39,<diag1=int6464#2,<diag1=int6464#2
+# asm 2: pshufd $0x39,<diag1=%xmm1,<diag1=%xmm1
+pshufd $0x39,%xmm1,%xmm1
+
+# qhasm: diag0 ^= b3
+# asm 1: pxor <b3=int6464#7,<diag0=int6464#1
+# asm 2: pxor <b3=%xmm6,<diag0=%xmm0
+pxor %xmm6,%xmm0
+
+# qhasm: uint32323232 a4 += diag0
+# asm 1: paddd <diag0=int6464#1,<a4=int6464#5
+# asm 2: paddd <diag0=%xmm0,<a4=%xmm4
+paddd %xmm0,%xmm4
+
+# qhasm: a5 = diag0
+# asm 1: movdqa <diag0=int6464#1,>a5=int6464#6
+# asm 2: movdqa <diag0=%xmm0,>a5=%xmm5
+movdqa %xmm0,%xmm5
+
+# qhasm: b4 = a4
+# asm 1: movdqa <a4=int6464#5,>b4=int6464#7
+# asm 2: movdqa <a4=%xmm4,>b4=%xmm6
+movdqa %xmm4,%xmm6
+
+# qhasm: uint32323232 a4 <<= 7
+# asm 1: pslld $7,<a4=int6464#5
+# asm 2: pslld $7,<a4=%xmm4
+pslld $7,%xmm4
+
+# qhasm: uint32323232 b4 >>= 25
+# asm 1: psrld $25,<b4=int6464#7
+# asm 2: psrld $25,<b4=%xmm6
+psrld $25,%xmm6
+
+# qhasm: diag1 ^= a4
+# asm 1: pxor <a4=int6464#5,<diag1=int6464#2
+# asm 2: pxor <a4=%xmm4,<diag1=%xmm1
+pxor %xmm4,%xmm1
+
+# qhasm: diag1 ^= b4
+# asm 1: pxor <b4=int6464#7,<diag1=int6464#2
+# asm 2: pxor <b4=%xmm6,<diag1=%xmm1
+pxor %xmm6,%xmm1
+
+# qhasm: uint32323232 a5 += diag1
+# asm 1: paddd <diag1=int6464#2,<a5=int6464#6
+# asm 2: paddd <diag1=%xmm1,<a5=%xmm5
+paddd %xmm1,%xmm5
+
+# qhasm: a6 = diag1
+# asm 1: movdqa <diag1=int6464#2,>a6=int6464#5
+# asm 2: movdqa <diag1=%xmm1,>a6=%xmm4
+movdqa %xmm1,%xmm4
+
+# qhasm: b5 = a5
+# asm 1: movdqa <a5=int6464#6,>b5=int6464#7
+# asm 2: movdqa <a5=%xmm5,>b5=%xmm6
+movdqa %xmm5,%xmm6
+
+# qhasm: uint32323232 a5 <<= 9
+# asm 1: pslld $9,<a5=int6464#6
+# asm 2: pslld $9,<a5=%xmm5
+pslld $9,%xmm5
+
+# qhasm: uint32323232 b5 >>= 23
+# asm 1: psrld $23,<b5=int6464#7
+# asm 2: psrld $23,<b5=%xmm6
+psrld $23,%xmm6
+
+# qhasm: diag2 ^= a5
+# asm 1: pxor <a5=int6464#6,<diag2=int6464#3
+# asm 2: pxor <a5=%xmm5,<diag2=%xmm2
+pxor %xmm5,%xmm2
+
+# qhasm: diag1 <<<= 32
+# asm 1: pshufd $0x93,<diag1=int6464#2,<diag1=int6464#2
+# asm 2: pshufd $0x93,<diag1=%xmm1,<diag1=%xmm1
+pshufd $0x93,%xmm1,%xmm1
+
+# qhasm: diag2 ^= b5
+# asm 1: pxor <b5=int6464#7,<diag2=int6464#3
+# asm 2: pxor <b5=%xmm6,<diag2=%xmm2
+pxor %xmm6,%xmm2
+
+# qhasm: uint32323232 a6 += diag2
+# asm 1: paddd <diag2=int6464#3,<a6=int6464#5
+# asm 2: paddd <diag2=%xmm2,<a6=%xmm4
+paddd %xmm2,%xmm4
+
+# qhasm: a7 = diag2
+# asm 1: movdqa <diag2=int6464#3,>a7=int6464#6
+# asm 2: movdqa <diag2=%xmm2,>a7=%xmm5
+movdqa %xmm2,%xmm5
+
+# qhasm: b6 = a6
+# asm 1: movdqa <a6=int6464#5,>b6=int6464#7
+# asm 2: movdqa <a6=%xmm4,>b6=%xmm6
+movdqa %xmm4,%xmm6
+
+# qhasm: uint32323232 a6 <<= 13
+# asm 1: pslld $13,<a6=int6464#5
+# asm 2: pslld $13,<a6=%xmm4
+pslld $13,%xmm4
+
+# qhasm: uint32323232 b6 >>= 19
+# asm 1: psrld $19,<b6=int6464#7
+# asm 2: psrld $19,<b6=%xmm6
+psrld $19,%xmm6
+
+# qhasm: diag3 ^= a6
+# asm 1: pxor <a6=int6464#5,<diag3=int6464#4
+# asm 2: pxor <a6=%xmm4,<diag3=%xmm3
+pxor %xmm4,%xmm3
+
+# qhasm: diag2 <<<= 64
+# asm 1: pshufd $0x4e,<diag2=int6464#3,<diag2=int6464#3
+# asm 2: pshufd $0x4e,<diag2=%xmm2,<diag2=%xmm2
+pshufd $0x4e,%xmm2,%xmm2
+
+# qhasm: diag3 ^= b6
+# asm 1: pxor <b6=int6464#7,<diag3=int6464#4
+# asm 2: pxor <b6=%xmm6,<diag3=%xmm3
+pxor %xmm6,%xmm3
+
+# qhasm: uint32323232 a7 += diag3
+# asm 1: paddd <diag3=int6464#4,<a7=int6464#6
+# asm 2: paddd <diag3=%xmm3,<a7=%xmm5
+paddd %xmm3,%xmm5
+
+# qhasm: a0 = diag1
+# asm 1: movdqa <diag1=int6464#2,>a0=int6464#5
+# asm 2: movdqa <diag1=%xmm1,>a0=%xmm4
+movdqa %xmm1,%xmm4
+
+# qhasm: b7 = a7
+# asm 1: movdqa <a7=int6464#6,>b7=int6464#7
+# asm 2: movdqa <a7=%xmm5,>b7=%xmm6
+movdqa %xmm5,%xmm6
+
+# qhasm: uint32323232 a7 <<= 18
+# asm 1: pslld $18,<a7=int6464#6
+# asm 2: pslld $18,<a7=%xmm5
+pslld $18,%xmm5
+
+# qhasm: uint32323232 b7 >>= 14
+# asm 1: psrld $14,<b7=int6464#7
+# asm 2: psrld $14,<b7=%xmm6
+psrld $14,%xmm6
+
+# qhasm: diag0 ^= a7
+# asm 1: pxor <a7=int6464#6,<diag0=int6464#1
+# asm 2: pxor <a7=%xmm5,<diag0=%xmm0
+pxor %xmm5,%xmm0
+
+# qhasm: diag3 <<<= 96
+# asm 1: pshufd $0x39,<diag3=int6464#4,<diag3=int6464#4
+# asm 2: pshufd $0x39,<diag3=%xmm3,<diag3=%xmm3
+pshufd $0x39,%xmm3,%xmm3
+
+# qhasm: diag0 ^= b7
+# asm 1: pxor <b7=int6464#7,<diag0=int6464#1
+# asm 2: pxor <b7=%xmm6,<diag0=%xmm0
+pxor %xmm6,%xmm0
+
+# qhasm: uint32323232 a0 += diag0
+# asm 1: paddd <diag0=int6464#1,<a0=int6464#5
+# asm 2: paddd <diag0=%xmm0,<a0=%xmm4
+paddd %xmm0,%xmm4
+
+# qhasm: a1 = diag0
+# asm 1: movdqa <diag0=int6464#1,>a1=int6464#6
+# asm 2: movdqa <diag0=%xmm0,>a1=%xmm5
+movdqa %xmm0,%xmm5
+
+# qhasm: b0 = a0
+# asm 1: movdqa <a0=int6464#5,>b0=int6464#7
+# asm 2: movdqa <a0=%xmm4,>b0=%xmm6
+movdqa %xmm4,%xmm6
+
+# qhasm: uint32323232 a0 <<= 7
+# asm 1: pslld $7,<a0=int6464#5
+# asm 2: pslld $7,<a0=%xmm4
+pslld $7,%xmm4
+
+# qhasm: uint32323232 b0 >>= 25
+# asm 1: psrld $25,<b0=int6464#7
+# asm 2: psrld $25,<b0=%xmm6
+psrld $25,%xmm6
+
+# qhasm: diag3 ^= a0
+# asm 1: pxor <a0=int6464#5,<diag3=int6464#4
+# asm 2: pxor <a0=%xmm4,<diag3=%xmm3
+pxor %xmm4,%xmm3
+
+# qhasm: diag3 ^= b0
+# asm 1: pxor <b0=int6464#7,<diag3=int6464#4
+# asm 2: pxor <b0=%xmm6,<diag3=%xmm3
+pxor %xmm6,%xmm3
+
+# qhasm: uint32323232 a1 += diag3
+# asm 1: paddd <diag3=int6464#4,<a1=int6464#6
+# asm 2: paddd <diag3=%xmm3,<a1=%xmm5
+paddd %xmm3,%xmm5
+
+# qhasm: a2 = diag3
+# asm 1: movdqa <diag3=int6464#4,>a2=int6464#5
+# asm 2: movdqa <diag3=%xmm3,>a2=%xmm4
+movdqa %xmm3,%xmm4
+
+# qhasm: b1 = a1
+# asm 1: movdqa <a1=int6464#6,>b1=int6464#7
+# asm 2: movdqa <a1=%xmm5,>b1=%xmm6
+movdqa %xmm5,%xmm6
+
+# qhasm: uint32323232 a1 <<= 9
+# asm 1: pslld $9,<a1=int6464#6
+# asm 2: pslld $9,<a1=%xmm5
+pslld $9,%xmm5
+
+# qhasm: uint32323232 b1 >>= 23
+# asm 1: psrld $23,<b1=int6464#7
+# asm 2: psrld $23,<b1=%xmm6
+psrld $23,%xmm6
+
+# qhasm: diag2 ^= a1
+# asm 1: pxor <a1=int6464#6,<diag2=int6464#3
+# asm 2: pxor <a1=%xmm5,<diag2=%xmm2
+pxor %xmm5,%xmm2
+
+# qhasm: diag3 <<<= 32
+# asm 1: pshufd $0x93,<diag3=int6464#4,<diag3=int6464#4
+# asm 2: pshufd $0x93,<diag3=%xmm3,<diag3=%xmm3
+pshufd $0x93,%xmm3,%xmm3
+
+# qhasm: diag2 ^= b1
+# asm 1: pxor <b1=int6464#7,<diag2=int6464#3
+# asm 2: pxor <b1=%xmm6,<diag2=%xmm2
+pxor %xmm6,%xmm2
+
+# qhasm: uint32323232 a2 += diag2
+# asm 1: paddd <diag2=int6464#3,<a2=int6464#5
+# asm 2: paddd <diag2=%xmm2,<a2=%xmm4
+paddd %xmm2,%xmm4
+
+# qhasm: a3 = diag2
+# asm 1: movdqa <diag2=int6464#3,>a3=int6464#6
+# asm 2: movdqa <diag2=%xmm2,>a3=%xmm5
+movdqa %xmm2,%xmm5
+
+# qhasm: b2 = a2
+# asm 1: movdqa <a2=int6464#5,>b2=int6464#7
+# asm 2: movdqa <a2=%xmm4,>b2=%xmm6
+movdqa %xmm4,%xmm6
+
+# qhasm: uint32323232 a2 <<= 13
+# asm 1: pslld $13,<a2=int6464#5
+# asm 2: pslld $13,<a2=%xmm4
+pslld $13,%xmm4
+
+# qhasm: uint32323232 b2 >>= 19
+# asm 1: psrld $19,<b2=int6464#7
+# asm 2: psrld $19,<b2=%xmm6
+psrld $19,%xmm6
+
+# qhasm: diag1 ^= a2
+# asm 1: pxor <a2=int6464#5,<diag1=int6464#2
+# asm 2: pxor <a2=%xmm4,<diag1=%xmm1
+pxor %xmm4,%xmm1
+
+# qhasm: diag2 <<<= 64
+# asm 1: pshufd $0x4e,<diag2=int6464#3,<diag2=int6464#3
+# asm 2: pshufd $0x4e,<diag2=%xmm2,<diag2=%xmm2
+pshufd $0x4e,%xmm2,%xmm2
+
+# qhasm: diag1 ^= b2
+# asm 1: pxor <b2=int6464#7,<diag1=int6464#2
+# asm 2: pxor <b2=%xmm6,<diag1=%xmm1
+pxor %xmm6,%xmm1
+
+# qhasm: uint32323232 a3 += diag1
+# asm 1: paddd <diag1=int6464#2,<a3=int6464#6
+# asm 2: paddd <diag1=%xmm1,<a3=%xmm5
+paddd %xmm1,%xmm5
+
+# qhasm: a4 = diag3
+# asm 1: movdqa <diag3=int6464#4,>a4=int6464#5
+# asm 2: movdqa <diag3=%xmm3,>a4=%xmm4
+movdqa %xmm3,%xmm4
+
+# qhasm: b3 = a3
+# asm 1: movdqa <a3=int6464#6,>b3=int6464#7
+# asm 2: movdqa <a3=%xmm5,>b3=%xmm6
+movdqa %xmm5,%xmm6
+
+# qhasm: uint32323232 a3 <<= 18
+# asm 1: pslld $18,<a3=int6464#6
+# asm 2: pslld $18,<a3=%xmm5
+pslld $18,%xmm5
+
+# qhasm: uint32323232 b3 >>= 14
+# asm 1: psrld $14,<b3=int6464#7
+# asm 2: psrld $14,<b3=%xmm6
+psrld $14,%xmm6
+
+# qhasm: diag0 ^= a3
+# asm 1: pxor <a3=int6464#6,<diag0=int6464#1
+# asm 2: pxor <a3=%xmm5,<diag0=%xmm0
+pxor %xmm5,%xmm0
+
+# qhasm: diag1 <<<= 96
+# asm 1: pshufd $0x39,<diag1=int6464#2,<diag1=int6464#2
+# asm 2: pshufd $0x39,<diag1=%xmm1,<diag1=%xmm1
+pshufd $0x39,%xmm1,%xmm1
+
+# qhasm: diag0 ^= b3
+# asm 1: pxor <b3=int6464#7,<diag0=int6464#1
+# asm 2: pxor <b3=%xmm6,<diag0=%xmm0
+pxor %xmm6,%xmm0
+
+# qhasm: uint32323232 a4 += diag0
+# asm 1: paddd <diag0=int6464#1,<a4=int6464#5
+# asm 2: paddd <diag0=%xmm0,<a4=%xmm4
+paddd %xmm0,%xmm4
+
+# qhasm: a5 = diag0
+# asm 1: movdqa <diag0=int6464#1,>a5=int6464#6
+# asm 2: movdqa <diag0=%xmm0,>a5=%xmm5
+movdqa %xmm0,%xmm5
+
+# qhasm: b4 = a4
+# asm 1: movdqa <a4=int6464#5,>b4=int6464#7
+# asm 2: movdqa <a4=%xmm4,>b4=%xmm6
+movdqa %xmm4,%xmm6
+
+# qhasm: uint32323232 a4 <<= 7
+# asm 1: pslld $7,<a4=int6464#5
+# asm 2: pslld $7,<a4=%xmm4
+pslld $7,%xmm4
+
+# qhasm: uint32323232 b4 >>= 25
+# asm 1: psrld $25,<b4=int6464#7
+# asm 2: psrld $25,<b4=%xmm6
+psrld $25,%xmm6
+
+# qhasm: diag1 ^= a4
+# asm 1: pxor <a4=int6464#5,<diag1=int6464#2
+# asm 2: pxor <a4=%xmm4,<diag1=%xmm1
+pxor %xmm4,%xmm1
+
+# qhasm: diag1 ^= b4
+# asm 1: pxor <b4=int6464#7,<diag1=int6464#2
+# asm 2: pxor <b4=%xmm6,<diag1=%xmm1
+pxor %xmm6,%xmm1
+
+# qhasm: uint32323232 a5 += diag1
+# asm 1: paddd <diag1=int6464#2,<a5=int6464#6
+# asm 2: paddd <diag1=%xmm1,<a5=%xmm5
+paddd %xmm1,%xmm5
+
+# qhasm: a6 = diag1
+# asm 1: movdqa <diag1=int6464#2,>a6=int6464#5
+# asm 2: movdqa <diag1=%xmm1,>a6=%xmm4
+movdqa %xmm1,%xmm4
+
+# qhasm: b5 = a5
+# asm 1: movdqa <a5=int6464#6,>b5=int6464#7
+# asm 2: movdqa <a5=%xmm5,>b5=%xmm6
+movdqa %xmm5,%xmm6
+
+# qhasm: uint32323232 a5 <<= 9
+# asm 1: pslld $9,<a5=int6464#6
+# asm 2: pslld $9,<a5=%xmm5
+pslld $9,%xmm5
+
+# qhasm: uint32323232 b5 >>= 23
+# asm 1: psrld $23,<b5=int6464#7
+# asm 2: psrld $23,<b5=%xmm6
+psrld $23,%xmm6
+
+# qhasm: diag2 ^= a5
+# asm 1: pxor <a5=int6464#6,<diag2=int6464#3
+# asm 2: pxor <a5=%xmm5,<diag2=%xmm2
+pxor %xmm5,%xmm2
+
+# qhasm: diag1 <<<= 32
+# asm 1: pshufd $0x93,<diag1=int6464#2,<diag1=int6464#2
+# asm 2: pshufd $0x93,<diag1=%xmm1,<diag1=%xmm1
+pshufd $0x93,%xmm1,%xmm1
+
+# qhasm: diag2 ^= b5
+# asm 1: pxor <b5=int6464#7,<diag2=int6464#3
+# asm 2: pxor <b5=%xmm6,<diag2=%xmm2
+pxor %xmm6,%xmm2
+
+# qhasm: uint32323232 a6 += diag2
+# asm 1: paddd <diag2=int6464#3,<a6=int6464#5
+# asm 2: paddd <diag2=%xmm2,<a6=%xmm4
+paddd %xmm2,%xmm4
+
+# qhasm: a7 = diag2
+# asm 1: movdqa <diag2=int6464#3,>a7=int6464#6
+# asm 2: movdqa <diag2=%xmm2,>a7=%xmm5
+movdqa %xmm2,%xmm5
+
+# qhasm: b6 = a6
+# asm 1: movdqa <a6=int6464#5,>b6=int6464#7
+# asm 2: movdqa <a6=%xmm4,>b6=%xmm6
+movdqa %xmm4,%xmm6
+
+# qhasm: uint32323232 a6 <<= 13
+# asm 1: pslld $13,<a6=int6464#5
+# asm 2: pslld $13,<a6=%xmm4
+pslld $13,%xmm4
+
+# qhasm: uint32323232 b6 >>= 19
+# asm 1: psrld $19,<b6=int6464#7
+# asm 2: psrld $19,<b6=%xmm6
+psrld $19,%xmm6
+
+# qhasm: diag3 ^= a6
+# asm 1: pxor <a6=int6464#5,<diag3=int6464#4
+# asm 2: pxor <a6=%xmm4,<diag3=%xmm3
+pxor %xmm4,%xmm3
+
+# qhasm: diag2 <<<= 64
+# asm 1: pshufd $0x4e,<diag2=int6464#3,<diag2=int6464#3
+# asm 2: pshufd $0x4e,<diag2=%xmm2,<diag2=%xmm2
+pshufd $0x4e,%xmm2,%xmm2
+
+# qhasm: diag3 ^= b6
+# asm 1: pxor <b6=int6464#7,<diag3=int6464#4
+# asm 2: pxor <b6=%xmm6,<diag3=%xmm3
+pxor %xmm6,%xmm3
+
+# qhasm: unsigned>? i -= 4
+# asm 1: sub $4,<i=int64#4
+# asm 2: sub $4,<i=%rcx
+sub $4,%rcx
+
+# qhasm: uint32323232 a7 += diag3
+# asm 1: paddd <diag3=int6464#4,<a7=int6464#6
+# asm 2: paddd <diag3=%xmm3,<a7=%xmm5
+paddd %xmm3,%xmm5
+
+# qhasm: a0 = diag1
+# asm 1: movdqa <diag1=int6464#2,>a0=int6464#5
+# asm 2: movdqa <diag1=%xmm1,>a0=%xmm4
+movdqa %xmm1,%xmm4
+
+# qhasm: b7 = a7
+# asm 1: movdqa <a7=int6464#6,>b7=int6464#7
+# asm 2: movdqa <a7=%xmm5,>b7=%xmm6
+movdqa %xmm5,%xmm6
+
+# qhasm: uint32323232 a7 <<= 18
+# asm 1: pslld $18,<a7=int6464#6
+# asm 2: pslld $18,<a7=%xmm5
+pslld $18,%xmm5
+
+# qhasm: b0 = 0
+# asm 1: pxor >b0=int6464#8,>b0=int6464#8
+# asm 2: pxor >b0=%xmm7,>b0=%xmm7
+pxor %xmm7,%xmm7
+
+# qhasm: uint32323232 b7 >>= 14
+# asm 1: psrld $14,<b7=int6464#7
+# asm 2: psrld $14,<b7=%xmm6
+psrld $14,%xmm6
+
+# qhasm: diag0 ^= a7
+# asm 1: pxor <a7=int6464#6,<diag0=int6464#1
+# asm 2: pxor <a7=%xmm5,<diag0=%xmm0
+pxor %xmm5,%xmm0
+
+# qhasm: diag3 <<<= 96
+# asm 1: pshufd $0x39,<diag3=int6464#4,<diag3=int6464#4
+# asm 2: pshufd $0x39,<diag3=%xmm3,<diag3=%xmm3
+pshufd $0x39,%xmm3,%xmm3
+
+# qhasm: diag0 ^= b7
+# asm 1: pxor <b7=int6464#7,<diag0=int6464#1
+# asm 2: pxor <b7=%xmm6,<diag0=%xmm0
+pxor %xmm6,%xmm0
+# comment:fp stack unchanged by jump
+
+# qhasm: goto mainloop2 if unsigned>
+ja ._mainloop2
+
+# qhasm: uint32323232 diag0 += x0
+# asm 1: paddd <x0=stack128#4,<diag0=int6464#1
+# asm 2: paddd <x0=48(%rsp),<diag0=%xmm0
+paddd 48(%rsp),%xmm0
+
+# qhasm: uint32323232 diag1 += x1
+# asm 1: paddd <x1=stack128#1,<diag1=int6464#2
+# asm 2: paddd <x1=0(%rsp),<diag1=%xmm1
+paddd 0(%rsp),%xmm1
+
+# qhasm: uint32323232 diag2 += x2
+# asm 1: paddd <x2=stack128#2,<diag2=int6464#3
+# asm 2: paddd <x2=16(%rsp),<diag2=%xmm2
+paddd 16(%rsp),%xmm2
+
+# qhasm: uint32323232 diag3 += x3
+# asm 1: paddd <x3=stack128#3,<diag3=int6464#4
+# asm 2: paddd <x3=32(%rsp),<diag3=%xmm3
+paddd 32(%rsp),%xmm3
+
+# qhasm: in0 = diag0
+# asm 1: movd <diag0=int6464#1,>in0=int64#4
+# asm 2: movd <diag0=%xmm0,>in0=%rcx
+movd %xmm0,%rcx
+
+# qhasm: in12 = diag1
+# asm 1: movd <diag1=int6464#2,>in12=int64#5
+# asm 2: movd <diag1=%xmm1,>in12=%r8
+movd %xmm1,%r8
+
+# qhasm: in8 = diag2
+# asm 1: movd <diag2=int6464#3,>in8=int64#6
+# asm 2: movd <diag2=%xmm2,>in8=%r9
+movd %xmm2,%r9
+
+# qhasm: in4 = diag3
+# asm 1: movd <diag3=int6464#4,>in4=int64#7
+# asm 2: movd <diag3=%xmm3,>in4=%rax
+movd %xmm3,%rax
+
+# qhasm: diag0 <<<= 96
+# asm 1: pshufd $0x39,<diag0=int6464#1,<diag0=int6464#1
+# asm 2: pshufd $0x39,<diag0=%xmm0,<diag0=%xmm0
+pshufd $0x39,%xmm0,%xmm0
+
+# qhasm: diag1 <<<= 96
+# asm 1: pshufd $0x39,<diag1=int6464#2,<diag1=int6464#2
+# asm 2: pshufd $0x39,<diag1=%xmm1,<diag1=%xmm1
+pshufd $0x39,%xmm1,%xmm1
+
+# qhasm: diag2 <<<= 96
+# asm 1: pshufd $0x39,<diag2=int6464#3,<diag2=int6464#3
+# asm 2: pshufd $0x39,<diag2=%xmm2,<diag2=%xmm2
+pshufd $0x39,%xmm2,%xmm2
+
+# qhasm: diag3 <<<= 96
+# asm 1: pshufd $0x39,<diag3=int6464#4,<diag3=int6464#4
+# asm 2: pshufd $0x39,<diag3=%xmm3,<diag3=%xmm3
+pshufd $0x39,%xmm3,%xmm3
+
+# qhasm: (uint32) in0 ^= *(uint32 *) (m + 0)
+# asm 1: xorl 0(<m=int64#2),<in0=int64#4d
+# asm 2: xorl 0(<m=%rsi),<in0=%ecx
+xorl 0(%rsi),%ecx
+
+# qhasm: (uint32) in12 ^= *(uint32 *) (m + 48)
+# asm 1: xorl 48(<m=int64#2),<in12=int64#5d
+# asm 2: xorl 48(<m=%rsi),<in12=%r8d
+xorl 48(%rsi),%r8d
+
+# qhasm: (uint32) in8 ^= *(uint32 *) (m + 32)
+# asm 1: xorl 32(<m=int64#2),<in8=int64#6d
+# asm 2: xorl 32(<m=%rsi),<in8=%r9d
+xorl 32(%rsi),%r9d
+
+# qhasm: (uint32) in4 ^= *(uint32 *) (m + 16)
+# asm 1: xorl 16(<m=int64#2),<in4=int64#7d
+# asm 2: xorl 16(<m=%rsi),<in4=%eax
+xorl 16(%rsi),%eax
+
+# qhasm: *(uint32 *) (out + 0) = in0
+# asm 1: movl <in0=int64#4d,0(<out=int64#1)
+# asm 2: movl <in0=%ecx,0(<out=%rdi)
+movl %ecx,0(%rdi)
+
+# qhasm: *(uint32 *) (out + 48) = in12
+# asm 1: movl <in12=int64#5d,48(<out=int64#1)
+# asm 2: movl <in12=%r8d,48(<out=%rdi)
+movl %r8d,48(%rdi)
+
+# qhasm: *(uint32 *) (out + 32) = in8
+# asm 1: movl <in8=int64#6d,32(<out=int64#1)
+# asm 2: movl <in8=%r9d,32(<out=%rdi)
+movl %r9d,32(%rdi)
+
+# qhasm: *(uint32 *) (out + 16) = in4
+# asm 1: movl <in4=int64#7d,16(<out=int64#1)
+# asm 2: movl <in4=%eax,16(<out=%rdi)
+movl %eax,16(%rdi)
+
+# qhasm: in5 = diag0
+# asm 1: movd <diag0=int6464#1,>in5=int64#4
+# asm 2: movd <diag0=%xmm0,>in5=%rcx
+movd %xmm0,%rcx
+
+# qhasm: in1 = diag1
+# asm 1: movd <diag1=int6464#2,>in1=int64#5
+# asm 2: movd <diag1=%xmm1,>in1=%r8
+movd %xmm1,%r8
+
+# qhasm: in13 = diag2
+# asm 1: movd <diag2=int6464#3,>in13=int64#6
+# asm 2: movd <diag2=%xmm2,>in13=%r9
+movd %xmm2,%r9
+
+# qhasm: in9 = diag3
+# asm 1: movd <diag3=int6464#4,>in9=int64#7
+# asm 2: movd <diag3=%xmm3,>in9=%rax
+movd %xmm3,%rax
+
+# qhasm: diag0 <<<= 96
+# asm 1: pshufd $0x39,<diag0=int6464#1,<diag0=int6464#1
+# asm 2: pshufd $0x39,<diag0=%xmm0,<diag0=%xmm0
+pshufd $0x39,%xmm0,%xmm0
+
+# qhasm: diag1 <<<= 96
+# asm 1: pshufd $0x39,<diag1=int6464#2,<diag1=int6464#2
+# asm 2: pshufd $0x39,<diag1=%xmm1,<diag1=%xmm1
+pshufd $0x39,%xmm1,%xmm1
+
+# qhasm: diag2 <<<= 96
+# asm 1: pshufd $0x39,<diag2=int6464#3,<diag2=int6464#3
+# asm 2: pshufd $0x39,<diag2=%xmm2,<diag2=%xmm2
+pshufd $0x39,%xmm2,%xmm2
+
+# qhasm: diag3 <<<= 96
+# asm 1: pshufd $0x39,<diag3=int6464#4,<diag3=int6464#4
+# asm 2: pshufd $0x39,<diag3=%xmm3,<diag3=%xmm3
+pshufd $0x39,%xmm3,%xmm3
+
+# qhasm: (uint32) in5 ^= *(uint32 *) (m + 20)
+# asm 1: xorl 20(<m=int64#2),<in5=int64#4d
+# asm 2: xorl 20(<m=%rsi),<in5=%ecx
+xorl 20(%rsi),%ecx
+
+# qhasm: (uint32) in1 ^= *(uint32 *) (m + 4)
+# asm 1: xorl 4(<m=int64#2),<in1=int64#5d
+# asm 2: xorl 4(<m=%rsi),<in1=%r8d
+xorl 4(%rsi),%r8d
+
+# qhasm: (uint32) in13 ^= *(uint32 *) (m + 52)
+# asm 1: xorl 52(<m=int64#2),<in13=int64#6d
+# asm 2: xorl 52(<m=%rsi),<in13=%r9d
+xorl 52(%rsi),%r9d
+
+# qhasm: (uint32) in9 ^= *(uint32 *) (m + 36)
+# asm 1: xorl 36(<m=int64#2),<in9=int64#7d
+# asm 2: xorl 36(<m=%rsi),<in9=%eax
+xorl 36(%rsi),%eax
+
+# qhasm: *(uint32 *) (out + 20) = in5
+# asm 1: movl <in5=int64#4d,20(<out=int64#1)
+# asm 2: movl <in5=%ecx,20(<out=%rdi)
+movl %ecx,20(%rdi)
+
+# qhasm: *(uint32 *) (out + 4) = in1
+# asm 1: movl <in1=int64#5d,4(<out=int64#1)
+# asm 2: movl <in1=%r8d,4(<out=%rdi)
+movl %r8d,4(%rdi)
+
+# qhasm: *(uint32 *) (out + 52) = in13
+# asm 1: movl <in13=int64#6d,52(<out=int64#1)
+# asm 2: movl <in13=%r9d,52(<out=%rdi)
+movl %r9d,52(%rdi)
+
+# qhasm: *(uint32 *) (out + 36) = in9
+# asm 1: movl <in9=int64#7d,36(<out=int64#1)
+# asm 2: movl <in9=%eax,36(<out=%rdi)
+movl %eax,36(%rdi)
+
+# qhasm: in10 = diag0
+# asm 1: movd <diag0=int6464#1,>in10=int64#4
+# asm 2: movd <diag0=%xmm0,>in10=%rcx
+movd %xmm0,%rcx
+
+# qhasm: in6 = diag1
+# asm 1: movd <diag1=int6464#2,>in6=int64#5
+# asm 2: movd <diag1=%xmm1,>in6=%r8
+movd %xmm1,%r8
+
+# qhasm: in2 = diag2
+# asm 1: movd <diag2=int6464#3,>in2=int64#6
+# asm 2: movd <diag2=%xmm2,>in2=%r9
+movd %xmm2,%r9
+
+# qhasm: in14 = diag3
+# asm 1: movd <diag3=int6464#4,>in14=int64#7
+# asm 2: movd <diag3=%xmm3,>in14=%rax
+movd %xmm3,%rax
+
+# qhasm: diag0 <<<= 96
+# asm 1: pshufd $0x39,<diag0=int6464#1,<diag0=int6464#1
+# asm 2: pshufd $0x39,<diag0=%xmm0,<diag0=%xmm0
+pshufd $0x39,%xmm0,%xmm0
+
+# qhasm: diag1 <<<= 96
+# asm 1: pshufd $0x39,<diag1=int6464#2,<diag1=int6464#2
+# asm 2: pshufd $0x39,<diag1=%xmm1,<diag1=%xmm1
+pshufd $0x39,%xmm1,%xmm1
+
+# qhasm: diag2 <<<= 96
+# asm 1: pshufd $0x39,<diag2=int6464#3,<diag2=int6464#3
+# asm 2: pshufd $0x39,<diag2=%xmm2,<diag2=%xmm2
+pshufd $0x39,%xmm2,%xmm2
+
+# qhasm: diag3 <<<= 96
+# asm 1: pshufd $0x39,<diag3=int6464#4,<diag3=int6464#4
+# asm 2: pshufd $0x39,<diag3=%xmm3,<diag3=%xmm3
+pshufd $0x39,%xmm3,%xmm3
+
+# qhasm: (uint32) in10 ^= *(uint32 *) (m + 40)
+# asm 1: xorl 40(<m=int64#2),<in10=int64#4d
+# asm 2: xorl 40(<m=%rsi),<in10=%ecx
+xorl 40(%rsi),%ecx
+
+# qhasm: (uint32) in6 ^= *(uint32 *) (m + 24)
+# asm 1: xorl 24(<m=int64#2),<in6=int64#5d
+# asm 2: xorl 24(<m=%rsi),<in6=%r8d
+xorl 24(%rsi),%r8d
+
+# qhasm: (uint32) in2 ^= *(uint32 *) (m + 8)
+# asm 1: xorl 8(<m=int64#2),<in2=int64#6d
+# asm 2: xorl 8(<m=%rsi),<in2=%r9d
+xorl 8(%rsi),%r9d
+
+# qhasm: (uint32) in14 ^= *(uint32 *) (m + 56)
+# asm 1: xorl 56(<m=int64#2),<in14=int64#7d
+# asm 2: xorl 56(<m=%rsi),<in14=%eax
+xorl 56(%rsi),%eax
+
+# qhasm: *(uint32 *) (out + 40) = in10
+# asm 1: movl <in10=int64#4d,40(<out=int64#1)
+# asm 2: movl <in10=%ecx,40(<out=%rdi)
+movl %ecx,40(%rdi)
+
+# qhasm: *(uint32 *) (out + 24) = in6
+# asm 1: movl <in6=int64#5d,24(<out=int64#1)
+# asm 2: movl <in6=%r8d,24(<out=%rdi)
+movl %r8d,24(%rdi)
+
+# qhasm: *(uint32 *) (out + 8) = in2
+# asm 1: movl <in2=int64#6d,8(<out=int64#1)
+# asm 2: movl <in2=%r9d,8(<out=%rdi)
+movl %r9d,8(%rdi)
+
+# qhasm: *(uint32 *) (out + 56) = in14
+# asm 1: movl <in14=int64#7d,56(<out=int64#1)
+# asm 2: movl <in14=%eax,56(<out=%rdi)
+movl %eax,56(%rdi)
+
+# qhasm: in15 = diag0
+# asm 1: movd <diag0=int6464#1,>in15=int64#4
+# asm 2: movd <diag0=%xmm0,>in15=%rcx
+movd %xmm0,%rcx
+
+# qhasm: in11 = diag1
+# asm 1: movd <diag1=int6464#2,>in11=int64#5
+# asm 2: movd <diag1=%xmm1,>in11=%r8
+movd %xmm1,%r8
+
+# qhasm: in7 = diag2
+# asm 1: movd <diag2=int6464#3,>in7=int64#6
+# asm 2: movd <diag2=%xmm2,>in7=%r9
+movd %xmm2,%r9
+
+# qhasm: in3 = diag3
+# asm 1: movd <diag3=int6464#4,>in3=int64#7
+# asm 2: movd <diag3=%xmm3,>in3=%rax
+movd %xmm3,%rax
+
+# qhasm: (uint32) in15 ^= *(uint32 *) (m + 60)
+# asm 1: xorl 60(<m=int64#2),<in15=int64#4d
+# asm 2: xorl 60(<m=%rsi),<in15=%ecx
+xorl 60(%rsi),%ecx
+
+# qhasm: (uint32) in11 ^= *(uint32 *) (m + 44)
+# asm 1: xorl 44(<m=int64#2),<in11=int64#5d
+# asm 2: xorl 44(<m=%rsi),<in11=%r8d
+xorl 44(%rsi),%r8d
+
+# qhasm: (uint32) in7 ^= *(uint32 *) (m + 28)
+# asm 1: xorl 28(<m=int64#2),<in7=int64#6d
+# asm 2: xorl 28(<m=%rsi),<in7=%r9d
+xorl 28(%rsi),%r9d
+
+# qhasm: (uint32) in3 ^= *(uint32 *) (m + 12)
+# asm 1: xorl 12(<m=int64#2),<in3=int64#7d
+# asm 2: xorl 12(<m=%rsi),<in3=%eax
+xorl 12(%rsi),%eax
+
+# qhasm: *(uint32 *) (out + 60) = in15
+# asm 1: movl <in15=int64#4d,60(<out=int64#1)
+# asm 2: movl <in15=%ecx,60(<out=%rdi)
+movl %ecx,60(%rdi)
+
+# qhasm: *(uint32 *) (out + 44) = in11
+# asm 1: movl <in11=int64#5d,44(<out=int64#1)
+# asm 2: movl <in11=%r8d,44(<out=%rdi)
+movl %r8d,44(%rdi)
+
+# qhasm: *(uint32 *) (out + 28) = in7
+# asm 1: movl <in7=int64#6d,28(<out=int64#1)
+# asm 2: movl <in7=%r9d,28(<out=%rdi)
+movl %r9d,28(%rdi)
+
+# qhasm: *(uint32 *) (out + 12) = in3
+# asm 1: movl <in3=int64#7d,12(<out=int64#1)
+# asm 2: movl <in3=%eax,12(<out=%rdi)
+movl %eax,12(%rdi)
+
+# qhasm: bytes = bytes_backup
+# asm 1: movq <bytes_backup=stack64#8,>bytes=int64#6
+# asm 2: movq <bytes_backup=408(%rsp),>bytes=%r9
+movq 408(%rsp),%r9
+
+# qhasm: in8 = ((uint32 *)&x2)[0]
+# asm 1: movl <x2=stack128#2,>in8=int64#4d
+# asm 2: movl <x2=16(%rsp),>in8=%ecx
+movl 16(%rsp),%ecx
+
+# qhasm: in9 = ((uint32 *)&x3)[1]
+# asm 1: movl 4+<x3=stack128#3,>in9=int64#5d
+# asm 2: movl 4+<x3=32(%rsp),>in9=%r8d
+movl 4+32(%rsp),%r8d
+
+# qhasm: in8 += 1
+# asm 1: add $1,<in8=int64#4
+# asm 2: add $1,<in8=%rcx
+add $1,%rcx
+
+# qhasm: in9 <<= 32
+# asm 1: shl $32,<in9=int64#5
+# asm 2: shl $32,<in9=%r8
+shl $32,%r8
+
+# qhasm: in8 += in9
+# asm 1: add <in9=int64#5,<in8=int64#4
+# asm 2: add <in9=%r8,<in8=%rcx
+add %r8,%rcx
+
+# qhasm: in9 = in8
+# asm 1: mov <in8=int64#4,>in9=int64#5
+# asm 2: mov <in8=%rcx,>in9=%r8
+mov %rcx,%r8
+
+# qhasm: (uint64) in9 >>= 32
+# asm 1: shr $32,<in9=int64#5
+# asm 2: shr $32,<in9=%r8
+shr $32,%r8
+
+# qhasm: ((uint32 *)&x2)[0] = in8
+# asm 1: movl <in8=int64#4d,>x2=stack128#2
+# asm 2: movl <in8=%ecx,>x2=16(%rsp)
+movl %ecx,16(%rsp)
+
+# qhasm: ((uint32 *)&x3)[1] = in9
+# asm 1: movl <in9=int64#5d,4+<x3=stack128#3
+# asm 2: movl <in9=%r8d,4+<x3=32(%rsp)
+movl %r8d,4+32(%rsp)
+
+# qhasm: unsigned>? unsigned<? bytes - 64
+# asm 1: cmp $64,<bytes=int64#6
+# asm 2: cmp $64,<bytes=%r9
+cmp $64,%r9
+# comment:fp stack unchanged by jump
+
+# qhasm: goto bytesatleast65 if unsigned>
+ja ._bytesatleast65
+# comment:fp stack unchanged by jump
+
+# qhasm: goto bytesatleast64 if !unsigned<
+jae ._bytesatleast64
+
+# qhasm: m = out
+# asm 1: mov <out=int64#1,>m=int64#2
+# asm 2: mov <out=%rdi,>m=%rsi
+mov %rdi,%rsi
+
+# qhasm: out = ctarget
+# asm 1: mov <ctarget=int64#3,>out=int64#1
+# asm 2: mov <ctarget=%rdx,>out=%rdi
+mov %rdx,%rdi
+
+# qhasm: i = bytes
+# asm 1: mov <bytes=int64#6,>i=int64#4
+# asm 2: mov <bytes=%r9,>i=%rcx
+mov %r9,%rcx
+
+# qhasm: while (i) { *out++ = *m++; --i }
+rep movsb
+# comment:fp stack unchanged by fallthrough
+
+# qhasm: bytesatleast64:
+._bytesatleast64:
+# comment:fp stack unchanged by fallthrough
+
+# qhasm: done:
+._done:
+
+# qhasm: r11_caller = r11_stack
+# asm 1: movq <r11_stack=stack64#1,>r11_caller=int64#9
+# asm 2: movq <r11_stack=352(%rsp),>r11_caller=%r11
+movq 352(%rsp),%r11
+
+# qhasm: r12_caller = r12_stack
+# asm 1: movq <r12_stack=stack64#2,>r12_caller=int64#10
+# asm 2: movq <r12_stack=360(%rsp),>r12_caller=%r12
+movq 360(%rsp),%r12
+
+# qhasm: r13_caller = r13_stack
+# asm 1: movq <r13_stack=stack64#3,>r13_caller=int64#11
+# asm 2: movq <r13_stack=368(%rsp),>r13_caller=%r13
+movq 368(%rsp),%r13
+
+# qhasm: r14_caller = r14_stack
+# asm 1: movq <r14_stack=stack64#4,>r14_caller=int64#12
+# asm 2: movq <r14_stack=376(%rsp),>r14_caller=%r14
+movq 376(%rsp),%r14
+
+# qhasm: r15_caller = r15_stack
+# asm 1: movq <r15_stack=stack64#5,>r15_caller=int64#13
+# asm 2: movq <r15_stack=384(%rsp),>r15_caller=%r15
+movq 384(%rsp),%r15
+
+# qhasm: rbx_caller = rbx_stack
+# asm 1: movq <rbx_stack=stack64#6,>rbx_caller=int64#14
+# asm 2: movq <rbx_stack=392(%rsp),>rbx_caller=%rbx
+movq 392(%rsp),%rbx
+
+# qhasm: rbp_caller = rbp_stack
+# asm 1: movq <rbp_stack=stack64#7,>rbp_caller=int64#15
+# asm 2: movq <rbp_stack=400(%rsp),>rbp_caller=%rbp
+movq 400(%rsp),%rbp
+
+# qhasm: leave
+add %r11,%rsp
+xor %rax,%rax
+xor %rdx,%rdx
+ret
+
+# qhasm: bytesatleast65:
+._bytesatleast65:
+
+# qhasm: bytes -= 64
+# asm 1: sub $64,<bytes=int64#6
+# asm 2: sub $64,<bytes=%r9
+sub $64,%r9
+
+# qhasm: out += 64
+# asm 1: add $64,<out=int64#1
+# asm 2: add $64,<out=%rdi
+add $64,%rdi
+
+# qhasm: m += 64
+# asm 1: add $64,<m=int64#2
+# asm 2: add $64,<m=%rsi
+add $64,%rsi
+# comment:fp stack unchanged by jump
+
+# qhasm: goto bytesbetween1and255
+jmp ._bytesbetween1and255
diff --git a/sdar/lib/nacl/box.c b/sdar/lib/nacl/box.c
new file mode 100644
index 0000000..8119596
--- /dev/null
+++ b/sdar/lib/nacl/box.c
@@ -0,0 +1,70 @@
+#include <nacl.h>
+
+int crypto_box_keypair(
+ unsigned char *pk,
+ unsigned char *sk
+)
+{
+ randombytes(sk,32);
+ return crypto_scalarmult_curve25519_base(pk,sk);
+}
+
+static const unsigned char sigma[16] = "expand 32-byte k";
+static const unsigned char n[16] = {0};
+
+int crypto_box_beforenm(
+ unsigned char *k,
+ const unsigned char *pk,
+ const unsigned char *sk
+)
+{
+ unsigned char s[32];
+ crypto_scalarmult_curve25519(s,sk,pk);
+ return crypto_core_hsalsa20(k,n,s,sigma);
+}
+
+int crypto_box_afternm(
+ unsigned char *c,
+ const unsigned char *m,unsigned long long mlen,
+ const unsigned char *n,
+ const unsigned char *k
+)
+{
+ return crypto_secretbox(c,m,mlen,n,k);
+}
+
+int crypto_box_open_afternm(
+ unsigned char *m,
+ const unsigned char *c,unsigned long long clen,
+ const unsigned char *n,
+ const unsigned char *k
+)
+{
+ return crypto_secretbox_open(m,c,clen,n,k);
+}
+
+int crypto_box(
+ unsigned char *c,
+ const unsigned char *m,unsigned long long mlen,
+ const unsigned char *n,
+ const unsigned char *pk,
+ const unsigned char *sk
+)
+{
+ unsigned char k[crypto_box_BEFORENMBYTES];
+ crypto_box_beforenm(k,pk,sk);
+ return crypto_box_afternm(c,m,mlen,n,k);
+}
+
+int crypto_box_open(
+ unsigned char *m,
+ const unsigned char *c,unsigned long long clen,
+ const unsigned char *n,
+ const unsigned char *pk,
+ const unsigned char *sk
+)
+{
+ unsigned char k[crypto_box_BEFORENMBYTES];
+ crypto_box_beforenm(k,pk,sk);
+ return crypto_box_open_afternm(m,c,clen,n,k);
+}
diff --git a/sdar/lib/nacl/curve25519.c b/sdar/lib/nacl/curve25519.c
new file mode 100644
index 0000000..51937ae
--- /dev/null
+++ b/sdar/lib/nacl/curve25519.c
@@ -0,0 +1,484 @@
+/* Copyright 2008, Google Inc.
+ * All rights reserved.
+ *
+ * Code released into the public domain.
+ *
+ * curve25519-donna: Curve25519 elliptic curve, public key function
+ *
+ * http://code.google.com/p/curve25519-donna/
+ *
+ * Adam Langley <agl@imperialviolet.org>
+ *
+ * Derived from public domain C code by Daniel J. Bernstein <djb@cr.yp.to>
+ *
+ * More information about curve25519 can be found here
+ * http://cr.yp.to/ecdh.html
+ *
+ * djb's sample implementation of curve25519 is written in a special assembly
+ * language called qhasm and uses the floating point registers.
+ *
+ * This is, almost, a clean room reimplementation from the curve25519 paper. It
+ * uses many of the tricks described therein. Only the crecip function is taken
+ * from the sample implementation.
+ */
+
+#include <nacl.h>
+#include <string.h>
+#include <stdint.h>
+
+typedef uint8_t u8;
+typedef uint64_t felem;
+// This is a special gcc mode for 128-bit integers. It's implemented on 64-bit
+// platforms only as far as I know.
+typedef unsigned uint128_t __attribute__((mode(TI)));
+
+/* Sum two numbers: output += in */
+static void fsum(felem *output, const felem *in) {
+ unsigned i;
+ for (i = 0; i < 5; ++i) output[i] += in[i];
+}
+
+/* Find the difference of two numbers: output = in - output
+ * (note the order of the arguments!)
+ */
+static void fdifference_backwards(felem *ioutput, const felem *iin) {
+ static const int64_t twotothe51 = (1l << 51);
+ const int64_t *in = (const int64_t *) iin;
+ int64_t *out = (int64_t *) ioutput;
+
+ out[0] = in[0] - out[0];
+ out[1] = in[1] - out[1];
+ out[2] = in[2] - out[2];
+ out[3] = in[3] - out[3];
+ out[4] = in[4] - out[4];
+
+ // An arithmetic shift right of 63 places turns a positive number to 0 and a
+ // negative number to all 1's. This gives us a bitmask that lets us avoid
+ // side-channel prone branches.
+ int64_t t;
+
+#define NEGCHAIN(a,b) \
+ t = out[a] >> 63; \
+ out[a] += twotothe51 & t; \
+ out[b] -= 1 & t;
+
+#define NEGCHAIN19(a,b) \
+ t = out[a] >> 63; \
+ out[a] += twotothe51 & t; \
+ out[b] -= 19 & t;
+
+ NEGCHAIN(0, 1);
+ NEGCHAIN(1, 2);
+ NEGCHAIN(2, 3);
+ NEGCHAIN(3, 4);
+ NEGCHAIN19(4, 0);
+ NEGCHAIN(0, 1);
+ NEGCHAIN(1, 2);
+ NEGCHAIN(2, 3);
+ NEGCHAIN(3, 4);
+}
+
+/* Multiply a number by a scalar: output = in * scalar */
+static void fscalar_product(felem *output, const felem *in, const felem scalar) {
+ uint128_t a;
+
+ a = ((uint128_t) in[0]) * scalar;
+ output[0] = a & 0x7ffffffffffff;
+
+ a = ((uint128_t) in[1]) * scalar + (a >> 51);
+ output[1] = a & 0x7ffffffffffff;
+
+ a = ((uint128_t) in[2]) * scalar + (a >> 51);
+ output[2] = a & 0x7ffffffffffff;
+
+ a = ((uint128_t) in[3]) * scalar + (a >> 51);
+ output[3] = a & 0x7ffffffffffff;
+
+ a = ((uint128_t) in[4]) * scalar + (a >> 51);
+ output[4] = a & 0x7ffffffffffff;
+
+ output[0] += (a >> 51) * 19;
+}
+
+/* Multiply two numbers: output = in2 * in
+ *
+ * output must be distinct to both inputs. The inputs are reduced coefficient
+ * form, the output is not.
+ */
+static void fmul(felem *output, const felem *in2, const felem *in) {
+ uint128_t t[9];
+
+ t[0] = ((uint128_t) in[0]) * in2[0];
+ t[1] = ((uint128_t) in[0]) * in2[1] +
+ ((uint128_t) in[1]) * in2[0];
+ t[2] = ((uint128_t) in[0]) * in2[2] +
+ ((uint128_t) in[2]) * in2[0] +
+ ((uint128_t) in[1]) * in2[1];
+ t[3] = ((uint128_t) in[0]) * in2[3] +
+ ((uint128_t) in[3]) * in2[0] +
+ ((uint128_t) in[1]) * in2[2] +
+ ((uint128_t) in[2]) * in2[1];
+ t[4] = ((uint128_t) in[0]) * in2[4] +
+ ((uint128_t) in[4]) * in2[0] +
+ ((uint128_t) in[3]) * in2[1] +
+ ((uint128_t) in[1]) * in2[3] +
+ ((uint128_t) in[2]) * in2[2];
+ t[5] = ((uint128_t) in[4]) * in2[1] +
+ ((uint128_t) in[1]) * in2[4] +
+ ((uint128_t) in[2]) * in2[3] +
+ ((uint128_t) in[3]) * in2[2];
+ t[6] = ((uint128_t) in[4]) * in2[2] +
+ ((uint128_t) in[2]) * in2[4] +
+ ((uint128_t) in[3]) * in2[3];
+ t[7] = ((uint128_t) in[3]) * in2[4] +
+ ((uint128_t) in[4]) * in2[3];
+ t[8] = ((uint128_t) in[4]) * in2[4];
+
+ t[0] += t[5] * 19;
+ t[1] += t[6] * 19;
+ t[2] += t[7] * 19;
+ t[3] += t[8] * 19;
+
+ t[1] += t[0] >> 51;
+ t[0] &= 0x7ffffffffffff;
+ t[2] += t[1] >> 51;
+ t[1] &= 0x7ffffffffffff;
+ t[3] += t[2] >> 51;
+ t[2] &= 0x7ffffffffffff;
+ t[4] += t[3] >> 51;
+ t[3] &= 0x7ffffffffffff;
+ t[0] += 19 * (t[4] >> 51);
+ t[4] &= 0x7ffffffffffff;
+ t[1] += t[0] >> 51;
+ t[0] &= 0x7ffffffffffff;
+ t[2] += t[1] >> 51;
+ t[1] &= 0x7ffffffffffff;
+
+ output[0] = t[0];
+ output[1] = t[1];
+ output[2] = t[2];
+ output[3] = t[3];
+ output[4] = t[4];
+}
+
+static void
+fsquare(felem *output, const felem *in) {
+ uint128_t t[9];
+
+ t[0] = ((uint128_t) in[0]) * in[0];
+ t[1] = ((uint128_t) in[0]) * in[1] * 2;
+ t[2] = ((uint128_t) in[0]) * in[2] * 2 +
+ ((uint128_t) in[1]) * in[1];
+ t[3] = ((uint128_t) in[0]) * in[3] * 2 +
+ ((uint128_t) in[1]) * in[2] * 2;
+ t[4] = ((uint128_t) in[0]) * in[4] * 2 +
+ ((uint128_t) in[3]) * in[1] * 2 +
+ ((uint128_t) in[2]) * in[2];
+ t[5] = ((uint128_t) in[4]) * in[1] * 2 +
+ ((uint128_t) in[2]) * in[3] * 2;
+ t[6] = ((uint128_t) in[4]) * in[2] * 2 +
+ ((uint128_t) in[3]) * in[3];
+ t[7] = ((uint128_t) in[3]) * in[4] * 2;
+ t[8] = ((uint128_t) in[4]) * in[4];
+
+ t[0] += t[5] * 19;
+ t[1] += t[6] * 19;
+ t[2] += t[7] * 19;
+ t[3] += t[8] * 19;
+
+ t[1] += t[0] >> 51;
+ t[0] &= 0x7ffffffffffff;
+ t[2] += t[1] >> 51;
+ t[1] &= 0x7ffffffffffff;
+ t[3] += t[2] >> 51;
+ t[2] &= 0x7ffffffffffff;
+ t[4] += t[3] >> 51;
+ t[3] &= 0x7ffffffffffff;
+ t[0] += 19 * (t[4] >> 51);
+ t[4] &= 0x7ffffffffffff;
+ t[1] += t[0] >> 51;
+ t[0] &= 0x7ffffffffffff;
+
+ output[0] = t[0];
+ output[1] = t[1];
+ output[2] = t[2];
+ output[3] = t[3];
+ output[4] = t[4];
+}
+
+/* Take a little-endian, 32-byte number and expand it into polynomial form */
+static void
+fexpand(felem *output, const u8 *in) {
+ output[0] = *((const uint64_t *)(in)) & 0x7ffffffffffff;
+ output[1] = (*((const uint64_t *)(in+6)) >> 3) & 0x7ffffffffffff;
+ output[2] = (*((const uint64_t *)(in+12)) >> 6) & 0x7ffffffffffff;
+ output[3] = (*((const uint64_t *)(in+19)) >> 1) & 0x7ffffffffffff;
+ output[4] = (*((const uint64_t *)(in+25)) >> 4) & 0x7ffffffffffff;
+}
+
+/* Take a fully reduced polynomial form number and contract it into a
+ * little-endian, 32-byte array
+ */
+static void
+fcontract(u8 *output, const felem *input) {
+ uint128_t t[5];
+
+ t[0] = input[0];
+ t[1] = input[1];
+ t[2] = input[2];
+ t[3] = input[3];
+ t[4] = input[4];
+
+ t[1] += t[0] >> 51; t[0] &= 0x7ffffffffffff;
+ t[2] += t[1] >> 51; t[1] &= 0x7ffffffffffff;
+ t[3] += t[2] >> 51; t[2] &= 0x7ffffffffffff;
+ t[4] += t[3] >> 51; t[3] &= 0x7ffffffffffff;
+ t[0] += 19 * (t[4] >> 51); t[4] &= 0x7ffffffffffff;
+
+ t[1] += t[0] >> 51; t[0] &= 0x7ffffffffffff;
+ t[2] += t[1] >> 51; t[1] &= 0x7ffffffffffff;
+ t[3] += t[2] >> 51; t[2] &= 0x7ffffffffffff;
+ t[4] += t[3] >> 51; t[3] &= 0x7ffffffffffff;
+ t[0] += 19 * (t[4] >> 51); t[4] &= 0x7ffffffffffff;
+
+ /* now t is between 0 and 2^255-1, properly carried. */
+ /* case 1: between 0 and 2^255-20. case 2: between 2^255-19 and 2^255-1. */
+
+ t[0] += 19;
+
+ t[1] += t[0] >> 51; t[0] &= 0x7ffffffffffff;
+ t[2] += t[1] >> 51; t[1] &= 0x7ffffffffffff;
+ t[3] += t[2] >> 51; t[2] &= 0x7ffffffffffff;
+ t[4] += t[3] >> 51; t[3] &= 0x7ffffffffffff;
+ t[0] += 19 * (t[4] >> 51); t[4] &= 0x7ffffffffffff;
+
+ /* now between 19 and 2^255-1 in both cases, and offset by 19. */
+
+ t[0] += 0x8000000000000 - 19;
+ t[1] += 0x8000000000000 - 1;
+ t[2] += 0x8000000000000 - 1;
+ t[3] += 0x8000000000000 - 1;
+ t[4] += 0x8000000000000 - 1;
+
+ /* now between 2^255 and 2^256-20, and offset by 2^255. */
+
+ t[1] += t[0] >> 51; t[0] &= 0x7ffffffffffff;
+ t[2] += t[1] >> 51; t[1] &= 0x7ffffffffffff;
+ t[3] += t[2] >> 51; t[2] &= 0x7ffffffffffff;
+ t[4] += t[3] >> 51; t[3] &= 0x7ffffffffffff;
+ t[4] &= 0x7ffffffffffff;
+
+ *((uint64_t *)(output)) = t[0] | (t[1] << 51);
+ *((uint64_t *)(output+8)) = (t[1] >> 13) | (t[2] << 38);
+ *((uint64_t *)(output+16)) = (t[2] >> 26) | (t[3] << 25);
+ *((uint64_t *)(output+24)) = (t[3] >> 39) | (t[4] << 12);
+}
+
+/* Input: Q, Q', Q-Q'
+ * Output: 2Q, Q+Q'
+ *
+ * x2 z3: long form
+ * x3 z3: long form
+ * x z: short form, destroyed
+ * xprime zprime: short form, destroyed
+ * qmqp: short form, preserved
+ */
+static void
+fmonty(felem *x2, felem *z2, /* output 2Q */
+ felem *x3, felem *z3, /* output Q + Q' */
+ felem *x, felem *z, /* input Q */
+ felem *xprime, felem *zprime, /* input Q' */
+ const felem *qmqp /* input Q - Q' */) {
+ felem origx[5], origxprime[5], zzz[5], xx[5], zz[5], xxprime[5],
+ zzprime[5], zzzprime[5];
+
+ memcpy(origx, x, 5 * sizeof(felem));
+ fsum(x, z);
+ fdifference_backwards(z, origx); // does x - z
+
+ memcpy(origxprime, xprime, sizeof(felem) * 5);
+ fsum(xprime, zprime);
+ fdifference_backwards(zprime, origxprime);
+ fmul(xxprime, xprime, z);
+ fmul(zzprime, x, zprime);
+ memcpy(origxprime, xxprime, sizeof(felem) * 5);
+ fsum(xxprime, zzprime);
+ fdifference_backwards(zzprime, origxprime);
+ fsquare(x3, xxprime);
+ fsquare(zzzprime, zzprime);
+ fmul(z3, zzzprime, qmqp);
+
+ fsquare(xx, x);
+ fsquare(zz, z);
+ fmul(x2, xx, zz);
+ fdifference_backwards(zz, xx); // does zz = xx - zz
+ fscalar_product(zzz, zz, 121665);
+ fsum(zzz, xx);
+ fmul(z2, zz, zzz);
+}
+
+// -----------------------------------------------------------------------------
+// Maybe swap the contents of two felem arrays (@a and @b), each @len elements
+// long. Perform the swap iff @swap is non-zero.
+//
+// This function performs the swap without leaking any side-channel
+// information.
+// -----------------------------------------------------------------------------
+static void
+swap_conditional(felem *a, felem *b, unsigned len, felem iswap) {
+ unsigned i;
+ const felem swap = -iswap;
+
+ for (i = 0; i < len; ++i) {
+ const felem x = swap & (a[i] ^ b[i]);
+ a[i] ^= x;
+ b[i] ^= x;
+ }
+}
+
+/* Calculates nQ where Q is the x-coordinate of a point on the curve
+ *
+ * resultx/resultz: the x coordinate of the resulting curve point (short form)
+ * n: a little endian, 32-byte number
+ * q: a point of the curve (short form)
+ */
+static void
+cmult(felem *resultx, felem *resultz, const u8 *n, const felem *q) {
+ felem a[5] = {0}, b[5] = {1}, c[5] = {1}, d[5] = {0};
+ felem *nqpqx = a, *nqpqz = b, *nqx = c, *nqz = d, *t;
+ felem e[5] = {0}, f[5] = {1}, g[5] = {0}, h[5] = {1};
+ felem *nqpqx2 = e, *nqpqz2 = f, *nqx2 = g, *nqz2 = h;
+
+ unsigned i, j;
+
+ memcpy(nqpqx, q, sizeof(felem) * 5);
+
+ for (i = 0; i < 32; ++i) {
+ u8 byte = n[31 - i];
+ for (j = 0; j < 8; ++j) {
+ const felem bit = byte >> 7;
+
+ swap_conditional(nqx, nqpqx, 5, bit);
+ swap_conditional(nqz, nqpqz, 5, bit);
+ fmonty(nqx2, nqz2,
+ nqpqx2, nqpqz2,
+ nqx, nqz,
+ nqpqx, nqpqz,
+ q);
+ swap_conditional(nqx2, nqpqx2, 5, bit);
+ swap_conditional(nqz2, nqpqz2, 5, bit);
+
+ t = nqx;
+ nqx = nqx2;
+ nqx2 = t;
+ t = nqz;
+ nqz = nqz2;
+ nqz2 = t;
+ t = nqpqx;
+ nqpqx = nqpqx2;
+ nqpqx2 = t;
+ t = nqpqz;
+ nqpqz = nqpqz2;
+ nqpqz2 = t;
+
+ byte <<= 1;
+ }
+ }
+
+ memcpy(resultx, nqx, sizeof(felem) * 5);
+ memcpy(resultz, nqz, sizeof(felem) * 5);
+}
+
+// -----------------------------------------------------------------------------
+// Shamelessly copied from djb's code
+// -----------------------------------------------------------------------------
+static void
+crecip(felem *out, const felem *z) {
+ felem z2[5];
+ felem z9[5];
+ felem z11[5];
+ felem z2_5_0[5];
+ felem z2_10_0[5];
+ felem z2_20_0[5];
+ felem z2_50_0[5];
+ felem z2_100_0[5];
+ felem t0[5];
+ felem t1[5];
+ int i;
+
+ /* 2 */ fsquare(z2,z);
+ /* 4 */ fsquare(t1,z2);
+ /* 8 */ fsquare(t0,t1);
+ /* 9 */ fmul(z9,t0,z);
+ /* 11 */ fmul(z11,z9,z2);
+ /* 22 */ fsquare(t0,z11);
+ /* 2^5 - 2^0 = 31 */ fmul(z2_5_0,t0,z9);
+
+ /* 2^6 - 2^1 */ fsquare(t0,z2_5_0);
+ /* 2^7 - 2^2 */ fsquare(t1,t0);
+ /* 2^8 - 2^3 */ fsquare(t0,t1);
+ /* 2^9 - 2^4 */ fsquare(t1,t0);
+ /* 2^10 - 2^5 */ fsquare(t0,t1);
+ /* 2^10 - 2^0 */ fmul(z2_10_0,t0,z2_5_0);
+
+ /* 2^11 - 2^1 */ fsquare(t0,z2_10_0);
+ /* 2^12 - 2^2 */ fsquare(t1,t0);
+ /* 2^20 - 2^10 */ for (i = 2;i < 10;i += 2) { fsquare(t0,t1); fsquare(t1,t0); }
+ /* 2^20 - 2^0 */ fmul(z2_20_0,t1,z2_10_0);
+
+ /* 2^21 - 2^1 */ fsquare(t0,z2_20_0);
+ /* 2^22 - 2^2 */ fsquare(t1,t0);
+ /* 2^40 - 2^20 */ for (i = 2;i < 20;i += 2) { fsquare(t0,t1); fsquare(t1,t0); }
+ /* 2^40 - 2^0 */ fmul(t0,t1,z2_20_0);
+
+ /* 2^41 - 2^1 */ fsquare(t1,t0);
+ /* 2^42 - 2^2 */ fsquare(t0,t1);
+ /* 2^50 - 2^10 */ for (i = 2;i < 10;i += 2) { fsquare(t1,t0); fsquare(t0,t1); }
+ /* 2^50 - 2^0 */ fmul(z2_50_0,t0,z2_10_0);
+
+ /* 2^51 - 2^1 */ fsquare(t0,z2_50_0);
+ /* 2^52 - 2^2 */ fsquare(t1,t0);
+ /* 2^100 - 2^50 */ for (i = 2;i < 50;i += 2) { fsquare(t0,t1); fsquare(t1,t0); }
+ /* 2^100 - 2^0 */ fmul(z2_100_0,t1,z2_50_0);
+
+ /* 2^101 - 2^1 */ fsquare(t1,z2_100_0);
+ /* 2^102 - 2^2 */ fsquare(t0,t1);
+ /* 2^200 - 2^100 */ for (i = 2;i < 100;i += 2) { fsquare(t1,t0); fsquare(t0,t1); }
+ /* 2^200 - 2^0 */ fmul(t1,t0,z2_100_0);
+
+ /* 2^201 - 2^1 */ fsquare(t0,t1);
+ /* 2^202 - 2^2 */ fsquare(t1,t0);
+ /* 2^250 - 2^50 */ for (i = 2;i < 50;i += 2) { fsquare(t0,t1); fsquare(t1,t0); }
+ /* 2^250 - 2^0 */ fmul(t0,t1,z2_50_0);
+
+ /* 2^251 - 2^1 */ fsquare(t1,t0);
+ /* 2^252 - 2^2 */ fsquare(t0,t1);
+ /* 2^253 - 2^3 */ fsquare(t1,t0);
+ /* 2^254 - 2^4 */ fsquare(t0,t1);
+ /* 2^255 - 2^5 */ fsquare(t1,t0);
+ /* 2^255 - 21 */ fmul(out,t1,z11);
+}
+
+int
+crypto_scalarmult_curve25519(u8 *mypublic, const u8 *secret, const u8 *basepoint) {
+ felem bp[5], x[5], z[5], zmone[5];
+ unsigned char e[32];
+ int i;
+ for (i = 0;i < 32;++i) e[i] = secret[i];
+ e[0] &= 248;
+ e[31] &= 127;
+ e[31] |= 64;
+ fexpand(bp, basepoint);
+ cmult(x, z, e, bp);
+ crecip(zmone, z);
+ fmul(z, x, zmone);
+ fcontract(mypublic, z);
+ return 0;
+}
+
+static const unsigned char basepoint[32] = {9};
+
+int crypto_scalarmult_curve25519_base(unsigned char *q,const unsigned char *n)
+{
+ return crypto_scalarmult_curve25519(q, n, basepoint);
+}
diff --git a/sdar/lib/nacl/generic/poly1305_auth.c b/sdar/lib/nacl/generic/poly1305_auth.c
new file mode 100644
index 0000000..9ac7b9c
--- /dev/null
+++ b/sdar/lib/nacl/generic/poly1305_auth.c
@@ -0,0 +1,1616 @@
+/*
+20080910
+D. J. Bernstein
+Public domain.
+*/
+
+#include <nacl.h>
+
+typedef unsigned char uchar;
+typedef int int32;
+typedef unsigned int uint32;
+typedef long long int64;
+typedef unsigned long long uint64;
+
+static const double poly1305_53_constants[] = {
+ 0.00000000558793544769287109375 /* alpham80 = 3 2^(-29) */
+, 24.0 /* alpham48 = 3 2^3 */
+, 103079215104.0 /* alpham16 = 3 2^35 */
+, 6755399441055744.0 /* alpha0 = 3 2^51 */
+, 1770887431076116955136.0 /* alpha18 = 3 2^69 */
+, 29014219670751100192948224.0 /* alpha32 = 3 2^83 */
+, 7605903601369376408980219232256.0 /* alpha50 = 3 2^101 */
+, 124615124604835863084731911901282304.0 /* alpha64 = 3 2^115 */
+, 32667107224410092492483962313449748299776.0 /* alpha82 = 3 2^133 */
+, 535217884764734955396857238543560676143529984.0 /* alpha96 = 3 2^147 */
+, 35076039295941670036888435985190792471742381031424.0 /* alpha112 = 3 2^163 */
+, 9194973245195333150150082162901855101712434733101613056.0 /* alpha130 = 3 2^181 */
+, 0.0000000000000000000000000000000000000036734198463196484624023016788195177431833298649127735047148490821200539357960224151611328125 /* scale = 5 2^(-130) */
+, 6755408030990331.0 /* offset0 = alpha0 + 2^33 - 5 */
+, 29014256564239239022116864.0 /* offset1 = alpha32 + 2^65 - 2^33 */
+, 124615283061160854719918951570079744.0 /* offset2 = alpha64 + 2^97 - 2^65 */
+, 535219245894202480694386063513315216128475136.0 /* offset3 = alpha96 + 2^130 - 2^97 */
+} ;
+
+int crypto_onetimeauth_poly1305(unsigned char *out,const unsigned char *m,unsigned long long l,const unsigned char *k)
+{
+ register const unsigned char *r = k;
+ register const unsigned char *s = k + 16;
+ double r0high_stack;
+ double r1high_stack;
+ double r1low_stack;
+ double sr1high_stack;
+ double r2low_stack;
+ double sr2high_stack;
+ double r0low_stack;
+ double sr1low_stack;
+ double r2high_stack;
+ double sr2low_stack;
+ double r3high_stack;
+ double sr3high_stack;
+ double r3low_stack;
+ double sr3low_stack;
+ int64 d0;
+ int64 d1;
+ int64 d2;
+ int64 d3;
+ register double scale;
+ register double alpha0;
+ register double alpha32;
+ register double alpha64;
+ register double alpha96;
+ register double alpha130;
+ register double h0;
+ register double h1;
+ register double h2;
+ register double h3;
+ register double h4;
+ register double h5;
+ register double h6;
+ register double h7;
+ register double y7;
+ register double y6;
+ register double y1;
+ register double y0;
+ register double y5;
+ register double y4;
+ register double x7;
+ register double x6;
+ register double x1;
+ register double x0;
+ register double y3;
+ register double y2;
+ register double r3low;
+ register double r0low;
+ register double r3high;
+ register double r0high;
+ register double sr1low;
+ register double x5;
+ register double r3lowx0;
+ register double sr1high;
+ register double x4;
+ register double r0lowx6;
+ register double r1low;
+ register double x3;
+ register double r3highx0;
+ register double r1high;
+ register double x2;
+ register double r0highx6;
+ register double sr2low;
+ register double r0lowx0;
+ register double sr2high;
+ register double sr1lowx6;
+ register double r2low;
+ register double r0highx0;
+ register double r2high;
+ register double sr1highx6;
+ register double sr3low;
+ register double r1lowx0;
+ register double sr3high;
+ register double sr2lowx6;
+ register double r1highx0;
+ register double sr2highx6;
+ register double r2lowx0;
+ register double sr3lowx6;
+ register double r2highx0;
+ register double sr3highx6;
+ register double r1highx4;
+ register double r1lowx4;
+ register double r0highx4;
+ register double r0lowx4;
+ register double sr3highx4;
+ register double sr3lowx4;
+ register double sr2highx4;
+ register double sr2lowx4;
+ register double r0lowx2;
+ register double r0highx2;
+ register double r1lowx2;
+ register double r1highx2;
+ register double r2lowx2;
+ register double r2highx2;
+ register double sr3lowx2;
+ register double sr3highx2;
+ register double z0;
+ register double z1;
+ register double z2;
+ register double z3;
+ register int64 r0;
+ register int64 r1;
+ register int64 r2;
+ register int64 r3;
+ register uint32 r00;
+ register uint32 r01;
+ register uint32 r02;
+ register uint32 r03;
+ register uint32 r10;
+ register uint32 r11;
+ register uint32 r12;
+ register uint32 r13;
+ register uint32 r20;
+ register uint32 r21;
+ register uint32 r22;
+ register uint32 r23;
+ register uint32 r30;
+ register uint32 r31;
+ register uint32 r32;
+ register uint32 r33;
+ register int64 m0;
+ register int64 m1;
+ register int64 m2;
+ register int64 m3;
+ register uint32 m00;
+ register uint32 m01;
+ register uint32 m02;
+ register uint32 m03;
+ register uint32 m10;
+ register uint32 m11;
+ register uint32 m12;
+ register uint32 m13;
+ register uint32 m20;
+ register uint32 m21;
+ register uint32 m22;
+ register uint32 m23;
+ register uint32 m30;
+ register uint32 m31;
+ register uint32 m32;
+ register uint64 m33;
+ register char *constants;
+ register int32 lbelow2;
+ register int32 lbelow3;
+ register int32 lbelow4;
+ register int32 lbelow5;
+ register int32 lbelow6;
+ register int32 lbelow7;
+ register int32 lbelow8;
+ register int32 lbelow9;
+ register int32 lbelow10;
+ register int32 lbelow11;
+ register int32 lbelow12;
+ register int32 lbelow13;
+ register int32 lbelow14;
+ register int32 lbelow15;
+ register double alpham80;
+ register double alpham48;
+ register double alpham16;
+ register double alpha18;
+ register double alpha50;
+ register double alpha82;
+ register double alpha112;
+ register double offset0;
+ register double offset1;
+ register double offset2;
+ register double offset3;
+ register uint32 s00;
+ register uint32 s01;
+ register uint32 s02;
+ register uint32 s03;
+ register uint32 s10;
+ register uint32 s11;
+ register uint32 s12;
+ register uint32 s13;
+ register uint32 s20;
+ register uint32 s21;
+ register uint32 s22;
+ register uint32 s23;
+ register uint32 s30;
+ register uint32 s31;
+ register uint32 s32;
+ register uint32 s33;
+ register uint64 bits32;
+ register uint64 f;
+ register uint64 f0;
+ register uint64 f1;
+ register uint64 f2;
+ register uint64 f3;
+ register uint64 f4;
+ register uint64 g;
+ register uint64 g0;
+ register uint64 g1;
+ register uint64 g2;
+ register uint64 g3;
+ register uint64 g4;
+
+ r00 = *(uchar *) (r + 0);
+ constants = (char *) &poly1305_53_constants;
+
+ r01 = *(uchar *) (r + 1);
+
+ r02 = *(uchar *) (r + 2);
+ r0 = 2151;
+
+ r03 = *(uchar *) (r + 3); r03 &= 15;
+ r0 <<= 51;
+
+ r10 = *(uchar *) (r + 4); r10 &= 252;
+ r01 <<= 8;
+ r0 += r00;
+
+ r11 = *(uchar *) (r + 5);
+ r02 <<= 16;
+ r0 += r01;
+
+ r12 = *(uchar *) (r + 6);
+ r03 <<= 24;
+ r0 += r02;
+
+ r13 = *(uchar *) (r + 7); r13 &= 15;
+ r1 = 2215;
+ r0 += r03;
+
+ d0 = r0;
+ r1 <<= 51;
+ r2 = 2279;
+
+ r20 = *(uchar *) (r + 8); r20 &= 252;
+ r11 <<= 8;
+ r1 += r10;
+
+ r21 = *(uchar *) (r + 9);
+ r12 <<= 16;
+ r1 += r11;
+
+ r22 = *(uchar *) (r + 10);
+ r13 <<= 24;
+ r1 += r12;
+
+ r23 = *(uchar *) (r + 11); r23 &= 15;
+ r2 <<= 51;
+ r1 += r13;
+
+ d1 = r1;
+ r21 <<= 8;
+ r2 += r20;
+
+ r30 = *(uchar *) (r + 12); r30 &= 252;
+ r22 <<= 16;
+ r2 += r21;
+
+ r31 = *(uchar *) (r + 13);
+ r23 <<= 24;
+ r2 += r22;
+
+ r32 = *(uchar *) (r + 14);
+ r2 += r23;
+ r3 = 2343;
+
+ d2 = r2;
+ r3 <<= 51;
+ alpha32 = *(double *) (constants + 40);
+
+ r33 = *(uchar *) (r + 15); r33 &= 15;
+ r31 <<= 8;
+ r3 += r30;
+
+ r32 <<= 16;
+ r3 += r31;
+
+ r33 <<= 24;
+ r3 += r32;
+
+ r3 += r33;
+ h0 = alpha32 - alpha32;
+
+ d3 = r3;
+ h1 = alpha32 - alpha32;
+
+ alpha0 = *(double *) (constants + 24);
+ h2 = alpha32 - alpha32;
+
+ alpha64 = *(double *) (constants + 56);
+ h3 = alpha32 - alpha32;
+
+ alpha18 = *(double *) (constants + 32);
+ h4 = alpha32 - alpha32;
+
+ r0low = *(double *) &d0;
+ h5 = alpha32 - alpha32;
+
+ r1low = *(double *) &d1;
+ h6 = alpha32 - alpha32;
+
+ r2low = *(double *) &d2;
+ h7 = alpha32 - alpha32;
+
+ alpha50 = *(double *) (constants + 48);
+ r0low -= alpha0;
+
+ alpha82 = *(double *) (constants + 64);
+ r1low -= alpha32;
+
+ scale = *(double *) (constants + 96);
+ r2low -= alpha64;
+
+ alpha96 = *(double *) (constants + 72);
+ r0high = r0low + alpha18;
+
+ r3low = *(double *) &d3;
+
+ alpham80 = *(double *) (constants + 0);
+ r1high = r1low + alpha50;
+ sr1low = scale * r1low;
+
+ alpham48 = *(double *) (constants + 8);
+ r2high = r2low + alpha82;
+ sr2low = scale * r2low;
+
+ r0high -= alpha18;
+ r0high_stack = r0high;
+
+ r3low -= alpha96;
+
+ r1high -= alpha50;
+ r1high_stack = r1high;
+
+ sr1high = sr1low + alpham80;
+
+ alpha112 = *(double *) (constants + 80);
+ r0low -= r0high;
+
+ alpham16 = *(double *) (constants + 16);
+ r2high -= alpha82;
+ sr3low = scale * r3low;
+
+ alpha130 = *(double *) (constants + 88);
+ sr2high = sr2low + alpham48;
+
+ r1low -= r1high;
+ r1low_stack = r1low;
+
+ sr1high -= alpham80;
+ sr1high_stack = sr1high;
+
+ r2low -= r2high;
+ r2low_stack = r2low;
+
+ sr2high -= alpham48;
+ sr2high_stack = sr2high;
+
+ r3high = r3low + alpha112;
+ r0low_stack = r0low;
+
+ sr1low -= sr1high;
+ sr1low_stack = sr1low;
+
+ sr3high = sr3low + alpham16;
+ r2high_stack = r2high;
+
+ sr2low -= sr2high;
+ sr2low_stack = sr2low;
+
+ r3high -= alpha112;
+ r3high_stack = r3high;
+
+
+ sr3high -= alpham16;
+ sr3high_stack = sr3high;
+
+
+ r3low -= r3high;
+ r3low_stack = r3low;
+
+
+ sr3low -= sr3high;
+ sr3low_stack = sr3low;
+
+if (l < 16) goto addatmost15bytes;
+
+ m00 = *(uchar *) (m + 0);
+ m0 = 2151;
+
+ m0 <<= 51;
+ m1 = 2215;
+ m01 = *(uchar *) (m + 1);
+
+ m1 <<= 51;
+ m2 = 2279;
+ m02 = *(uchar *) (m + 2);
+
+ m2 <<= 51;
+ m3 = 2343;
+ m03 = *(uchar *) (m + 3);
+
+ m10 = *(uchar *) (m + 4);
+ m01 <<= 8;
+ m0 += m00;
+
+ m11 = *(uchar *) (m + 5);
+ m02 <<= 16;
+ m0 += m01;
+
+ m12 = *(uchar *) (m + 6);
+ m03 <<= 24;
+ m0 += m02;
+
+ m13 = *(uchar *) (m + 7);
+ m3 <<= 51;
+ m0 += m03;
+
+ m20 = *(uchar *) (m + 8);
+ m11 <<= 8;
+ m1 += m10;
+
+ m21 = *(uchar *) (m + 9);
+ m12 <<= 16;
+ m1 += m11;
+
+ m22 = *(uchar *) (m + 10);
+ m13 <<= 24;
+ m1 += m12;
+
+ m23 = *(uchar *) (m + 11);
+ m1 += m13;
+
+ m30 = *(uchar *) (m + 12);
+ m21 <<= 8;
+ m2 += m20;
+
+ m31 = *(uchar *) (m + 13);
+ m22 <<= 16;
+ m2 += m21;
+
+ m32 = *(uchar *) (m + 14);
+ m23 <<= 24;
+ m2 += m22;
+
+ m33 = *(uchar *) (m + 15);
+ m2 += m23;
+
+ d0 = m0;
+ m31 <<= 8;
+ m3 += m30;
+
+ d1 = m1;
+ m32 <<= 16;
+ m3 += m31;
+
+ d2 = m2;
+ m33 += 256;
+
+ m33 <<= 24;
+ m3 += m32;
+
+ m3 += m33;
+ d3 = m3;
+
+ m += 16;
+ l -= 16;
+
+ z0 = *(double *) &d0;
+
+ z1 = *(double *) &d1;
+
+ z2 = *(double *) &d2;
+
+ z3 = *(double *) &d3;
+
+ z0 -= alpha0;
+
+ z1 -= alpha32;
+
+ z2 -= alpha64;
+
+ z3 -= alpha96;
+
+ h0 += z0;
+
+ h1 += z1;
+
+ h3 += z2;
+
+ h5 += z3;
+
+if (l < 16) goto multiplyaddatmost15bytes;
+
+multiplyaddatleast16bytes:;
+
+ m2 = 2279;
+ m20 = *(uchar *) (m + 8);
+ y7 = h7 + alpha130;
+
+ m2 <<= 51;
+ m3 = 2343;
+ m21 = *(uchar *) (m + 9);
+ y6 = h6 + alpha130;
+
+ m3 <<= 51;
+ m0 = 2151;
+ m22 = *(uchar *) (m + 10);
+ y1 = h1 + alpha32;
+
+ m0 <<= 51;
+ m1 = 2215;
+ m23 = *(uchar *) (m + 11);
+ y0 = h0 + alpha32;
+
+ m1 <<= 51;
+ m30 = *(uchar *) (m + 12);
+ y7 -= alpha130;
+
+ m21 <<= 8;
+ m2 += m20;
+ m31 = *(uchar *) (m + 13);
+ y6 -= alpha130;
+
+ m22 <<= 16;
+ m2 += m21;
+ m32 = *(uchar *) (m + 14);
+ y1 -= alpha32;
+
+ m23 <<= 24;
+ m2 += m22;
+ m33 = *(uchar *) (m + 15);
+ y0 -= alpha32;
+
+ m2 += m23;
+ m00 = *(uchar *) (m + 0);
+ y5 = h5 + alpha96;
+
+ m31 <<= 8;
+ m3 += m30;
+ m01 = *(uchar *) (m + 1);
+ y4 = h4 + alpha96;
+
+ m32 <<= 16;
+ m02 = *(uchar *) (m + 2);
+ x7 = h7 - y7;
+ y7 *= scale;
+
+ m33 += 256;
+ m03 = *(uchar *) (m + 3);
+ x6 = h6 - y6;
+ y6 *= scale;
+
+ m33 <<= 24;
+ m3 += m31;
+ m10 = *(uchar *) (m + 4);
+ x1 = h1 - y1;
+
+ m01 <<= 8;
+ m3 += m32;
+ m11 = *(uchar *) (m + 5);
+ x0 = h0 - y0;
+
+ m3 += m33;
+ m0 += m00;
+ m12 = *(uchar *) (m + 6);
+ y5 -= alpha96;
+
+ m02 <<= 16;
+ m0 += m01;
+ m13 = *(uchar *) (m + 7);
+ y4 -= alpha96;
+
+ m03 <<= 24;
+ m0 += m02;
+ d2 = m2;
+ x1 += y7;
+
+ m0 += m03;
+ d3 = m3;
+ x0 += y6;
+
+ m11 <<= 8;
+ m1 += m10;
+ d0 = m0;
+ x7 += y5;
+
+ m12 <<= 16;
+ m1 += m11;
+ x6 += y4;
+
+ m13 <<= 24;
+ m1 += m12;
+ y3 = h3 + alpha64;
+
+ m1 += m13;
+ d1 = m1;
+ y2 = h2 + alpha64;
+
+ x0 += x1;
+
+ x6 += x7;
+
+ y3 -= alpha64;
+ r3low = r3low_stack;
+
+ y2 -= alpha64;
+ r0low = r0low_stack;
+
+ x5 = h5 - y5;
+ r3lowx0 = r3low * x0;
+ r3high = r3high_stack;
+
+ x4 = h4 - y4;
+ r0lowx6 = r0low * x6;
+ r0high = r0high_stack;
+
+ x3 = h3 - y3;
+ r3highx0 = r3high * x0;
+ sr1low = sr1low_stack;
+
+ x2 = h2 - y2;
+ r0highx6 = r0high * x6;
+ sr1high = sr1high_stack;
+
+ x5 += y3;
+ r0lowx0 = r0low * x0;
+ r1low = r1low_stack;
+
+ h6 = r3lowx0 + r0lowx6;
+ sr1lowx6 = sr1low * x6;
+ r1high = r1high_stack;
+
+ x4 += y2;
+ r0highx0 = r0high * x0;
+ sr2low = sr2low_stack;
+
+ h7 = r3highx0 + r0highx6;
+ sr1highx6 = sr1high * x6;
+ sr2high = sr2high_stack;
+
+ x3 += y1;
+ r1lowx0 = r1low * x0;
+ r2low = r2low_stack;
+
+ h0 = r0lowx0 + sr1lowx6;
+ sr2lowx6 = sr2low * x6;
+ r2high = r2high_stack;
+
+ x2 += y0;
+ r1highx0 = r1high * x0;
+ sr3low = sr3low_stack;
+
+ h1 = r0highx0 + sr1highx6;
+ sr2highx6 = sr2high * x6;
+ sr3high = sr3high_stack;
+
+ x4 += x5;
+ r2lowx0 = r2low * x0;
+ z2 = *(double *) &d2;
+
+ h2 = r1lowx0 + sr2lowx6;
+ sr3lowx6 = sr3low * x6;
+
+ x2 += x3;
+ r2highx0 = r2high * x0;
+ z3 = *(double *) &d3;
+
+ h3 = r1highx0 + sr2highx6;
+ sr3highx6 = sr3high * x6;
+
+ r1highx4 = r1high * x4;
+ z2 -= alpha64;
+
+ h4 = r2lowx0 + sr3lowx6;
+ r1lowx4 = r1low * x4;
+
+ r0highx4 = r0high * x4;
+ z3 -= alpha96;
+
+ h5 = r2highx0 + sr3highx6;
+ r0lowx4 = r0low * x4;
+
+ h7 += r1highx4;
+ sr3highx4 = sr3high * x4;
+
+ h6 += r1lowx4;
+ sr3lowx4 = sr3low * x4;
+
+ h5 += r0highx4;
+ sr2highx4 = sr2high * x4;
+
+ h4 += r0lowx4;
+ sr2lowx4 = sr2low * x4;
+
+ h3 += sr3highx4;
+ r0lowx2 = r0low * x2;
+
+ h2 += sr3lowx4;
+ r0highx2 = r0high * x2;
+
+ h1 += sr2highx4;
+ r1lowx2 = r1low * x2;
+
+ h0 += sr2lowx4;
+ r1highx2 = r1high * x2;
+
+ h2 += r0lowx2;
+ r2lowx2 = r2low * x2;
+
+ h3 += r0highx2;
+ r2highx2 = r2high * x2;
+
+ h4 += r1lowx2;
+ sr3lowx2 = sr3low * x2;
+
+ h5 += r1highx2;
+ sr3highx2 = sr3high * x2;
+ alpha0 = *(double *) (constants + 24);
+
+ m += 16;
+ h6 += r2lowx2;
+
+ l -= 16;
+ h7 += r2highx2;
+
+ z1 = *(double *) &d1;
+ h0 += sr3lowx2;
+
+ z0 = *(double *) &d0;
+ h1 += sr3highx2;
+
+ z1 -= alpha32;
+
+ z0 -= alpha0;
+
+ h5 += z3;
+
+ h3 += z2;
+
+ h1 += z1;
+
+ h0 += z0;
+
+if (l >= 16) goto multiplyaddatleast16bytes;
+
+multiplyaddatmost15bytes:;
+
+ y7 = h7 + alpha130;
+
+ y6 = h6 + alpha130;
+
+ y1 = h1 + alpha32;
+
+ y0 = h0 + alpha32;
+
+ y7 -= alpha130;
+
+ y6 -= alpha130;
+
+ y1 -= alpha32;
+
+ y0 -= alpha32;
+
+ y5 = h5 + alpha96;
+
+ y4 = h4 + alpha96;
+
+ x7 = h7 - y7;
+ y7 *= scale;
+
+ x6 = h6 - y6;
+ y6 *= scale;
+
+ x1 = h1 - y1;
+
+ x0 = h0 - y0;
+
+ y5 -= alpha96;
+
+ y4 -= alpha96;
+
+ x1 += y7;
+
+ x0 += y6;
+
+ x7 += y5;
+
+ x6 += y4;
+
+ y3 = h3 + alpha64;
+
+ y2 = h2 + alpha64;
+
+ x0 += x1;
+
+ x6 += x7;
+
+ y3 -= alpha64;
+ r3low = r3low_stack;
+
+ y2 -= alpha64;
+ r0low = r0low_stack;
+
+ x5 = h5 - y5;
+ r3lowx0 = r3low * x0;
+ r3high = r3high_stack;
+
+ x4 = h4 - y4;
+ r0lowx6 = r0low * x6;
+ r0high = r0high_stack;
+
+ x3 = h3 - y3;
+ r3highx0 = r3high * x0;
+ sr1low = sr1low_stack;
+
+ x2 = h2 - y2;
+ r0highx6 = r0high * x6;
+ sr1high = sr1high_stack;
+
+ x5 += y3;
+ r0lowx0 = r0low * x0;
+ r1low = r1low_stack;
+
+ h6 = r3lowx0 + r0lowx6;
+ sr1lowx6 = sr1low * x6;
+ r1high = r1high_stack;
+
+ x4 += y2;
+ r0highx0 = r0high * x0;
+ sr2low = sr2low_stack;
+
+ h7 = r3highx0 + r0highx6;
+ sr1highx6 = sr1high * x6;
+ sr2high = sr2high_stack;
+
+ x3 += y1;
+ r1lowx0 = r1low * x0;
+ r2low = r2low_stack;
+
+ h0 = r0lowx0 + sr1lowx6;
+ sr2lowx6 = sr2low * x6;
+ r2high = r2high_stack;
+
+ x2 += y0;
+ r1highx0 = r1high * x0;
+ sr3low = sr3low_stack;
+
+ h1 = r0highx0 + sr1highx6;
+ sr2highx6 = sr2high * x6;
+ sr3high = sr3high_stack;
+
+ x4 += x5;
+ r2lowx0 = r2low * x0;
+
+ h2 = r1lowx0 + sr2lowx6;
+ sr3lowx6 = sr3low * x6;
+
+ x2 += x3;
+ r2highx0 = r2high * x0;
+
+ h3 = r1highx0 + sr2highx6;
+ sr3highx6 = sr3high * x6;
+
+ r1highx4 = r1high * x4;
+
+ h4 = r2lowx0 + sr3lowx6;
+ r1lowx4 = r1low * x4;
+
+ r0highx4 = r0high * x4;
+
+ h5 = r2highx0 + sr3highx6;
+ r0lowx4 = r0low * x4;
+
+ h7 += r1highx4;
+ sr3highx4 = sr3high * x4;
+
+ h6 += r1lowx4;
+ sr3lowx4 = sr3low * x4;
+
+ h5 += r0highx4;
+ sr2highx4 = sr2high * x4;
+
+ h4 += r0lowx4;
+ sr2lowx4 = sr2low * x4;
+
+ h3 += sr3highx4;
+ r0lowx2 = r0low * x2;
+
+ h2 += sr3lowx4;
+ r0highx2 = r0high * x2;
+
+ h1 += sr2highx4;
+ r1lowx2 = r1low * x2;
+
+ h0 += sr2lowx4;
+ r1highx2 = r1high * x2;
+
+ h2 += r0lowx2;
+ r2lowx2 = r2low * x2;
+
+ h3 += r0highx2;
+ r2highx2 = r2high * x2;
+
+ h4 += r1lowx2;
+ sr3lowx2 = sr3low * x2;
+
+ h5 += r1highx2;
+ sr3highx2 = sr3high * x2;
+
+ h6 += r2lowx2;
+
+ h7 += r2highx2;
+
+ h0 += sr3lowx2;
+
+ h1 += sr3highx2;
+
+addatmost15bytes:;
+
+if (l == 0) goto nomorebytes;
+
+ lbelow2 = l - 2;
+
+ lbelow3 = l - 3;
+
+ lbelow2 >>= 31;
+ lbelow4 = l - 4;
+
+ m00 = *(uchar *) (m + 0);
+ lbelow3 >>= 31;
+ m += lbelow2;
+
+ m01 = *(uchar *) (m + 1);
+ lbelow4 >>= 31;
+ m += lbelow3;
+
+ m02 = *(uchar *) (m + 2);
+ m += lbelow4;
+ m0 = 2151;
+
+ m03 = *(uchar *) (m + 3);
+ m0 <<= 51;
+ m1 = 2215;
+
+ m0 += m00;
+ m01 &= ~lbelow2;
+
+ m02 &= ~lbelow3;
+ m01 -= lbelow2;
+
+ m01 <<= 8;
+ m03 &= ~lbelow4;
+
+ m0 += m01;
+ lbelow2 -= lbelow3;
+
+ m02 += lbelow2;
+ lbelow3 -= lbelow4;
+
+ m02 <<= 16;
+ m03 += lbelow3;
+
+ m03 <<= 24;
+ m0 += m02;
+
+ m0 += m03;
+ lbelow5 = l - 5;
+
+ lbelow6 = l - 6;
+ lbelow7 = l - 7;
+
+ lbelow5 >>= 31;
+ lbelow8 = l - 8;
+
+ lbelow6 >>= 31;
+ m += lbelow5;
+
+ m10 = *(uchar *) (m + 4);
+ lbelow7 >>= 31;
+ m += lbelow6;
+
+ m11 = *(uchar *) (m + 5);
+ lbelow8 >>= 31;
+ m += lbelow7;
+
+ m12 = *(uchar *) (m + 6);
+ m1 <<= 51;
+ m += lbelow8;
+
+ m13 = *(uchar *) (m + 7);
+ m10 &= ~lbelow5;
+ lbelow4 -= lbelow5;
+
+ m10 += lbelow4;
+ lbelow5 -= lbelow6;
+
+ m11 &= ~lbelow6;
+ m11 += lbelow5;
+
+ m11 <<= 8;
+ m1 += m10;
+
+ m1 += m11;
+ m12 &= ~lbelow7;
+
+ lbelow6 -= lbelow7;
+ m13 &= ~lbelow8;
+
+ m12 += lbelow6;
+ lbelow7 -= lbelow8;
+
+ m12 <<= 16;
+ m13 += lbelow7;
+
+ m13 <<= 24;
+ m1 += m12;
+
+ m1 += m13;
+ m2 = 2279;
+
+ lbelow9 = l - 9;
+ m3 = 2343;
+
+ lbelow10 = l - 10;
+ lbelow11 = l - 11;
+
+ lbelow9 >>= 31;
+ lbelow12 = l - 12;
+
+ lbelow10 >>= 31;
+ m += lbelow9;
+
+ m20 = *(uchar *) (m + 8);
+ lbelow11 >>= 31;
+ m += lbelow10;
+
+ m21 = *(uchar *) (m + 9);
+ lbelow12 >>= 31;
+ m += lbelow11;
+
+ m22 = *(uchar *) (m + 10);
+ m2 <<= 51;
+ m += lbelow12;
+
+ m23 = *(uchar *) (m + 11);
+ m20 &= ~lbelow9;
+ lbelow8 -= lbelow9;
+
+ m20 += lbelow8;
+ lbelow9 -= lbelow10;
+
+ m21 &= ~lbelow10;
+ m21 += lbelow9;
+
+ m21 <<= 8;
+ m2 += m20;
+
+ m2 += m21;
+ m22 &= ~lbelow11;
+
+ lbelow10 -= lbelow11;
+ m23 &= ~lbelow12;
+
+ m22 += lbelow10;
+ lbelow11 -= lbelow12;
+
+ m22 <<= 16;
+ m23 += lbelow11;
+
+ m23 <<= 24;
+ m2 += m22;
+
+ m3 <<= 51;
+ lbelow13 = l - 13;
+
+ lbelow13 >>= 31;
+ lbelow14 = l - 14;
+
+ lbelow14 >>= 31;
+ m += lbelow13;
+ lbelow15 = l - 15;
+
+ m30 = *(uchar *) (m + 12);
+ lbelow15 >>= 31;
+ m += lbelow14;
+
+ m31 = *(uchar *) (m + 13);
+ m += lbelow15;
+ m2 += m23;
+
+ m32 = *(uchar *) (m + 14);
+ m30 &= ~lbelow13;
+ lbelow12 -= lbelow13;
+
+ m30 += lbelow12;
+ lbelow13 -= lbelow14;
+
+ m3 += m30;
+ m31 &= ~lbelow14;
+
+ m31 += lbelow13;
+ m32 &= ~lbelow15;
+
+ m31 <<= 8;
+ lbelow14 -= lbelow15;
+
+ m3 += m31;
+ m32 += lbelow14;
+ d0 = m0;
+
+ m32 <<= 16;
+ m33 = lbelow15 + 1;
+ d1 = m1;
+
+ m33 <<= 24;
+ m3 += m32;
+ d2 = m2;
+
+ m3 += m33;
+ d3 = m3;
+
+ alpha0 = *(double *) (constants + 24);
+
+ z3 = *(double *) &d3;
+
+ z2 = *(double *) &d2;
+
+ z1 = *(double *) &d1;
+
+ z0 = *(double *) &d0;
+
+ z3 -= alpha96;
+
+ z2 -= alpha64;
+
+ z1 -= alpha32;
+
+ z0 -= alpha0;
+
+ h5 += z3;
+
+ h3 += z2;
+
+ h1 += z1;
+
+ h0 += z0;
+
+ y7 = h7 + alpha130;
+
+ y6 = h6 + alpha130;
+
+ y1 = h1 + alpha32;
+
+ y0 = h0 + alpha32;
+
+ y7 -= alpha130;
+
+ y6 -= alpha130;
+
+ y1 -= alpha32;
+
+ y0 -= alpha32;
+
+ y5 = h5 + alpha96;
+
+ y4 = h4 + alpha96;
+
+ x7 = h7 - y7;
+ y7 *= scale;
+
+ x6 = h6 - y6;
+ y6 *= scale;
+
+ x1 = h1 - y1;
+
+ x0 = h0 - y0;
+
+ y5 -= alpha96;
+
+ y4 -= alpha96;
+
+ x1 += y7;
+
+ x0 += y6;
+
+ x7 += y5;
+
+ x6 += y4;
+
+ y3 = h3 + alpha64;
+
+ y2 = h2 + alpha64;
+
+ x0 += x1;
+
+ x6 += x7;
+
+ y3 -= alpha64;
+ r3low = r3low_stack;
+
+ y2 -= alpha64;
+ r0low = r0low_stack;
+
+ x5 = h5 - y5;
+ r3lowx0 = r3low * x0;
+ r3high = r3high_stack;
+
+ x4 = h4 - y4;
+ r0lowx6 = r0low * x6;
+ r0high = r0high_stack;
+
+ x3 = h3 - y3;
+ r3highx0 = r3high * x0;
+ sr1low = sr1low_stack;
+
+ x2 = h2 - y2;
+ r0highx6 = r0high * x6;
+ sr1high = sr1high_stack;
+
+ x5 += y3;
+ r0lowx0 = r0low * x0;
+ r1low = r1low_stack;
+
+ h6 = r3lowx0 + r0lowx6;
+ sr1lowx6 = sr1low * x6;
+ r1high = r1high_stack;
+
+ x4 += y2;
+ r0highx0 = r0high * x0;
+ sr2low = sr2low_stack;
+
+ h7 = r3highx0 + r0highx6;
+ sr1highx6 = sr1high * x6;
+ sr2high = sr2high_stack;
+
+ x3 += y1;
+ r1lowx0 = r1low * x0;
+ r2low = r2low_stack;
+
+ h0 = r0lowx0 + sr1lowx6;
+ sr2lowx6 = sr2low * x6;
+ r2high = r2high_stack;
+
+ x2 += y0;
+ r1highx0 = r1high * x0;
+ sr3low = sr3low_stack;
+
+ h1 = r0highx0 + sr1highx6;
+ sr2highx6 = sr2high * x6;
+ sr3high = sr3high_stack;
+
+ x4 += x5;
+ r2lowx0 = r2low * x0;
+
+ h2 = r1lowx0 + sr2lowx6;
+ sr3lowx6 = sr3low * x6;
+
+ x2 += x3;
+ r2highx0 = r2high * x0;
+
+ h3 = r1highx0 + sr2highx6;
+ sr3highx6 = sr3high * x6;
+
+ r1highx4 = r1high * x4;
+
+ h4 = r2lowx0 + sr3lowx6;
+ r1lowx4 = r1low * x4;
+
+ r0highx4 = r0high * x4;
+
+ h5 = r2highx0 + sr3highx6;
+ r0lowx4 = r0low * x4;
+
+ h7 += r1highx4;
+ sr3highx4 = sr3high * x4;
+
+ h6 += r1lowx4;
+ sr3lowx4 = sr3low * x4;
+
+ h5 += r0highx4;
+ sr2highx4 = sr2high * x4;
+
+ h4 += r0lowx4;
+ sr2lowx4 = sr2low * x4;
+
+ h3 += sr3highx4;
+ r0lowx2 = r0low * x2;
+
+ h2 += sr3lowx4;
+ r0highx2 = r0high * x2;
+
+ h1 += sr2highx4;
+ r1lowx2 = r1low * x2;
+
+ h0 += sr2lowx4;
+ r1highx2 = r1high * x2;
+
+ h2 += r0lowx2;
+ r2lowx2 = r2low * x2;
+
+ h3 += r0highx2;
+ r2highx2 = r2high * x2;
+
+ h4 += r1lowx2;
+ sr3lowx2 = sr3low * x2;
+
+ h5 += r1highx2;
+ sr3highx2 = sr3high * x2;
+
+ h6 += r2lowx2;
+
+ h7 += r2highx2;
+
+ h0 += sr3lowx2;
+
+ h1 += sr3highx2;
+
+
+nomorebytes:;
+
+ offset0 = *(double *) (constants + 104);
+ y7 = h7 + alpha130;
+
+ offset1 = *(double *) (constants + 112);
+ y0 = h0 + alpha32;
+
+ offset2 = *(double *) (constants + 120);
+ y1 = h1 + alpha32;
+
+ offset3 = *(double *) (constants + 128);
+ y2 = h2 + alpha64;
+
+ y7 -= alpha130;
+
+ y3 = h3 + alpha64;
+
+ y4 = h4 + alpha96;
+
+ y5 = h5 + alpha96;
+
+ x7 = h7 - y7;
+ y7 *= scale;
+
+ y0 -= alpha32;
+
+ y1 -= alpha32;
+
+ y2 -= alpha64;
+
+ h6 += x7;
+
+ y3 -= alpha64;
+
+ y4 -= alpha96;
+
+ y5 -= alpha96;
+
+ y6 = h6 + alpha130;
+
+ x0 = h0 - y0;
+
+ x1 = h1 - y1;
+
+ x2 = h2 - y2;
+
+ y6 -= alpha130;
+
+ x0 += y7;
+
+ x3 = h3 - y3;
+
+ x4 = h4 - y4;
+
+ x5 = h5 - y5;
+
+ x6 = h6 - y6;
+
+ y6 *= scale;
+
+ x2 += y0;
+
+ x3 += y1;
+
+ x4 += y2;
+
+ x0 += y6;
+
+ x5 += y3;
+
+ x6 += y4;
+
+ x2 += x3;
+
+ x0 += x1;
+
+ x4 += x5;
+
+ x6 += y5;
+
+ x2 += offset1;
+ *(double *) &d1 = x2;
+
+ x0 += offset0;
+ *(double *) &d0 = x0;
+
+ x4 += offset2;
+ *(double *) &d2 = x4;
+
+ x6 += offset3;
+ *(double *) &d3 = x6;
+
+
+
+
+ f0 = d0;
+
+ f1 = d1;
+ bits32 = -1;
+
+ f2 = d2;
+ bits32 >>= 32;
+
+ f3 = d3;
+ f = f0 >> 32;
+
+ f0 &= bits32;
+ f &= 255;
+
+ f1 += f;
+ g0 = f0 + 5;
+
+ g = g0 >> 32;
+ g0 &= bits32;
+
+ f = f1 >> 32;
+ f1 &= bits32;
+
+ f &= 255;
+ g1 = f1 + g;
+
+ g = g1 >> 32;
+ f2 += f;
+
+ f = f2 >> 32;
+ g1 &= bits32;
+
+ f2 &= bits32;
+ f &= 255;
+
+ f3 += f;
+ g2 = f2 + g;
+
+ g = g2 >> 32;
+ g2 &= bits32;
+
+ f4 = f3 >> 32;
+ f3 &= bits32;
+
+ f4 &= 255;
+ g3 = f3 + g;
+
+ g = g3 >> 32;
+ g3 &= bits32;
+
+ g4 = f4 + g;
+
+ g4 = g4 - 4;
+ s00 = *(uchar *) (s + 0);
+
+ f = (int64) g4 >> 63;
+ s01 = *(uchar *) (s + 1);
+
+ f0 &= f;
+ g0 &= ~f;
+ s02 = *(uchar *) (s + 2);
+
+ f1 &= f;
+ f0 |= g0;
+ s03 = *(uchar *) (s + 3);
+
+ g1 &= ~f;
+ f2 &= f;
+ s10 = *(uchar *) (s + 4);
+
+ f3 &= f;
+ g2 &= ~f;
+ s11 = *(uchar *) (s + 5);
+
+ g3 &= ~f;
+ f1 |= g1;
+ s12 = *(uchar *) (s + 6);
+
+ f2 |= g2;
+ f3 |= g3;
+ s13 = *(uchar *) (s + 7);
+
+ s01 <<= 8;
+ f0 += s00;
+ s20 = *(uchar *) (s + 8);
+
+ s02 <<= 16;
+ f0 += s01;
+ s21 = *(uchar *) (s + 9);
+
+ s03 <<= 24;
+ f0 += s02;
+ s22 = *(uchar *) (s + 10);
+
+ s11 <<= 8;
+ f1 += s10;
+ s23 = *(uchar *) (s + 11);
+
+ s12 <<= 16;
+ f1 += s11;
+ s30 = *(uchar *) (s + 12);
+
+ s13 <<= 24;
+ f1 += s12;
+ s31 = *(uchar *) (s + 13);
+
+ f0 += s03;
+ f1 += s13;
+ s32 = *(uchar *) (s + 14);
+
+ s21 <<= 8;
+ f2 += s20;
+ s33 = *(uchar *) (s + 15);
+
+ s22 <<= 16;
+ f2 += s21;
+
+ s23 <<= 24;
+ f2 += s22;
+
+ s31 <<= 8;
+ f3 += s30;
+
+ s32 <<= 16;
+ f3 += s31;
+
+ s33 <<= 24;
+ f3 += s32;
+
+ f2 += s23;
+ f3 += s33;
+
+ *(uchar *) (out + 0) = f0;
+ f0 >>= 8;
+ *(uchar *) (out + 1) = f0;
+ f0 >>= 8;
+ *(uchar *) (out + 2) = f0;
+ f0 >>= 8;
+ *(uchar *) (out + 3) = f0;
+ f0 >>= 8;
+ f1 += f0;
+
+ *(uchar *) (out + 4) = f1;
+ f1 >>= 8;
+ *(uchar *) (out + 5) = f1;
+ f1 >>= 8;
+ *(uchar *) (out + 6) = f1;
+ f1 >>= 8;
+ *(uchar *) (out + 7) = f1;
+ f1 >>= 8;
+ f2 += f1;
+
+ *(uchar *) (out + 8) = f2;
+ f2 >>= 8;
+ *(uchar *) (out + 9) = f2;
+ f2 >>= 8;
+ *(uchar *) (out + 10) = f2;
+ f2 >>= 8;
+ *(uchar *) (out + 11) = f2;
+ f2 >>= 8;
+ f3 += f2;
+
+ *(uchar *) (out + 12) = f3;
+ f3 >>= 8;
+ *(uchar *) (out + 13) = f3;
+ f3 >>= 8;
+ *(uchar *) (out + 14) = f3;
+ f3 >>= 8;
+ *(uchar *) (out + 15) = f3;
+
+ return 0;
+}
diff --git a/sdar/lib/nacl/generic/salsa20.c b/sdar/lib/nacl/generic/salsa20.c
new file mode 100644
index 0000000..5376ff6
--- /dev/null
+++ b/sdar/lib/nacl/generic/salsa20.c
@@ -0,0 +1,134 @@
+/*
+version 20080912
+D. J. Bernstein
+Public domain.
+*/
+
+#include <nacl.h>
+
+#define ROUNDS 20
+
+typedef unsigned int uint32;
+
+static uint32 rotate(uint32 u,int c)
+{
+ return (u << c) | (u >> (32 - c));
+}
+
+static uint32 load_littleendian(const unsigned char *x)
+{
+ return
+ (uint32) (x[0]) \
+ | (((uint32) (x[1])) << 8) \
+ | (((uint32) (x[2])) << 16) \
+ | (((uint32) (x[3])) << 24)
+ ;
+}
+
+static void store_littleendian(unsigned char *x,uint32 u)
+{
+ x[0] = u; u >>= 8;
+ x[1] = u; u >>= 8;
+ x[2] = u; u >>= 8;
+ x[3] = u;
+}
+
+int crypto_core_salsa20(
+ unsigned char *out,
+ const unsigned char *in,
+ const unsigned char *k,
+ const unsigned char *c
+)
+{
+ uint32 x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15;
+ uint32 j0, j1, j2, j3, j4, j5, j6, j7, j8, j9, j10, j11, j12, j13, j14, j15;
+ int i;
+
+ j0 = x0 = load_littleendian(c + 0);
+ j1 = x1 = load_littleendian(k + 0);
+ j2 = x2 = load_littleendian(k + 4);
+ j3 = x3 = load_littleendian(k + 8);
+ j4 = x4 = load_littleendian(k + 12);
+ j5 = x5 = load_littleendian(c + 4);
+ j6 = x6 = load_littleendian(in + 0);
+ j7 = x7 = load_littleendian(in + 4);
+ j8 = x8 = load_littleendian(in + 8);
+ j9 = x9 = load_littleendian(in + 12);
+ j10 = x10 = load_littleendian(c + 8);
+ j11 = x11 = load_littleendian(k + 16);
+ j12 = x12 = load_littleendian(k + 20);
+ j13 = x13 = load_littleendian(k + 24);
+ j14 = x14 = load_littleendian(k + 28);
+ j15 = x15 = load_littleendian(c + 12);
+
+ for (i = ROUNDS;i > 0;i -= 2) {
+ x4 ^= rotate( x0+x12, 7);
+ x8 ^= rotate( x4+ x0, 9);
+ x12 ^= rotate( x8+ x4,13);
+ x0 ^= rotate(x12+ x8,18);
+ x9 ^= rotate( x5+ x1, 7);
+ x13 ^= rotate( x9+ x5, 9);
+ x1 ^= rotate(x13+ x9,13);
+ x5 ^= rotate( x1+x13,18);
+ x14 ^= rotate(x10+ x6, 7);
+ x2 ^= rotate(x14+x10, 9);
+ x6 ^= rotate( x2+x14,13);
+ x10 ^= rotate( x6+ x2,18);
+ x3 ^= rotate(x15+x11, 7);
+ x7 ^= rotate( x3+x15, 9);
+ x11 ^= rotate( x7+ x3,13);
+ x15 ^= rotate(x11+ x7,18);
+ x1 ^= rotate( x0+ x3, 7);
+ x2 ^= rotate( x1+ x0, 9);
+ x3 ^= rotate( x2+ x1,13);
+ x0 ^= rotate( x3+ x2,18);
+ x6 ^= rotate( x5+ x4, 7);
+ x7 ^= rotate( x6+ x5, 9);
+ x4 ^= rotate( x7+ x6,13);
+ x5 ^= rotate( x4+ x7,18);
+ x11 ^= rotate(x10+ x9, 7);
+ x8 ^= rotate(x11+x10, 9);
+ x9 ^= rotate( x8+x11,13);
+ x10 ^= rotate( x9+ x8,18);
+ x12 ^= rotate(x15+x14, 7);
+ x13 ^= rotate(x12+x15, 9);
+ x14 ^= rotate(x13+x12,13);
+ x15 ^= rotate(x14+x13,18);
+ }
+
+ x0 += j0;
+ x1 += j1;
+ x2 += j2;
+ x3 += j3;
+ x4 += j4;
+ x5 += j5;
+ x6 += j6;
+ x7 += j7;
+ x8 += j8;
+ x9 += j9;
+ x10 += j10;
+ x11 += j11;
+ x12 += j12;
+ x13 += j13;
+ x14 += j14;
+ x15 += j15;
+
+ store_littleendian(out + 0,x0);
+ store_littleendian(out + 4,x1);
+ store_littleendian(out + 8,x2);
+ store_littleendian(out + 12,x3);
+ store_littleendian(out + 16,x4);
+ store_littleendian(out + 20,x5);
+ store_littleendian(out + 24,x6);
+ store_littleendian(out + 28,x7);
+ store_littleendian(out + 32,x8);
+ store_littleendian(out + 36,x9);
+ store_littleendian(out + 40,x10);
+ store_littleendian(out + 44,x11);
+ store_littleendian(out + 48,x12);
+ store_littleendian(out + 52,x13);
+ store_littleendian(out + 56,x14);
+ store_littleendian(out + 60,x15);
+
+ return 0;
+}
diff --git a/sdar/lib/nacl/generic/salsa20_stream.c b/sdar/lib/nacl/generic/salsa20_stream.c
new file mode 100644
index 0000000..88e5f1e
--- /dev/null
+++ b/sdar/lib/nacl/generic/salsa20_stream.c
@@ -0,0 +1,88 @@
+/*
+version 20080913
+D. J. Bernstein
+Public domain.
+*/
+
+#include <nacl.h>
+
+typedef unsigned int uint32;
+
+static const unsigned char sigma[16] = "expand 32-byte k";
+
+int crypto_stream_salsa20(
+ unsigned char *c,unsigned long long clen,
+ const unsigned char *n,
+ const unsigned char *k
+)
+{
+ unsigned char in[16];
+ unsigned char block[64];
+ int i;
+ unsigned int u;
+
+ if (!clen) return 0;
+
+ for (i = 0;i < 8;++i) in[i] = n[i];
+ for (i = 8;i < 16;++i) in[i] = 0;
+
+ while (clen >= 64) {
+ crypto_core_salsa20(c,in,k,sigma);
+
+ u = 1;
+ for (i = 8;i < 16;++i) {
+ u += (unsigned int) in[i];
+ in[i] = u;
+ u >>= 8;
+ }
+
+ clen -= 64;
+ c += 64;
+ }
+
+ if (clen) {
+ crypto_core_salsa20(block,in,k,sigma);
+ for (i = 0;i < clen;++i) c[i] = block[i];
+ }
+ return 0;
+}
+
+int crypto_stream_salsa20_xor(
+ unsigned char *c,
+ const unsigned char *m,unsigned long long mlen,
+ const unsigned char *n,
+ const unsigned char *k
+)
+{
+ unsigned char in[16];
+ unsigned char block[64];
+ int i;
+ unsigned int u;
+
+ if (!mlen) return 0;
+
+ for (i = 0;i < 8;++i) in[i] = n[i];
+ for (i = 8;i < 16;++i) in[i] = 0;
+
+ while (mlen >= 64) {
+ crypto_core_salsa20(block,in,k,sigma);
+ for (i = 0;i < 64;++i) c[i] = m[i] ^ block[i];
+
+ u = 1;
+ for (i = 8;i < 16;++i) {
+ u += (unsigned int) in[i];
+ in[i] = u;
+ u >>= 8;
+ }
+
+ mlen -= 64;
+ c += 64;
+ m += 64;
+ }
+
+ if (mlen) {
+ crypto_core_salsa20(block,in,k,sigma);
+ for (i = 0;i < mlen;++i) c[i] = m[i] ^ block[i];
+ }
+ return 0;
+}
diff --git a/sdar/lib/nacl/hsalsa20.c b/sdar/lib/nacl/hsalsa20.c
new file mode 100644
index 0000000..42a831a
--- /dev/null
+++ b/sdar/lib/nacl/hsalsa20.c
@@ -0,0 +1,135 @@
+/*
+version 20080912
+D. J. Bernstein
+Public domain.
+*/
+
+#include <nacl.h>
+
+#define ROUNDS 20
+
+typedef unsigned int uint32;
+
+static uint32 rotate(uint32 u,int c)
+{
+ return (u << c) | (u >> (32 - c));
+}
+
+static uint32 load_littleendian(const unsigned char *x)
+{
+ return
+ (uint32) (x[0]) \
+ | (((uint32) (x[1])) << 8) \
+ | (((uint32) (x[2])) << 16) \
+ | (((uint32) (x[3])) << 24)
+ ;
+}
+
+static void store_littleendian(unsigned char *x,uint32 u)
+{
+ x[0] = u; u >>= 8;
+ x[1] = u; u >>= 8;
+ x[2] = u; u >>= 8;
+ x[3] = u;
+}
+
+int crypto_core_hsalsa20(
+ unsigned char *out,
+ const unsigned char *in,
+ const unsigned char *k,
+ const unsigned char *c
+)
+{
+ uint32 x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15;
+ uint32 j0, j1, j2, j3, j4, j5, j6, j7, j8, j9, j10, j11, j12, j13, j14, j15;
+ int i;
+
+ j0 = x0 = load_littleendian(c + 0);
+ j1 = x1 = load_littleendian(k + 0);
+ j2 = x2 = load_littleendian(k + 4);
+ j3 = x3 = load_littleendian(k + 8);
+ j4 = x4 = load_littleendian(k + 12);
+ j5 = x5 = load_littleendian(c + 4);
+ j6 = x6 = load_littleendian(in + 0);
+ j7 = x7 = load_littleendian(in + 4);
+ j8 = x8 = load_littleendian(in + 8);
+ j9 = x9 = load_littleendian(in + 12);
+ j10 = x10 = load_littleendian(c + 8);
+ j11 = x11 = load_littleendian(k + 16);
+ j12 = x12 = load_littleendian(k + 20);
+ j13 = x13 = load_littleendian(k + 24);
+ j14 = x14 = load_littleendian(k + 28);
+ j15 = x15 = load_littleendian(c + 12);
+
+ for (i = ROUNDS;i > 0;i -= 2) {
+ x4 ^= rotate( x0+x12, 7);
+ x8 ^= rotate( x4+ x0, 9);
+ x12 ^= rotate( x8+ x4,13);
+ x0 ^= rotate(x12+ x8,18);
+ x9 ^= rotate( x5+ x1, 7);
+ x13 ^= rotate( x9+ x5, 9);
+ x1 ^= rotate(x13+ x9,13);
+ x5 ^= rotate( x1+x13,18);
+ x14 ^= rotate(x10+ x6, 7);
+ x2 ^= rotate(x14+x10, 9);
+ x6 ^= rotate( x2+x14,13);
+ x10 ^= rotate( x6+ x2,18);
+ x3 ^= rotate(x15+x11, 7);
+ x7 ^= rotate( x3+x15, 9);
+ x11 ^= rotate( x7+ x3,13);
+ x15 ^= rotate(x11+ x7,18);
+ x1 ^= rotate( x0+ x3, 7);
+ x2 ^= rotate( x1+ x0, 9);
+ x3 ^= rotate( x2+ x1,13);
+ x0 ^= rotate( x3+ x2,18);
+ x6 ^= rotate( x5+ x4, 7);
+ x7 ^= rotate( x6+ x5, 9);
+ x4 ^= rotate( x7+ x6,13);
+ x5 ^= rotate( x4+ x7,18);
+ x11 ^= rotate(x10+ x9, 7);
+ x8 ^= rotate(x11+x10, 9);
+ x9 ^= rotate( x8+x11,13);
+ x10 ^= rotate( x9+ x8,18);
+ x12 ^= rotate(x15+x14, 7);
+ x13 ^= rotate(x12+x15, 9);
+ x14 ^= rotate(x13+x12,13);
+ x15 ^= rotate(x14+x13,18);
+ }
+
+ x0 += j0;
+ x1 += j1;
+ x2 += j2;
+ x3 += j3;
+ x4 += j4;
+ x5 += j5;
+ x6 += j6;
+ x7 += j7;
+ x8 += j8;
+ x9 += j9;
+ x10 += j10;
+ x11 += j11;
+ x12 += j12;
+ x13 += j13;
+ x14 += j14;
+ x15 += j15;
+
+ x0 -= load_littleendian(c + 0);
+ x5 -= load_littleendian(c + 4);
+ x10 -= load_littleendian(c + 8);
+ x15 -= load_littleendian(c + 12);
+ x6 -= load_littleendian(in + 0);
+ x7 -= load_littleendian(in + 4);
+ x8 -= load_littleendian(in + 8);
+ x9 -= load_littleendian(in + 12);
+
+ store_littleendian(out + 0,x0);
+ store_littleendian(out + 4,x5);
+ store_littleendian(out + 8,x10);
+ store_littleendian(out + 12,x15);
+ store_littleendian(out + 16,x6);
+ store_littleendian(out + 20,x7);
+ store_littleendian(out + 24,x8);
+ store_littleendian(out + 28,x9);
+
+ return 0;
+}
diff --git a/sdar/lib/nacl/nacl.h b/sdar/lib/nacl/nacl.h
new file mode 100644
index 0000000..c46b8fb
--- /dev/null
+++ b/sdar/lib/nacl/nacl.h
@@ -0,0 +1,70 @@
+#ifndef nacl_H
+#define nacl_H
+
+#define crypto_verify_16_BYTES 16
+
+#define crypto_core_salsa20_OUTPUTBYTES 64
+#define crypto_core_salsa20_INPUTBYTES 16
+#define crypto_core_salsa20_KEYBYTES 32
+#define crypto_core_salsa20_CONSTBYTES 16
+
+#define crypto_core_hsalsa20_OUTPUTBYTES 32
+#define crypto_core_hsalsa20_INPUTBYTES 16
+#define crypto_core_hsalsa20_KEYBYTES 32
+#define crypto_core_hsalsa20_CONSTBYTES 16
+
+#define crypto_stream_xsalsa20_KEYBYTES 32
+#define crypto_stream_xsalsa20_NONCEBYTES 24
+
+#define crypto_stream_salsa20_KEYBYTES 32
+#define crypto_stream_salsa20_NONCEBYTES 8
+
+#define crypto_onetimeauth_poly1305_BYTES 16
+#define crypto_onetimeauth_poly1305_KEYBYTES 32
+
+#define crypto_scalarmult_curve25519_BYTES 32
+#define crypto_scalarmult_curve25519_SCALARBYTES 32
+
+#define crypto_secretbox_KEYBYTES 32
+#define crypto_secretbox_NONCEBYTES 24
+#define crypto_secretbox_ZEROBYTES 32
+#define crypto_secretbox_BOXZEROBYTES 16
+
+#define crypto_box_PUBLICKEYBYTES 32
+#define crypto_box_SECRETKEYBYTES 32
+#define crypto_box_BEFORENMBYTES 32
+#define crypto_box_NONCEBYTES 24
+#define crypto_box_ZEROBYTES 32
+#define crypto_box_BOXZEROBYTES 16
+
+void randombytes(unsigned char *,unsigned long long);
+
+int crypto_verify_16(const unsigned char *,const unsigned char *);
+
+int crypto_core_salsa20(unsigned char *,const unsigned char *,const unsigned char *,const unsigned char *);
+
+int crypto_core_hsalsa20(unsigned char *,const unsigned char *,const unsigned char *,const unsigned char *);
+
+int crypto_stream_xsalsa20(unsigned char *,unsigned long long,const unsigned char *,const unsigned char *);
+int crypto_stream_xsalsa20_xor(unsigned char *,const unsigned char *,unsigned long long,const unsigned char *,const unsigned char *);
+
+int crypto_stream_salsa20(unsigned char *,unsigned long long,const unsigned char *,const unsigned char *);
+int crypto_stream_salsa20_xor(unsigned char *,const unsigned char *,unsigned long long,const unsigned char *,const unsigned char *);
+
+int crypto_onetimeauth_poly1305(unsigned char *,const unsigned char *,unsigned long long,const unsigned char *);
+int crypto_onetimeauth_poly1305_verify(const unsigned char *,const unsigned char *,unsigned long long,const unsigned char *);
+
+int crypto_scalarmult_curve25519(unsigned char *,const unsigned char *,const unsigned char *);
+int crypto_scalarmult_curve25519_base(unsigned char *,const unsigned char *);
+
+int crypto_secretbox(unsigned char *,const unsigned char *,unsigned long long,const unsigned char *,const unsigned char *);
+int crypto_secretbox_open(unsigned char *,const unsigned char *,unsigned long long,const unsigned char *,const unsigned char *);
+
+int crypto_box(unsigned char *,const unsigned char *,unsigned long long,const unsigned char *,const unsigned char *,const unsigned char *);
+int crypto_box_open(unsigned char *,const unsigned char *,unsigned long long,const unsigned char *,const unsigned char *,const unsigned char *);
+int crypto_box_keypair(unsigned char *,unsigned char *);
+int crypto_box_beforenm(unsigned char *,const unsigned char *,const unsigned char *);
+int crypto_box_afternm(unsigned char *,const unsigned char *,unsigned long long,const unsigned char *,const unsigned char *);
+int crypto_box_open_afternm(unsigned char *,const unsigned char *,unsigned long long,const unsigned char *,const unsigned char *);
+
+#endif
diff --git a/sdar/lib/nacl/poly1305_verify.c b/sdar/lib/nacl/poly1305_verify.c
new file mode 100644
index 0000000..db2bf0a
--- /dev/null
+++ b/sdar/lib/nacl/poly1305_verify.c
@@ -0,0 +1,8 @@
+#include <nacl.h>
+
+int crypto_onetimeauth_poly1305_verify(const unsigned char *h,const unsigned char *in,unsigned long long inlen,const unsigned char *k)
+{
+ unsigned char correct[16];
+ crypto_onetimeauth_poly1305(correct,in,inlen,k);
+ return crypto_verify_16(h,correct);
+}
diff --git a/sdar/lib/nacl/randombytes.c b/sdar/lib/nacl/randombytes.c
new file mode 100644
index 0000000..f5f7ef1
--- /dev/null
+++ b/sdar/lib/nacl/randombytes.c
@@ -0,0 +1,33 @@
+#include <nacl.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+
+static int fd = -1;
+
+void randombytes(unsigned char *x,unsigned long long xlen)
+{
+ int i;
+
+ if (fd == -1) {
+ for (;;) {
+ fd = open("/dev/urandom",O_RDONLY);
+ if (fd != -1) break;
+ sleep(1);
+ }
+ }
+
+ while (xlen > 0) {
+ if (xlen < 1048576) i = xlen; else i = 1048576;
+
+ i = read(fd,x,i);
+ if (i < 1) {
+ sleep(1);
+ continue;
+ }
+
+ x += i;
+ xlen -= i;
+ }
+}
diff --git a/sdar/lib/nacl/secretbox.c b/sdar/lib/nacl/secretbox.c
new file mode 100644
index 0000000..ab1e526
--- /dev/null
+++ b/sdar/lib/nacl/secretbox.c
@@ -0,0 +1,33 @@
+#include <nacl.h>
+
+int crypto_secretbox(
+ unsigned char *c,
+ const unsigned char *m,unsigned long long mlen,
+ const unsigned char *n,
+ const unsigned char *k
+)
+{
+ int i;
+ if (mlen < 32) return -1;
+ crypto_stream_xsalsa20_xor(c,m,mlen,n,k);
+ crypto_onetimeauth_poly1305(c + 16,c + 32,mlen - 32,c);
+ for (i = 0;i < 16;++i) c[i] = 0;
+ return 0;
+}
+
+int crypto_secretbox_open(
+ unsigned char *m,
+ const unsigned char *c,unsigned long long clen,
+ const unsigned char *n,
+ const unsigned char *k
+)
+{
+ int i;
+ unsigned char subkey[32];
+ if (clen < 32) return -1;
+ crypto_stream_xsalsa20(subkey,32,n,k);
+ if (crypto_onetimeauth_poly1305_verify(c + 16,c + 32,clen - 32,subkey) != 0) return -1;
+ crypto_stream_xsalsa20_xor(m,c,clen,n,k);
+ for (i = 0;i < 32;++i) m[i] = 0;
+ return 0;
+}
diff --git a/sdar/lib/nacl/verify_16.c b/sdar/lib/nacl/verify_16.c
new file mode 100644
index 0000000..09db9f5
--- /dev/null
+++ b/sdar/lib/nacl/verify_16.c
@@ -0,0 +1,24 @@
+#include <nacl.h>
+
+int crypto_verify_16(const unsigned char *x,const unsigned char *y)
+{
+ unsigned int differentbits = 0;
+#define F(i) differentbits |= x[i] ^ y[i];
+ F(0)
+ F(1)
+ F(2)
+ F(3)
+ F(4)
+ F(5)
+ F(6)
+ F(7)
+ F(8)
+ F(9)
+ F(10)
+ F(11)
+ F(12)
+ F(13)
+ F(14)
+ F(15)
+ return (1 & ((differentbits - 1) >> 8)) - 1;
+}
diff --git a/sdar/lib/nacl/xsalsa20.c b/sdar/lib/nacl/xsalsa20.c
new file mode 100644
index 0000000..541fe04
--- /dev/null
+++ b/sdar/lib/nacl/xsalsa20.c
@@ -0,0 +1,32 @@
+/*
+version 20080914
+D. J. Bernstein
+Public domain.
+*/
+
+#include <nacl.h>
+
+static const unsigned char sigma[16] = "expand 32-byte k";
+
+int crypto_stream_xsalsa20(
+ unsigned char *c,unsigned long long clen,
+ const unsigned char *n,
+ const unsigned char *k
+)
+{
+ unsigned char subkey[32];
+ crypto_core_hsalsa20(subkey,n,k,sigma);
+ return crypto_stream_salsa20(c,clen,n + 16,subkey);
+}
+
+int crypto_stream_xsalsa20_xor(
+ unsigned char *c,
+ const unsigned char *m,unsigned long long mlen,
+ const unsigned char *n,
+ const unsigned char *k
+)
+{
+ unsigned char subkey[32];
+ crypto_core_hsalsa20(subkey,n,k,sigma);
+ return crypto_stream_salsa20_xor(c,m,mlen,n + 16,subkey);
+}
diff --git a/sdar/slice.c b/sdar/slice.c
index 5dc7c2c..6febae9 100644
--- a/sdar/slice.c
+++ b/sdar/slice.c
@@ -4,7 +4,7 @@
#include <stdlib.h>
#include <string.h>
#include <blake3.h>
-#include <crypto_box.h>
+#include <nacl.h>
#include <lz4.h>
/* module wrapping buffer allocation, crypto,
diff --git a/sdar/stash.c b/sdar/stash.c
index 6216566..a5d27b9 100644
--- a/sdar/stash.c
+++ b/sdar/stash.c
@@ -5,7 +5,7 @@
#include <sys/mman.h>
#include <sys/stat.h>
#include <unistd.h>
-#include <crypto_box.h>
+#include <nacl.h>
MAKESURE(Segidsz_is_ok, Segidsz <= Keysz);