; AES/Rijndael Implementation for Z80
;
; Copyright (c) 2013, Michael Borisov
; All rights reserved.
;
; Redistribution and use in source and binary forms, with or without
; modification, are permitted provided that the following conditions are met: 
; 
; 1. Redistributions of source code must retain the above copyright notice, this
;    list of conditions and the following disclaimer. 
; 2. Redistributions in binary form must reproduce the above copyright notice,
;    this list of conditions and the following disclaimer in the documentation
;    and/or other materials provided with the distribution. 
; 
; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
; ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
; WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
; DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
; ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
; (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
; ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
; 
; -----------------------------------------------------------
;
; Usage:
;
; For encryption:
; 1) copy your key into the first 16 bytes of key[] buffer
; 2) call key_expansion (no parameters)
; 3) copy your block of plaintext (16 bytes) into the state[] buffer
; 4) call aes_encrypt (no parameters). The ciphertext will
;    replace plaintext in the state[] buffer
; 5) repeat steps 3-4 as necessary. The expanded key will be preserved in key[] buffer
;
; For decryption:
; 1) copy your key into the first 16 bytes of key[] buffer
; 2) call key_expansion (no parameters)
; 3) copy your block of ciphertext (16 bytes) into the state[] buffer
; 4) call aes_decrypt (no parameters). The decrypted plaintext
;    will replace ciphertext in the state[] buffer
; 5) repeat steps 3-4 as necessary. The expanded key will be preserved in key[] buffer
;
; Encryption and decryption routines can be called in an arbitrary order
; if the same key is used. No state except the expanded key is maintained by the library.
;
; Environment:
; - The library uses stack to a little extent (a few function calls in depth)
; - Interrupts are permitted. The library never disables interrupts
; - BASIC-compatible: IY register is never used or modified by the library
; - BASIC-compatible: HL' register is never used or modified by the library
; - s_box and si_box must not cross a 256-byte page boundary
; - state[], and key[] buffers must not cross a 256-byte page boundary
; - no other restrictions on code locations. There is no self-modifying code
;   (ROM-compatible)
;
; Code size reduction opportunities:
; - si_box can be easily computed from s_box. This way, ROM/file size can be saved.
; - If decryption is not used, si_box can be removed, as well as decryption-specific
;   subroutines.
; - If encryption is not used, s_box must be preserved for key expansion. However,
;   a slower key-expansion routine can be written that uses si_box instead of s_box.
;   encryption-specific subroutines can be removed.
;
; Timing budget:
; key_expansion              11614  t-states
; aes_encrypt                ~32150 t-states (slightly depends on data and key due
;                                             to the xtime subroutine)
; aes_decrypt                ~39818 t-states (slightly depends on data and key due
;                                             to the xtime subroutine)




	device	zxspectrum48
	org	#8000


s_box:
 db 063h ,07ch ,077h ,07bh ,0f2h ,06bh ,06fh ,0c5h ,030h ,001h ,067h ,02bh ,0feh ,0d7h ,0abh ,076h
 db 0cah ,082h ,0c9h ,07dh ,0fah ,059h ,047h ,0f0h ,0adh ,0d4h ,0a2h ,0afh ,09ch ,0a4h ,072h ,0c0h
 db 0b7h ,0fdh ,093h ,026h ,036h ,03fh ,0f7h ,0cch ,034h ,0a5h ,0e5h ,0f1h ,071h ,0d8h ,031h ,015h
 db 004h ,0c7h ,023h ,0c3h ,018h ,096h ,005h ,09ah ,007h ,012h ,080h ,0e2h ,0ebh ,027h ,0b2h ,075h
 db 009h ,083h ,02ch ,01ah ,01bh ,06eh ,05ah ,0a0h ,052h ,03bh ,0d6h ,0b3h ,029h ,0e3h ,02fh ,084h
 db 053h ,0d1h ,000h ,0edh ,020h ,0fch ,0b1h ,05bh ,06ah ,0cbh ,0beh ,039h ,04ah ,04ch ,058h ,0cfh
 db 0d0h ,0efh ,0aah ,0fbh ,043h ,04dh ,033h ,085h ,045h ,0f9h ,002h ,07fh ,050h ,03ch ,09fh ,0a8h
 db 051h ,0a3h ,040h ,08fh ,092h ,09dh ,038h ,0f5h ,0bch ,0b6h ,0dah ,021h ,010h ,0ffh ,0f3h ,0d2h
 db 0cdh ,00ch ,013h ,0ech ,05fh ,097h ,044h ,017h ,0c4h ,0a7h ,07eh ,03dh ,064h ,05dh ,019h ,073h
 db 060h ,081h ,04fh ,0dch ,022h ,02ah ,090h ,088h ,046h ,0eeh ,0b8h ,014h ,0deh ,05eh ,00bh ,0dbh
 db 0e0h ,032h ,03ah ,00ah ,049h ,006h ,024h ,05ch ,0c2h ,0d3h ,0ach ,062h ,091h ,095h ,0e4h ,079h
 db 0e7h ,0c8h ,037h ,06dh ,08dh ,0d5h ,04eh ,0a9h ,06ch ,056h ,0f4h ,0eah ,065h ,07ah ,0aeh ,008h
 db 0bah ,078h ,025h ,02eh ,01ch ,0a6h ,0b4h ,0c6h ,0e8h ,0ddh ,074h ,01fh ,04bh ,0bdh ,08bh ,08ah
 db 070h ,03eh ,0b5h ,066h ,048h ,003h ,0f6h ,00eh ,061h ,035h ,057h ,0b9h ,086h ,0c1h ,01dh ,09eh
 db 0e1h ,0f8h ,098h ,011h ,069h ,0d9h ,08eh ,094h ,09bh ,01eh ,087h ,0e9h ,0ceh ,055h ,028h ,0dfh
 db 08ch ,0a1h ,089h ,00dh ,0bfh ,0e6h ,042h ,068h ,041h ,099h ,02dh ,00fh ,0b0h ,054h ,0bbh ,016h

si_box:
 db 052h ,009h ,06ah ,0d5h ,030h ,036h ,0a5h ,038h ,0bfh ,040h ,0a3h ,09eh ,081h ,0f3h ,0d7h ,0fbh
 db 07ch ,0e3h ,039h ,082h ,09bh ,02fh ,0ffh ,087h ,034h ,08eh ,043h ,044h ,0c4h ,0deh ,0e9h ,0cbh
 db 054h ,07bh ,094h ,032h ,0a6h ,0c2h ,023h ,03dh ,0eeh ,04ch ,095h ,00bh ,042h ,0fah ,0c3h ,04eh
 db 008h ,02eh ,0a1h ,066h ,028h ,0d9h ,024h ,0b2h ,076h ,05bh ,0a2h ,049h ,06dh ,08bh ,0d1h ,025h
 db 072h ,0f8h ,0f6h ,064h ,086h ,068h ,098h ,016h ,0d4h ,0a4h ,05ch ,0cch ,05dh ,065h ,0b6h ,092h
 db 06ch ,070h ,048h ,050h ,0fdh ,0edh ,0b9h ,0dah ,05eh ,015h ,046h ,057h ,0a7h ,08dh ,09dh ,084h
 db 090h ,0d8h ,0abh ,000h ,08ch ,0bch ,0d3h ,00ah ,0f7h ,0e4h ,058h ,005h ,0b8h ,0b3h ,045h ,006h
 db 0d0h ,02ch ,01eh ,08fh ,0cah ,03fh ,00fh ,002h ,0c1h ,0afh ,0bdh ,003h ,001h ,013h ,08ah ,06bh
 db 03ah ,091h ,011h ,041h ,04fh ,067h ,0dch ,0eah ,097h ,0f2h ,0cfh ,0ceh ,0f0h ,0b4h ,0e6h ,073h
 db 096h ,0ach ,074h ,022h ,0e7h ,0adh ,035h ,085h ,0e2h ,0f9h ,037h ,0e8h ,01ch ,075h ,0dfh ,06eh
 db 047h ,0f1h ,01ah ,071h ,01dh ,029h ,0c5h ,089h ,06fh ,0b7h ,062h ,00eh ,0aah ,018h ,0beh ,01bh
 db 0fch ,056h ,03eh ,04bh ,0c6h ,0d2h ,079h ,020h ,09ah ,0dbh ,0c0h ,0feh ,078h ,0cdh ,05ah ,0f4h
 db 01fh ,0ddh ,0a8h ,033h ,088h ,007h ,0c7h ,031h ,0b1h ,012h ,010h ,059h ,027h ,080h ,0ech ,05fh
 db 060h ,051h ,07fh ,0a9h ,019h ,0b5h ,04ah ,00dh ,02dh ,0e5h ,07ah ,09fh ,093h ,0c9h ,09ch ,0efh
 db 0a0h ,0e0h ,03bh ,04dh ,0aeh ,02ah ,0f5h ,0b0h ,0c8h ,0ebh ,0bbh ,03ch ,083h ,053h ,099h ,061h
 db 017h ,02bh ,004h ,07eh ,0bah ,077h ,0d6h ,026h ,0e1h ,069h ,014h ,063h ,055h ,021h ,00ch ,07dh

;---------------------------
; Data segment
state:	ds	16
key:	ds	176




; ---- byte substitution per Rijndael s-box
; performs byte substitution on the whole state buffer
; uses hl, de, b, a
byte_sub:
	ld	d,high s_box
byte_sub0:
	ld	hl,state
	ld	b,10h
byte_sub1:
	ld	e,(hl)
	ld	a,(de)
	ld	(hl),a
	inc	l
	djnz	byte_sub1
	ret

; ---- byte substitution per inverse Rijndael s-box
; performs byte substitution on the whole state buffer
; uses hl, de, b, a
inv_byte_sub:
	ld	d,high si_box
	jr	byte_sub0


; ---- shift row
; operates on state buffer
; uses b
shift_row:
; row 3
	ld	a,(state+1)
	ld	b,a
	ld	a,(state+5)
	ld	(state+1),a
	ld	a,(state+9)
	ld	(state+5),a
	ld	a,(state+13)
	ld	(state+9),a
	ld	a,b
	ld	(state+13),a
; row 1
	ld	a,(state+15)
	ld	b,a
	ld	a,(state+11)
	ld	(state+15),a
	ld	a,(state+7)
	ld	(state+11),a
	ld	a,(state+3)
	ld	(state+7),a
	ld	a,b
	ld	(state+3),a
	jr	row_2

; ---- inverse shift row
; operates on state buffer
; uses b
inv_shift_row:
; row 3
	ld	a,(state+13)
	ld	b,a
	ld	a,(state+9)
	ld	(state+13),a
	ld	a,(state+5)
	ld	(state+9),a
	ld	a,(state+1)
	ld	(state+5),a
	ld	a,b
	ld	(state+1),a
; row 1
	ld	a,(state+3)
	ld	b,a
	ld	a,(state+7)
	ld	(state+3),a
	ld	a,(state+11)
	ld	(state+7),a
	ld	a,(state+15)
	ld	(state+11),a
	ld	a,b
	ld	(state+15),a
; ---- shift/inverse shift row 2 (common for shift_row and inv_shift_row)
row_2:
; row 2: exchange s[2] and s[10]
	ld	a,(state+10)
	ld	b,a
	ld	a,(state+2)
	ld	(state+10),a
	ld	a,b
	ld	(state+2),a
; row 2: exchange s[6] and s[14]
	ld	a,(state+14)
	ld	b,a
	ld	a,(state+6)
	ld	(state+14),a
	ld	a,b
	ld	(state+6),a
	ret

;----- xtime
;argument in A, result in A
xtime:	rlca
	ret	nc
	xor	1ah ;must xor by 1B, but bit 0 is already set due to rlca	
	ret


;----- key expansion
;expands 128-bit key in the first 16 bytes of key buffer
;into the whole key buffer (160 bytes) for 10 rounds
; uses hl, de, bc, a, xh
key_expansion:
	ld	hl,key+16
	ld	xh,01h
kex0:	ld	bc,16
	ld	d,h
	ld	a,l
	sub	c
	ld	e,a
	ex	de,hl
	ldir
	ld	e,l		;d=h anyway, de=key+16
	dec	l
	dec	l
	dec	l		;after this hl=key+13
	ld	b,high s_box
	ld	c,(hl)          ;key[13]
	ld	a,(bc)		;s-box substitution
	xor	xh		;rcon
	ex	de,hl
	xor	(hl)
	ld	(hl),a		;key[0] ^= s_box[key[13]] ^ rcon
	inc	l
	inc	e
	ex	de,hl
	ld	c,(hl)		;key[14]
	ld	a,(bc)		;s-box substitution
	ex	de,hl
	xor	(hl)
	ld	(hl),a		;key[1] ^= s_box[key[14]]
	inc	l
	inc	e
	ex	de,hl
	ld	c,(hl)		;key[15]
	ld	a,(bc)		;s-box substitution
	ex	de,hl
	xor	(hl)
	ld	(hl),a		;key[2] ^= s_box[key[15]]
	inc	l
	dec	e
	dec	e
	dec	e		;de = key+12
	ex	de,hl
	ld	c,(hl)		;key[12]
	ld	a,(bc)		;s-box substitution
	ex	de,hl
	xor	(hl)
	ld	(hl),a		;key[3] ^= s_box[key[12]]

	inc	l		;hl = key+4
	ld	a,l
	sub	4
	ld	e,a		;de = key
	ld	b,12
kex1:	ld	a,(de)
	xor	(hl)
	ld	(hl),a
	inc	e
	inc	l
	djnz	kex1
; after the iteration, hl=key+16
	ld	a,xh
	call	xtime
	ld	xh,a
	cp	6ch
	jr	nz,kex0
	ret





;----- mix_column
; operates on state buffer
; uses hl, de, bc, a
mix_column:
	ld	hl,state
	ld	b,4
; temp - c
; tm - e
; tm2 - d
; temp = a[0] ^ a[1] ^ a[2] ^ a[3]
; tm2 = a[0]
mic0:	ld	d,(hl)		;save tm2 = a[0]
	ld	a,d		;a[0]
	inc	l
	xor	(hl)		;a[0] ^ a[1]
	ld	e,a		;a[0] ^ a[1]
	inc	l
	xor	(hl)		;a[0] ^ a[1] ^ a[2]
	inc	l
	xor	(hl)
	ld	c,a		;temp = a[0] ^ a[1] ^ a[2] ^ a[3]
; restore hl pointing to a[0]
	dec	l
	dec	l
	dec	l
;b[0] = a[0] ^ xtime(a[0] ^ a[1]) ^ temp
	ld	a,e		;a[0] ^ a[1]
	call	xtime
	xor	c		;xtime(tm) ^ temp
	xor	d		;d = saved a[0]
	ld	(hl),a
	inc	l
;b[1] = a[1] ^ xtime(a[1] ^ a[2]) ^ temp
	ld	a,(hl)		;a[1]
	inc	l
	xor	(hl)		;a[1] ^ a[2]
	call	xtime
	xor	c		;xtime(tm) ^ temp
	dec	l
	xor	(hl)
	ld	(hl),a		;set b[1]
	inc	l
;b[2] = a[2] ^ xtime(a[2] ^ a[3]) ^ temp
	ld	a,(hl)		;a[2]
	inc	l
	xor	(hl)		;a[2] ^ a[3]
	call	xtime
	xor	c		;xtime(tm) ^ temp
	dec	l
	xor	(hl)
	ld	(hl),a		;set b[2]
	inc	l
; b[3] = a[3] ^ xtime(a[3] ^ a[0]) ^ temp
	ld	a,(hl)		;a[3]
	xor	d		;a[3] ^ a[0], d - saved a[0]
	call	xtime
	xor	c		;xtime(tm) ^ temp
	xor	(hl)
	ld	(hl),a		;set b[3]
; loop for all 4 columns
	inc	l
	djnz	mic0
	ret

;----- inverse mix_column
; operates on state buffer
; uses hl, de, bc, xl, a
inv_mix_column:
	ld	hl,state
	ld	xl,4
; c = a[0] ^ a[1] ^ a[2] ^ a[3]
; e = xtime(a[0] ^ a[2])
; d = xtime(a[1] ^ a[3])
imc0:	ld	e,(hl)		;e = a[0]
	ld	b,e		;save for b[3] computation
	inc	l
	ld	d,(hl)		;d = a[1]
	inc	l
	ld	a,(hl)		;a[2]
	xor	e
	ld	c,a		;c = a[0] ^ a[2]
	call	xtime
	ld	e,a		;e = xtime(a[0] ^ a[2])
	inc	l
	ld	a,(hl)		;a[3]
	xor	d
	ld	d,a		;d = a[1] ^ a[3]
	xor	c
	ld	c,a		;c = a[0] ^ a[1] ^ a[2] ^ a[3]
	ld	a,d
	call	xtime
	ld	d,a		;d = xtime(a[1] ^ a[3])
; c = xtime(xtime(e ^ d)) ^ c
	xor	e		;a = d ^ e
	call	xtime
	call	xtime
	xor	c
	ld	c,a

	dec	l
	dec	l

; b[0] = a[0] ^ xtime(a[0] ^ a[1] ^ tmp1) ^ tmp3
	ld	a,(hl)		;a[1]
	xor	b		;a[0] ^ a[1]
	xor	e
	call	xtime
	xor	c
	dec	l
	xor	(hl)
	ld	(hl),a		;set b[0]
; b[1] = a[1] ^ xtime(a[1]^a[2]^tmp2) ^ tmp3
	inc	l
	ld	a,(hl)		;a[1]
	inc	l
	xor	(hl)		;a[2]
	xor	d		;tmp2
	call	xtime
	xor	c
	dec	l
	xor	(hl)
	ld	(hl),a		;set b[1]
; b[2] = a[2] ^ xtime(a[2]^a[3]^tmp1) ^ tmp3
	inc	l
	ld	a,(hl)		;a[2]
	inc	l
	xor	(hl)		;a[3]
	xor	e
	call	xtime
	xor	c
	dec	l
	xor	(hl)
	ld	(hl),a		;set b[2]
; b[3] = a[3] ^ xtime(a[3]^a[0]^tmp2) ^ tmp3
	inc	l
	ld	a,(hl)		;a[3]
	xor	b		;a[0]
	xor	d
	call	xtime
	xor	c
	xor	(hl)
	ld	(hl),a
; loop for all 4 columns
	inc	l
	dec	xl
	jr	nz,imc0
	ret





;------ AES Rijndael Encryption
; in/out: state (1 block - 16 bytes)
; in: expanded key (176 bytes). Use key_expansion to prepare the key
; uses bc,de,hl,af,af',ix
aes_encrypt:
	ld	xh,9		;round counter
	ld	a,low key
	ex	af,af'
	call	add_round_key
encry0:
	call	byte_sub
	call	shift_row
	call	mix_column
	call	add_round_key
	dec	xh
	jr	nz,encry0
	call	byte_sub
	call	shift_row
; ---- Add round key
; operates on state buffer
; uses de,hl, b, updates af'
; state and key buffers must not cross 256-byte boundary
add_round_key:
	ld	h,high key
	ex	af,af'
	ld	l,a
	ld	de,state
	ld	b,16
ark0:	ld	a,(de)
	xor	(hl)
	ld	(de),a
	inc	e
	inc	l
	djnz	ark0
	ld	a,l
	ex	af,af'
	ret


;----- AES Rijndael Decryption
;in/out: state (1 block - 16 bytes)
;in: expanded key (176 bytes). Use key_expansion to prepare the key
;uses bc,de,hl,af,af',ix
aes_decrypt:
	ld	xh,9		;round counter
	ld	a,low key+175
	ex	af,af'
	call	add_round_key_back
	call	inv_shift_row
	call	inv_byte_sub
decry0:
	call	add_round_key_back
	call	inv_mix_column
	call	inv_shift_row
	call	inv_byte_sub
	dec	xh
	jr	nz,decry0
; ---- Add round key backwards
; operates on state buffer
; uses de,hl,b, updates af'
add_round_key_back:
	ld	h,high key
	ex	af,af'
	ld	l,a
	ld	de,state+15
	ld	b,16
arkb0:	ld	a,(de)
	xor	(hl)
	ld	(de),a
	dec	e
	dec	l
	djnz	arkb0
	ld	a,l
	ex	af,af'
	ret


	savebin "aes.bin",s_box,0x800
