view modules/x86.tp @ 350:a3b06d53bcb9

Make il and x86 modules cope with dict hash instead of dict linear for the program definition
author Michael Pavone <pavone@retrodev.com>
date Fri, 10 Apr 2015 01:19:46 -0700
parents a840e9a068a2
children 0b4d4f06bf91
line wrap: on
line source
{
	regnames <- #["rax" "rcx" "rdx" "rbx" "rsp" "rbp" "rsi" "rdi" "r8" "r9" "r10" "r11" "r12" "r13" "r14" "r15"]
	uppernames <- #["ah" "ch" "dh" "bh"]
	ireg <- :regnum {
		#{
			num <- { regnum }
			reg <- { regnum and 7u8}
			string <- { regnames get: regnum }
			rm <- :tail { reg or 0xC0u8 | tail }
			validforSize? <- :size { true }
			isInteger? <- { false }
			isString? <- { false }
			register? <- { true }
			label? <- { false }
			upper? <- { false }
			needsRex? <- { regnum >= 8u8 }
			rexBitReg <- {
				if: needsRex? {
					4u8
				} else: {
					0u8
				}
			}
			rexBitRM <- {
				if: needsRex? {
					1u8
				} else: {
					0u8
				}
			}
			= <- :other {
				(not: (other isInteger?)) && (other register?) && (not: (other upper?)) && regnum = (other num)
			}
		}
	}

	upper <- :regnum {
		#{
			num <- { regnum }
			reg <- { regnum }
			string <- { uppernames get: regnum - 4 }
			rm <- :tail { regnum or 0xC0u8 | tail }
			validforSize? <- :size {
				size = byte
			}
			isInteger? <- { false }
			register? <- { true }
			label? <- { false }
			upper? <- { true }
			needsRex? <- { false }
			= <- :other {
				(not: (other isInteger?)) && (other register?) && (other upper?) && regnum = (other num)
			}
		}
	}
	fakesrc <- #{
		needsRex? <- { false }
		rexBitReg <- { 0u8 }
		rexBitRM <- { 0u8 }
	}
	_size <- :s {
		#{
			num <- { s }
			= <- :other {
				s = (other num)
			}
			> <- :other {
				s > (other num)
			}
			>= <- :other {
				s >= (other num)
			}
			< <- :other {
				s < (other num)
			}
			<= <- :other {
				s <= (other num)
			}
			needsRex? <- { s = 3 }
			rexBit <- {
				if: needsRex? {
					0x08u8
				} else: {
					0u8
				}
			}
		}
	}
	byte <- _size: 0
	word <- _size: 1
	dword <- _size: 2
	qword <- _size: 3

	condition <- :num {
		#{
			cc <- { num }
		}
	}
	_o <- condition: 0u8
	_no <- condition: 1u8
	_c <- condition: 2u8
	_nc <- condition: 3u8
	_z <- condition: 4u8
	_nz <- condition: 5u8
	_be <- condition: 6u8
	_nbe <- condition: 7u8
	_s <- condition: 8u8
	_ns <- condition: 9u8
	_p <- condition: 10u8
	_np <- condition: 11u8
	_l <- condition: 12u8
	_nl <- condition: 13u8
	_le <- condition: 14u8
	_nle <- condition: 15u8


	size_bit <- :opcode size {
		if: size = byte {
			opcode
		} else: {
			opcode or 1u8
		}
	}
	opex <- :val {
		#{
			reg <- { val }
			string <- { "opex " . val}
		}
	}

	mod_rm:withTail <- :register regmem :end {
		list <- regmem rm: end
		(list value) or ( lshift: (register reg) by: 3u8) | (list tail)
	}

	mod_rm <- :reg rm {
		mod_rm: reg rm withTail: []
	}

	int_op:withTail <- :value size :tail {
		if: size >= dword {
			tail <- (uint8: (rshift: value by: 16u64)) | (uint8: (rshift: value by: 24u64)) | tail
		}
		if: size >= word {
			tail <- (uint8: (rshift: value by: 8u64)) | tail
		}
		(uint8: value) | tail
	}
	int_op <- :value size {
		int_op: value size withTail: []
	}
	//used for mov instructions that support 64-bit immediate operands/offsets
	int_op64 <- :value size {
		tail <- []
		value <- uint64: value
		if: size = qword {
			tail <- (uint8: (rshift: value by: 32u64)) | (uint8: (rshift: value by: 40u64)) | (uint8: (rshift: value by: 48u64)) | (uint8: (rshift: value by: 56u64)) | tail
		}
		int_op: value size withTail: tail
	}

	prefix:withInstruction <- :reg rm size :inst {
		if: size = word {
			inst <- 0x66u8 | inst
		}
		if: (size needsRex?) || (reg needsRex?) || (rm needsRex?) {
			rex <- 0x40u8 or (size rexBit) or (reg rexBitReg) or (rm rexBitRM)
			inst <- rex | inst
		}
		inst
	}

	_rax <- ireg: 0u8
	_rcx <- ireg: 1u8
	_rdx <- ireg: 2u8
	_rbx <- ireg: 3u8
	_rsp <- ireg: 4u8
	_rbp <- ireg: 5u8
	_rsi <- ireg: 6u8
	_rdi <- ireg: 7u8
	_r8 <- ireg: 8u8
	_r9 <- ireg: 9u8
	_r10 <- ireg: 10u8
	_r11 <- ireg: 11u8
	_r12 <- ireg: 12u8
	_r13 <- ireg: 13u8
	_r14 <- ireg: 14u8
	_r15 <- ireg: 15u8
	_ah <- upper: 4u8
	_ch <- upper: 5u8
	_dh <- upper: 6u8
	_bh <- upper: 7u8

	//AMD64 convention
	_argregs <- #[
		_rdi
		_rsi
		_rdx
		_rcx
		_r8
		_r9
	]
	_calleesave <- #[
		_rbx
		_rbp
		_r12
		_r13
		_r14
		_r15
	]
	_tempregs <- #[
		_r10
		_r11
		//TODO: Add rax back in once there's logic in il to properly
		//allocate it for the instances in which it's live
		//_rax
	]


	inst <- :ilist {
		#{
			length <- { ilist length }
			flattenTo:at <- :dest :idx {
				ilist fold: idx with: :idx byte {
					dest set: idx byte
					idx + 1
				}
			}
			string <- { (ilist map: :el { hex: el}) join: " "}
		}
	}
	multiInst <- :instarr {
		#{
			length <- {
				instarr fold: 0 with: :acc inst {
					acc + (inst length)
				}
			}
			flattenTo:at <- :dest :idx {
				instarr fold: idx with: :idx inst {
					inst flattenTo: dest at: idx
				}
			}
			string <- {
				(instarr map: :inst {
					(inst map: :el { hex: el}) join: " "
				}) join: "\n" 
			}
		}
	}

	op:withCode:withImmed:withOpEx <- :src dst size :normal :immed :myopex {
		reg <- src
		rm <- dst
		base <- if: (src isInteger?) {
			reg <- fakesrc
			(size_bit: immed size) | (mod_rm: (opex: myopex) dst withTail: (int_op: src size))
		} else: {
			if: (src register?) {
				(size_bit: normal size) | (mod_rm: src dst)
			} else: {
				reg <- dst
				rm <- src
				(size_bit: normal or 0x02u8 size) | (mod_rm: dst src)
			}
		}
		inst: (prefix: reg rm size withInstruction: base)
	}

	op:withCode:withImmed:withImmedRax:withOpEx:withByteExtend <- :src dst size :normal :immed :immedRax :myopex :byteExt {
		reg <- src
		rm <- dst
		if: (src isInteger?) {
			reg <- fakesrc
			base <- if: size > byte && (((src signed?) && src < 128 && src >= -128) || ((not: (src signed?)) && src < 256)) {
				byteExt | (mod_rm: (opex: myopex) dst withTail: [(uint8: src)])
			} else: {
				if: dst = _rax {
					(size_bit: immedRax size) | (int_op: src size)
				} else: {
					(size_bit: immed size) | (mod_rm: (opex: myopex) dst withTail: (int_op: src size))
				}
			}
			inst: (prefix: reg rm size withInstruction: base)
		} else: {
			op: src dst size withCode: normal withImmed: immed withOpEx: myopex
		}
	}

	shiftRot:withOpEx <- :amount dst size :myopex {
		opcode <- 0u8
		tail <- []
		pre <- #[]
		post <- #[]
		base <- if: (amount isInteger?) {
			if: amount = 1 {
				opcode <- 0xD0u8
			} else: {
				opcode <- 0xC0u8
				tail <- [uint8: amount]
			}
		} else: {
			opcode <- 0xD2u8
			if: (not: _rcx = amount) {
				pre <- #[
					x86 push: _rcx
					x86 mov: amount _rcx byte
				]
				post <- #[
					x86 pop: _rcx
				]
			}
		}
		bytes <- prefix: fakesrc dst withInstruction: (size_bit: 0xC0u8 size) | (mod_rm: (opex: myopex) dst withTail: tail)
		myinst <- inst: bytes
		if: (pre length) > 0 {
			pre append: myinst
			foreach: post :_ inst {
				pre append: inst
			}
			multiInst: pre
		} else: {
			myinst
		}
	}

	_jmprel <- :op jmpDest {
	}

	#{
		rax <- { _rax }
		rcx <- { _rcx }
		rdx <- { _rdx }
		rbx <- { _rbx }
		rsp <- { _rsp }
		rbp <- { _rbp }
		rsi <- { _rsi }
		rdi <- { _rdi }
		r8 <- { _r8 }
		r9 <- { _r9 }
		r10 <- { _r10 }
		r11 <- { _r11 }
		r12 <- { _r12 }
		r13 <- { _r13 }
		r14 <- { _r14 }
		r15 <- { _r15 }
		ah <- { _ah }
		ch <- { _ch }
		dh <- { _dh }
		bh <- { _bh }

		b <- { byte }
		w <- { word }
		d <- { dword }
		q <- { qword }

		o <- { _o }
		no <- { _no }
		c <- { _c }
		nc <- { _nc }
		ae <- { _nc }
		z <- { _z }
		e <- { _z }
		nz <- { _nz }
		ne <- { _nz }
		be <- { _be }
		nbe <- { _nbe }
		a <- { _nbe }
		s <- { _s }
		ns <- { _ns }
		p <- { _p }
		pe <- { _p }
		np <- { _np }
		po <- { _np }
		l <- { _l }
		nl <- { _nl }
		ge <- { _nl }
		le <- { _le }
		nle <- { _nle }
		g <- { _nle }

		add <- :src dst size {
			op: src dst size withCode: 0u8 withImmed: 0x80u8 withImmedRax: 0x04u8 withOpEx: 0u8 withByteExtend: 0x83u8
		}

		sub <- :src dst size {
			op: src dst size withCode: 0x28u8 withImmed: 0x80u8 withImmedRax: 0x2Cu8 withOpEx: 5u8 withByteExtend: 0x83u8
		}

		cmp <- :src dst size {
			op: src dst size withCode: 0x38u8 withImmed: 0x80u8 withImmedRax: 0x3Cu8 withOpEx: 7u8 withByteExtend: 0x83u8
		}

		and <- :src dst size {
			op: src dst size withCode: 0x20u8 withImmed: 0x80u8 withImmedRax: 0x24u8 withOpEx: 4u8 withByteExtend: 0x83u8
		}

		or <- :src dst size {
			op: src dst size withCode: 0x08u8 withImmed: 0x80u8 withImmedRax: 0x0Cu8 withOpEx: 1u8 withByteExtend: 0x83u8
		}

		xor <- :src dst size {
			op: src dst size withCode: 0x30u8 withImmed: 0x80u8 withImmedRax: 0x34u8 withOpEx: 6u8 withByteExtend: 0x83u8
		}

		mov <- :src dst size {
			rm <- dst
			if: (src isInteger?) && (dst register?) {
				opval <- if: size = byte { 0xB0u8 } else: { 0xB8u8 }
				base <- opval or (dst reg) | (int_op64: src size)
				inst: (prefix: fakesrc rm size withInstruction: base)
			} else: {
				op: src dst size withCode: 0x88u8 withImmed: 0xC6u8 withOpEx: 0u8
			}
		}

		shl <- :shift dst size {
			shiftRot: shift dst size withOpEx: 4u8
		}

		shr <- :shift dst size {
			shiftRot: shift dst size withOpEx: 5u8
		}

		sar <- :shift dst size {
			shiftRot: shift dst size withOpEx: 7u8
		}

		rol <- :shift dst size {
			shiftRot: shift dst size withOpEx: 0u8
		}

		ror <- :shift dst size {
			shiftRot: shift dst size withOpEx: 1u8
		}

		ret <- { inst: [ 0xC3u8 ] }

		label <- {
			_offset <- -1
			_forwardRefs <- #[]
			#{
				length <- { 0 }
				hasOffset? <- { _offset >= 0 }
				offset <- { _offset }
				register? <- { false }
				label? <- { true }
				flattenTo:at <- :dest :idx {
					if: (not: hasOffset?) {
						_offset <- idx
						foreach: _forwardRefs :idx fun {
							fun: _offset
						}
						_forwardRefs <- #[]
					}
					idx
				}
				string <- { "label: " . _offset }
				withOffset:else <- :fun :elsefun {
					if: hasOffset? {
						fun: _offset
					} else: {
						_forwardRefs append: fun
						elsefun:
					}
				}
			}
		}

		jmp <- :jmpDest {
			if: (jmpDest label?) {
				_size <- -1
				#{
					length <- { if: _size < 0 { 5 } else: { _size } }
					flattenTo:at <- :dest :idx {
						jmpDest withOffset: :off {
							if: _size < 0 {
								rel <- off - (idx + 2)
								if: rel < 128 && rel >= -128 {
									_size <- 2
								} else: {
									rel <- rel - 2
									if: rel < 32768 && rel >= -32768 {
										_size <- 4
									} else: {
										_size <- 5
									}
								}
							}
							rel <- off - (idx + _size)
							if: _size = 2 {
								dest set: idx 0xEBu8
								dest set: (idx + 1) (uint8: rel)
							} else: {
								if: _size = 4 {
									dest set: idx 0x66u8
									dest set: (idx + 1) 0xE9u8
									dest set: (idx + 2) (uint8: rel)
									dest set: (idx + 3) (uint8: (rshift: rel by: 8))
								} else: {
									dest set: idx 0xE9u8
									dest set: (idx + 1) (uint8: rel)
									dest set: (idx + 2) (uint8: (rshift: rel by: 8))
									dest set: (idx + 3) (uint8: (rshift: rel by: 16))
									dest set: (idx + 4) (uint8: (rshift: rel by: 24))
								}
							}
						} else: {
							_size <- 5
						}
						idx + _size
					}
					string <- {
						"jmp " . jmpDest
					}
				}
			} else: {
				inst: 0xFFu8 | (mod_rm: (opex: 5u8) jmpDest)
			}
		}

		jcc <- :cond jmpDest {
			_size <- -1
			#{
				length <- { if: _size < 0 { 5 } else: { _size } }
				flattenTo:at <- :dest :idx {
					jmpDest withOffset: :off {
						if: _size < 0 {
							rel <- off - (idx + 2)
							if: rel < 128 && rel >= -128 {
								_size <- 2
							} else: {
								_size <- 6
							}
						}
						rel <- off - (idx + _size)
						if: _size = 2 {
							dest set: idx 0x70u8 or (cond cc)
							dest set: (idx + 1) (uint8: rel)
						} else: {
							dest set: idx 0x0Fu8
							dest set: (idx + 1) 0x80u8 or (cond cc)
							dest set: (idx + 2) (uint8: rel)
							dest set: (idx + 3) (uint8: (rshift: rel by: 8))
							dest set: (idx + 4) (uint8: (rshift: rel by: 16))
							dest set: (idx + 5) (uint8: (rshift: rel by: 24))
						}
					} else: {
						_size <- 6
					}
					idx + _size
				}
				string <- {
					"jcc " . jmpDest
				}
			}
		}

		call <- :callDest {
			if: (callDest label?) {
				#{
					length <- { 5 }
					flattenTo:at <- :dest :idx {
						dest set: idx 0xE8u8
						callDest withOffset: :off {
							rel <- off - (idx + 5)
							dest set: (idx + 1) (uint8: rel)
							dest set: (idx + 2) (uint8: (rshift: rel by: 8))
							dest set: (idx + 3) (uint8: (rshift: rel by: 16))
							dest set: (idx + 4) (uint8: (rshift: rel by: 24))
						} else: {
						}
						idx + 5
					}
					string <- {
						"call " . callDest
					}
				}
			} else: {
				inst: 0xFFu8 | (mod_rm: (opex: 2u8) callDest)
			}
		}

		push <- :src {
			if: (src isInteger?) {
				if: src < 128 && src > -128 {
					inst: 0x6Au8 | (uint8: src)
				} else: {
					inst: 0x68u8 | (uint8: src) | (uint8: (rshift: src by: 8)) | (uint8: (rshift: src by: 16)) | (uint8: (rshift: src by: 24))
				}
			} else: {
				base <- if: (src register?) {
					[0x50u8 or (src reg)]
				} else: {
					0xFFu8 | (mod_rm: (opex: 6u8) src)
				}
				inst: (prefix: fakesrc src d withInstruction: base)
			}
		}

		pop <- :dst {
			base <- if: (dst register?) {
				[0x58u8 or (dst reg)]
			} else: {
				0x8Fu8 | (mod_rm: (opex: 0u8) dst)
			}
			inst: (prefix: fakesrc dst d withInstruction: base)
		}

		bnot <- :dst size {
			base <- (size_bit: 0xF6u8 size) | (mod_rm: (opex: 2u8) dst)
			inst: (prefix: fakesrc dst size withInstruction: base)
		}

		//TODO: support multiple calling conventions
		regSource <- {
			_used <- 0
			_usedAllTime <- 0
			_nextStackOff <- 0
			_findUnused <- :size reglists{
				found <- -1
				foundlist <- -1
				curlist <- 0
				ll <- reglists length
				while: { found < 0 && curlist < ll } do: {
					cur <- 0
					regs <- reglists get: curlist
					len <- regs length
					while: { found < 0 && cur < len } do: {
						bit <- lshift: 1 by: ((regs get: cur) num)
						if: (_used and bit) = 0 {
							found <- cur
							foundlist <- regs
							_used <- _used or bit
							_usedAllTime <- _usedAllTime or bit
						}
						cur <- cur + 1
					}
					curlist <- curlist + 1
				}
				if: found >= 0 {
					foundlist get: found
				} else: {
					myoff <- _nextStackOff
					_nextStackOff <- _nextStackOff + size
					il base: _rsp offset: myoff
				}
			}
			#{
				alloc <- :size {
					_findUnused: size #[
						_calleesave
						_tempregs
						_argregs
					]
				}
				//used to allocate a register
				//that will be returned before a call
				allocTemp <- :size {
					_findUnused: size #[
						_tempregs
						_argregs
						_calleesave
					]
				}
				//allocated the return register
				allocRet <- {
					bit <- (lshift: 1 by: (_rax num))
					_used <- _used or bit
					_usedAllTime <- _usedAllTime or bit
					_rax
				}
				allocArg <- :argnum {
					if: argnum < (_argregs length) {
						reg <- _argregs get: argnum
						bit <- (lshift: 1 by: (reg num))
						_used <- _used or bit
						_usedAllTime <- _usedAllTime or bit
						reg
					} else: {
						il base: _rsp offset: _nextStackOff + 8 * (argnum - (_argregs length))
					}
				}
				allocSpecific <- :reg {
					if: (reg register?) {
						bit <- (lshift: 1 by: (reg num))
						_used <- _used or bit
					}
				}
				stackSize <- { _nextStackOff }
				return <- :reg {
					_used <- _used and (0xF xor (lshift: 1 by: (reg num)))
				}
				returnAll <- { _used <- 0 }
				needSaveProlog <- {
					retval <- #[]
					foreach: _calleesave :idx reg {
						bit <- lshift: 1 by: (reg num)
						if: (_usedAllTime and bit) != 0 {
							retval append: reg
						}
					}
					retval
				}
				needSaveForCall <- {
					retval <- #[]
					foreach: #[(_tempregs) (_argregs)] :_ regs {
						foreach: regs :_ reg {
							if: (_used and (lshift: 1 by: (reg num))) != 0 {
								retval append: reg
							}
						}
					}
					retval
				}
			}
		}

		adjustIL <- :ilfun {
			il to2Op: (il allocRegs: ilfun withSource: regSource)
		}

		convertIL:to:withLabels:withSaved <- :inst :outarr :labels :saved {
			mapSize <- :ilsize {
				if: (ilsize bytes) > 2 {
					if: (ilsize bytes) = 8 { q } else: { d }
				} else: {
					if: (ilsize bytes) = 1 { b } else: { w }
				}
			}
			mapcond <- :ilcond {
				ccmap <- #[
					e
					ne
					ge
					le
					g
					l
					ae
					be
					a
					c
				]
				ccmap get: (ilcond cc)
			}
			opmap <- #[
				{ outarr append: (add: (inst in) (inst out) (mapSize: (inst size))) }
				{ outarr append: (and: (inst in) (inst out) (mapSize: (inst size))) }
				{ outarr append: (or: (inst in) (inst out) (mapSize: (inst size))) }
				{ outarr append: (xor: (inst in) (inst out) (mapSize: (inst size))) }
				{ } //muls
				{ } //mulu
				{ } //divs
				{ } //divu
				{ outarr append: (sub: (inst in) (inst out) (mapSize: (inst size))) }
				{ outarr append: (cmp: (inst in) (inst out) (mapSize: (inst size))) }
				{ outarr append: (bnot: (inst arg) (mapSize: (inst size))) }
				{ outarr append: (shl: (inst in) (inst out) (mapSize: (inst size))) } //sl
				{ outarr append: (sar: (inst in) (inst out) (mapSize: (inst size))) } //asr
				{ outarr append: (shr: (inst in) (inst out) (mapSize: (inst size))) } //lsr
				{ outarr append: (rol: (inst in) (inst out) (mapSize: (inst size))) }
				{ outarr append: (ror: (inst in) (inst out) (mapSize: (inst size))) }
				{ outarr append: (mov: (inst in) (inst out) (mapSize: (inst size))) }
				{
					//call
					arguments <- inst args
					arguments foldr: (arguments length) - 1 with: :cur src {
						if: cur < (_argregs length) {
							dst <- _argregs get: cur
							if: (not: dst = src) {
								//TODO: Handle edge case in which src is a caller saved
								//reg that has been pusehd onto the stack to preserve
								//it across this call
								outarr append: (mov: src dst q)
							}
						} else: {
							outarr append: (push: src)
						}
						cur - 1
					}
					
					toCall <- inst target
					if: (toCall isString?) {
						//TODO: Handle call to undefined label
						toCall <- labels get: toCall else: { 
							print: "Could not find label " . toCall . "\nDefined labels:\n"
							foreach: labels :key _ {
								print: "\t" . key . "\n"
							}
							false 
						}
					}
					outarr append: (call: toCall)
				}
				{
					//return
					if: (not: _rax = (inst arg)) {
						outarr append: (mov: (inst arg) _rax q)
					}
					foreach: saved :_ reg {
						outarr append: (pop: reg)
					}
					outarr append: (ret: )
				}
				{
					//skipIf
					endlab <- label:
					outarr append: (jcc: (mapcond: (inst cond)) endlab)
					foreach: (inst toskip) :_ inst {
						convertIL: inst to: outarr withLabels: labels withSaved: saved
					}
					outarr append: endlab
				}
				//skipIf:else
				{ }
				{
					//save
					newsave <- []
					foreach: (inst tosave) :_ reg {
						outarr append: (push: reg)
						newsave <- reg | newsave
					}
					foreach: (inst scope) :_ inst {
						convertIL: inst to: outarr withLabels: labels withSaved: newsave
					}
					if: ((inst scope) length) = 0 || (((inst scope) get: ((inst scope) length) - 1) opcode) != 18 {
						foreach: newsave :_ reg {
							outarr append: (pop: reg)
						}
					}
				}
				//bool
				{ }
			]
			print: "Opcode: " . (inst opcode) . "\n"
			fun <- opmap get: (inst opcode)
			fun:
			outarr
		}

		convertIL:to:withLabels <- :inst :outarr :labels {
			convertIL: inst to: outarr withLabels: labels withSaved: []
		}

		main <- {
			fib <- label:
			notbase <- label:
			prog <- #[
				fib
				sub: 2 rdi q
				jcc: ge notbase
				mov: 1 rax q
				ret:

				notbase
				push: rdi
				call: fib
				pop: rdi
				push: rax
				add: 1 rdi q
				call: fib
				pop: rdi
				add: rdi rax q
				ret:
			]

			ba <- bytearray executableFromBytes: prog
			res <- ba runWithArg: 30u64
			print: (string: res) . "\n"
			0
		}
	}
}