Mercurial > repos > blastem
comparison m68k_core_x86.c @ 1282:c5821f9de325
Cycle accurate implementation of divs
author | Michael Pavone <pavone@retrodev.com> |
---|---|
date | Wed, 15 Mar 2017 19:05:27 -0700 |
parents | 2d8b9d40f5ea |
children | 82838d4c84d9 |
comparison
equal
deleted
inserted
replaced
1281:34113230fd88 | 1282:c5821f9de325 |
---|---|
1717 context->current_cycle += cycles * context->options->gen.clock_divider; | 1717 context->current_cycle += cycles * context->options->gen.clock_divider; |
1718 quotient = quotient << 1 | bit; | 1718 quotient = quotient << 1 | bit; |
1719 return dividend | quotient; | 1719 return dividend | quotient; |
1720 } | 1720 } |
1721 | 1721 |
1722 void translate_m68k_divu(m68k_options *opts, m68kinst *inst, host_ea *src_op, host_ea *dst_op) | 1722 static uint32_t divs(uint32_t dividend, m68k_context *context, uint32_t divisor_shift) |
1723 { | |
1724 uint32_t orig_divisor = divisor_shift, orig_dividend = dividend; | |
1725 if (divisor_shift & 0x80000000) { | |
1726 divisor_shift = 0 - divisor_shift; | |
1727 } | |
1728 | |
1729 uint32_t cycles = 12; | |
1730 if (dividend & 0x80000000) { | |
1731 //dvs10 | |
1732 dividend = 0 - dividend; | |
1733 cycles += 2; | |
1734 } | |
1735 if (divisor_shift <= dividend) { | |
1736 context->flags[FLAG_V] = 1; | |
1737 context->flags[FLAG_N] = 1; | |
1738 context->flags[FLAG_Z] = 0; | |
1739 //TODO: FIXME - this cycle count probably changes based on whether the dividend is negative | |
1740 context->current_cycle += 16 * context->options->gen.clock_divider; | |
1741 return orig_dividend; | |
1742 } | |
1743 uint16_t quotient = 0; | |
1744 uint16_t bit = 0; | |
1745 for (int i = 0; i < 15; i++) | |
1746 { | |
1747 quotient = quotient << 1 | bit; | |
1748 dividend = dividend << 1; | |
1749 | |
1750 if (dividend >= divisor_shift) { | |
1751 dividend -= divisor_shift; | |
1752 cycles += 6; | |
1753 bit = 1; | |
1754 } else { | |
1755 bit = 0; | |
1756 cycles += 8; | |
1757 } | |
1758 } | |
1759 quotient = quotient << 1 | bit; | |
1760 dividend = dividend << 1; | |
1761 if (dividend >= divisor_shift) { | |
1762 dividend -= divisor_shift; | |
1763 quotient = quotient << 1 | 1; | |
1764 } else { | |
1765 quotient = quotient << 1; | |
1766 } | |
1767 cycles += 4; | |
1768 | |
1769 context->flags[FLAG_V] = 0; | |
1770 if (orig_divisor & 0x80000000) { | |
1771 cycles += 16; //was 10 | |
1772 if (orig_dividend & 0x80000000) { | |
1773 if (quotient & 0x8000) { | |
1774 context->flags[FLAG_V] = 1; | |
1775 context->flags[FLAG_N] = 1; | |
1776 context->flags[FLAG_Z] = 0; | |
1777 context->current_cycle += cycles * context->options->gen.clock_divider; | |
1778 return orig_dividend; | |
1779 } else { | |
1780 dividend = -dividend; | |
1781 } | |
1782 } else { | |
1783 quotient = -quotient; | |
1784 if (quotient && !(quotient & 0x8000)) { | |
1785 context->flags[FLAG_V] = 1; | |
1786 } | |
1787 } | |
1788 } else if (orig_dividend & 0x80000000) { | |
1789 cycles += 18; // was 12 | |
1790 quotient = -quotient; | |
1791 if (quotient && !(quotient & 0x8000)) { | |
1792 context->flags[FLAG_V] = 1; | |
1793 } else { | |
1794 dividend = -dividend; | |
1795 } | |
1796 } else { | |
1797 cycles += 14; //was 10 | |
1798 if (quotient & 0x8000) { | |
1799 context->flags[FLAG_V] = 1; | |
1800 } | |
1801 } | |
1802 if (context->flags[FLAG_V]) { | |
1803 context->flags[FLAG_N] = 1; | |
1804 context->flags[FLAG_Z] = 0; | |
1805 context->current_cycle += cycles * context->options->gen.clock_divider; | |
1806 return orig_dividend; | |
1807 } | |
1808 context->flags[FLAG_N] = (quotient & 0x8000) ? 1 : 0; | |
1809 context->flags[FLAG_Z] = quotient == 0; | |
1810 //V was cleared above, C is cleared by the generated machine code | |
1811 context->current_cycle += cycles * context->options->gen.clock_divider; | |
1812 return dividend | quotient; | |
1813 } | |
1814 | |
1815 void translate_m68k_div(m68k_options *opts, m68kinst *inst, host_ea *src_op, host_ea *dst_op) | |
1723 { | 1816 { |
1724 code_info *code = &opts->gen.code; | 1817 code_info *code = &opts->gen.code; |
1725 check_alloc_code(code, MAX_NATIVE_SIZE); | 1818 check_alloc_code(code, MAX_NATIVE_SIZE); |
1726 set_flag(opts, 0, FLAG_C); | 1819 set_flag(opts, 0, FLAG_C); |
1727 if (dst_op->mode == MODE_REG_DIRECT) { | 1820 if (dst_op->mode == MODE_REG_DIRECT) { |
1764 mov_ir(code, VECTOR_INT_DIV_ZERO, opts->gen.scratch2, SZ_D); | 1857 mov_ir(code, VECTOR_INT_DIV_ZERO, opts->gen.scratch2, SZ_D); |
1765 mov_ir(code, inst->address+isize, opts->gen.scratch1, SZ_D); | 1858 mov_ir(code, inst->address+isize, opts->gen.scratch1, SZ_D); |
1766 jmp(code, opts->trap); | 1859 jmp(code, opts->trap); |
1767 | 1860 |
1768 *not_zero = code->cur - (not_zero + 1); | 1861 *not_zero = code->cur - (not_zero + 1); |
1769 cmp_rr(code, opts->gen.scratch1, opts->gen.scratch2, SZ_D); | 1862 code_ptr end = NULL; |
1770 code_ptr not_overflow = code->cur+1; | 1863 if (inst->op == M68K_DIVU) { |
1771 jcc(code, CC_C, not_overflow); | 1864 //initial overflow check needs to be done in the C code for divs |
1772 | 1865 //but can be done before dumping state to mem in divu as an optimization |
1773 //overflow seems to always set the N and clear Z | 1866 cmp_rr(code, opts->gen.scratch1, opts->gen.scratch2, SZ_D); |
1774 update_flags(opts, N1|Z0|V1); | 1867 code_ptr not_overflow = code->cur+1; |
1775 cycles(&opts->gen, 10); | 1868 jcc(code, CC_C, not_overflow); |
1776 code_ptr end = code->cur+1; | 1869 |
1777 jmp(code, end); | 1870 //overflow seems to always set the N and clear Z |
1778 | 1871 update_flags(opts, N1|Z0|V1); |
1779 *not_overflow = code->cur - (not_overflow + 1); | 1872 cycles(&opts->gen, 10); |
1873 end = code->cur+1; | |
1874 jmp(code, end); | |
1875 | |
1876 *not_overflow = code->cur - (not_overflow + 1); | |
1877 } | |
1780 call(code, opts->gen.save_context); | 1878 call(code, opts->gen.save_context); |
1781 push_r(code, opts->gen.context_reg); | 1879 push_r(code, opts->gen.context_reg); |
1782 //TODO: inline the functionality of divu so we don't need to dump context to memory | 1880 //TODO: inline the functionality of divudivs/ so we don't need to dump context to memory |
1783 call_args(code, (code_ptr)divu, 3, opts->gen.scratch2, opts->gen.context_reg, opts->gen.scratch1); | 1881 call_args(code, (code_ptr)(inst->op == M68K_DIVU ? divu : divs), 3, opts->gen.scratch2, opts->gen.context_reg, opts->gen.scratch1); |
1784 pop_r(code, opts->gen.context_reg); | 1882 pop_r(code, opts->gen.context_reg); |
1785 mov_rr(code, RAX, opts->gen.scratch1, SZ_D); | 1883 mov_rr(code, RAX, opts->gen.scratch1, SZ_D); |
1786 | 1884 |
1787 call(code, opts->gen.load_context); | 1885 call(code, opts->gen.load_context); |
1788 | 1886 |
1789 cmp_ir(code, 0, opts->gen.scratch1, SZ_W); | 1887 if (inst->op == M68K_DIVU) { |
1790 update_flags(opts, V0|Z|N); | 1888 cmp_ir(code, 0, opts->gen.scratch1, SZ_W); |
1889 update_flags(opts, V0|Z|N); | |
1890 } | |
1791 | 1891 |
1792 if (dst_op->mode == MODE_REG_DIRECT) { | 1892 if (dst_op->mode == MODE_REG_DIRECT) { |
1793 mov_rr(code, opts->gen.scratch1, dst_op->base, SZ_D); | 1893 mov_rr(code, opts->gen.scratch1, dst_op->base, SZ_D); |
1794 } else { | 1894 } else { |
1795 mov_rrdisp(code, opts->gen.scratch1, dst_op->base, dst_op->disp, SZ_D); | 1895 mov_rrdisp(code, opts->gen.scratch1, dst_op->base, dst_op->disp, SZ_D); |
1796 } | 1896 } |
1797 | 1897 if (end) { |
1798 *end = code->cur - (end + 1); | 1898 *end = code->cur - (end + 1); |
1799 } | 1899 } |
1800 | |
1801 void translate_m68k_div(m68k_options *opts, m68kinst *inst, host_ea *src_op, host_ea *dst_op) | |
1802 { | |
1803 code_info *code = &opts->gen.code; | |
1804 check_alloc_code(code, MAX_NATIVE_SIZE); | |
1805 //TODO: cycle exact division | |
1806 cycles(&opts->gen, inst->op == M68K_DIVS ? 158 : 140); | |
1807 set_flag(opts, 0, FLAG_C); | |
1808 push_r(code, RDX); | |
1809 push_r(code, RAX); | |
1810 uint32_t tmp_stack_off = code->stack_off; | |
1811 if (dst_op->mode == MODE_REG_DIRECT) { | |
1812 mov_rr(code, dst_op->base, RAX, SZ_D); | |
1813 } else { | |
1814 mov_rdispr(code, dst_op->base, dst_op->disp, RAX, SZ_D); | |
1815 } | |
1816 if (src_op->mode == MODE_IMMED) { | |
1817 mov_ir(code, (src_op->disp & 0x8000) && inst->op == M68K_DIVS ? src_op->disp | 0xFFFF0000 : src_op->disp, opts->gen.scratch2, SZ_D); | |
1818 } else if (src_op->mode == MODE_REG_DIRECT) { | |
1819 if (inst->op == M68K_DIVS) { | |
1820 movsx_rr(code, src_op->base, opts->gen.scratch2, SZ_W, SZ_D); | |
1821 } else { | |
1822 movzx_rr(code, src_op->base, opts->gen.scratch2, SZ_W, SZ_D); | |
1823 } | |
1824 } else if (src_op->mode == MODE_REG_DISPLACE8) { | |
1825 if (inst->op == M68K_DIVS) { | |
1826 movsx_rdispr(code, src_op->base, src_op->disp, opts->gen.scratch2, SZ_W, SZ_D); | |
1827 } else { | |
1828 movzx_rdispr(code, src_op->base, src_op->disp, opts->gen.scratch2, SZ_W, SZ_D); | |
1829 } | |
1830 } | |
1831 uint32_t isize = 2; | |
1832 switch(inst->src.addr_mode) | |
1833 { | |
1834 case MODE_AREG_DISPLACE: | |
1835 case MODE_AREG_INDEX_DISP8: | |
1836 case MODE_ABSOLUTE_SHORT: | |
1837 case MODE_PC_INDEX_DISP8: | |
1838 case MODE_IMMEDIATE: | |
1839 isize = 4; | |
1840 break; | |
1841 case MODE_ABSOLUTE: | |
1842 isize = 6; | |
1843 break; | |
1844 } | |
1845 cmp_ir(code, 0, opts->gen.scratch2, SZ_D); | |
1846 check_alloc_code(code, 6*MAX_INST_LEN); | |
1847 code_ptr not_zero = code->cur + 1; | |
1848 jcc(code, CC_NZ, code->cur + 2); | |
1849 pop_r(code, RAX); | |
1850 pop_r(code, RDX); | |
1851 mov_ir(code, VECTOR_INT_DIV_ZERO, opts->gen.scratch2, SZ_D); | |
1852 mov_ir(code, inst->address+isize, opts->gen.scratch1, SZ_D); | |
1853 jmp(code, opts->trap); | |
1854 | |
1855 code->stack_off = tmp_stack_off; | |
1856 *not_zero = code->cur - (not_zero+1); | |
1857 if (inst->op == M68K_DIVS) { | |
1858 cdq(code); | |
1859 } else { | |
1860 xor_rr(code, RDX, RDX, SZ_D); | |
1861 } | |
1862 if (inst->op == M68K_DIVS) { | |
1863 idiv_r(code, opts->gen.scratch2, SZ_D); | |
1864 } else { | |
1865 div_r(code, opts->gen.scratch2, SZ_D); | |
1866 } | |
1867 code_ptr skip_sec_check, norm_off; | |
1868 if (inst->op == M68K_DIVS) { | |
1869 cmp_ir(code, 0x8000, RAX, SZ_D); | |
1870 skip_sec_check = code->cur + 1; | |
1871 jcc(code, CC_GE, code->cur + 2); | |
1872 cmp_ir(code, -0x8000, RAX, SZ_D); | |
1873 norm_off = code->cur + 1; | |
1874 jcc(code, CC_L, code->cur + 2); | |
1875 } else { | |
1876 cmp_ir(code, 0x10000, RAX, SZ_D); | |
1877 norm_off = code->cur + 1; | |
1878 jcc(code, CC_NC, code->cur + 2); | |
1879 } | |
1880 if (dst_op->mode == MODE_REG_DIRECT) { | |
1881 mov_rr(code, RDX, dst_op->base, SZ_W); | |
1882 shl_ir(code, 16, dst_op->base, SZ_D); | |
1883 mov_rr(code, RAX, dst_op->base, SZ_W); | |
1884 } else { | |
1885 mov_rrdisp(code, RDX, dst_op->base, dst_op->disp, SZ_W); | |
1886 shl_irdisp(code, 16, dst_op->base, dst_op->disp, SZ_D); | |
1887 mov_rrdisp(code, RAX, dst_op->base, dst_op->disp, SZ_W); | |
1888 } | |
1889 cmp_ir(code, 0, RAX, SZ_W); | |
1890 pop_r(code, RAX); | |
1891 if (dst_op->base == RDX) { | |
1892 update_flags(opts, V0|Z|N); | |
1893 add_ir(code, sizeof(void *), RSP, SZ_D); | |
1894 } else { | |
1895 pop_r(code, RDX); | |
1896 update_flags(opts, V0|Z|N); | |
1897 } | |
1898 code_ptr end_off = code->cur + 1; | |
1899 jmp(code, code->cur + 2); | |
1900 code->stack_off = tmp_stack_off; | |
1901 *norm_off = code->cur - (norm_off + 1); | |
1902 if (inst->op == M68K_DIVS) { | |
1903 *skip_sec_check = code->cur - (skip_sec_check+1); | |
1904 } | |
1905 pop_r(code, RAX); | |
1906 pop_r(code, RDX); | |
1907 set_flag(opts, 1, FLAG_V); | |
1908 *end_off = code->cur - (end_off + 1); | |
1909 } | 1900 } |
1910 | 1901 |
1911 void translate_m68k_exg(m68k_options *opts, m68kinst *inst, host_ea *src_op, host_ea *dst_op) | 1902 void translate_m68k_exg(m68k_options *opts, m68kinst *inst, host_ea *src_op, host_ea *dst_op) |
1912 { | 1903 { |
1913 code_info *code = &opts->gen.code; | 1904 code_info *code = &opts->gen.code; |