comparison m68k_core_x86.c @ 1282:c5821f9de325

Cycle accurate implementation of divs
author Michael Pavone <pavone@retrodev.com>
date Wed, 15 Mar 2017 19:05:27 -0700
parents 2d8b9d40f5ea
children 82838d4c84d9
comparison
equal deleted inserted replaced
1281:34113230fd88 1282:c5821f9de325
1717 context->current_cycle += cycles * context->options->gen.clock_divider; 1717 context->current_cycle += cycles * context->options->gen.clock_divider;
1718 quotient = quotient << 1 | bit; 1718 quotient = quotient << 1 | bit;
1719 return dividend | quotient; 1719 return dividend | quotient;
1720 } 1720 }
1721 1721
1722 void translate_m68k_divu(m68k_options *opts, m68kinst *inst, host_ea *src_op, host_ea *dst_op) 1722 static uint32_t divs(uint32_t dividend, m68k_context *context, uint32_t divisor_shift)
1723 {
1724 uint32_t orig_divisor = divisor_shift, orig_dividend = dividend;
1725 if (divisor_shift & 0x80000000) {
1726 divisor_shift = 0 - divisor_shift;
1727 }
1728
1729 uint32_t cycles = 12;
1730 if (dividend & 0x80000000) {
1731 //dvs10
1732 dividend = 0 - dividend;
1733 cycles += 2;
1734 }
1735 if (divisor_shift <= dividend) {
1736 context->flags[FLAG_V] = 1;
1737 context->flags[FLAG_N] = 1;
1738 context->flags[FLAG_Z] = 0;
1739 //TODO: FIXME - this cycle count probably changes based on whether the dividend is negative
1740 context->current_cycle += 16 * context->options->gen.clock_divider;
1741 return orig_dividend;
1742 }
1743 uint16_t quotient = 0;
1744 uint16_t bit = 0;
1745 for (int i = 0; i < 15; i++)
1746 {
1747 quotient = quotient << 1 | bit;
1748 dividend = dividend << 1;
1749
1750 if (dividend >= divisor_shift) {
1751 dividend -= divisor_shift;
1752 cycles += 6;
1753 bit = 1;
1754 } else {
1755 bit = 0;
1756 cycles += 8;
1757 }
1758 }
1759 quotient = quotient << 1 | bit;
1760 dividend = dividend << 1;
1761 if (dividend >= divisor_shift) {
1762 dividend -= divisor_shift;
1763 quotient = quotient << 1 | 1;
1764 } else {
1765 quotient = quotient << 1;
1766 }
1767 cycles += 4;
1768
1769 context->flags[FLAG_V] = 0;
1770 if (orig_divisor & 0x80000000) {
1771 cycles += 16; //was 10
1772 if (orig_dividend & 0x80000000) {
1773 if (quotient & 0x8000) {
1774 context->flags[FLAG_V] = 1;
1775 context->flags[FLAG_N] = 1;
1776 context->flags[FLAG_Z] = 0;
1777 context->current_cycle += cycles * context->options->gen.clock_divider;
1778 return orig_dividend;
1779 } else {
1780 dividend = -dividend;
1781 }
1782 } else {
1783 quotient = -quotient;
1784 if (quotient && !(quotient & 0x8000)) {
1785 context->flags[FLAG_V] = 1;
1786 }
1787 }
1788 } else if (orig_dividend & 0x80000000) {
1789 cycles += 18; // was 12
1790 quotient = -quotient;
1791 if (quotient && !(quotient & 0x8000)) {
1792 context->flags[FLAG_V] = 1;
1793 } else {
1794 dividend = -dividend;
1795 }
1796 } else {
1797 cycles += 14; //was 10
1798 if (quotient & 0x8000) {
1799 context->flags[FLAG_V] = 1;
1800 }
1801 }
1802 if (context->flags[FLAG_V]) {
1803 context->flags[FLAG_N] = 1;
1804 context->flags[FLAG_Z] = 0;
1805 context->current_cycle += cycles * context->options->gen.clock_divider;
1806 return orig_dividend;
1807 }
1808 context->flags[FLAG_N] = (quotient & 0x8000) ? 1 : 0;
1809 context->flags[FLAG_Z] = quotient == 0;
1810 //V was cleared above, C is cleared by the generated machine code
1811 context->current_cycle += cycles * context->options->gen.clock_divider;
1812 return dividend | quotient;
1813 }
1814
1815 void translate_m68k_div(m68k_options *opts, m68kinst *inst, host_ea *src_op, host_ea *dst_op)
1723 { 1816 {
1724 code_info *code = &opts->gen.code; 1817 code_info *code = &opts->gen.code;
1725 check_alloc_code(code, MAX_NATIVE_SIZE); 1818 check_alloc_code(code, MAX_NATIVE_SIZE);
1726 set_flag(opts, 0, FLAG_C); 1819 set_flag(opts, 0, FLAG_C);
1727 if (dst_op->mode == MODE_REG_DIRECT) { 1820 if (dst_op->mode == MODE_REG_DIRECT) {
1764 mov_ir(code, VECTOR_INT_DIV_ZERO, opts->gen.scratch2, SZ_D); 1857 mov_ir(code, VECTOR_INT_DIV_ZERO, opts->gen.scratch2, SZ_D);
1765 mov_ir(code, inst->address+isize, opts->gen.scratch1, SZ_D); 1858 mov_ir(code, inst->address+isize, opts->gen.scratch1, SZ_D);
1766 jmp(code, opts->trap); 1859 jmp(code, opts->trap);
1767 1860
1768 *not_zero = code->cur - (not_zero + 1); 1861 *not_zero = code->cur - (not_zero + 1);
1769 cmp_rr(code, opts->gen.scratch1, opts->gen.scratch2, SZ_D); 1862 code_ptr end = NULL;
1770 code_ptr not_overflow = code->cur+1; 1863 if (inst->op == M68K_DIVU) {
1771 jcc(code, CC_C, not_overflow); 1864 //initial overflow check needs to be done in the C code for divs
1772 1865 //but can be done before dumping state to mem in divu as an optimization
1773 //overflow seems to always set the N and clear Z 1866 cmp_rr(code, opts->gen.scratch1, opts->gen.scratch2, SZ_D);
1774 update_flags(opts, N1|Z0|V1); 1867 code_ptr not_overflow = code->cur+1;
1775 cycles(&opts->gen, 10); 1868 jcc(code, CC_C, not_overflow);
1776 code_ptr end = code->cur+1; 1869
1777 jmp(code, end); 1870 //overflow seems to always set the N and clear Z
1778 1871 update_flags(opts, N1|Z0|V1);
1779 *not_overflow = code->cur - (not_overflow + 1); 1872 cycles(&opts->gen, 10);
1873 end = code->cur+1;
1874 jmp(code, end);
1875
1876 *not_overflow = code->cur - (not_overflow + 1);
1877 }
1780 call(code, opts->gen.save_context); 1878 call(code, opts->gen.save_context);
1781 push_r(code, opts->gen.context_reg); 1879 push_r(code, opts->gen.context_reg);
1782 //TODO: inline the functionality of divu so we don't need to dump context to memory 1880 //TODO: inline the functionality of divudivs/ so we don't need to dump context to memory
1783 call_args(code, (code_ptr)divu, 3, opts->gen.scratch2, opts->gen.context_reg, opts->gen.scratch1); 1881 call_args(code, (code_ptr)(inst->op == M68K_DIVU ? divu : divs), 3, opts->gen.scratch2, opts->gen.context_reg, opts->gen.scratch1);
1784 pop_r(code, opts->gen.context_reg); 1882 pop_r(code, opts->gen.context_reg);
1785 mov_rr(code, RAX, opts->gen.scratch1, SZ_D); 1883 mov_rr(code, RAX, opts->gen.scratch1, SZ_D);
1786 1884
1787 call(code, opts->gen.load_context); 1885 call(code, opts->gen.load_context);
1788 1886
1789 cmp_ir(code, 0, opts->gen.scratch1, SZ_W); 1887 if (inst->op == M68K_DIVU) {
1790 update_flags(opts, V0|Z|N); 1888 cmp_ir(code, 0, opts->gen.scratch1, SZ_W);
1889 update_flags(opts, V0|Z|N);
1890 }
1791 1891
1792 if (dst_op->mode == MODE_REG_DIRECT) { 1892 if (dst_op->mode == MODE_REG_DIRECT) {
1793 mov_rr(code, opts->gen.scratch1, dst_op->base, SZ_D); 1893 mov_rr(code, opts->gen.scratch1, dst_op->base, SZ_D);
1794 } else { 1894 } else {
1795 mov_rrdisp(code, opts->gen.scratch1, dst_op->base, dst_op->disp, SZ_D); 1895 mov_rrdisp(code, opts->gen.scratch1, dst_op->base, dst_op->disp, SZ_D);
1796 } 1896 }
1797 1897 if (end) {
1798 *end = code->cur - (end + 1); 1898 *end = code->cur - (end + 1);
1799 } 1899 }
1800
1801 void translate_m68k_div(m68k_options *opts, m68kinst *inst, host_ea *src_op, host_ea *dst_op)
1802 {
1803 code_info *code = &opts->gen.code;
1804 check_alloc_code(code, MAX_NATIVE_SIZE);
1805 //TODO: cycle exact division
1806 cycles(&opts->gen, inst->op == M68K_DIVS ? 158 : 140);
1807 set_flag(opts, 0, FLAG_C);
1808 push_r(code, RDX);
1809 push_r(code, RAX);
1810 uint32_t tmp_stack_off = code->stack_off;
1811 if (dst_op->mode == MODE_REG_DIRECT) {
1812 mov_rr(code, dst_op->base, RAX, SZ_D);
1813 } else {
1814 mov_rdispr(code, dst_op->base, dst_op->disp, RAX, SZ_D);
1815 }
1816 if (src_op->mode == MODE_IMMED) {
1817 mov_ir(code, (src_op->disp & 0x8000) && inst->op == M68K_DIVS ? src_op->disp | 0xFFFF0000 : src_op->disp, opts->gen.scratch2, SZ_D);
1818 } else if (src_op->mode == MODE_REG_DIRECT) {
1819 if (inst->op == M68K_DIVS) {
1820 movsx_rr(code, src_op->base, opts->gen.scratch2, SZ_W, SZ_D);
1821 } else {
1822 movzx_rr(code, src_op->base, opts->gen.scratch2, SZ_W, SZ_D);
1823 }
1824 } else if (src_op->mode == MODE_REG_DISPLACE8) {
1825 if (inst->op == M68K_DIVS) {
1826 movsx_rdispr(code, src_op->base, src_op->disp, opts->gen.scratch2, SZ_W, SZ_D);
1827 } else {
1828 movzx_rdispr(code, src_op->base, src_op->disp, opts->gen.scratch2, SZ_W, SZ_D);
1829 }
1830 }
1831 uint32_t isize = 2;
1832 switch(inst->src.addr_mode)
1833 {
1834 case MODE_AREG_DISPLACE:
1835 case MODE_AREG_INDEX_DISP8:
1836 case MODE_ABSOLUTE_SHORT:
1837 case MODE_PC_INDEX_DISP8:
1838 case MODE_IMMEDIATE:
1839 isize = 4;
1840 break;
1841 case MODE_ABSOLUTE:
1842 isize = 6;
1843 break;
1844 }
1845 cmp_ir(code, 0, opts->gen.scratch2, SZ_D);
1846 check_alloc_code(code, 6*MAX_INST_LEN);
1847 code_ptr not_zero = code->cur + 1;
1848 jcc(code, CC_NZ, code->cur + 2);
1849 pop_r(code, RAX);
1850 pop_r(code, RDX);
1851 mov_ir(code, VECTOR_INT_DIV_ZERO, opts->gen.scratch2, SZ_D);
1852 mov_ir(code, inst->address+isize, opts->gen.scratch1, SZ_D);
1853 jmp(code, opts->trap);
1854
1855 code->stack_off = tmp_stack_off;
1856 *not_zero = code->cur - (not_zero+1);
1857 if (inst->op == M68K_DIVS) {
1858 cdq(code);
1859 } else {
1860 xor_rr(code, RDX, RDX, SZ_D);
1861 }
1862 if (inst->op == M68K_DIVS) {
1863 idiv_r(code, opts->gen.scratch2, SZ_D);
1864 } else {
1865 div_r(code, opts->gen.scratch2, SZ_D);
1866 }
1867 code_ptr skip_sec_check, norm_off;
1868 if (inst->op == M68K_DIVS) {
1869 cmp_ir(code, 0x8000, RAX, SZ_D);
1870 skip_sec_check = code->cur + 1;
1871 jcc(code, CC_GE, code->cur + 2);
1872 cmp_ir(code, -0x8000, RAX, SZ_D);
1873 norm_off = code->cur + 1;
1874 jcc(code, CC_L, code->cur + 2);
1875 } else {
1876 cmp_ir(code, 0x10000, RAX, SZ_D);
1877 norm_off = code->cur + 1;
1878 jcc(code, CC_NC, code->cur + 2);
1879 }
1880 if (dst_op->mode == MODE_REG_DIRECT) {
1881 mov_rr(code, RDX, dst_op->base, SZ_W);
1882 shl_ir(code, 16, dst_op->base, SZ_D);
1883 mov_rr(code, RAX, dst_op->base, SZ_W);
1884 } else {
1885 mov_rrdisp(code, RDX, dst_op->base, dst_op->disp, SZ_W);
1886 shl_irdisp(code, 16, dst_op->base, dst_op->disp, SZ_D);
1887 mov_rrdisp(code, RAX, dst_op->base, dst_op->disp, SZ_W);
1888 }
1889 cmp_ir(code, 0, RAX, SZ_W);
1890 pop_r(code, RAX);
1891 if (dst_op->base == RDX) {
1892 update_flags(opts, V0|Z|N);
1893 add_ir(code, sizeof(void *), RSP, SZ_D);
1894 } else {
1895 pop_r(code, RDX);
1896 update_flags(opts, V0|Z|N);
1897 }
1898 code_ptr end_off = code->cur + 1;
1899 jmp(code, code->cur + 2);
1900 code->stack_off = tmp_stack_off;
1901 *norm_off = code->cur - (norm_off + 1);
1902 if (inst->op == M68K_DIVS) {
1903 *skip_sec_check = code->cur - (skip_sec_check+1);
1904 }
1905 pop_r(code, RAX);
1906 pop_r(code, RDX);
1907 set_flag(opts, 1, FLAG_V);
1908 *end_off = code->cur - (end_off + 1);
1909 } 1900 }
1910 1901
1911 void translate_m68k_exg(m68k_options *opts, m68kinst *inst, host_ea *src_op, host_ea *dst_op) 1902 void translate_m68k_exg(m68k_options *opts, m68kinst *inst, host_ea *src_op, host_ea *dst_op)
1912 { 1903 {
1913 code_info *code = &opts->gen.code; 1904 code_info *code = &opts->gen.code;