diff --git a/src/crt/ldivu.src b/src/crt/ldivu.src index a5aa9cc0c..408a25ce7 100644 --- a/src/crt/ldivu.src +++ b/src/crt/ldivu.src @@ -10,6 +10,7 @@ .else +; REMEMBER to keep ldivu.src, lremu.src, and ldiv.src in sync with this choice .if 1 __ldivu: @@ -17,7 +18,7 @@ __ldivu: ; O: euhl=EUHL/AUBC push bc - call __ldvrmu + call __ldivu_lremu_common ld a, b pop bc @@ -26,6 +27,8 @@ __ldivu: ei ret + .extern __ldivu_lremu_common + .else __ldivu: @@ -41,8 +44,9 @@ __ldivu: pop hl ret -.endif .extern __ldvrmu .endif + +.endif diff --git a/src/crt/ldivu_lremu_common.src b/src/crt/ldivu_lremu_common.src new file mode 100644 index 000000000..a5861533d --- /dev/null +++ b/src/crt/ldivu_lremu_common.src @@ -0,0 +1,86 @@ + .assume adl=1 + + .section .text + + .global __ldivu_lremu_common + .type __ldivu_lremu_common, @function + +;;; struct u32div_t { +;;; uint32_t rem; +;;; uint32_t quot; +;;; }; +;;; u32div_t _ldivu_lremu_common(uint32_t dividend, uint32_t divisor) { + +__ldivu_lremu_common: +; Similar to __ldvrmu but faster and with a different calling convention. +; Note: Uses shadow registers and disables interrupts. +; I: EUHL=dividend, AUBC=divisor +; O: a[uhl']=EUHL%AUBC, bcu=0, b=A, c=?, euhl=EUHL/AUBC, eubc'=AUBC, zf=!IEF2 + +;;; u32div_t result; +;;; result.quot = dividend; + ; euhl : result.quot + + push bc + + ld c, a ; c = A + ld a, i ; a = I + ; pf = IEF2 + di + + ld a, c ; a = A + exx + pop bc + ld e, a ; eubc' : divisor + + push af + +;;; result.rem = 0; + xor a, a + sbc hl, hl ; auhl' : result.rem + +;;; int i = 32; + exx + ld b, 32 ; b : i + +;;; do { +.L.c.loop: + +;;; bool dividendBit = result.quot >> 31; +;;; result.quot <<= 1; + add hl, hl + rl e +;;; result.rem = (result.rem << 1) + dividendBit; + exx + adc hl, hl + adc a, a + +;;; bool quotBit = result.rem >= divisor; +;;; result.rem -= divisor; + sbc hl, bc + sbc a, e + +;;; if (!quotBit) { + jr nc, .L.c.restore_skip +;;; result.rem += divisor; + add hl, bc + adc a, e +;;; } +.L.c.restore_skip: + +;;; if (quotBit) { + exx + jr c, .L.c.skip +;;; result.quot++; + inc l +;;; } +.L.c.skip: + +;;; } while (--i != 0); + djnz .L.c.loop + +;;; return result; + pop bc + bit 2, c + ret +;;; } diff --git a/src/crt/ldvrmu.src b/src/crt/ldvrmu.src index 9901a72c4..f69a48a22 100644 --- a/src/crt/ldvrmu.src +++ b/src/crt/ldvrmu.src @@ -1,11 +1,7 @@ .assume adl=1 .section .text -;;; struct u32div_t { -;;; uint32_t rem; -;;; uint32_t quot; -;;; }; -;;; u32div_t _ldvrmu(uint32_t dividend, uint32_t divisor) { + .global __ldvrmu .type __ldvrmu, @function @@ -15,83 +11,6 @@ .else -; REMEMBER to also modify ldivu.src, lremu.src, and ldiv.src if you change this -.if 1 - -__ldvrmu: -; I: EUHL=dividend, AUBC=divisor -; O: a[uhl']=EUHL%AUBC, bcu=0, b=A, c=?, euhl=EUHL/AUBC, eubc'=AUBC, zf=!IEF2 - -;;; u32div_t result; -;;; result.quot = dividend; - ; euhl : result.quot - - push bc - - ld c, a ; c = A - ld a, i ; a = I - ; pf = IEF2 - di - - ld a, c ; a = A - exx - pop bc - ld e, a ; eubc' : divisor - - push af - -;;; result.rem = 0; - xor a, a - sbc hl, hl ; auhl' : result.rem - -;;; int i = 32; - exx - ld b, 32 ; b : i - -;;; do { -.L.c.loop: - -;;; bool dividendBit = result.quot >> 31; -;;; result.quot <<= 1; - add hl, hl - rl e -;;; result.rem = (result.rem << 1) + dividendBit; - exx - adc hl, hl - adc a, a - -;;; bool quotBit = result.rem >= divisor; -;;; result.rem -= divisor; - sbc hl, bc - sbc a, e - -;;; if (!quotBit) { - jr nc, .L.c.restore_skip -;;; result.rem += divisor; - add hl, bc - adc a, e -;;; } -.L.c.restore_skip: - -;;; if (quotBit) { - exx - jr c, .L.c.skip -;;; result.quot++; - inc l -;;; } -.L.c.skip: - -;;; } while (--i != 0); - djnz .L.c.loop - -;;; return result; - pop bc - bit 2, c - ret -;;; } - -.else - __ldvrmu: ; I: EUHL=dividend, AUBC=divisor ; O: auhl=EUHL%AUBC, euix=EUHL/AUBC, iyh=A, iyl=0 @@ -128,5 +47,3 @@ __ldvrmu: ret .endif - -.endif diff --git a/src/crt/lremu.src b/src/crt/lremu.src index a98c9cf1f..baf49d8da 100644 --- a/src/crt/lremu.src +++ b/src/crt/lremu.src @@ -10,12 +10,13 @@ .else +; REMEMBER to keep ldivu.src, lremu.src, and ldiv.src in sync with this choice .if 1 __lremu: ; I: EUHL=dividend, AUBC=divisor ; O: euhl=EUHL%AUBC - call __ldvrmu + call __ldivu_lremu_common ld e, a push de exx @@ -26,6 +27,8 @@ __lremu: ei ret + .extern __ldivu_lremu_common + .else __lremu: @@ -41,8 +44,9 @@ __lremu: pop iy pop ix ret -.endif .extern __ldvrmu .endif + +.endif diff --git a/src/libc/ldiv.src b/src/libc/ldiv.src index 8ce0fc77d..e4e6ea42f 100644 --- a/src/libc/ldiv.src +++ b/src/libc/ldiv.src @@ -4,6 +4,7 @@ .global _ldiv .type _ldiv, @function +; REMEMBER to keep ldivu.src, lremu.src, and ldiv.src in sync with this choice .if 1 _ldiv: @@ -27,7 +28,7 @@ _ldiv: bit 7, e call __ldivs_lrems_common - call __ldvrmu + call __ldivu_lremu_common exx ld e, a @@ -53,6 +54,8 @@ _ldiv: .L.ei_skip: jp (hl) + .extern __ldivu_lremu_common + .else _ldiv: @@ -111,8 +114,9 @@ _ldiv: push de jp (hl) + .extern __ldvrmu + .endif .extern __ldivs_lrems_common - .extern __ldvrmu .extern __lneg