diff options
-rw-r--r-- | src/factor-ng.c | 124 |
1 files changed, 66 insertions, 58 deletions
diff --git a/src/factor-ng.c b/src/factor-ng.c index 776aaa653..36be5e6fe 100644 --- a/src/factor-ng.c +++ b/src/factor-ng.c @@ -1743,8 +1743,10 @@ is_square (uintmax_t x) return 0; } -static const unsigned short invtab[] = +/* invtab[i] = floor(0x10000 / (0x100 + i) */ +static const unsigned short invtab[0x81] = { + 0x200, 0x1fc, 0x1f8, 0x1f4, 0x1f0, 0x1ec, 0x1e9, 0x1e5, 0x1e1, 0x1de, 0x1da, 0x1d7, 0x1d4, 0x1d0, 0x1cd, 0x1ca, 0x1c7, 0x1c3, 0x1c0, 0x1bd, 0x1ba, 0x1b7, 0x1b4, 0x1b2, 0x1af, @@ -1767,17 +1769,22 @@ static const unsigned short invtab[] = that q < 0x40; here it instead uses a table of (Euclidian) inverses. */ #define div_smallq(q, r, u, d) \ do { \ - if (0 && (u) / 0x40 < (d)) \ + if ((u) / 0x40 < (d)) \ { \ int _cnt; \ uintmax_t _dinv, _mask, _q, _r; \ count_leading_zeros (_cnt, (d)); \ - \ - _dinv = invtab[((d) >> (W_TYPE_SIZE - 8 - _cnt)) \ - - (1 << (8 - 1))]; \ - \ _r = (u); \ - _q = _r * _dinv >> (W_TYPE_SIZE + 8 - _cnt); \ + if (UNLIKELY (_cnt > (W_TYPE_SIZE - 8))) \ + { \ + _dinv = invtab[((d) << (_cnt + 8 - W_TYPE_SIZE)) - 0x80]; \ + _q = _dinv * _r >> (8 + W_TYPE_SIZE - _cnt); \ + } \ + else \ + { \ + _dinv = invtab[((d) >> (W_TYPE_SIZE - 8 - _cnt)) - 0x7f]; \ + _q = _dinv * (_r >> (W_TYPE_SIZE - 3 - _cnt)) >> 11; \ + } \ _r -= _q*(d); \ \ _mask = -(uintmax_t) (_r >= (d)); \ @@ -1837,8 +1844,9 @@ static unsigned int q_freq[Q_FREQ_SIZE + 1]; # define MIN(a,b) ((a) < (b) ? (a) : (b)) #endif - -static void +/* Return true on success. Expected to fail only for numbers + >= 2^{2*W_TYPE_SIZE - 2}, or close to that limit. */ +static bool factor_using_squfof (uintmax_t n1, uintmax_t n0, struct factors *factors) { /* Uses algorithm and notation from @@ -1858,35 +1866,41 @@ factor_using_squfof (uintmax_t n1, uintmax_t n0, struct factors *factors) 1155, 15, 231, 35, 3, 55, 7, 11, 0 }; - uintmax_t S; const unsigned int *m; struct { uintmax_t Q; uintmax_t P; } queue[QUEUE_SIZE]; - S = isqrt2 (n1, n0); + if (n1 >= ((uintmax_t) 1 << (W_TYPE_SIZE - 2))) + return false; + + uintmax_t sqrt_n = isqrt2 (n1, n0); - if (n0 == S * S) + if (n0 == sqrt_n * sqrt_n) { uintmax_t p1, p0; - umul_ppmm (p1, p0, S, S); + umul_ppmm (p1, p0, sqrt_n, sqrt_n); assert (p0 == n0); if (n1 == p1) { - if (prime_p (S)) - factor_insert_multiplicity (factors, S, 2); + if (prime_p (sqrt_n)) + factor_insert_multiplicity (factors, sqrt_n, 2); else { struct factors f; f.nfactors = 0; - factor_using_squfof (0, S, &f); + if (!factor_using_squfof (0, sqrt_n, &f)) + { + /* Try pollard rho instead */ + factor_using_pollard_rho (sqrt_n, 1, &f); + } /* Duplicate the new factors */ for (unsigned int i = 0; i < f.nfactors; i++) factor_insert_multiplicity (factors, f.p[i], 2*f.e[i]); } - return; + return true; } } @@ -1925,6 +1939,7 @@ factor_using_squfof (uintmax_t n1, uintmax_t n0, struct factors *factors) Dh += n1 * mu; assert (Dl % 4 != 1); + assert (Dh < (uintmax_t) 1 << (W_TYPE_SIZE - 2)); S = isqrt2 (Dh, Dl); @@ -1943,10 +1958,10 @@ factor_using_squfof (uintmax_t n1, uintmax_t n0, struct factors *factors) for (i = 0; i <= B; i++) { - uintmax_t q, P1, t, r; + uintmax_t q, P1, t, rem; - div_smallq (q, r, S+P, Q); - P1 = S - r; /* P1 = q*Q - P */ + div_smallq (q, rem, S+P, Q); + P1 = S - rem; /* P1 = q*Q - P */ #if STAT_SQUFOF assert (q > 0); @@ -2025,29 +2040,25 @@ factor_using_squfof (uintmax_t n1, uintmax_t n0, struct factors *factors) for the case D = 2N. */ /* Compute Q = (D - P*P) / Q1, but we need double precision. */ - { - uintmax_t hi, lo, rem; - umul_ppmm (hi, lo, P, P); - sub_ddmmss (hi, lo, Dh, Dl, hi, lo); - udiv_qrnnd (Q, rem, hi, lo, Q1); - assert (rem == 0); - } + uintmax_t hi, lo; + umul_ppmm (hi, lo, P, P); + sub_ddmmss (hi, lo, Dh, Dl, hi, lo); + udiv_qrnnd (Q, rem, hi, lo, Q1); + assert (rem == 0); for (;;) { - uintmax_t r; - /* Note: There appears to by a typo in the paper, Step 4a in the algorithm description says q <-- floor([S+P]/\hat Q), but looking at the equations in Sec. 3.1, it should be q <-- floor([S+P] / Q). (In this code, \hat Q is Q1). */ - div_smallq (q, r, S+P, Q); - P1 = S - r; /* P1 = q*Q - P */ + div_smallq (q, rem, S+P, Q); + P1 = S - rem; /* P1 = q*Q - P */ #if STAT_SQUFOF q_freq[0]++; - q_freq[MIN(q, Q_FREQ_SIZE)]++; + q_freq[MIN (q, Q_FREQ_SIZE)]++; #endif if (P == P1) break; @@ -2065,8 +2076,8 @@ factor_using_squfof (uintmax_t n1, uintmax_t n0, struct factors *factors) if (prime_p (Q)) factor_insert (factors, Q); - else - factor_using_squfof (0, Q, factors); + else if (!factor_using_squfof (0, Q, factors)) + factor_using_pollard_rho (Q, 2, factors); divexact_21 (n1, n0, n1, n0, Q); @@ -2074,13 +2085,16 @@ factor_using_squfof (uintmax_t n1, uintmax_t n0, struct factors *factors) factor_insert_large (factors, n1, n0); else { - if (n1 == 0) - factor_using_pollard_rho (n0, 1, factors); - else - factor_using_squfof (n1, n0, factors); + if (!factor_using_squfof (n1, n0, factors)) + { + if (n1 == 0) + factor_using_pollard_rho (n0, 1, factors); + else + factor_using_pollard_rho2 (n1, n0, 1, factors); + } } - return; + return true; } } next_i: @@ -2089,8 +2103,7 @@ factor_using_squfof (uintmax_t n1, uintmax_t n0, struct factors *factors) next_multiplier: ; } - fprintf (stderr, "squfof failed.\n"); - exit (EXIT_FAILURE); + return false; } static void @@ -2104,26 +2117,21 @@ factor (uintmax_t t1, uintmax_t t0, struct factors *factors) t0 = factor_using_division (&t1, t1, t0, factors); - if (t1 == 0) - { - if (t0 != 1) - { - if (prime_p (t0)) - factor_insert (factors, t0); - else if (alg == ALG_POLLARD_RHO) - factor_using_pollard_rho (t0, 1, factors); - else - factor_using_squfof (0, t0, factors); - } - } + if (t1 == 0 && t0 < 2) + return; + + if (prime2_p (t1, t0)) + factor_insert_large (factors, t1, t0); else { - if (prime2_p (t1, t0)) - factor_insert_large (factors, t1, t0); - else if (alg == ALG_POLLARD_RHO) - factor_using_pollard_rho2 (t1, t0, 1, factors); + if (alg == ALG_SQUFOF) + if (factor_using_squfof (t1, t0, factors)) + return; + + if (t1 == 0) + factor_using_pollard_rho (t0, 1, factors); else - factor_using_squfof (t1, t0, factors); + factor_using_pollard_rho2 (t1, t0, 1, factors); } } |