1 /* $XConsortium: arith.c,v 1.4 94/03/22 19:08:54 gildea Exp $ */
2 /* Copyright International Business Machines, Corp. 1991
4 * Copyright Lexmark International, Inc. 1991
7 * License to use, copy, modify, and distribute this software and its
8 * documentation for any purpose and without fee is hereby granted,
9 * provided that the above copyright notice appear in all copies and that
10 * both that copyright notice and this permission notice appear in
11 * supporting documentation, and that the name of IBM or Lexmark not be
12 * used in advertising or publicity pertaining to distribution of the
13 * software without specific, written prior permission.
15 * IBM AND LEXMARK PROVIDE THIS SOFTWARE "AS IS", WITHOUT ANY WARRANTIES OF
16 * ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING, BUT NOT LIMITED TO ANY
17 * IMPLIED WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE,
18 * AND NONINFRINGEMENT OF THIRD PARTY RIGHTS. THE ENTIRE RISK AS TO THE
19 * QUALITY AND PERFORMANCE OF THE SOFTWARE, INCLUDING ANY DUTY TO SUPPORT
20 * OR MAINTAIN, BELONGS TO THE LICENSEE. SHOULD ANY PORTION OF THE
21 * SOFTWARE PROVE DEFECTIVE, THE LICENSEE (NOT IBM OR LEXMARK) ASSUMES THE
22 * ENTIRE COST OF ALL SERVICING, REPAIR AND CORRECTION. IN NO EVENT SHALL
23 * IBM OR LEXMARK BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL
24 * DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
25 * PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
26 * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
29 /* ARITH CWEB V0006 ******** */
31 :h1.ARITH Module - Portable Module for Multiple Precision Fixed Point Arithmetic
33 This module provides division and multiplication of 64-bit fixed point
34 numbers. (To be more precise, the module works on numbers that take
35 two 'longs' to store. That is almost always equivalent to saying 64-bit
38 Note: it is frequently easy and desirable to recode these functions in
39 assembly language for the particular processor being used, because
40 assembly language, unlike C, will have 64-bit multiply products and
41 64-bit dividends. This module is offered as a portable version.
43 &author. Jeffrey B. Lotspiech (lotspiech@almaden.ibm.com) and Sten F. Andler
48 The included files are:
58 /*SHARED LINE(S) ORIGINATED HERE*/
60 Reference for all algorithms: Donald E. Knuth, "The Art of Computer
61 Programming, Volume 2, Semi-Numerical Algorithms," Addison-Wesley Co.,
62 Massachusetts, 1969, pp. 229-279.
64 Knuth talks about a 'digit' being an arbitrary sized unit and a number
65 being a sequence of digits. We'll take a digit to be a 'short'.
66 The following assumption must be valid for these algorithms to work:
68 :li.A 'long' is two 'short's.
70 The following code is INDEPENDENT of:
72 :li.The actual size of a short.
73 :li.Whether shorts and longs are stored most significant byte
74 first or least significant byte first.
77 SHORTSIZE is the number of bits in a short; LONGSIZE is the number of
78 bits in a long; MAXSHORT is the maximum unsigned short:
80 /*SHARED LINE(S) ORIGINATED HERE*/
82 ASSEMBLE concatenates two shorts to form a long:
84 #define ASSEMBLE(hi,lo) ((((unsigned long)hi)<<SHORTSIZE)+(lo))
86 HIGHDIGIT extracts the most significant short from a long; LOWDIGIT
87 extracts the least significant short from a long:
89 #define HIGHDIGIT(u) ((u)>>SHORTSIZE)
90 #define LOWDIGIT(u) ((u)&MAXSHORT)
93 SIGNBITON tests the high order bit of a long 'w':
95 #define SIGNBITON(w) (((long)w)<0)
97 /*SHARED LINE(S) ORIGINATED HERE*/
100 :h2.Double Long Arithmetic
102 :h3.DLmult() - Multiply Two Longs to Yield a Double Long
104 The two multiplicands must be positive.
107 void DLmult(product, u, v)
108 register doublelong *product;
109 register unsigned long u;
110 register unsigned long v;
113 /* printf("DLmult(? ?, %lx, %lx)\n", u, v); */
115 /* printf("DLmult returns %lx\n", *product); */
117 register unsigned long u1, u2; /* the digits of u */
118 register unsigned long v1, v2; /* the digits of v */
119 register unsigned int w1, w2, w3, w4; /* the digits of w */
120 register unsigned long t; /* temporary variable */
121 /* printf("DLmult(? ?, %x, %x)\n", u, v); */
127 if (v2 == 0) w4 = w3 = w2 = 0;
132 t = u1 * v2 + HIGHDIGIT(t);
142 t = u1 * v1 + w2 + HIGHDIGIT(t);
147 product->high = ASSEMBLE(w1, w2);
148 product->low = ASSEMBLE(w3, w4);
149 #endif /* LONG64 else */
153 :h2.DLdiv() - Divide Two Longs by One Long, Yielding Two Longs
155 Both the dividend and the divisor must be positive.
158 void DLdiv(quotient, divisor)
159 doublelong *quotient; /* also where dividend is, originally */
160 unsigned long divisor;
163 /* printf("DLdiv(%lx %lx)\n", quotient, divisor); */
164 *quotient /= divisor;
165 /* printf("DLdiv returns %lx\n", *quotient); */
167 register unsigned long u1u2 = quotient->high;
168 register unsigned long u3u4 = quotient->low;
169 register long u3; /* single digit of dividend */
170 register int v1,v2; /* divisor in registers */
171 register long t; /* signed copy of u1u2 */
172 register int qhat; /* guess at the quotient digit */
173 register unsigned long q3q4; /* low two digits of quotient */
174 register int shift; /* holds the shift value for normalizing */
175 register int j; /* loop variable */
177 /* printf("DLdiv(%x %x, %x)\n", quotient->high, quotient->low, divisor); */
179 * Knuth's algorithm works if the dividend is smaller than the
180 * divisor. We can get to that state quickly:
182 if (u1u2 >= divisor) {
183 quotient->high = u1u2 / divisor;
189 if (divisor <= MAXSHORT) {
192 * This is the case where the divisor is contained in one
193 * 'short'. It is worthwhile making this fast:
195 u1u2 = ASSEMBLE(u1u2, HIGHDIGIT(u3u4));
196 q3q4 = u1u2 / divisor;
198 u1u2 = ASSEMBLE(u1u2, LOWDIGIT(u3u4));
199 quotient->low = ASSEMBLE(q3q4, u1u2 / divisor);
205 * At this point the divisor is a true 'long' so we must use
208 * Step D1: Normalize divisor and dividend (this makes our 'qhat'
209 * guesses more accurate):
211 for (shift=0; !SIGNBITON(divisor); shift++, divisor <<= 1) { ; }
215 if ((u1u2 >> (LONGSIZE - shift)) != 0 && shift != 0)
216 abort("DLdiv: dividend too large");
217 u1u2 = (u1u2 << shift) + ((shift == 0) ? 0 : u3u4 >> (LONGSIZE - shift));
221 * Step D2: Begin Loop through digits, dividing u1,u2,u3 by v1,v2,
222 * then shifting U left by 1 digit:
224 v1 = HIGHDIGIT(divisor);
225 v2 = LOWDIGIT(divisor);
227 u3 = HIGHDIGIT(u3u4);
229 for (j=0; j < 2; j++) {
232 * Step D3: make a guess (qhat) at the next quotient denominator:
234 qhat = (HIGHDIGIT(u1u2) == v1) ? MAXSHORT : u1u2 / v1;
236 * At this point Knuth would have us further refine our
237 * guess, since we know qhat is too big if
239 * v2 * qhat > ASSEMBLE(u1u2 % v, u3)
241 * That would make sense if u1u2 % v was easy to find, as it
242 * would be in assembly language. I ignore this step, and
243 * repeat step D6 if qhat is too big.
247 * Step D4: Multiply v1,v2 times qhat and subtract it from
252 * The high digit of u3 now contains the "borrow" for the
253 * rest of the substraction from u1,u2.
254 * Sometimes we can lose the sign bit with the above.
255 * If so, we have to force the high digit negative:
259 t |= -1 << SHORTSIZE;
260 t += u1u2 - qhat * v1;
261 /* printf("..>divide step qhat=%x t=%x u3=%x u1u2=%x v1=%x v2=%x\n",
262 qhat, t, u3, u1u2, v1, v2); */
263 while (t < 0) { /* Test is Step D5. */
266 * D6: Oops, qhat was too big. Add back in v1,v2 and
267 * decrease qhat by 1:
269 u3 = LOWDIGIT(u3) + v2;
270 t += HIGHDIGIT(u3) + v1;
272 /* printf("..>>qhat correction t=%x u3=%x qhat=%x\n", t, u3, qhat); */
275 * Step D7: shift U left one digit and loop:
278 if (HIGHDIGIT(u1u2) != 0)
279 abort("divide algorithm error");
280 u1u2 = ASSEMBLE(u1u2, LOWDIGIT(u3));
282 q3q4 = ASSEMBLE(q3q4, qhat);
284 quotient->low = q3q4;
285 /* printf("DLdiv returns %x %x\n", quotient->high, quotient->low); */
291 :h3.DLadd() - Add Two Double Longs
293 In this case, the doublelongs may be signed. The algorithm takes the
294 piecewise sum of the high and low longs, with the possibility that the
295 high should be incremented if there is a carry out of the low. How to
296 tell if there is a carry? Alex Harbury suggested that if the sum of
297 the lows is less than the max of the lows, there must have been a
298 carry. Conversely, if there was a carry, the sum of the lows must be
299 less than the max of the lows. So, the test is "if and only if".
303 doublelong *u; /* u = u + v */
307 /* printf("DLadd(%lx %lx)\n", *u, *v); */
309 /* printf("DLadd returns %lx\n", *u); */
311 register unsigned long lowmax = MAX(u->low, v->low);
313 /* printf("DLadd(%x %x, %x %x)\n", u->high, u->low, v->high, v->low); */
321 :h3.DLsub() - Subtract Two Double Longs
323 Testing for a borrow is even easier. If the v.low is greater than
324 u.low, there must be a borrow.
328 doublelong *u; /* u = u - v */
332 /* printf("DLsub(%lx %lx)\n", *u, *v); */
334 /* printf("DLsub returns %lx\n", *u); */
336 /* printf("DLsub(%x %x, %x %x)\n", u->high, u->low, v->high, v->low);*/
344 :h3.DLrightshift() - Macro to Shift Double Long Right by N
347 /*SHARED LINE(S) ORIGINATED HERE*/
350 :h2.Fractional Pel Arithmetic
353 :h3.FPmult() - Multiply Two Fractional Pel Values
355 This funtion first calculates w = u * v to "doublelong" precision.
356 It then shifts w right by FRACTBITS bits, and checks that no
357 overflow will occur when the resulting value is passed back as
361 fractpel FPmult(u, v)
362 register fractpel u,v;
365 register int negative = FALSE; /* sign flag */
367 register fractpel ret;
370 if ((u == 0) || (v == 0)) return (0);
373 if (u < 0) {u = -u; negative = TRUE;}
374 if (v < 0) {v = -v; negative = !negative;}
376 if (u == TOFRACTPEL(1)) return ((negative) ? -v : v);
377 if (v == TOFRACTPEL(1)) return ((negative) ? -u : u);
380 DLrightshift(w, FRACTBITS);
382 if (w.high != 0 || SIGNBITON(w.low)) {
383 IfTrace2(TRUE,"FPmult: overflow, %px%p\n", u, v);
384 w.low = TOFRACTPEL(MAXSHORT);
387 return ((negative) ? -w.low : w.low);
389 if (w & 0xffffffff80000000L ) {
390 IfTrace2(TRUE,"FPmult: overflow, %px%p\n", u, v);
391 ret = TOFRACTPEL(MAXSHORT);
396 return ((negative) ? -ret : ret);
401 :h3.FPdiv() - Divide Two Fractional Pel Values
403 These values may be signed. The function returns the quotient.
406 fractpel FPdiv(dividend, divisor)
407 register fractpel dividend;
408 register fractpel divisor;
410 doublelong w; /* result will be built here */
411 int negative = FALSE; /* flag for sign bit */
413 register fractpel ret;
417 dividend = -dividend;
422 negative = !negative;
425 w.low = dividend << FRACTBITS;
426 w.high = dividend >> (LONGSIZE - FRACTBITS);
428 if (w.high != 0 || SIGNBITON(w.low)) {
429 IfTrace2(TRUE,"FPdiv: overflow, %p/%p\n", dividend, divisor);
430 w.low = TOFRACTPEL(MAXSHORT);
432 return( (negative) ? -w.low : w.low);
434 w = ((long)dividend) << FRACTBITS;
436 if (w & 0xffffffff80000000L ) {
437 IfTrace2(TRUE,"FPdiv: overflow, %p/%p\n", dividend, divisor);
438 ret = TOFRACTPEL(MAXSHORT);
442 return( (negative) ? -ret : ret);
447 :h3.FPstarslash() - Multiply then Divide
449 Borrowing a chapter from the language Forth, it is useful to define
450 an operator that first multiplies by one constant then divides by
451 another, keeping the intermediate result in extended precision.
454 fractpel FPstarslash(a, b, c)
455 register fractpel a,b,c; /* result = a * b / c */
457 doublelong w; /* result will be built here */
458 int negative = FALSE;
460 register fractpel ret;
463 if (a < 0) { a = -a; negative = TRUE; }
464 if (b < 0) { b = -b; negative = !negative; }
465 if (c < 0) { c = -c; negative = !negative; }
470 if (w.high != 0 || SIGNBITON(w.low)) {
471 IfTrace3(TRUE,"FPstarslash: overflow, %p*%p/%p\n", a, b, c);
472 w.low = TOFRACTPEL(MAXSHORT);
474 return((negative) ? -w.low : w.low);
476 if (w & 0xffffffff80000000L ) {
477 IfTrace3(TRUE,"FPstarslash: overflow, %p*%p/%p\n", a, b, c);
478 ret = TOFRACTPEL(MAXSHORT);
482 return( (negative) ? -ret : ret);