git.sesse.net Git - rdpsrv/blob - Xserver/programs/Xserver/mfb/maskbits.h

   1 /* Combined Purdue/PurduePlus patches, level 2.1, 1/24/89 */
   2 /***********************************************************
   3 Copyright 1987 by Digital Equipment Corporation, Maynard, Massachusetts.
   4
   5                         All Rights Reserved
   6
   7 Permission to use, copy, modify, and distribute this software and its
   8 documentation for any purpose and without fee is hereby granted,
   9 provided that the above copyright notice appear in all copies and that
  10 both that copyright notice and this permission notice appear in
  11 supporting documentation, and that the name of Digital not be
  12 used in advertising or publicity pertaining to distribution of the
  13 software without specific, written prior permission.
  14
  15 DIGITAL DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING
  16 ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL
  17 DIGITAL BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR
  18 ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
  19 WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
  20 ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
  21 SOFTWARE.
  22
  23 ******************************************************************/
  24 /* $XConsortium: maskbits.h,v 1.33 94/04/17 20:28:13 dpw Exp $ */
  25 /* $XFree86: xc/programs/Xserver/mfb/maskbits.h,v 3.3 1996/12/09 11:56:33 dawes Exp $ */
  26 #include "X.h"
  27 #include "Xmd.h"
  28 #include "servermd.h"
  29
  30
  31 /* the following notes use the following conventions:
  32 SCREEN LEFT                             SCREEN RIGHT
  33 in this file and maskbits.c, left and right refer to screen coordinates,
  34 NOT bit numbering in registers.
  35
  36 starttab[n]
  37         bits[0,n-1] = 0 bits[n,PLST] = 1
  38 endtab[n] =
  39         bits[0,n-1] = 1 bits[n,PLST] = 0
  40
  41 startpartial[], endpartial[]
  42         these are used as accelerators for doing putbits and masking out
  43 bits that are all contained between longword boudaries.  the extra
  44 256 bytes of data seems a small price to pay -- code is smaller,
  45 and narrow things (e.g. window borders) go faster.
  46
  47 the names may seem misleading; they are derived not from which end
  48 of the word the bits are turned on, but at which end of a scanline
  49 the table tends to be used.
  50
  51 look at the tables and macros to understand boundary conditions.
  52 (careful readers will note that starttab[n] = ~endtab[n] for n != 0)
  53
  54 -----------------------------------------------------------------------
  55 these two macros depend on the screen's bit ordering.
  56 in both of them x is a screen position.  they are used to
  57 combine bits collected from multiple longwords into a
  58 single destination longword, and to unpack a single
  59 source longword into multiple destinations.
  60
  61 SCRLEFT(dst, x)
  62         takes dst[x, PPW] and moves them to dst[0, PPW-x]
  63         the contents of the rest of dst are 0.
  64         this is a right shift on LSBFirst (forward-thinking)
  65         machines like the VAX, and left shift on MSBFirst
  66         (backwards) machines like the 680x0 and pc/rt.
  67
  68 SCRRIGHT(dst, x)
  69         takes dst[0,x] and moves them to dst[PPW-x, PPW]
  70         the contents of the rest of dst are 0.
  71         this is a left shift on LSBFirst, right shift
  72         on MSBFirst.
  73
  74
  75 the remaining macros are cpu-independent; all bit order dependencies
  76 are built into the tables and the two macros above.
  77
  78 maskbits(x, w, startmask, endmask, nlw)
  79         for a span of width w starting at position x, returns
  80 a mask for ragged bits at start, mask for ragged bits at end,
  81 and the number of whole longwords between the ends.
  82
  83 maskpartialbits(x, w, mask)
  84         works like maskbits(), except all the bits are in the
  85         same longword (i.e. (x&PIM + w) <= PPW)
  86
  87 maskPPWbits(x, w, startmask, endmask, nlw)
  88         as maskbits, but does not calculate nlw.  it is used by
  89         mfbGlyphBlt to put down glyphs <= PPW bits wide.
  90
  91 -------------------------------------------------------------------
  92
  93 NOTE
  94         any pointers passed to the following 4 macros are
  95         guranteed to be PPW-bit aligned.
  96         The only non-PPW-bit-aligned references ever made are
  97         to font glyphs, and those are made with getleftbits()
  98         and getshiftedleftbits (qq.v.)
  99
 100         For 64-bit server, it is assumed that we will never have font padding
 101         of more than 4 bytes. The code uses int's to access the fonts
 102         intead of longs.
 103
 104 getbits(psrc, x, w, dst)
 105         starting at position x in psrc (x < PPW), collect w
 106         bits and put them in the screen left portion of dst.
 107         psrc is a longword pointer.  this may span longword boundaries.
 108         it special-cases fetching all w bits from one longword.
 109
 110         +--------+--------+             +--------+
 111         |    | m |n|      |     ==>     | m |n|  |
 112         +--------+--------+             +--------+
 113             x      x+w                  0     w
 114         psrc     psrc+1                 dst
 115                         m = PPW - x
 116                         n = w - m
 117
 118         implementation:
 119         get m bits, move to screen-left of dst, zeroing rest of dst;
 120         get n bits from next word, move screen-right by m, zeroing
 121                  lower m bits of word.
 122         OR the two things together.
 123
 124 putbits(src, x, w, pdst)
 125         starting at position x in pdst, put down the screen-leftmost
 126         w bits of src.  pdst is a longword pointer.  this may
 127         span longword boundaries.
 128         it special-cases putting all w bits into the same longword.
 129
 130         +--------+                      +--------+--------+
 131         | m |n|  |              ==>     |    | m |n|      |
 132         +--------+                      +--------+--------+
 133         0     w                              x     x+w
 134         dst                             pdst     pdst+1
 135                         m = PPW - x
 136                         n = w - m
 137
 138         implementation:
 139         get m bits, shift screen-right by x, zero screen-leftmost x
 140                 bits; zero rightmost m bits of *pdst and OR in stuff
 141                 from before the semicolon.
 142         shift src screen-left by m, zero bits n-PPW;
 143                 zero leftmost n bits of *(pdst+1) and OR in the
 144                 stuff from before the semicolon.
 145
 146 putbitsrop(src, x, w, pdst, ROP)
 147         like putbits but calls DoRop with the rasterop ROP (see mfb.h for
 148         DoRop)
 149
 150 putbitsrrop(src, x, w, pdst, ROP)
 151         like putbits but calls DoRRop with the reduced rasterop ROP
 152         (see mfb.h for DoRRop)
 153
 154 -----------------------------------------------------------------------
 155         The two macros below are used only for getting bits from glyphs
 156 in fonts, and glyphs in fonts are gotten only with the following two
 157 mcros.
 158         You should tune these macros toyour font format and cpu
 159 byte ordering.
 160
 161 NOTE
 162 getleftbits(psrc, w, dst)
 163         get the leftmost w (w<=32) bits from *psrc and put them
 164         in dst.  this is used by the mfbGlyphBlt code for glyphs
 165         <=PPW bits wide.
 166         psrc is declared (unsigned char *)
 167
 168         psrc is NOT guaranteed to be PPW-bit aligned.  on  many
 169         machines this will cause problems, so there are several
 170         versions of this macro.
 171
 172         this macro is called ONLY for getting bits from font glyphs,
 173         and depends on the server-natural font padding.
 174
 175         for blazing text performance, you want this macro
 176         to touch memory as infrequently as possible (e.g.
 177         fetch longwords) and as efficiently as possible
 178         (e.g. don't fetch misaligned longwords)
 179
 180 getshiftedleftbits(psrc, offset, w, dst)
 181         used by the font code; like getleftbits, but shifts the
 182         bits SCRLEFT by offset.
 183         this is implemented portably, calling getleftbits()
 184         and SCRLEFT().
 185         psrc is declared (unsigned char *).
 186 */
 187
 188 /* to match CFB and allow algorithm sharing ...
 189  * name    mfb32  mfb64  explanation
 190  * ----    ------ -----  -----------
 191  * PGSZ    32      64    pixel group size (in bits; same as PPW for mfb)
 192  * PGSZB    4      8     pixel group size (in bytes)
 193  * PPW     32     64     pixels per word (pixels per pixel group)
 194  * PLST    31     63     index of last pixel in a word (should be PPW-1)
 195  * PIM     0x1f   0x3f   pixel index mask (index within a pixel group)
 196  * PWSH    5       6     pixel-to-word shift (should be log2(PPW))
 197  *
 198  * The MFB_ versions are here so that cfb can include maskbits.h to get
 199  * the bitmap constants without conflicting with its own P* constants.
 200  */
 201
 202 /* warning: PixelType definition duplicated in mfb.h */
 203 #ifndef PixelType
 204 #define PixelType unsigned long
 205 #endif /* PixelType */
 206
 207 #ifdef LONG64
 208 #define MFB_PGSZB 8
 209 #else
 210 #define MFB_PGSZB 4
 211 #endif /* LONG64 */
 212 #define MFB_PPW         (MFB_PGSZB<<3) /* assuming 8 bits per byte */
 213 #define MFB_PGSZ        MFB_PPW
 214 #define MFB_PLST        (MFB_PPW-1)
 215 #define MFB_PIM         MFB_PLST
 216
 217 /* set PWSH = log2(PPW) using brute force */
 218
 219 #if MFB_PPW == 32
 220 #define MFB_PWSH 5
 221 #else
 222 #if MFB_PPW == 64
 223 #define MFB_PWSH 6
 224 #endif /* MFB_PPW == 64 */
 225 #endif /* MFB_PPW == 32 */
 226
 227 extern PixelType starttab[];
 228 extern PixelType endtab[];
 229 extern PixelType partmasks[MFB_PPW][MFB_PPW];
 230 extern PixelType rmask[];
 231 extern PixelType mask[];
 232
 233 #ifndef MFB_CONSTS_ONLY
 234
 235 #define PGSZB   MFB_PGSZB
 236 #define PPW     MFB_PPW
 237 #define PGSZ    MFB_PGSZ
 238 #define PLST    MFB_PLST
 239 #define PIM     MFB_PIM
 240 #define PWSH    MFB_PWSH
 241
 242 #define BitLeft(b,s)    SCRLEFT(b,s)
 243 #define BitRight(b,s)   SCRRIGHT(b,s)
 244
 245 #if (BITMAP_BIT_ORDER == IMAGE_BYTE_ORDER)
 246 #define LONG2CHARS(x) ((unsigned long)(x))
 247 #else
 248 /*
 249  *  the unsigned case below is for compilers like
 250  *  the Danbury C and i386cc
 251  */
 252 #if PPW == 32
 253 #define LONG2CHARS( x ) ( ( ( ( x ) & (unsigned long)0x000000FF ) << 0x18 ) \
 254                         | ( ( ( x ) & (unsigned long)0x0000FF00 ) << 0x08 ) \
 255                         | ( ( ( x ) & (unsigned long)0x00FF0000 ) >> 0x08 ) \
 256                         | ( ( ( x ) & (unsigned long)0xFF000000 ) >> 0x18 ) )
 257 #else /* PPW == 64 */
 258 #if defined( __alpha__)
 259 #define LONG2CHARS( x ) \
 260       ( ( ( ( x ) & 0x000000FFUL) << 0x38 ) \
 261       | ( ( ( x ) & 0x0000FF00UL) << 0x28 ) \
 262       | ( ( ( x ) & 0x00FF0000UL) << 0x18 ) \
 263       | ( ( ( x ) & 0xFF000000UL) << 0x08 ) \
 264       | ( ( ( x ) & 0x000000FF00000000UL) >> 0x08 ) \
 265       | ( ( ( x ) & 0x0000FF0000000000UL) >> 0x18 ) \
 266       | ( ( ( x ) & 0x00FF000000000000UL) >> 0x28 ) \
 267       | ( ( ( x ) & 0xFF00000000000000UL) >> 0x38 ) )
 268 #else /* __alpha__ */
 269 #define LONG2CHARS( x ) ( ( ( ( x ) & 0x000000FF000000FFUL) << 0x18 ) \
 270                         | ( ( ( x ) & 0x0000FF000000FF00UL) << 0x08 ) \
 271                         | ( ( ( x ) & 0x00FF000000FF0000UL) >> 0x08 ) \
 272                         | ( ( ( x ) & 0xFF000000FF000000UL) >> 0x18 ) )
 273 #endif /* __alpha__ */
 274 #endif /* PPW */
 275 #endif /* BITMAP_BIT_ORDER */
 276
 277 #ifdef STRICT_ANSI_SHIFT
 278 #define SHL(x,y)    ((y) >= PPW ? 0 : LONG2CHARS(LONG2CHARS(x) << (y)))
 279 #define SHR(x,y)    ((y) >= PPW ? 0 : LONG2CHARS(LONG2CHARS(x) >> (y)))
 280 #else
 281 #define SHL(x,y)    LONG2CHARS(LONG2CHARS(x) << (y))
 282 #define SHR(x,y)    LONG2CHARS(LONG2CHARS(x) >> (y))
 283 #endif
 284
 285 #if (BITMAP_BIT_ORDER == MSBFirst)      /* pc/rt, 680x0 */
 286 #define SCRLEFT(lw, n)  SHL((PixelType)(lw),(n))
 287 #define SCRRIGHT(lw, n) SHR((PixelType)(lw),(n))
 288 #else                                   /* vax, intel */
 289 #define SCRLEFT(lw, n)  SHR((PixelType)(lw),(n))
 290 #define SCRRIGHT(lw, n) SHL((PixelType)(lw),(n))
 291 #endif
 292
 293 #define DoRRop(alu, src, dst) \
 294 (((alu) == RROP_BLACK) ? ((dst) & ~(src)) : \
 295  ((alu) == RROP_WHITE) ? ((dst) | (src)) : \
 296  ((alu) == RROP_INVERT) ? ((dst) ^ (src)) : \
 297   (dst))
 298
 299 #if PPW == 32
 300 /* A generalized form of a x4 Duff's Device */
 301 #define Duff(counter, block) { \
 302   while (counter >= 4) {\
 303      { block; } \
 304      { block; } \
 305      { block; } \
 306      { block; } \
 307      counter -= 4; \
 308   } \
 309      switch (counter & 3) { \
 310      case 3:    { block; } \
 311      case 2:    { block; } \
 312      case 1:    { block; } \
 313      case 0: \
 314      counter = 0; \
 315    } \
 316 }
 317 #else /* PPW == 64 */
 318 /* A generalized form of a x8 Duff's Device */
 319 #define Duff(counter, block) { \
 320   while (counter >= 8) {\
 321      { block; } \
 322      { block; } \
 323      { block; } \
 324      { block; } \
 325      { block; } \
 326      { block; } \
 327      { block; } \
 328      { block; } \
 329      counter -= 8; \
 330   } \
 331      switch (counter & 7) { \
 332      case 7:    { block; } \
 333      case 6:    { block; } \
 334      case 5:    { block; } \
 335      case 4:    { block; } \
 336      case 3:    { block; } \
 337      case 2:    { block; } \
 338      case 1:    { block; } \
 339      case 0: \
 340      counter = 0; \
 341    } \
 342 }
 343 #endif /* PPW */
 344
 345
 346 #define maskbits(x, w, startmask, endmask, nlw) \
 347     startmask = starttab[(x) & PIM]; \
 348     endmask = endtab[((x)+(w)) & PIM]; \
 349     if (startmask) \
 350         nlw = (((w) - (PPW - ((x) & PIM))) >> PWSH); \
 351     else \
 352         nlw = (w) >> PWSH;
 353
 354 #define maskpartialbits(x, w, mask) \
 355     mask = partmasks[(x) & PIM][(w) & PIM];
 356
 357 #define maskPPWbits(x, w, startmask, endmask) \
 358     startmask = starttab[(x) & PIM]; \
 359     endmask = endtab[((x)+(w)) & PIM];
 360
 361 #ifdef __GNUC__ /* XXX don't want for Alpha? */
 362 #ifdef vax
 363 #define FASTGETBITS(psrc,x,w,dst) \
 364     __asm ("extzv %1,%2,%3,%0" \
 365          : "=g" (dst) \
 366          : "g" (x), "g" (w), "m" (*(char *)(psrc)))
 367 #define getbits(psrc,x,w,dst) FASTGETBITS(psrc,x,w,dst)
 368
 369 #define FASTPUTBITS(src, x, w, pdst) \
 370     __asm ("insv %3,%1,%2,%0" \
 371          : "=m" (*(char *)(pdst)) \
 372          : "g" (x), "g" (w), "g" (src))
 373 #define putbits(src, x, w, pdst) FASTPUTBITS(src, x, w, pdst)
 374 #endif /* vax */
 375 #ifdef mc68020
 376 #define FASTGETBITS(psrc, x, w, dst) \
 377     __asm ("bfextu %3{%1:%2},%0" \
 378     : "=d" (dst) : "di" (x), "di" (w), "o" (*(char *)(psrc)))
 379
 380 #define getbits(psrc,x,w,dst) \
 381 { \
 382     FASTGETBITS(psrc, x, w, dst);\
 383     dst = SHL(dst,(32-(w))); \
 384 }
 385
 386 #define FASTPUTBITS(src, x, w, pdst) \
 387     __asm ("bfins %3,%0{%1:%2}" \
 388          : "=o" (*(char *)(pdst)) \
 389          : "di" (x), "di" (w), "d" (src), "0" (*(char *) (pdst)))
 390
 391 #define putbits(src, x, w, pdst) FASTPUTBITS(SHR((src),32-(w)), x, w, pdst)
 392
 393 #endif /* mc68020 */
 394 #endif /* __GNUC__ */
 395
 396 /*  The following flag is used to override a bugfix for sun 3/60+CG4 machines,
 397  */
 398
 399 /*  We don't need to be careful about this unless we're dealing with sun3's
 400  *  We will default its usage for those who do not know anything, but will
 401  *  override its effect if the machine doesn't look like a sun3
 402  */
 403 #if !defined(mc68020) || !defined(sun)
 404 #define NO_3_60_CG4
 405 #endif
 406
 407 /* This is gross.  We want to #define u_putbits as something which can be used
 408  * in the case of the 3/60+CG4, but if we use /bin/cc or are on another
 409  * machine type, we want nothing to do with u_putbits.  What a hastle.  Here
 410  * I used slo_putbits as something which either u_putbits or putbits could be
 411  * defined as.
 412  *
 413  * putbits gets it iff it is not already defined with FASTPUTBITS above.
 414  * u_putbits gets it if we have FASTPUTBITS (putbits) from above and have not
 415  *      overridden the NO_3_60_CG4 flag.
 416  */
 417
 418 #define slo_putbits(src, x, w, pdst) \
 419 { \
 420     register int n = (x)+(w)-PPW; \
 421     \
 422     if (n <= 0) \
 423     { \
 424         register PixelType tmpmask; \
 425         maskpartialbits((x), (w), tmpmask); \
 426         *(pdst) = (*(pdst) & ~tmpmask) | \
 427                 (SCRRIGHT(src, x) & tmpmask); \
 428     } \
 429     else \
 430     { \
 431         *(pdst) = (*(pdst) & endtab[x]) | (SCRRIGHT((src), x)); \
 432         (pdst)[1] = ((pdst)[1] & starttab[n]) | \
 433                 (SCRLEFT(src, PPW-(x)) & endtab[n]); \
 434     } \
 435 }
 436
 437 #if defined(putbits) && !defined(NO_3_60_CG4)
 438 #define u_putbits(src, x, w, pdst) slo_putbits(src, x, w, pdst)
 439 #else
 440 #define u_putbits(src, x, w, pdst) putbits(src, x, w, pdst)
 441 #endif
 442
 443 #if !defined(putbits)
 444 #define putbits(src, x, w, pdst) slo_putbits(src, x, w, pdst)
 445 #endif
 446
 447 /* Now if we have not gotten any really good bitfield macros, try some
 448  * moderately fast macros.  Alas, I don't know how to do asm instructions
 449  * without gcc.
 450  */
 451
 452 #ifndef getbits
 453 #define getbits(psrc, x, w, dst) \
 454 { \
 455     dst = SCRLEFT(*(psrc), (x)); \
 456     if ( ((x) + (w)) > PPW) \
 457         dst |= (SCRRIGHT(*((psrc)+1), PPW-(x))); \
 458 }
 459 #endif
 460
 461 /*  We have to special-case putbitsrop because of 3/60+CG4 combos
 462  */
 463
 464 #define u_putbitsrop(src, x, w, pdst, rop) \
 465 {\
 466         register PixelType t1, t2; \
 467         register int n = (x)+(w)-PPW; \
 468         \
 469         t1 = SCRRIGHT((src), (x)); \
 470         DoRop(t2, rop, t1, *(pdst)); \
 471         \
 472     if (n <= 0) \
 473     { \
 474         register PixelType tmpmask; \
 475         \
 476         maskpartialbits((x), (w), tmpmask); \
 477         *(pdst) = (*(pdst) & ~tmpmask) | (t2 & tmpmask); \
 478     } \
 479     else \
 480     { \
 481         int m = PPW-(x); \
 482         *(pdst) = (*(pdst) & endtab[x]) | (t2 & starttab[x]); \
 483         t1 = SCRLEFT((src), m); \
 484         DoRop(t2, rop, t1, (pdst)[1]); \
 485         (pdst)[1] = ((pdst)[1] & starttab[n]) | (t2 & endtab[n]); \
 486     } \
 487 }
 488
 489 /* If our getbits and putbits are FAST enough,
 490  * do this brute force, it's faster
 491  */
 492
 493 #if defined(FASTPUTBITS) && defined(FASTGETBITS) && defined(NO_3_60_CG4)
 494 #if (BITMAP_BIT_ORDER == MSBFirst)
 495 #define putbitsrop(src, x, w, pdst, rop) \
 496 { \
 497   register PixelType _tmp, _tmp2; \
 498   FASTGETBITS(pdst, x, w, _tmp); \
 499   _tmp2 = SCRRIGHT(src, PPW-(w)); \
 500   DoRop(_tmp, rop, _tmp2, _tmp) \
 501   FASTPUTBITS(_tmp, x, w, pdst); \
 502 }
 503 #define putbitsrrop(src, x, w, pdst, rop) \
 504 { \
 505   register PixelType _tmp, _tmp2; \
 506  \
 507   FASTGETBITS(pdst, x, w, _tmp); \
 508   _tmp2 = SCRRIGHT(src, PPW-(w)); \
 509   _tmp= DoRRop(rop, _tmp2, _tmp); \
 510   FASTPUTBITS(_tmp, x, w, pdst); \
 511 }
 512 #undef u_putbitsrop
 513 #else
 514 #define putbitsrop(src, x, w, pdst, rop) \
 515 { \
 516   register PixelType _tmp; \
 517   FASTGETBITS(pdst, x, w, _tmp); \
 518   DoRop(_tmp, rop, src, _tmp) \
 519   FASTPUTBITS(_tmp, x, w, pdst); \
 520 }
 521 #define putbitsrrop(src, x, w, pdst, rop) \
 522 { \
 523   register PixelType _tmp; \
 524  \
 525   FASTGETBITS(pdst, x, w, _tmp); \
 526   _tmp= DoRRop(rop, src, _tmp); \
 527   FASTPUTBITS(_tmp, x, w, pdst); \
 528 }
 529 #undef u_putbitsrop
 530 #endif
 531 #endif
 532
 533 #ifndef putbitsrop
 534 #define putbitsrop(src, x, w, pdst, rop)  u_putbitsrop(src, x, w, pdst, rop)
 535 #endif
 536
 537 #ifndef putbitsrrop
 538 #define putbitsrrop(src, x, w, pdst, rop) \
 539 {\
 540         register PixelType t1, t2; \
 541         register int n = (x)+(w)-PPW; \
 542         \
 543         t1 = SCRRIGHT((src), (x)); \
 544         t2 = DoRRop(rop, t1, *(pdst)); \
 545         \
 546     if (n <= 0) \
 547     { \
 548         register PixelType tmpmask; \
 549         \
 550         maskpartialbits((x), (w), tmpmask); \
 551         *(pdst) = (*(pdst) & ~tmpmask) | (t2 & tmpmask); \
 552     } \
 553     else \
 554     { \
 555         int m = PPW-(x); \
 556         *(pdst) = (*(pdst) & endtab[x]) | (t2 & starttab[x]); \
 557         t1 = SCRLEFT((src), m); \
 558         t2 = DoRRop(rop, t1, (pdst)[1]); \
 559         (pdst)[1] = ((pdst)[1] & starttab[n]) | (t2 & endtab[n]); \
 560     } \
 561 }
 562 #endif
 563
 564 #if GETLEFTBITS_ALIGNMENT == 1
 565 #define getleftbits(psrc, w, dst)       dst = *((CARD32 *) psrc)
 566 #endif /* GETLEFTBITS_ALIGNMENT == 1 */
 567
 568 #if GETLEFTBITS_ALIGNMENT == 2
 569 #define getleftbits(psrc, w, dst) \
 570     { \
 571         if ( ((int)(psrc)) & 0x01 ) \
 572                 getbits( ((CARD32 *)(((char *)(psrc))-1)), 8, (w), (dst) ); \
 573         else \
 574                 getbits(psrc, 0, w, dst); \
 575     }
 576 #endif /* GETLEFTBITS_ALIGNMENT == 2 */
 577
 578 #if GETLEFTBITS_ALIGNMENT == 4
 579 #define getleftbits(psrc, w, dst) \
 580     { \
 581         int off, off_b; \
 582         off_b = (off = ( ((int)(psrc)) & 0x03)) << 3; \
 583         getbits( \
 584                 (CARD32 *)( ((char *)(psrc)) - off), \
 585                 (off_b), (w), (dst) \
 586                ); \
 587     }
 588 #endif /* GETLEFTBITS_ALIGNMENT == 4 */
 589
 590
 591 #define getshiftedleftbits(psrc, offset, w, dst) \
 592         getleftbits((psrc), (w), (dst)); \
 593         dst = SCRLEFT((dst), (offset));
 594
 595 /* FASTGETBITS and FASTPUTBITS are not necessarily correct implementations of
 596  * getbits and putbits, but they work if used together.
 597  *
 598  * On a MSBFirst machine, a cpu bitfield extract instruction (like bfextu)
 599  * could normally assign its result to a long word register in the screen
 600  * right position.  This saves canceling register shifts by not fighting the
 601  * natural cpu byte order.
 602  *
 603  * Unfortunately, these fail on a 3/60+CG4 and cannot be used unmodified. Sigh.
 604  */
 605 #if defined(FASTGETBITS) && defined(FASTPUTBITS)
 606 #ifdef NO_3_60_CG4
 607 #define u_FASTPUT(aa, bb, cc, dd)  FASTPUTBITS(aa, bb, cc, dd)
 608 #else
 609 #define u_FASTPUT(aa, bb, cc, dd)  u_putbits(SCRLEFT(aa, PPW-(cc)), bb, cc, dd)
 610 #endif
 611
 612 #define getandputbits(psrc, srcbit, dstbit, width, pdst) \
 613 { \
 614     register PixelType _tmpbits; \
 615     FASTGETBITS(psrc, srcbit, width, _tmpbits); \
 616     u_FASTPUT(_tmpbits, dstbit, width, pdst); \
 617 }
 618
 619 #define getandputrop(psrc, srcbit, dstbit, width, pdst, rop) \
 620 { \
 621   register PixelType _tmpsrc, _tmpdst; \
 622   FASTGETBITS(pdst, dstbit, width, _tmpdst); \
 623   FASTGETBITS(psrc, srcbit, width, _tmpsrc); \
 624   DoRop(_tmpdst, rop, _tmpsrc, _tmpdst); \
 625   u_FASTPUT(_tmpdst, dstbit, width, pdst); \
 626 }
 627
 628 #define getandputrrop(psrc, srcbit, dstbit, width, pdst, rop) \
 629 { \
 630   register PixelType _tmpsrc, _tmpdst; \
 631   FASTGETBITS(pdst, dstbit, width, _tmpdst); \
 632   FASTGETBITS(psrc, srcbit, width, _tmpsrc); \
 633   _tmpdst = DoRRop(rop, _tmpsrc, _tmpdst); \
 634   u_FASTPUT(_tmpdst, dstbit, width, pdst); \
 635 }
 636
 637 #define getandputbits0(psrc, srcbit, width, pdst) \
 638         getandputbits(psrc, srcbit, 0, width, pdst)
 639
 640 #define getandputrop0(psrc, srcbit, width, pdst, rop) \
 641         getandputrop(psrc, srcbit, 0, width, pdst, rop)
 642
 643 #define getandputrrop0(psrc, srcbit, width, pdst, rop) \
 644         getandputrrop(psrc, srcbit, 0, width, pdst, rop)
 645
 646
 647 #else /* Slow poke */
 648
 649 /* pairs of getbits/putbits happen frequently. Some of the code can
 650  * be shared or avoided in a few specific instances.  It gets us a
 651  * small advantage, so we do it.  The getandput...0 macros are the only ones
 652  * which speed things here.  The others are here for compatibility w/the above
 653  * FAST ones
 654  */
 655
 656 #define getandputbits(psrc, srcbit, dstbit, width, pdst) \
 657 { \
 658     register PixelType _tmpbits; \
 659     getbits(psrc, srcbit, width, _tmpbits); \
 660     putbits(_tmpbits, dstbit, width, pdst); \
 661 }
 662
 663 #define getandputrop(psrc, srcbit, dstbit, width, pdst, rop) \
 664 { \
 665     register PixelType _tmpbits; \
 666     getbits(psrc, srcbit, width, _tmpbits) \
 667     putbitsrop(_tmpbits, dstbit, width, pdst, rop) \
 668 }
 669
 670 #define getandputrrop(psrc, srcbit, dstbit, width, pdst, rop) \
 671 { \
 672     register PixelType _tmpbits; \
 673     getbits(psrc, srcbit, width, _tmpbits) \
 674     putbitsrrop(_tmpbits, dstbit, width, pdst, rop) \
 675 }
 676
 677
 678 #define getandputbits0(psrc, sbindex, width, pdst) \
 679 {                       /* unroll the whole damn thing to see how it * behaves */ \
 680     register int          _flag = PPW - (sbindex); \
 681     register PixelType _src; \
 682  \
 683     _src = SCRLEFT (*(psrc), (sbindex)); \
 684     if ((width) > _flag) \
 685         _src |=  SCRRIGHT (*((psrc) + 1), _flag); \
 686  \
 687     *(pdst) = (*(pdst) & starttab[(width)]) | (_src & endtab[(width)]); \
 688 }
 689
 690
 691 #define getandputrop0(psrc, sbindex, width, pdst, rop) \
 692 {                       \
 693     register int          _flag = PPW - (sbindex); \
 694     register PixelType _src; \
 695  \
 696     _src = SCRLEFT (*(psrc), (sbindex)); \
 697     if ((width) > _flag) \
 698         _src |=  SCRRIGHT (*((psrc) + 1), _flag); \
 699     DoRop(_src, rop, _src, *(pdst)); \
 700  \
 701     *(pdst) = (*(pdst) & starttab[(width)]) | (_src & endtab[(width)]); \
 702 }
 703
 704 #define getandputrrop0(psrc, sbindex, width, pdst, rop) \
 705 { \
 706     int             _flag = PPW - (sbindex); \
 707     register PixelType _src; \
 708  \
 709     _src = SCRLEFT (*(psrc), (sbindex)); \
 710     if ((width) > _flag) \
 711         _src |=  SCRRIGHT (*((psrc) + 1), _flag); \
 712     _src = DoRRop(rop, _src, *(pdst)); \
 713  \
 714     *(pdst) = (*(pdst) & starttab[(width)]) | (_src & endtab[(width)]); \
 715 }
 716
 717 #endif  /* FASTGETBITS && FASTPUTBITS */
 718
 719 #endif /* MFB_CONSTS_ONLY */