git.sesse.net Git - rdpsrv/blob - Xserver/programs/Xserver/cfb/cfbmskbits.h

   1 /************************************************************
   2 Copyright 1987 by Sun Microsystems, Inc. Mountain View, CA.
   3
   4                     All Rights Reserved
   5
   6 Permission  to  use,  copy,  modify,  and  distribute   this
   7 software  and  its documentation for any purpose and without
   8 fee is hereby granted, provided that the above copyright no-
   9 tice  appear  in all copies and that both that copyright no-
  10 tice and this permission notice appear in  supporting  docu-
  11 mentation,  and  that the names of Sun or X Consortium
  12 not be used in advertising or publicity pertaining to
  13 distribution  of  the software  without specific prior
  14 written permission. Sun and X Consortium make no
  15 representations about the suitability of this software for
  16 any purpose. It is provided "as is" without any express or
  17 implied warranty.
  18
  19 SUN DISCLAIMS ALL WARRANTIES WITH REGARD TO  THIS  SOFTWARE,
  20 INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FIT-
  21 NESS FOR A PARTICULAR PURPOSE. IN NO EVENT SHALL SUN BE  LI-
  22 ABLE  FOR  ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR
  23 ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,  DATA  OR
  24 PROFITS,  WHETHER  IN  AN  ACTION OF CONTRACT, NEGLIGENCE OR
  25 OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION  WITH
  26 THE USE OR PERFORMANCE OF THIS SOFTWARE.
  27
  28 ********************************************************/
  29
  30 /* $XConsortium: cfbmskbits.h,v 4.25 94/04/17 20:28:55 dpw Exp $ */
  31 /* $XFree86: xc/programs/Xserver/cfb/cfbmskbits.h,v 3.3.2.1 1997/05/27 06:28:09 dawes Exp $ */
  32 /* Optimizations for PSZ == 32 added by Kyle Marvin (marvin@vitec.com) */
  33
  34 #include        "X.h"
  35 #include        "Xmd.h"
  36 #include        "servermd.h"
  37 #ifdef XFREE86
  38 #define NO_COMPILER_H_EXTRAS
  39 #include        "compiler.h"
  40 #endif
  41
  42 /*
  43  * ==========================================================================
  44  * Converted from mfb to support memory-mapped color framebuffer by smarks@sun,
  45  * April-May 1987.
  46  *
  47  * The way I did the conversion was to consider each longword as an
  48  * array of four bytes instead of an array of 32 one-bit pixels.  So
  49  * getbits() and putbits() retain much the same calling sequence, but
  50  * they move bytes around instead of bits.  Of course, this entails the
  51  * removal of all of the one-bit-pixel dependencies from the other
  52  * files, but the major bit-hacking stuff should be covered here.
  53  *
  54  * I've created some new macros that make it easier to understand what's
  55  * going on in the pixel calculations, and that make it easier to change the
  56  * pixel size.
  57  *
  58  * name     explanation
  59  * ----     -----------
  60  * PSZ      pixel size (in bits)
  61  * PGSZ     pixel group size (in bits)
  62  * PGSZB    pixel group size (in bytes)
  63  * PGSZBMSK mask with lowest PGSZB bits set to 1
  64  * PPW      pixels per word (pixels per pixel group)
  65  * PPWMSK   mask with lowest PPW bits set to 1
  66  * PLST     index of last pixel in a word (should be PPW-1)
  67  * PIM      pixel index mask (index within a pixel group)
  68  * PWSH     pixel-to-word shift (should be log2(PPW))
  69  * PMSK     mask with lowest PSZ bits set to 1
  70  *
  71  *
  72  * Here are some sample values.  In the notation cfbA,B: A is PSZ, and
  73  * B is PGSZB.  All the other values are derived from these
  74  * two.  This table does not show all combinations!
  75  *
  76  * name     cfb8,4    cfb24,4      cfb32,4    cfb8,8    cfb24,8    cfb32,8
  77  * ----     ------    -------      ------     ------    ------     -------
  78  * PSZ        8         24           32          8        24         32
  79  * PGSZ      32         32           32         64        64         64
  80  * PGSZB      4          4            4          8         8          8
  81  * PGSZBMSK 0xF        0xF?         0xF        0xFF      0xFF       0xFF
  82  * PPW        4          1            1          8         2          2
  83  * PPWMSK   0xF        0x1          0x1        0xFF       0x3?       0x3
  84  * PLST       3          0            0          7         1          1
  85  * PIM      0x3        0x0          0x0        0x7       0x1?        0x1
  86  * PWSH       2          0            0          3         1          1
  87  * PMSK     0xFF      0xFFFFFF     0xFFFFFFFF 0xFF      0xFFFFFF   0xFFFFFFFF
  88  *
  89  *
  90  * I have also added a new macro, PFILL, that takes one pixel and
  91  * replicates it throughout a word.  This macro definition is dependent
  92  * upon pixel and word size; it doesn't use macros like PPW and so
  93  * forth.  Examples: for monochrome, PFILL(1) => 0xffffffff, PFILL(0) =>
  94  * 0x00000000.  For 8-bit color, PFILL(0x5d) => 0x5d5d5d5d.  This macro
  95  * is used primarily for replicating a plane mask into a word.
  96  *
  97  * Color framebuffers operations also support the notion of a plane
  98  * mask.  This mask determines which planes of the framebuffer can be
  99  * altered; the others are left unchanged.  I have added another
 100  * parameter to the putbits and putbitsrop macros that is the plane
 101  * mask.
 102  * ==========================================================================
 103  */
 104
 105 /*
 106  *  PSZ needs to be defined before we get here.  Usually it comes from a
 107  *  -DPSZ=foo on the compilation command line.
 108  */
 109
 110 #ifndef PSZ
 111 #define PSZ 8
 112 #endif
 113
 114 /*
 115  *  PixelGroup is the data type used to operate on groups of pixels.
 116  *  We typedef it here to unsigned long with the assumption that you
 117  *  want to manipulate as many pixels at a time as you can.  If unsigned
 118  *  long is not appropriate for your server, define it to something else
 119  *  before including this file.  In this case you will also have to define
 120  *  PGSZB to the size in bytes of PixelGroup.
 121  */
 122 #ifndef PixelGroup
 123 typedef unsigned long PixelGroup;
 124 #ifdef LONG64
 125 #define PGSZB 8
 126 #else
 127 #define PGSZB 4
 128 #endif /* LONG64 */
 129 #endif /* PixelGroup */
 130
 131 #define PGSZ    (PGSZB << 3)
 132 #define PPW     (PGSZ/PSZ)
 133 #define PLST    (PPW-1)
 134 #define PIM     PLST
 135 #define PMSK    (((PixelGroup)1 << PSZ) - 1)
 136 #define PPWMSK  (((PixelGroup)1 << PPW) - 1) /* instead of BITMSK */
 137 #define PGSZBMSK (((PixelGroup)1 << PGSZB) - 1)
 138
 139 /*  set PWSH = log2(PPW) using brute force */
 140
 141 #if PPW == 1
 142 #define PWSH 0
 143 #else
 144 #if PPW == 2
 145 #define PWSH 1
 146 #else
 147 #if PPW == 4
 148 #define PWSH 2
 149 #else
 150 #if PPW == 8
 151 #define PWSH 3
 152 #else
 153 #if PPW == 16
 154 #define PWSH 4
 155 #endif /* PPW == 16 */
 156 #endif /* PPW == 8 */
 157 #endif /* PPW == 4 */
 158 #endif /* PPW == 2 */
 159 #endif /* PPW == 1 */
 160
 161 /*  Defining PIXEL_ADDR means that individual pixels are addressable by this
 162  *  machine (as type PixelType).  A possible CFB architecture which supported
 163  *  8-bits-per-pixel on a non byte-addressable machine would not have this
 164  *  defined.
 165  *
 166  *  Defining FOUR_BIT_CODE means that cfb knows how to stipple on this machine;
 167  *  eventually, stippling code for 16 and 32 bit devices should be written
 168  *  which would allow them to also use FOUR_BIT_CODE.  There isn't that
 169  *  much to do in those cases, but it would make them quite a bit faster.
 170  */
 171
 172 #if PSZ == 8
 173 #define PIXEL_ADDR
 174 typedef CARD8 PixelType;
 175 #define FOUR_BIT_CODE
 176 #endif
 177
 178 #if PSZ == 16
 179 #define PIXEL_ADDR
 180 typedef CARD16 PixelType;
 181 #endif
 182
 183 #if PSZ == 24
 184 #undef PMSK
 185 #define PMSK    0xFFFFFF
 186 /*#undef PIM
 187 #define PIM 3*/
 188 #define PIXEL_ADDR
 189 typedef CARD32 PixelType;
 190 #endif
 191
 192 #if PSZ == 32
 193 #undef PMSK
 194 #define PMSK    0xFFFFFFFF
 195 #define PIXEL_ADDR
 196 typedef CARD32 PixelType;
 197 #endif
 198
 199
 200 /* the following notes use the following conventions:
 201 SCREEN LEFT                             SCREEN RIGHT
 202 in this file and maskbits.c, left and right refer to screen coordinates,
 203 NOT bit numbering in registers.
 204
 205 cfbstarttab[n]
 206         pixels[0,n-1] = 0's     pixels[n,PPW-1] = 1's
 207 cfbendtab[n] =
 208         pixels[0,n-1] = 1's     pixels[n,PPW-1] = 0's
 209
 210 cfbstartpartial[], cfbendpartial[]
 211         these are used as accelerators for doing putbits and masking out
 212 bits that are all contained between longword boudaries.  the extra
 213 256 bytes of data seems a small price to pay -- code is smaller,
 214 and narrow things (e.g. window borders) go faster.
 215
 216 the names may seem misleading; they are derived not from which end
 217 of the word the bits are turned on, but at which end of a scanline
 218 the table tends to be used.
 219
 220 look at the tables and macros to understand boundary conditions.
 221 (careful readers will note that starttab[n] = ~endtab[n] for n != 0)
 222
 223 -----------------------------------------------------------------------
 224 these two macros depend on the screen's bit ordering.
 225 in both of them x is a screen position.  they are used to
 226 combine bits collected from multiple longwords into a
 227 single destination longword, and to unpack a single
 228 source longword into multiple destinations.
 229
 230 SCRLEFT(dst, x)
 231         takes dst[x, PPW] and moves them to dst[0, PPW-x]
 232         the contents of the rest of dst are 0 ONLY IF
 233         dst is UNSIGNED.
 234         is cast as an unsigned.
 235         this is a right shift on the VAX, left shift on
 236         Sun and pc-rt.
 237
 238 SCRRIGHT(dst, x)
 239         takes dst[0,x] and moves them to dst[PPW-x, PPW]
 240         the contents of the rest of dst are 0 ONLY IF
 241         dst is UNSIGNED.
 242         this is a left shift on the VAX, right shift on
 243         Sun and pc-rt.
 244
 245
 246 the remaining macros are cpu-independent; all bit order dependencies
 247 are built into the tables and the two macros above.
 248
 249 maskbits(x, w, startmask, endmask, nlw)
 250         for a span of width w starting at position x, returns
 251 a mask for ragged pixels at start, mask for ragged pixels at end,
 252 and the number of whole longwords between the ends.
 253
 254 maskpartialbits(x, w, mask)
 255         works like maskbits(), except all the pixels are in the
 256         same longword (i.e. (x&0xPIM + w) <= PPW)
 257
 258 mask32bits(x, w, startmask, endmask, nlw)
 259         as maskbits, but does not calculate nlw.  it is used by
 260         cfbGlyphBlt to put down glyphs <= PPW bits wide.
 261
 262 getbits(psrc, x, w, dst)
 263         starting at position x in psrc (x < PPW), collect w
 264         pixels and put them in the screen left portion of dst.
 265         psrc is a longword pointer.  this may span longword boundaries.
 266         it special-cases fetching all w bits from one longword.
 267
 268         +--------+--------+             +--------+
 269         |    | m |n|      |     ==>     | m |n|  |
 270         +--------+--------+             +--------+
 271             x      x+w                  0     w
 272         psrc     psrc+1                 dst
 273                         m = PPW - x
 274                         n = w - m
 275
 276         implementation:
 277         get m pixels, move to screen-left of dst, zeroing rest of dst;
 278         get n pixels from next word, move screen-right by m, zeroing
 279                  lower m pixels of word.
 280         OR the two things together.
 281
 282 putbits(src, x, w, pdst, planemask)
 283         starting at position x in pdst, put down the screen-leftmost
 284         w bits of src.  pdst is a longword pointer.  this may
 285         span longword boundaries.
 286         it special-cases putting all w bits into the same longword.
 287
 288         +--------+                      +--------+--------+
 289         | m |n|  |              ==>     |    | m |n|      |
 290         +--------+                      +--------+--------+
 291         0     w                              x     x+w
 292         dst                             pdst     pdst+1
 293                         m = PPW - x
 294                         n = w - m
 295
 296         implementation:
 297         get m pixels, shift screen-right by x, zero screen-leftmost x
 298                 pixels; zero rightmost m bits of *pdst and OR in stuff
 299                 from before the semicolon.
 300         shift src screen-left by m, zero bits n-32;
 301                 zero leftmost n pixels of *(pdst+1) and OR in the
 302                 stuff from before the semicolon.
 303
 304 putbitsrop(src, x, w, pdst, planemask, ROP)
 305         like putbits but calls DoRop with the rasterop ROP (see cfb.h for
 306         DoRop)
 307
 308 getleftbits(psrc, w, dst)
 309         get the leftmost w (w<=PPW) bits from *psrc and put them
 310         in dst.  this is used by the cfbGlyphBlt code for glyphs
 311         <=PPW bits wide.
 312 */
 313
 314 #if     (BITMAP_BIT_ORDER == MSBFirst)
 315 #define BitRight(lw,n)  ((lw) >> (n))
 316 #define BitLeft(lw,n)   ((lw) << (n))
 317 #else   /* (BITMAP_BIT_ORDER == LSBFirst) */
 318 #define BitRight(lw,n)  ((lw) << (n))
 319 #define BitLeft(lw,n)   ((lw) >> (n))
 320 #endif  /* (BITMAP_BIT_ORDER == MSBFirst) */
 321
 322 #define SCRLEFT(lw, n)  BitLeft (lw, (n) * PSZ)
 323 #define SCRRIGHT(lw, n) BitRight(lw, (n) * PSZ)
 324
 325 /*
 326  * Note that the shift direction is independent of the byte ordering of the
 327  * machine.  The following is portable code.
 328  */
 329 #if PPW == 16
 330 #define PFILL(p) ( ((p)&PMSK)          | \
 331                    ((p)&PMSK) <<   PSZ | \
 332                    ((p)&PMSK) << 2*PSZ | \
 333                    ((p)&PMSK) << 3*PSZ | \
 334                    ((p)&PMSK) << 4*PSZ | \
 335                    ((p)&PMSK) << 5*PSZ | \
 336                    ((p)&PMSK) << 6*PSZ | \
 337                    ((p)&PMSK) << 7*PSZ | \
 338                    ((p)&PMSK) << 8*PSZ | \
 339                    ((p)&PMSK) << 9*PSZ | \
 340                    ((p)&PMSK) << 10*PSZ | \
 341                    ((p)&PMSK) << 11*PSZ | \
 342                    ((p)&PMSK) << 12*PSZ | \
 343                    ((p)&PMSK) << 13*PSZ | \
 344                    ((p)&PMSK) << 14*PSZ | \
 345                    ((p)&PMSK) << 15*PSZ )
 346 #define PFILL2(p, pf) { \
 347     pf = (p) & PMSK; \
 348     pf |= (pf << PSZ); \
 349     pf |= (pf << 2*PSZ); \
 350     pf |= (pf << 4*PSZ); \
 351     pf |= (pf << 8*PSZ); \
 352 }
 353 #endif /* PPW == 16 */
 354 #if PPW == 8
 355 #define PFILL(p) ( ((p)&PMSK)          | \
 356                    ((p)&PMSK) <<   PSZ | \
 357                    ((p)&PMSK) << 2*PSZ | \
 358                    ((p)&PMSK) << 3*PSZ | \
 359                    ((p)&PMSK) << 4*PSZ | \
 360                    ((p)&PMSK) << 5*PSZ | \
 361                    ((p)&PMSK) << 6*PSZ | \
 362                    ((p)&PMSK) << 7*PSZ )
 363 #define PFILL2(p, pf) { \
 364     pf = (p) & PMSK; \
 365     pf |= (pf << PSZ); \
 366     pf |= (pf << 2*PSZ); \
 367     pf |= (pf << 4*PSZ); \
 368 }
 369 #endif
 370 #if PPW == 4
 371 #define PFILL(p) ( ((p)&PMSK)          | \
 372                    ((p)&PMSK) <<   PSZ | \
 373                    ((p)&PMSK) << 2*PSZ | \
 374                    ((p)&PMSK) << 3*PSZ )
 375 #define PFILL2(p, pf) { \
 376     pf = (p) & PMSK; \
 377     pf |= (pf << PSZ); \
 378     pf |= (pf << 2*PSZ); \
 379 }
 380 #endif
 381 #if PPW == 2
 382 #define PFILL(p) ( ((p)&PMSK)          | \
 383                    ((p)&PMSK) <<   PSZ )
 384 #define PFILL2(p, pf) { \
 385     pf = (p) & PMSK; \
 386     pf |= (pf << PSZ); \
 387 }
 388 #endif
 389 #if PPW == 1
 390 #define PFILL(p)        (p)
 391 #define PFILL2(p,pf)    (pf = (p))
 392 #endif
 393
 394 /*
 395  * Reduced raster op - using precomputed values, perform the above
 396  * in three instructions
 397  */
 398
 399 #define DoRRop(dst, and, xor)   (((dst) & (and)) ^ (xor))
 400
 401 #define DoMaskRRop(dst, and, xor, mask) \
 402     (((dst) & ((and) | ~(mask))) ^ (xor & mask))
 403
 404 #if PSZ != 32 || PPW != 1
 405
 406 # if (PSZ == 24 && PPW == 1)
 407 #define maskbits(x, w, startmask, endmask, nlw) {\
 408     startmask = cfbstarttab[(x)&3]; \
 409     endmask = cfbendtab[((x)+(w)) & 3]; \
 410     nlw = ((((x)+(w))*3)>>2) - (((x)*3 +3)>>2); \
 411 }
 412
 413 #define mask32bits(x, w, startmask, endmask) \
 414     startmask = cfbstarttab[(x)&3]; \
 415     endmask = cfbendtab[((x)+(w)) & 3];
 416
 417 #define maskpartialbits(x, w, mask) \
 418     mask = cfbstartpartial[(x) & 3] & cfbendpartial[((x)+(w)) & 3];
 419
 420 #define maskbits24(x, w, startmask, endmask, nlw) \
 421     startmask = cfbstarttab24[(x) & 3]; \
 422     endmask = cfbendtab24[((x)+(w)) & 3]; \
 423     if (startmask){ \
 424         nlw = (((w) - (4 - ((x) & 3))) >> 2); \
 425     } else { \
 426         nlw = (w) >> 2; \
 427     }
 428
 429 #define getbits24(psrc, dst, index) {\
 430     register int idx; \
 431     switch(idx = ((index)&3)<<1){ \
 432         case 0: \
 433                 dst = (*(psrc) &cfbmask[idx]); \
 434                 break; \
 435         case 6: \
 436                 dst = BitLeft((*(psrc) &cfbmask[idx]), cfb24Shift[idx]); \
 437                 break; \
 438         default: \
 439                 dst = BitLeft((*(psrc) &cfbmask[idx]), cfb24Shift[idx]) | \
 440                 BitRight(((*((psrc)+1)) &cfbmask[idx+1]), cfb24Shift[idx+1]); \
 441         }; \
 442 }
 443
 444 #define putbits24(src, x, w, pdst, planemask, index) {\
 445     register PixelGroup dstpixel; \
 446     register unsigned int idx; \
 447     switch(idx = ((index)&3)<<1){ \
 448         case 0: \
 449                 dstpixel = (*(pdst) &cfbmask[idx]); \
 450                 break; \
 451         case 6: \
 452                 dstpixel = BitLeft((*(pdst) &cfbmask[idx]), cfb24Shift[idx]); \
 453                 break; \
 454         default: \
 455                 dstpixel = BitLeft((*(pdst) &cfbmask[idx]), cfb24Shift[idx])| \
 456                 BitRight(((*((pdst)+1)) &cfbmask[idx+1]), cfb24Shift[idx+1]); \
 457         }; \
 458     dstpixel &= ~(planemask); \
 459     dstpixel |= (src & planemask); \
 460     *(pdst) &= cfbrmask[idx]; \
 461     switch(idx){ \
 462         case 0: \
 463                 *(pdst) |=  (dstpixel & cfbmask[idx]); \
 464                 break; \
 465         case 2: \
 466         case 4: \
 467                 pdst++;idx++; \
 468                 *(pdst) = ((*(pdst))  & cfbrmask[idx]) | \
 469                                 (BitLeft(dstpixel, cfb24Shift[idx]) & cfbmask[idx]); \
 470                 pdst--;idx--; \
 471         case 6: \
 472                 *(pdst) |=  (BitRight(dstpixel, cfb24Shift[idx]) & cfbmask[idx]); \
 473                 break; \
 474         }; \
 475 }
 476
 477 #define putbitsrop24(src, x, pdst, planemask, rop) \
 478 { \
 479     register PixelGroup t1, dstpixel; \
 480     register unsigned int idx; \
 481     switch(idx = (x)<<1){ \
 482         case 0: \
 483                 dstpixel = (*(pdst) &cfbmask[idx]); \
 484                 break; \
 485         case 6: \
 486                 dstpixel = BitLeft((*(pdst) &cfbmask[idx]), cfb24Shift[idx]); \
 487                 break; \
 488         default: \
 489                 dstpixel = BitLeft((*(pdst) &cfbmask[idx]), cfb24Shift[idx])| \
 490                 BitRight(((*((pdst)+1)) &cfbmask[idx+1]), cfb24Shift[idx+1]); \
 491         }; \
 492     DoRop(t1, rop, (src), dstpixel); \
 493     dstpixel &= ~planemask; \
 494     dstpixel |= (t1 & planemask); \
 495     *(pdst) &= cfbrmask[idx]; \
 496     switch(idx){ \
 497         case 0: \
 498                 *(pdst) |= (dstpixel & cfbmask[idx]); \
 499                 break; \
 500         case 2: \
 501         case 4: \
 502                 *((pdst)+1) = ((*((pdst)+1))  & cfbrmask[idx+1]) | \
 503                                 (BitLeft(dstpixel, cfb24Shift[idx+1]) & (cfbmask[idx+1])); \
 504         case 6: \
 505                 *(pdst) |= (BitRight(dstpixel, cfb24Shift[idx]) & cfbmask[idx]); \
 506         }; \
 507 }
 508 # else  /* PSZ == 24 && PPW == 1 */
 509 #define maskbits(x, w, startmask, endmask, nlw) \
 510     startmask = cfbstarttab[(x)&PIM]; \
 511     endmask = cfbendtab[((x)+(w)) & PIM]; \
 512     if (startmask) \
 513         nlw = (((w) - (PPW - ((x)&PIM))) >> PWSH); \
 514     else \
 515         nlw = (w) >> PWSH;
 516
 517 #define maskpartialbits(x, w, mask) \
 518     mask = cfbstartpartial[(x) & PIM] & cfbendpartial[((x) + (w)) & PIM];
 519
 520 #define mask32bits(x, w, startmask, endmask) \
 521     startmask = cfbstarttab[(x)&PIM]; \
 522     endmask = cfbendtab[((x)+(w)) & PIM];
 523
 524 /* FIXME */
 525 #define maskbits24(x, w, startmask, endmask, nlw) \
 526     abort()
 527 #define getbits24(psrc, dst, index) \
 528     abort()
 529 #define putbits24(src, x, w, pdst, planemask, index) \
 530     abort()
 531 #define putbitsrop24(src, x, pdst, planemask, rop) \
 532     abort()
 533
 534 #endif /* PSZ == 24 && PPW == 1 */
 535
 536 #define getbits(psrc, x, w, dst) \
 537 if ( ((x) + (w)) <= PPW) \
 538 { \
 539     dst = SCRLEFT(*(psrc), (x)); \
 540 } \
 541 else \
 542 { \
 543     int m; \
 544     m = PPW-(x); \
 545     dst = (SCRLEFT(*(psrc), (x)) & cfbendtab[m]) | \
 546           (SCRRIGHT(*((psrc)+1), m) & cfbstarttab[m]); \
 547 }
 548
 549
 550 #define putbits(src, x, w, pdst, planemask) \
 551 if ( ((x)+(w)) <= PPW) \
 552 { \
 553     PixelGroup tmpmask; \
 554     maskpartialbits((x), (w), tmpmask); \
 555     tmpmask &= PFILL(planemask); \
 556     *(pdst) = (*(pdst) & ~tmpmask) | (SCRRIGHT(src, x) & tmpmask); \
 557 } \
 558 else \
 559 { \
 560     unsigned long m; \
 561     unsigned long n; \
 562     PixelGroup pm = PFILL(planemask); \
 563     m = PPW-(x); \
 564     n = (w) - m; \
 565     *(pdst) = (*(pdst) & (cfbendtab[x] | ~pm)) | \
 566         (SCRRIGHT(src, x) & (cfbstarttab[x] & pm)); \
 567     *((pdst)+1) = (*((pdst)+1) & (cfbstarttab[n] | ~pm)) | \
 568         (SCRLEFT(src, m) & (cfbendtab[n] & pm)); \
 569 }
 570 #if defined(__GNUC__) && defined(mc68020)
 571 #undef getbits
 572 #define FASTGETBITS(psrc, x, w, dst) \
 573     asm ("bfextu %3{%1:%2},%0" \
 574          : "=d" (dst) : "di" (x), "di" (w), "o" (*(char *)(psrc)))
 575
 576 #define getbits(psrc,x,w,dst) \
 577 { \
 578     FASTGETBITS(psrc, (x) * PSZ, (w) * PSZ, dst); \
 579     dst = SCRLEFT(dst,PPW-(w)); \
 580 }
 581
 582 #define FASTPUTBITS(src, x, w, pdst) \
 583     asm ("bfins %3,%0{%1:%2}" \
 584          : "=o" (*(char *)(pdst)) \
 585          : "di" (x), "di" (w), "d" (src), "0" (*(char *) (pdst)))
 586
 587 #undef putbits
 588 #define putbits(src, x, w, pdst, planemask) \
 589 { \
 590     if (planemask != PMSK) { \
 591         PixelGroup _m, _pm; \
 592         FASTGETBITS(pdst, (x) * PSZ , (w) * PSZ, _m); \
 593         PFILL2(planemask, _pm); \
 594         _m &= (~_pm); \
 595         _m |= (SCRRIGHT(src, PPW-(w)) & _pm); \
 596         FASTPUTBITS(_m, (x) * PSZ, (w) * PSZ, pdst); \
 597     } else { \
 598         FASTPUTBITS(SCRRIGHT(src, PPW-(w)), (x) * PSZ, (w) * PSZ, pdst); \
 599     } \
 600 }
 601
 602
 603 #endif /* mc68020 */
 604
 605 #define putbitsrop(src, x, w, pdst, planemask, rop) \
 606 if ( ((x)+(w)) <= PPW) \
 607 { \
 608     PixelGroup tmpmask; \
 609     PixelGroup t1, t2; \
 610     maskpartialbits((x), (w), tmpmask); \
 611     PFILL2(planemask, t1); \
 612     tmpmask &= t1; \
 613     t1 = SCRRIGHT((src), (x)); \
 614     DoRop(t2, rop, t1, *(pdst)); \
 615     *(pdst) = (*(pdst) & ~tmpmask) | (t2 & tmpmask); \
 616 } \
 617 else \
 618 { \
 619     unsigned long m; \
 620     unsigned long n; \
 621     PixelGroup t1, t2; \
 622     PixelGroup pm; \
 623     PFILL2(planemask, pm); \
 624     m = PPW-(x); \
 625     n = (w) - m; \
 626     t1 = SCRRIGHT((src), (x)); \
 627     DoRop(t2, rop, t1, *(pdst)); \
 628     *(pdst) = (*(pdst) & (cfbendtab[x] | ~pm)) | (t2 & (cfbstarttab[x] & pm));\
 629     t1 = SCRLEFT((src), m); \
 630     DoRop(t2, rop, t1, *((pdst) + 1)); \
 631     *((pdst)+1) = (*((pdst)+1) & (cfbstarttab[n] | ~pm)) | \
 632         (t2 & (cfbendtab[n] & pm)); \
 633 }
 634
 635 #else /* PSZ == 32 && PPW == 1*/
 636
 637 /*
 638  * These macros can be optimized for 32-bit pixels since there is no
 639  * need to worry about left/right edge masking.  These macros were
 640  * derived from the above using the following reductions:
 641  *
 642  *      - x & PIW = 0   [since PIW = 0]
 643  *      - all masking tables are only indexed by 0  [ due to above ]
 644  *      - cfbstartab[0] and cfbendtab[0] = 0    [ no left/right edge masks]
 645  *    - cfbstartpartial[0] and cfbendpartial[0] = ~0 [no partial pixel mask]
 646  *
 647  * Macro reduction based upon constants cannot be performed automatically
 648  *       by the compiler since it does not know the contents of the masking
 649  *       arrays in cfbmskbits.c.
 650  */
 651 #define maskbits(x, w, startmask, endmask, nlw) \
 652     startmask = endmask = 0; \
 653     nlw = (w);
 654
 655 #define maskpartialbits(x, w, mask) \
 656     mask = 0xFFFFFFFF;
 657
 658 #define mask32bits(x, w, startmask, endmask) \
 659     startmask = endmask = 0;
 660
 661 /*
 662  * For 32-bit operations, getbits(), putbits(), and putbitsrop()
 663  * will only be invoked with x = 0 and w = PPW (1).  The getbits()
 664  * macro is only called within left/right edge logic, which doesn't
 665  * happen for 32-bit pixels.
 666  */
 667 #define getbits(psrc, x, w, dst) (dst) = *(psrc)
 668
 669 #define putbits(src, x, w, pdst, planemask) \
 670     *(pdst) = (*(pdst) & ~planemask) | (src & planemask);
 671
 672 #define putbitsrop(src, x, w, pdst, planemask, rop) \
 673 { \
 674     PixelGroup t1; \
 675     DoRop(t1, rop, (src), *(pdst)); \
 676     *(pdst) = (*(pdst) & ~planemask) | (t1 & planemask); \
 677 }
 678
 679 #endif /* PSZ != 32 */
 680
 681 /*
 682  * Use these macros only when you're using the MergeRop stuff
 683  * in ../mfb/mergerop.h
 684  */
 685
 686 /* useful only when not spanning destination longwords */
 687 #if PSZ == 24
 688 #define putbitsmropshort24(src,x,w,pdst,index) {\
 689     PixelGroup   _tmpmask; \
 690     PixelGroup   _t1; \
 691     maskpartialbits ((x), (w), _tmpmask); \
 692     _t1 = SCRRIGHT((src), (x)); \
 693     DoMaskMergeRop24(_t1, pdst, _tmpmask, index); \
 694 }
 695 #endif
 696 #define putbitsmropshort(src,x,w,pdst) {\
 697     PixelGroup   _tmpmask; \
 698     PixelGroup   _t1; \
 699     maskpartialbits ((x), (w), _tmpmask); \
 700     _t1 = SCRRIGHT((src), (x)); \
 701     *pdst = DoMaskMergeRop(_t1, *pdst, _tmpmask); \
 702 }
 703
 704 /* useful only when spanning destination longwords */
 705 #define putbitsmroplong(src,x,w,pdst) { \
 706     PixelGroup   _startmask, _endmask; \
 707     int             _m; \
 708     PixelGroup   _t1; \
 709     _m = PPW - (x); \
 710     _startmask = cfbstarttab[x]; \
 711     _endmask = cfbendtab[(w) - _m]; \
 712     _t1 = SCRRIGHT((src), (x)); \
 713     pdst[0] = DoMaskMergeRop(_t1,pdst[0],_startmask); \
 714     _t1 = SCRLEFT ((src),_m); \
 715     pdst[1] = DoMaskMergeRop(_t1,pdst[1],_endmask); \
 716 }
 717
 718 #define putbitsmrop(src,x,w,pdst) \
 719 if ((x) + (w) <= PPW) {\
 720     putbitsmropshort(src,x,w,pdst); \
 721 } else { \
 722     putbitsmroplong(src,x,w,pdst); \
 723 }
 724
 725 #if GETLEFTBITS_ALIGNMENT == 1
 726 #define getleftbits(psrc, w, dst)       dst = *((unsigned int *) psrc)
 727 #define getleftbits24(psrc, w, dst, idx){       \
 728         regiseter int index; \
 729         switch(index = ((idx)&3)<<1){ \
 730         case 0: \
 731         dst = (*((unsigned int *) psrc))&cfbmask[index]; \
 732         break; \
 733         case 2: \
 734         case 4: \
 735         dst = BitLeft(((*((unsigned int *) psrc))&cfbmask[index]), cfb24Shift[index]); \
 736         dst |= BitRight(((*((unsigned int *) psrc)+1)&cfbmask[index]), cfb4Shift[index]); \
 737         break; \
 738         case 6: \
 739         dst = BitLeft((*((unsigned int *) psrc)),cfb24Shift[index]); \
 740         break; \
 741         }; \
 742 }
 743 #endif /* GETLEFTBITS_ALIGNMENT == 1 */
 744
 745 #define getglyphbits(psrc, x, w, dst) \
 746 { \
 747     dst = BitLeft((unsigned) *(psrc), (x)); \
 748     if ( ((x) + (w)) > 32) \
 749         dst |= (BitRight((unsigned) *((psrc)+1), 32-(x))); \
 750 }
 751 #if GETLEFTBITS_ALIGNMENT == 2
 752 #define getleftbits(psrc, w, dst) \
 753     { \
 754         if ( ((int)(psrc)) & 0x01 ) \
 755                 getglyphbits( ((unsigned int *)(((char *)(psrc))-1)), 8, (w), (dst) ); \
 756         else \
 757                 dst = *((unsigned int *) psrc); \
 758     }
 759 #endif /* GETLEFTBITS_ALIGNMENT == 2 */
 760
 761 #if GETLEFTBITS_ALIGNMENT == 4
 762 #define getleftbits(psrc, w, dst) \
 763     { \
 764         int off, off_b; \
 765         off_b = (off = ( ((int)(psrc)) & 0x03)) << 3; \
 766         getglyphbits( \
 767                 (unsigned int *)( ((char *)(psrc)) - off), \
 768                 (off_b), (w), (dst) \
 769                ); \
 770     }
 771 #endif /* GETLEFTBITS_ALIGNMENT == 4 */
 772
 773 /*
 774  * getstipplepixels( psrcstip, x, w, ones, psrcpix, destpix )
 775  *
 776  * Converts bits to pixels in a reasonable way.  Takes w (1 <= w <= PPW)
 777  * bits from *psrcstip, starting at bit x; call this a quartet of bits.
 778  * Then, takes the pixels from *psrcpix corresponding to the one-bits (if
 779  * ones is TRUE) or the zero-bits (if ones is FALSE) of the quartet
 780  * and puts these pixels into destpix.
 781  *
 782  * Example:
 783  *
 784  *      getstipplepixels( &(0x08192A3B), 17, 4, 1, &(0x4C5D6E7F), dest )
 785  *
 786  * 0x08192A3B = 0000 1000 0001 1001 0010 1010 0011 1011
 787  *
 788  * This will take 4 bits starting at bit 17, so the quartet is 0x5 = 0101.
 789  * It will take pixels from 0x4C5D6E7F corresponding to the one-bits in this
 790  * quartet, so dest = 0x005D007F.
 791  *
 792  * XXX Works with both byte order.
 793  * XXX This works for all values of x and w within a doubleword.
 794  */
 795 #if (BITMAP_BIT_ORDER == MSBFirst)
 796 #define getstipplepixels( psrcstip, x, w, ones, psrcpix, destpix ) \
 797 { \
 798     PixelGroup q; \
 799     int m; \
 800     if ((m = ((x) - ((PPW*PSZ)-PPW))) > 0) { \
 801         q = (*(psrcstip)) << m; \
 802         if ( (x)+(w) > (PPW*PSZ) ) \
 803             q |= *((psrcstip)+1) >> ((PPW*PSZ)-m); \
 804     } \
 805     else \
 806         q = (*(psrcstip)) >> -m; \
 807     q = QuartetBitsTable[(w)] & ((ones) ? q : ~q); \
 808     *(destpix) = (*(psrcpix)) & QuartetPixelMaskTable[q]; \
 809 }
 810 #else /* BITMAP_BIT_ORDER == LSB */
 811
 812 /*================================================================
 813    BEGIN ORL VNC modification
 814    Only use ldq_u on XFREE86 platforms */
 815
 816 #ifdef XFREE86
 817 #define getstipplepixels( psrcstip, xt, w, ones, psrcpix, destpix ) \
 818 { \
 819     PixelGroup q; \
 820     q = ldq_u(psrcstip) >> (xt); \
 821     if ( ((xt)+(w)) > (PPW*PSZ) ) \
 822         q |= (ldq_u((psrcstip)+1)) << ((PPW*PSZ)-(xt)); \
 823     q = QuartetBitsTable[(w)] & ((ones) ? q : ~q); \
 824     *(destpix) = (*(psrcpix)) & QuartetPixelMaskTable[q]; \
 825 }
 826 #else /* XFREE86 */
 827 #define getstipplepixels( psrcstip, xt, w, ones, psrcpix, destpix ) \
 828 { \
 829     PixelGroup q; \
 830     q = *(psrcstip) >> (xt); \
 831     if ( ((xt)+(w)) > (PPW*PSZ) ) \
 832         q |= (*((psrcstip)+1)) << ((PPW*PSZ)-(xt)); \
 833     q = QuartetBitsTable[(w)] & ((ones) ? q : ~q); \
 834     *(destpix) = (*(psrcpix)) & QuartetPixelMaskTable[q]; \
 835 }
 836 #endif /* XFREE86 */
 837
 838 /* END ORL VNC modification
 839 ================================================================*/
 840
 841 #if PSZ == 24
 842 # if 0
 843 #define getstipplepixels24( psrcstip,xt,w,ones,psrcpix,destpix,stipindex,srcindex,dstindex) \
 844 { \
 845     PixelGroup q, srcpix, srcstip; \
 846     unsigned long src; \
 847     register unsigned int sidx; \
 848     register unsigned int didx; \
 849     register unsigned int stipidx; \
 850     sidx = ((srcindex) & 3)<<1; \
 851     didx = ((dstindex) & 3)<<1; \
 852     q = *(psrcstip) >> (xt); \
 853 /*    if((srcindex)!=0)*/ \
 854 /*    src = (((*(psrcpix)) << cfb24Shift[sidx]) & (cfbmask[sidx])) |*/ \
 855 /*      (((*((psrcpix)+1)) << cfb24Shift[sidx+1]) & (cfbmask[sidx+1])); */\
 856 /*    else */\
 857         src = (*(psrcpix))&0xFFFFFF; \
 858     if ( ((xt)+(w)) > PGSZ ) \
 859         q |= (*((psrcstip)+1)) << (PGSZ -(xt)); \
 860     q = QuartetBitsTable[(w)] & ((ones) ? q : ~q); \
 861     src &= QuartetPixelMaskTable[q]; \
 862     *(destpix) &= cfbrmask[didx]; \
 863     switch(didx) {\
 864         case 0: \
 865                 *(destpix) |= (src &cfbmask[didx]); \
 866                 break; \
 867         case 2: \
 868         case 4: \
 869                 destpix++;didx++; \
 870                 *(destpix) = ((*(destpix)) & (cfbrmask[didx]))| \
 871                         (BitLeft(src, cfb24Shift[didx]) & (cfbmask[didx])); \
 872                 destpix--; didx--;\
 873         case 6: \
 874                 *(destpix) |= (BitRight(src, cfb24Shift[didx]) & cfbmask[didx]); \
 875                 break; \
 876         }; \
 877 }
 878 # else
 879 #define getstipplepixels24(psrcstip,xt,ones,psrcpix,destpix,stipindex) \
 880 { \
 881     PixelGroup q, srcpix, srcstip; \
 882     unsigned long src; \
 883     register unsigned int stipidx; \
 884     q = *(psrcstip) >> (xt); \
 885     q = ((ones) ? q : ~q) & 1; \
 886     *(destpix) = (*(psrcpix)) & QuartetPixelMaskTable[q]; \
 887 }
 888 # endif
 889 #endif /* PSZ == 24 */
 890 #endif
 891
 892 extern PixelGroup cfbstarttab[];
 893 extern PixelGroup cfbendtab[];
 894 extern PixelGroup cfbstartpartial[];
 895 extern PixelGroup cfbendpartial[];
 896 extern PixelGroup cfbrmask[];
 897 extern PixelGroup cfbmask[];
 898 extern PixelGroup QuartetBitsTable[];
 899 extern PixelGroup QuartetPixelMaskTable[];
 900 #if PSZ == 24
 901 extern int cfb24Shift[];
 902 #endif