Initial commit
This commit is contained in:
541
graphics/blit/blit-alpha.cpp
Normal file
541
graphics/blit/blit-alpha.cpp
Normal file
@@ -0,0 +1,541 @@
|
||||
/* ScummVM - Graphic Adventure Engine
|
||||
*
|
||||
* ScummVM is the legal property of its developers, whose names
|
||||
* are too numerous to list here. Please refer to the COPYRIGHT
|
||||
* file distributed with this source distribution.
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*
|
||||
*/
|
||||
|
||||
#include "common/system.h"
|
||||
#include "graphics/blit.h"
|
||||
#include "graphics/pixelformat.h"
|
||||
|
||||
namespace Graphics {
|
||||
|
||||
namespace {
|
||||
|
||||
template<typename Color, int Size>
|
||||
static inline uint32 READ_PIXEL(const byte *src) {
|
||||
if (Size == sizeof(Color)) {
|
||||
return *(const Color *)src;
|
||||
} else {
|
||||
uint32 color;
|
||||
uint8 *col = (uint8 *)&color;
|
||||
#ifdef SCUMM_BIG_ENDIAN
|
||||
if (Size == 3)
|
||||
col++;
|
||||
#endif
|
||||
memcpy(col, src, Size);
|
||||
return color;
|
||||
}
|
||||
}
|
||||
|
||||
template<typename Color, int Size>
|
||||
static inline void WRITE_PIXEL(byte *dst, const uint32 color) {
|
||||
if (Size == sizeof(Color)) {
|
||||
*(Color *)dst = color;
|
||||
} else {
|
||||
const uint8 *col = (const uint8 *)&color;
|
||||
#ifdef SCUMM_BIG_ENDIAN
|
||||
if (Size == 3)
|
||||
col++;
|
||||
#endif
|
||||
memcpy(dst, col, Size);
|
||||
}
|
||||
}
|
||||
|
||||
template<typename SrcColor, int SrcSize, typename DstColor, int DstSize, bool hasKey, bool hasMask, bool hasMap>
|
||||
static inline void alphaBlitLogic(byte *dst, const byte *src, const byte *mask, const uint w, const uint h,
|
||||
const PixelFormat &srcFmt, const PixelFormat &dstFmt, const uint32 *map,
|
||||
const int srcDelta, const int dstDelta, const int maskDelta,
|
||||
const int srcInc, const int dstInc, const int maskInc,
|
||||
const uint32 key, const byte flip, const byte aMod) {
|
||||
const uint32 alphaMask = srcFmt.ARGBToColor(255, 0, 0, 0);
|
||||
const bool convert = hasMap ? false : ((SrcSize != DstSize) ? true : srcFmt == dstFmt);
|
||||
|
||||
for (uint y = 0; y < h; ++y) {
|
||||
for (uint x = 0; x < w; ++x) {
|
||||
const uint32 srcColor = hasMap ? map[*src]
|
||||
: READ_PIXEL<SrcColor, SrcSize>(src);
|
||||
|
||||
const bool isOpaque = hasMask ? (*mask == 0xff)
|
||||
: (hasKey ? (READ_PIXEL<SrcColor, SrcSize>(src) != key)
|
||||
: !alphaMask || ((srcColor & alphaMask) == alphaMask));
|
||||
const bool isTransparent = hasMask ? (*mask == 0x00)
|
||||
: (hasKey ? (READ_PIXEL<SrcColor, SrcSize>(src) == key)
|
||||
: alphaMask && ((srcColor & alphaMask) == 0));
|
||||
|
||||
if (isOpaque && aMod == 0xff) {
|
||||
if (convert) {
|
||||
byte sR, sG, sB;
|
||||
srcFmt.colorToRGB(srcColor, sR, sG, sB);
|
||||
WRITE_PIXEL<DstColor, DstSize>(dst, dstFmt.RGBToColor(sR, sG, sB));
|
||||
} else {
|
||||
WRITE_PIXEL<DstColor, DstSize>(dst, srcColor);
|
||||
}
|
||||
} else if (!isTransparent) {
|
||||
// TODO: Optimise for matching formats?
|
||||
const uint32 dstColor = READ_PIXEL<DstColor, DstSize>(dst);
|
||||
|
||||
byte sA, sR, sG, sB;
|
||||
srcFmt.colorToARGB(srcColor, sA, sR, sG, sB);
|
||||
|
||||
byte dR, dG, dB;
|
||||
dstFmt.colorToRGB(dstColor, dR, dG, dB);
|
||||
|
||||
if (hasKey)
|
||||
sA = aMod;
|
||||
else if (hasMask)
|
||||
sA = ((*mask * aMod) >> 8);
|
||||
else
|
||||
sA = ((sA * aMod) >> 8);
|
||||
|
||||
dR = (dR * (255-sA) + sR * sA) >> 8;
|
||||
dG = (dG * (255-sA) + sG * sA) >> 8;
|
||||
dB = (dB * (255-sA) + sB * sA) >> 8;
|
||||
|
||||
const uint32 outColor = dstFmt.RGBToColor(dR, dG, dB);
|
||||
WRITE_PIXEL<DstColor, DstSize>(dst, outColor);
|
||||
}
|
||||
|
||||
src += srcInc;
|
||||
dst += dstInc;
|
||||
if (hasMask)
|
||||
mask += maskInc;
|
||||
}
|
||||
|
||||
src += srcDelta;
|
||||
dst += dstDelta;
|
||||
if (hasMask)
|
||||
mask += maskDelta;
|
||||
}
|
||||
}
|
||||
|
||||
template<bool hasKey, bool hasMask>
|
||||
static inline bool alphaBlitHelper(byte *dst, const byte *src, const byte *mask, const uint w, const uint h,
|
||||
const PixelFormat &srcFmt, const PixelFormat &dstFmt,
|
||||
const uint srcPitch, const uint dstPitch, const uint maskPitch,
|
||||
const uint32 key, const byte flip, const byte aMod) {
|
||||
const bool hasMap = false;
|
||||
const bool flipx = flip & FLIP_H;
|
||||
const bool flipy = flip & FLIP_V;
|
||||
|
||||
// Faster, but larger, to provide optimized handling for each case.
|
||||
int dstDelta = (dstPitch - w * dstFmt.bytesPerPixel);
|
||||
const int srcDelta = (srcPitch - w * srcFmt.bytesPerPixel);
|
||||
const int maskDelta = hasMask ? (maskPitch - w) : 0;
|
||||
|
||||
const int dstInc = flipx ? -dstFmt.bytesPerPixel : dstFmt.bytesPerPixel;
|
||||
const int srcInc = srcFmt.bytesPerPixel;
|
||||
const int maskInc = 1;
|
||||
|
||||
if (flipx)
|
||||
dst += (w - 1) * dstFmt.bytesPerPixel;
|
||||
|
||||
if (flipy)
|
||||
dst += (h - 1) * dstPitch;
|
||||
|
||||
if (flipy && flipx)
|
||||
dstDelta = -dstDelta;
|
||||
else if (flipy)
|
||||
dstDelta = -((dstPitch * 2) - dstDelta);
|
||||
else if (flipx)
|
||||
dstDelta = (dstPitch * 2) - dstDelta;
|
||||
|
||||
if (aMod == 0)
|
||||
return true;
|
||||
|
||||
// TODO: optimized cases for dstDelta of 0
|
||||
if (dstFmt.bytesPerPixel == 2) {
|
||||
if (srcFmt.bytesPerPixel == 2) {
|
||||
alphaBlitLogic<uint16, 2, uint16, 2, hasKey, hasMask, hasMap>(dst, src, mask, w, h, srcFmt, dstFmt, nullptr, srcDelta, dstDelta, maskDelta, srcInc, dstInc, maskInc, key, flip, aMod);
|
||||
} else if (srcFmt.bytesPerPixel == 3) {
|
||||
alphaBlitLogic<uint8, 3, uint16, 2, hasKey, hasMask, hasMap>(dst, src, mask, w, h, srcFmt, dstFmt, nullptr, srcDelta, dstDelta, maskDelta, srcInc, dstInc, maskInc, key, flip, aMod);
|
||||
} else {
|
||||
alphaBlitLogic<uint32, 4, uint16, 2, hasKey, hasMask, hasMap>(dst, src, mask, w, h, srcFmt, dstFmt, nullptr, srcDelta, dstDelta, maskDelta, srcInc, dstInc, maskInc, key, flip, aMod);
|
||||
}
|
||||
} else if (dstFmt.bytesPerPixel == 4) {
|
||||
if (srcFmt.bytesPerPixel == 2) {
|
||||
alphaBlitLogic<uint16, 2, uint32, 4, hasKey, hasMask, hasMap>(dst, src, mask, w, h, srcFmt, dstFmt, nullptr, srcDelta, dstDelta, maskDelta, srcInc, dstInc, maskInc, key, flip, aMod);
|
||||
} else if (srcFmt.bytesPerPixel == 3) {
|
||||
alphaBlitLogic<uint8, 3, uint32, 4, hasKey, hasMask, hasMap>(dst, src, mask, w, h, srcFmt, dstFmt, nullptr, srcDelta, dstDelta, maskDelta, srcInc, dstInc, maskInc, key, flip, aMod);
|
||||
} else {
|
||||
alphaBlitLogic<uint32, 4, uint32, 4, hasKey, hasMask, hasMap>(dst, src, mask, w, h, srcFmt, dstFmt, nullptr, srcDelta, dstDelta, maskDelta, srcInc, dstInc, maskInc, key, flip, aMod);
|
||||
}
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
template<bool hasKey, bool hasMask>
|
||||
static inline bool alphaBlitMapHelper(byte *dst, const byte *src, const byte *mask, const uint w, const uint h,
|
||||
const PixelFormat &dstFmt, const uint32 *map,
|
||||
const uint srcPitch, const uint dstPitch, const uint maskPitch,
|
||||
const uint32 key, const byte flip, const byte aMod) {
|
||||
const Graphics::PixelFormat &srcFmt = dstFmt;
|
||||
const bool hasMap = true;
|
||||
const bool flipx = flip & FLIP_H;
|
||||
const bool flipy = flip & FLIP_V;
|
||||
|
||||
// Faster, but larger, to provide optimized handling for each case.
|
||||
int dstDelta = (dstPitch - w * dstFmt.bytesPerPixel);
|
||||
const int srcDelta = (srcPitch - w);
|
||||
const int maskDelta = hasMask ? (maskPitch - w) : 0;
|
||||
|
||||
const int dstInc = flipx ? -dstFmt.bytesPerPixel : dstFmt.bytesPerPixel;
|
||||
const int srcInc = 1;
|
||||
const int maskInc = 1;
|
||||
|
||||
if (flipx)
|
||||
dst += (w - 1) * dstFmt.bytesPerPixel;
|
||||
|
||||
if (flipy)
|
||||
dst += (h - 1) * dstPitch;
|
||||
|
||||
if (flipy && flipx)
|
||||
dstDelta = -dstDelta;
|
||||
else if (flipy)
|
||||
dstDelta = -((dstPitch * 2) - dstDelta);
|
||||
else if (flipx)
|
||||
dstDelta = (dstPitch * 2) - dstDelta;
|
||||
|
||||
// TODO: optimized cases for dstDelta of 0
|
||||
if (dstFmt.bytesPerPixel == 2) {
|
||||
alphaBlitLogic<uint8, 1, uint16, 2, hasKey, hasMask, hasMap>(dst, src, mask, w, h, srcFmt, dstFmt, map, srcDelta, dstDelta, maskDelta, srcInc, dstInc, maskInc, key, flip, aMod);
|
||||
} else if (dstFmt.bytesPerPixel == 4) {
|
||||
alphaBlitLogic<uint8, 1, uint32, 4, hasKey, hasMask, hasMap>(dst, src, mask, w, h, srcFmt, dstFmt, map, srcDelta, dstDelta, maskDelta, srcInc, dstInc, maskInc, key, flip, aMod);
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
} // End of anonymous namespace
|
||||
|
||||
bool alphaBlit(byte *dst, const byte *src,
|
||||
const uint dstPitch, const uint srcPitch,
|
||||
const uint w, const uint h,
|
||||
const Graphics::PixelFormat &dstFmt, const Graphics::PixelFormat &srcFmt,
|
||||
const byte flip, const byte aMod) {
|
||||
// Error out if conversion is impossible
|
||||
if ((srcFmt.bytesPerPixel == 1) || (dstFmt.bytesPerPixel == 1)
|
||||
|| (!srcFmt.bytesPerPixel) || (!dstFmt.bytesPerPixel))
|
||||
return false;
|
||||
|
||||
return alphaBlitHelper<false, false>(dst, src, nullptr, w, h, srcFmt, dstFmt, srcPitch, dstPitch, 0, 0, flip, aMod);
|
||||
}
|
||||
|
||||
bool alphaKeyBlit(byte *dst, const byte *src,
|
||||
const uint dstPitch, const uint srcPitch,
|
||||
const uint w, const uint h,
|
||||
const Graphics::PixelFormat &dstFmt, const Graphics::PixelFormat &srcFmt,
|
||||
const uint32 key, const byte flip, const byte aMod) {
|
||||
// Error out if conversion is impossible
|
||||
if ((srcFmt.bytesPerPixel == 1) || (dstFmt.bytesPerPixel == 1)
|
||||
|| (!srcFmt.bytesPerPixel) || (!dstFmt.bytesPerPixel))
|
||||
return false;
|
||||
|
||||
return alphaBlitHelper<true, false>(dst, src, nullptr, w, h, srcFmt, dstFmt, srcPitch, dstPitch, 0, key, flip, aMod);
|
||||
}
|
||||
|
||||
bool alphaMaskBlit(byte *dst, const byte *src, const byte *mask,
|
||||
const uint dstPitch, const uint srcPitch, const uint maskPitch,
|
||||
const uint w, const uint h,
|
||||
const Graphics::PixelFormat &dstFmt, const Graphics::PixelFormat &srcFmt,
|
||||
const byte flip, const byte aMod) {
|
||||
// Error out if conversion is impossible
|
||||
if ((srcFmt.bytesPerPixel == 1) || (dstFmt.bytesPerPixel == 1)
|
||||
|| (!srcFmt.bytesPerPixel) || (!dstFmt.bytesPerPixel))
|
||||
return false;
|
||||
|
||||
return alphaBlitHelper<false, true>(dst, src, mask, w, h, srcFmt, dstFmt, srcPitch, dstPitch, maskPitch, 0, flip, aMod);
|
||||
}
|
||||
|
||||
bool alphaBlitMap(byte *dst, const byte *src,
|
||||
const uint dstPitch, const uint srcPitch,
|
||||
const uint w, const uint h,
|
||||
const Graphics::PixelFormat &dstFmt, const uint32 *map,
|
||||
const byte flip, const byte aMod) {
|
||||
// Error out if conversion is impossible
|
||||
if ((dstFmt.bytesPerPixel == 1) || (!dstFmt.bytesPerPixel))
|
||||
return false;
|
||||
|
||||
return alphaBlitMapHelper<false, false>(dst, src, nullptr, w, h, dstFmt, map, srcPitch, dstPitch, 0, 0, flip, aMod);
|
||||
}
|
||||
|
||||
bool alphaKeyBlitMap(byte *dst, const byte *src,
|
||||
const uint dstPitch, const uint srcPitch,
|
||||
const uint w, const uint h,
|
||||
const Graphics::PixelFormat &dstFmt, const uint32 *map,
|
||||
const uint32 key, const byte flip, const byte aMod) {
|
||||
// Error out if conversion is impossible
|
||||
if ((dstFmt.bytesPerPixel == 1) || (!dstFmt.bytesPerPixel))
|
||||
return false;
|
||||
|
||||
return alphaBlitMapHelper<true, false>(dst, src, nullptr, w, h, dstFmt, map, srcPitch, dstPitch, 0, key, flip, aMod);
|
||||
}
|
||||
|
||||
bool alphaMaskBlitMap(byte *dst, const byte *src, const byte *mask,
|
||||
const uint dstPitch, const uint srcPitch, const uint maskPitch,
|
||||
const uint w, const uint h,
|
||||
const Graphics::PixelFormat &dstFmt, const uint32 *map,
|
||||
const byte flip, const byte aMod) {
|
||||
// Error out if conversion is impossible
|
||||
if ((dstFmt.bytesPerPixel == 1) || (!dstFmt.bytesPerPixel))
|
||||
return false;
|
||||
|
||||
return alphaBlitMapHelper<false, true>(dst, src, mask, w, h, dstFmt, map, srcPitch, dstPitch, maskPitch, 0, flip, aMod);
|
||||
}
|
||||
|
||||
namespace {
|
||||
|
||||
template<typename Size, bool overwriteAlpha>
|
||||
inline bool applyColorKeyLogic(byte *dst, const byte *src, const uint w, const uint h,
|
||||
const uint srcDelta, const uint dstDelta,
|
||||
const Graphics::PixelFormat &format,
|
||||
const uint8 rKey, const uint8 gKey, const uint8 bKey,
|
||||
const uint8 rNew, const uint8 gNew, const uint8 bNew) {
|
||||
|
||||
const uint32 keyPix = format.ARGBToColor(0, rKey, gKey, bKey);
|
||||
const uint32 newPix = format.ARGBToColor(0, rNew, gNew, bNew);
|
||||
const uint32 rgbMask = format.ARGBToColor(0, 255, 255, 255);
|
||||
const uint32 alphaMask = format.ARGBToColor(255, 0, 0, 0);
|
||||
bool applied = false;
|
||||
|
||||
for (uint y = 0; y < h; ++y) {
|
||||
for (uint x = 0; x < w; ++x) {
|
||||
uint32 pix = *(const Size *)src;
|
||||
|
||||
if ((pix & rgbMask) == keyPix) {
|
||||
*(Size *)dst = newPix;
|
||||
applied = true;
|
||||
} else if (overwriteAlpha) {
|
||||
*(Size *)dst = pix | alphaMask;
|
||||
}
|
||||
|
||||
src += sizeof(Size);
|
||||
dst += sizeof(Size);
|
||||
}
|
||||
|
||||
src += srcDelta;
|
||||
dst += dstDelta;
|
||||
}
|
||||
|
||||
return applied;
|
||||
}
|
||||
|
||||
template<typename Size, bool skipTransparent>
|
||||
inline void setAlphaLogic(byte *dst, const byte *src, const uint w, const uint h,
|
||||
const uint srcDelta, const uint dstDelta,
|
||||
const Graphics::PixelFormat &format, const uint8 alpha) {
|
||||
|
||||
const uint32 newAlpha = format.ARGBToColor(alpha, 0, 0, 0);
|
||||
const uint32 rgbMask = format.ARGBToColor(0, 255, 255, 255);
|
||||
const uint32 alphaMask = format.ARGBToColor(255, 0, 0, 0);
|
||||
|
||||
for (uint y = 0; y < h; ++y) {
|
||||
for (uint x = 0; x < w; ++x) {
|
||||
uint32 pix = *(const Size *)src;
|
||||
|
||||
if (!skipTransparent || (pix & alphaMask))
|
||||
*(Size *)dst = (pix & rgbMask) | newAlpha;
|
||||
else
|
||||
*(Size *)dst = pix;
|
||||
|
||||
src += sizeof(Size);
|
||||
dst += sizeof(Size);
|
||||
}
|
||||
|
||||
src += srcDelta;
|
||||
dst += dstDelta;
|
||||
}
|
||||
}
|
||||
|
||||
} // End of anonymous namespace
|
||||
|
||||
// Function to merge a transparent color key with the alpha channel
|
||||
bool applyColorKey(byte *dst, const byte *src,
|
||||
const uint dstPitch, const uint srcPitch,
|
||||
const uint w, const uint h,
|
||||
const Graphics::PixelFormat &format, const bool overwriteAlpha,
|
||||
const uint8 rKey, const uint8 gKey, const uint8 bKey,
|
||||
const uint8 rNew, const uint8 gNew, const uint8 bNew) {
|
||||
|
||||
// Faster, but larger, to provide optimized handling for each case.
|
||||
const uint srcDelta = (srcPitch - w * format.bytesPerPixel);
|
||||
const uint dstDelta = (dstPitch - w * format.bytesPerPixel);
|
||||
|
||||
if (format.aBits() == 0) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (overwriteAlpha) {
|
||||
if (format.bytesPerPixel == 1) {
|
||||
return applyColorKeyLogic<uint8, true>(dst, src, w, h, srcDelta, dstDelta, format, rKey, gKey, bKey, rNew, gNew, bNew);
|
||||
} else if (format.bytesPerPixel == 2) {
|
||||
return applyColorKeyLogic<uint16, true>(dst, src, w, h, srcDelta, dstDelta, format, rKey, gKey, bKey, rNew, gNew, bNew);
|
||||
} else if (format.bytesPerPixel == 4) {
|
||||
return applyColorKeyLogic<uint32, true>(dst, src, w, h, srcDelta, dstDelta, format, rKey, gKey, bKey, rNew, gNew, bNew);
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
} else {
|
||||
if (format.bytesPerPixel == 1) {
|
||||
return applyColorKeyLogic<uint8, false>(dst, src, w, h, srcDelta, dstDelta, format, rKey, gKey, bKey, rNew, gNew, bNew);
|
||||
} else if (format.bytesPerPixel == 2) {
|
||||
return applyColorKeyLogic<uint16, false>(dst, src, w, h, srcDelta, dstDelta, format, rKey, gKey, bKey, rNew, gNew, bNew);
|
||||
} else if (format.bytesPerPixel == 4) {
|
||||
return applyColorKeyLogic<uint32, false>(dst, src, w, h, srcDelta, dstDelta, format, rKey, gKey, bKey, rNew, gNew, bNew);
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Function to set the alpha channel for all pixels to the specified value
|
||||
bool setAlpha(byte *dst, const byte *src,
|
||||
const uint dstPitch, const uint srcPitch,
|
||||
const uint w, const uint h,
|
||||
const Graphics::PixelFormat &format,
|
||||
const bool skipTransparent, const uint8 alpha) {
|
||||
|
||||
// Faster, but larger, to provide optimized handling for each case.
|
||||
const uint srcDelta = (srcPitch - w * format.bytesPerPixel);
|
||||
const uint dstDelta = (dstPitch - w * format.bytesPerPixel);
|
||||
|
||||
if (format.aBits() == 0) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (skipTransparent) {
|
||||
if (format.bytesPerPixel == 1) {
|
||||
setAlphaLogic<uint8, true>(dst, src, w, h, srcDelta, dstDelta, format, alpha);
|
||||
} else if (format.bytesPerPixel == 2) {
|
||||
setAlphaLogic<uint16, true>(dst, src, w, h, srcDelta, dstDelta, format, alpha);
|
||||
} else if (format.bytesPerPixel == 4) {
|
||||
setAlphaLogic<uint32, true>(dst, src, w, h, srcDelta, dstDelta, format, alpha);
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
} else {
|
||||
if (format.bytesPerPixel == 1) {
|
||||
setAlphaLogic<uint8, false>(dst, src, w, h, srcDelta, dstDelta, format, alpha);
|
||||
} else if (format.bytesPerPixel == 2) {
|
||||
setAlphaLogic<uint16, false>(dst, src, w, h, srcDelta, dstDelta, format, alpha);
|
||||
} else if (format.bytesPerPixel == 4) {
|
||||
setAlphaLogic<uint32, false>(dst, src, w, h, srcDelta, dstDelta, format, alpha);
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
BlendBlit::Args::Args(byte *dst, const byte *src,
|
||||
const uint _dstPitch, const uint _srcPitch,
|
||||
const int posX, const int posY,
|
||||
const uint _width, const uint _height,
|
||||
const int _scaleX, const int _scaleY,
|
||||
const int scaleXsrcOff, const int scaleYsrcOff,
|
||||
const uint32 colorMod, const uint _flipping) :
|
||||
xp(0), yp(0), dstPitch(_dstPitch),
|
||||
width(_width), height(_height), color(colorMod),
|
||||
scaleX(_scaleX), scaleY(_scaleY), flipping(_flipping),
|
||||
scaleXoff(scaleXsrcOff), scaleYoff(scaleYsrcOff) {
|
||||
bool doScale = scaleX != SCALE_THRESHOLD || scaleY != SCALE_THRESHOLD;
|
||||
|
||||
rgbmod = ((colorMod & kRGBModMask) != kRGBModMask);
|
||||
alphamod = ((colorMod & kAModMask) != kAModMask);
|
||||
inStep = 4;
|
||||
inoStep = _srcPitch;
|
||||
if (flipping & FLIP_H) {
|
||||
inStep = -inStep;
|
||||
xp = width - 1;
|
||||
if (doScale) xp = xp * scaleX / SCALE_THRESHOLD;
|
||||
}
|
||||
|
||||
if (flipping & FLIP_V) {
|
||||
inoStep = -inoStep;
|
||||
yp = height - 1;
|
||||
if (doScale) yp = yp * scaleY / SCALE_THRESHOLD;
|
||||
}
|
||||
|
||||
ino = src + yp * _srcPitch + xp * 4;
|
||||
outo = dst + posY * _dstPitch + posX * 4;
|
||||
}
|
||||
|
||||
// Initialize these to nullptr at the start
|
||||
BlendBlit::BlitFunc BlendBlit::blitFunc = nullptr;
|
||||
BlendBlit::FillFunc BlendBlit::fillFunc = nullptr;
|
||||
|
||||
// Only blits to and from 32bpp images
|
||||
// So this function is just here to jump to whatever function is in
|
||||
// BlendBlit::blitFunc. This way, we can detect at runtime whether or not
|
||||
// the cpu has certain SIMD feature enabled or not.
|
||||
void BlendBlit::blit(byte *dst, const byte *src,
|
||||
const uint dstPitch, const uint srcPitch,
|
||||
const int posX, const int posY,
|
||||
const uint width, const uint height,
|
||||
const int scaleX, const int scaleY,
|
||||
const int scaleXsrcOff, const int scaleYsrcOff,
|
||||
const uint32 colorMod, const uint flipping,
|
||||
const TSpriteBlendMode blendMode,
|
||||
const AlphaType alphaType) {
|
||||
if (width == 0 || height == 0) return;
|
||||
|
||||
// If no function has been selected yet, detect and select
|
||||
if (!blitFunc) {
|
||||
// Get the correct blit function
|
||||
blitFunc = blitGeneric;
|
||||
#ifdef SCUMMVM_NEON
|
||||
if (g_system->hasFeature(OSystem::kFeatureCpuNEON)) blitFunc = blitNEON;
|
||||
#endif
|
||||
#ifdef SCUMMVM_SSE2
|
||||
if (g_system->hasFeature(OSystem::kFeatureCpuSSE2)) blitFunc = blitSSE2;
|
||||
#endif
|
||||
#ifdef SCUMMVM_AVX2
|
||||
if (g_system->hasFeature(OSystem::kFeatureCpuAVX2)) blitFunc = blitAVX2;
|
||||
#endif
|
||||
}
|
||||
|
||||
Args args(dst, src, dstPitch, srcPitch, posX, posY, width, height, scaleX, scaleY, scaleXsrcOff, scaleYsrcOff, colorMod, flipping);
|
||||
blitFunc(args, blendMode, alphaType);
|
||||
}
|
||||
|
||||
// Only fills 32bpp images
|
||||
// So this function is just here to jump to whatever function is in
|
||||
// BlendBlit::fillFunc. This way, we can detect at runtime whether or not
|
||||
// the cpu has certain SIMD feature enabled or not.
|
||||
void BlendBlit::fill(byte *dst, const uint dstPitch,
|
||||
const uint width, const uint height,
|
||||
const uint32 colorMod,
|
||||
const TSpriteBlendMode blendMode) {
|
||||
if (width == 0 || height == 0) return;
|
||||
|
||||
// If no function has been selected yet, detect and select
|
||||
if (!fillFunc) {
|
||||
// Get the correct blit function
|
||||
// TODO: Add SIMD variants
|
||||
fillFunc = fillGeneric;
|
||||
}
|
||||
|
||||
Args args(dst, nullptr, dstPitch, 0, 0, 0, width, height, 0, 0, 0, 0, colorMod, 0);
|
||||
fillFunc(args, blendMode);
|
||||
}
|
||||
|
||||
} // End of namespace Graphics
|
||||
501
graphics/blit/blit-alpha.h
Normal file
501
graphics/blit/blit-alpha.h
Normal file
@@ -0,0 +1,501 @@
|
||||
/* ScummVM - Graphic Adventure Engine
|
||||
*
|
||||
* ScummVM is the legal property of its developers, whose names
|
||||
* are too numerous to list here. Please refer to the COPYRIGHT
|
||||
* file distributed with this source distribution.
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*
|
||||
*/
|
||||
|
||||
#include "graphics/blit.h"
|
||||
|
||||
namespace Graphics {
|
||||
|
||||
class BlendBlitImpl_Base {
|
||||
friend class BlendBlit;
|
||||
protected:
|
||||
|
||||
template<bool rgbmod, bool alphamod>
|
||||
struct BaseBlend {
|
||||
public:
|
||||
constexpr BaseBlend(const uint32 color) :
|
||||
ca(alphamod ? ((color >> BlendBlit::kAModShift) & 0xFF) : 255),
|
||||
cr(rgbmod ? ((color >> BlendBlit::kRModShift) & 0xFF) : 255),
|
||||
cg(rgbmod ? ((color >> BlendBlit::kGModShift) & 0xFF) : 255),
|
||||
cb(rgbmod ? ((color >> BlendBlit::kBModShift) & 0xFF) : 255) {}
|
||||
|
||||
protected:
|
||||
const byte ca, cr, cg, cb;
|
||||
};
|
||||
|
||||
template<bool rgbmod, bool alphamod>
|
||||
struct AlphaBlend : public BaseBlend<rgbmod, alphamod> {
|
||||
public:
|
||||
constexpr AlphaBlend(const uint32 color) : BaseBlend<rgbmod, alphamod>(color) {}
|
||||
|
||||
inline void normal(const byte *in, byte *out) const {
|
||||
uint32 ina;
|
||||
|
||||
if (alphamod) {
|
||||
ina = in[BlendBlit::kAIndex] * this->ca >> 8;
|
||||
} else {
|
||||
ina = in[BlendBlit::kAIndex];
|
||||
}
|
||||
|
||||
if (ina == 255) {
|
||||
if (rgbmod) {
|
||||
out[BlendBlit::kAIndex] = 255;
|
||||
out[BlendBlit::kBIndex] = (in[BlendBlit::kBIndex] * this->cb >> 8);
|
||||
out[BlendBlit::kGIndex] = (in[BlendBlit::kGIndex] * this->cg >> 8);
|
||||
out[BlendBlit::kRIndex] = (in[BlendBlit::kRIndex] * this->cr >> 8);
|
||||
} else {
|
||||
out[BlendBlit::kAIndex] = 255;
|
||||
out[BlendBlit::kBIndex] = in[BlendBlit::kBIndex];
|
||||
out[BlendBlit::kGIndex] = in[BlendBlit::kGIndex];
|
||||
out[BlendBlit::kRIndex] = in[BlendBlit::kRIndex];
|
||||
}
|
||||
} else if (ina != 0) {
|
||||
if (rgbmod) {
|
||||
const uint outb = (out[BlendBlit::kBIndex] * (255 - ina) >> 8);
|
||||
const uint outg = (out[BlendBlit::kGIndex] * (255 - ina) >> 8);
|
||||
const uint outr = (out[BlendBlit::kRIndex] * (255 - ina) >> 8);
|
||||
|
||||
out[BlendBlit::kAIndex] = 255;
|
||||
out[BlendBlit::kBIndex] = outb + (in[BlendBlit::kBIndex] * ina * this->cb >> 16);
|
||||
out[BlendBlit::kGIndex] = outg + (in[BlendBlit::kGIndex] * ina * this->cg >> 16);
|
||||
out[BlendBlit::kRIndex] = outr + (in[BlendBlit::kRIndex] * ina * this->cr >> 16);
|
||||
} else {
|
||||
out[BlendBlit::kAIndex] = 255;
|
||||
out[BlendBlit::kBIndex] = (out[BlendBlit::kBIndex] * (255 - ina) + in[BlendBlit::kBIndex] * ina) >> 8;
|
||||
out[BlendBlit::kGIndex] = (out[BlendBlit::kGIndex] * (255 - ina) + in[BlendBlit::kGIndex] * ina) >> 8;
|
||||
out[BlendBlit::kRIndex] = (out[BlendBlit::kRIndex] * (255 - ina) + in[BlendBlit::kRIndex] * ina) >> 8;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
inline void fill(byte *out) const {
|
||||
uint32 ina = this->ca;
|
||||
|
||||
/* if (ina == 255) {
|
||||
if (rgbmod) {
|
||||
out[BlendBlit::kAIndex] = 255;
|
||||
out[BlendBlit::kBIndex] = this->cb;
|
||||
out[BlendBlit::kGIndex] = this->cg;
|
||||
out[BlendBlit::kRIndex] = this->cr;
|
||||
} else {
|
||||
out[BlendBlit::kAIndex] = 255;
|
||||
out[BlendBlit::kBIndex] = 255;
|
||||
out[BlendBlit::kGIndex] = 255;
|
||||
out[BlendBlit::kRIndex] = 255;
|
||||
}
|
||||
} else if (ina != 0) */ {
|
||||
if (rgbmod) {
|
||||
const uint outb = (out[BlendBlit::kBIndex] * (255 - ina) >> 8);
|
||||
const uint outg = (out[BlendBlit::kGIndex] * (255 - ina) >> 8);
|
||||
const uint outr = (out[BlendBlit::kRIndex] * (255 - ina) >> 8);
|
||||
|
||||
out[BlendBlit::kAIndex] = 255;
|
||||
out[BlendBlit::kBIndex] = outb + (255 * ina * this->cb >> 16);
|
||||
out[BlendBlit::kGIndex] = outg + (255 * ina * this->cg >> 16);
|
||||
out[BlendBlit::kRIndex] = outr + (255 * ina * this->cr >> 16);
|
||||
} else {
|
||||
out[BlendBlit::kAIndex] = 255;
|
||||
out[BlendBlit::kBIndex] = (out[BlendBlit::kBIndex] * (255 - ina) + 255 * ina) >> 8;
|
||||
out[BlendBlit::kGIndex] = (out[BlendBlit::kGIndex] * (255 - ina) + 255 * ina) >> 8;
|
||||
out[BlendBlit::kRIndex] = (out[BlendBlit::kRIndex] * (255 - ina) + 255 * ina) >> 8;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
};
|
||||
|
||||
template<bool rgbmod, bool alphamod>
|
||||
struct MultiplyBlend : public BaseBlend<rgbmod, alphamod> {
|
||||
public:
|
||||
constexpr MultiplyBlend(const uint32 color) : BaseBlend<rgbmod, alphamod>(color) {}
|
||||
|
||||
inline void normal(const byte *in, byte *out) const {
|
||||
uint32 ina;
|
||||
|
||||
if (alphamod) {
|
||||
ina = in[BlendBlit::kAIndex] * this->ca >> 8;
|
||||
} else {
|
||||
ina = in[BlendBlit::kAIndex];
|
||||
}
|
||||
|
||||
if (ina == 255) {
|
||||
if (rgbmod) {
|
||||
out[BlendBlit::kBIndex] = out[BlendBlit::kBIndex] * ((in[BlendBlit::kBIndex] * this->cb) >> 8) >> 8;
|
||||
out[BlendBlit::kGIndex] = out[BlendBlit::kGIndex] * ((in[BlendBlit::kGIndex] * this->cg) >> 8) >> 8;
|
||||
out[BlendBlit::kRIndex] = out[BlendBlit::kRIndex] * ((in[BlendBlit::kRIndex] * this->cr) >> 8) >> 8;
|
||||
} else {
|
||||
out[BlendBlit::kBIndex] = out[BlendBlit::kBIndex] * in[BlendBlit::kBIndex] >> 8;
|
||||
out[BlendBlit::kGIndex] = out[BlendBlit::kGIndex] * in[BlendBlit::kGIndex] >> 8;
|
||||
out[BlendBlit::kRIndex] = out[BlendBlit::kRIndex] * in[BlendBlit::kRIndex] >> 8;
|
||||
}
|
||||
} else if (ina != 0) {
|
||||
if (rgbmod) {
|
||||
out[BlendBlit::kBIndex] = out[BlendBlit::kBIndex] * ((in[BlendBlit::kBIndex] * this->cb * ina) >> 16) >> 8;
|
||||
out[BlendBlit::kGIndex] = out[BlendBlit::kGIndex] * ((in[BlendBlit::kGIndex] * this->cg * ina) >> 16) >> 8;
|
||||
out[BlendBlit::kRIndex] = out[BlendBlit::kRIndex] * ((in[BlendBlit::kRIndex] * this->cr * ina) >> 16) >> 8;
|
||||
} else {
|
||||
out[BlendBlit::kBIndex] = out[BlendBlit::kBIndex] * ((in[BlendBlit::kBIndex] * ina) >> 8) >> 8;
|
||||
out[BlendBlit::kGIndex] = out[BlendBlit::kGIndex] * ((in[BlendBlit::kGIndex] * ina) >> 8) >> 8;
|
||||
out[BlendBlit::kRIndex] = out[BlendBlit::kRIndex] * ((in[BlendBlit::kRIndex] * ina) >> 8) >> 8;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
inline void fill(byte *out) const {
|
||||
uint32 ina = this->ca;
|
||||
|
||||
if (ina == 255) {
|
||||
if (rgbmod) {
|
||||
out[BlendBlit::kBIndex] = (out[BlendBlit::kBIndex] * this->cb) >> 8;
|
||||
out[BlendBlit::kGIndex] = (out[BlendBlit::kGIndex] * this->cg) >> 8;
|
||||
out[BlendBlit::kRIndex] = (out[BlendBlit::kRIndex] * this->cr) >> 8;
|
||||
}
|
||||
} else if (ina != 0) {
|
||||
if (rgbmod) {
|
||||
out[BlendBlit::kBIndex] = out[BlendBlit::kBIndex] * ((this->cb * ina) >> 8) >> 8;
|
||||
out[BlendBlit::kGIndex] = out[BlendBlit::kGIndex] * ((this->cg * ina) >> 8) >> 8;
|
||||
out[BlendBlit::kRIndex] = out[BlendBlit::kRIndex] * ((this->cr * ina) >> 8) >> 8;
|
||||
} else {
|
||||
out[BlendBlit::kBIndex] = (out[BlendBlit::kBIndex] * ina) >> 8;
|
||||
out[BlendBlit::kGIndex] = (out[BlendBlit::kGIndex] * ina) >> 8;
|
||||
out[BlendBlit::kRIndex] = (out[BlendBlit::kRIndex] * ina) >> 8;
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
template<bool rgbmod, bool alphamod>
|
||||
struct OpaqueBlend : public BaseBlend<rgbmod, alphamod> {
|
||||
public:
|
||||
constexpr OpaqueBlend(const uint32 color) : BaseBlend<rgbmod, alphamod>(color) {}
|
||||
|
||||
inline void normal(const byte *in, byte *out) const {
|
||||
*(uint32 *)out = *(const uint32 *)in | BlendBlit::kAModMask;
|
||||
}
|
||||
};
|
||||
|
||||
template<bool rgbmod, bool alphamod>
|
||||
struct BinaryBlend : public BaseBlend<rgbmod, alphamod> {
|
||||
public:
|
||||
constexpr BinaryBlend(const uint32 color) : BaseBlend<rgbmod, alphamod>(color) {}
|
||||
|
||||
inline void normal(const byte *in, byte *out) const {
|
||||
uint32 pix = *(const uint32 *)in;
|
||||
uint32 a = pix & BlendBlit::kAModMask;
|
||||
|
||||
if (a != 0) { // Full opacity (Any value not exactly 0 is Opaque here)
|
||||
*(uint32 *)out = pix | BlendBlit::kAModMask;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
template<bool rgbmod, bool alphamod>
|
||||
struct AdditiveBlend : public BaseBlend<rgbmod, alphamod> {
|
||||
public:
|
||||
constexpr AdditiveBlend(const uint32 color) : BaseBlend<rgbmod, alphamod>(color) {}
|
||||
|
||||
inline void normal(const byte *in, byte *out) const {
|
||||
uint32 ina;
|
||||
|
||||
if (alphamod) {
|
||||
ina = in[BlendBlit::kAIndex] * this->ca >> 8;
|
||||
} else {
|
||||
ina = in[BlendBlit::kAIndex];
|
||||
}
|
||||
|
||||
if (ina == 255) {
|
||||
if (rgbmod) {
|
||||
out[BlendBlit::kBIndex] = out[BlendBlit::kBIndex] + ((in[BlendBlit::kBIndex] * this->cb) >> 8);
|
||||
out[BlendBlit::kGIndex] = out[BlendBlit::kGIndex] + ((in[BlendBlit::kGIndex] * this->cg) >> 8);
|
||||
out[BlendBlit::kRIndex] = out[BlendBlit::kRIndex] + ((in[BlendBlit::kRIndex] * this->cr) >> 8);
|
||||
} else {
|
||||
out[BlendBlit::kBIndex] = out[BlendBlit::kBIndex] + in[BlendBlit::kBIndex];
|
||||
out[BlendBlit::kGIndex] = out[BlendBlit::kGIndex] + in[BlendBlit::kGIndex];
|
||||
out[BlendBlit::kRIndex] = out[BlendBlit::kRIndex] + in[BlendBlit::kRIndex];
|
||||
}
|
||||
} else if (ina != 0) {
|
||||
if (rgbmod) {
|
||||
out[BlendBlit::kBIndex] = out[BlendBlit::kBIndex] + ((in[BlendBlit::kBIndex] * this->cb * ina) >> 16);
|
||||
out[BlendBlit::kGIndex] = out[BlendBlit::kGIndex] + ((in[BlendBlit::kGIndex] * this->cg * ina) >> 16);
|
||||
out[BlendBlit::kRIndex] = out[BlendBlit::kRIndex] + ((in[BlendBlit::kRIndex] * this->cr * ina) >> 16);
|
||||
} else {
|
||||
out[BlendBlit::kBIndex] = out[BlendBlit::kBIndex] + ((in[BlendBlit::kBIndex] * ina) >> 8);
|
||||
out[BlendBlit::kGIndex] = out[BlendBlit::kGIndex] + ((in[BlendBlit::kGIndex] * ina) >> 8);
|
||||
out[BlendBlit::kRIndex] = out[BlendBlit::kRIndex] + ((in[BlendBlit::kRIndex] * ina) >> 8);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
inline void fill(byte *out) const {
|
||||
uint32 ina = this->ca;
|
||||
|
||||
if (ina == 255) {
|
||||
if (rgbmod) {
|
||||
out[BlendBlit::kBIndex] = out[BlendBlit::kBIndex] + this->cb;
|
||||
out[BlendBlit::kGIndex] = out[BlendBlit::kGIndex] + this->cg;
|
||||
out[BlendBlit::kRIndex] = out[BlendBlit::kRIndex] + this->cr;
|
||||
} else {
|
||||
out[BlendBlit::kBIndex] = out[BlendBlit::kBIndex] + 255;
|
||||
out[BlendBlit::kGIndex] = out[BlendBlit::kGIndex] + 255;
|
||||
out[BlendBlit::kRIndex] = out[BlendBlit::kRIndex] + 255;
|
||||
}
|
||||
} else if (ina != 0) {
|
||||
if (rgbmod) {
|
||||
out[BlendBlit::kBIndex] = out[BlendBlit::kBIndex] + ((this->cb * ina) >> 8);
|
||||
out[BlendBlit::kGIndex] = out[BlendBlit::kGIndex] + ((this->cg * ina) >> 8);
|
||||
out[BlendBlit::kRIndex] = out[BlendBlit::kRIndex] + ((this->cr * ina) >> 8);
|
||||
} else {
|
||||
out[BlendBlit::kBIndex] = out[BlendBlit::kBIndex] + ina;
|
||||
out[BlendBlit::kGIndex] = out[BlendBlit::kGIndex] + ina;
|
||||
out[BlendBlit::kRIndex] = out[BlendBlit::kRIndex] + ina;
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
template<bool rgbmod, bool alphamod>
|
||||
struct SubtractiveBlend : public BaseBlend<rgbmod, alphamod> {
|
||||
public:
|
||||
constexpr SubtractiveBlend(const uint32 color) : BaseBlend<rgbmod, alphamod>(color) {}
|
||||
|
||||
inline void normal(const byte *in, byte *out) const {
|
||||
uint32 ina = in[BlendBlit::kAIndex];
|
||||
out[BlendBlit::kAIndex] = 255;
|
||||
|
||||
if (ina == 255) {
|
||||
if (rgbmod) {
|
||||
out[BlendBlit::kBIndex] = MAX<int32>(out[BlendBlit::kBIndex] - ((in[BlendBlit::kBIndex] * this->cb * (out[BlendBlit::kBIndex])) >> 16), 0);
|
||||
out[BlendBlit::kGIndex] = MAX<int32>(out[BlendBlit::kGIndex] - ((in[BlendBlit::kGIndex] * this->cg * (out[BlendBlit::kGIndex])) >> 16), 0);
|
||||
out[BlendBlit::kRIndex] = MAX<int32>(out[BlendBlit::kRIndex] - ((in[BlendBlit::kRIndex] * this->cr * (out[BlendBlit::kRIndex])) >> 16), 0);
|
||||
} else {
|
||||
out[BlendBlit::kBIndex] = MAX<int32>(out[BlendBlit::kBIndex] - ((in[BlendBlit::kBIndex] * (out[BlendBlit::kBIndex])) >> 8), 0);
|
||||
out[BlendBlit::kGIndex] = MAX<int32>(out[BlendBlit::kGIndex] - ((in[BlendBlit::kGIndex] * (out[BlendBlit::kGIndex])) >> 8), 0);
|
||||
out[BlendBlit::kRIndex] = MAX<int32>(out[BlendBlit::kRIndex] - ((in[BlendBlit::kRIndex] * (out[BlendBlit::kRIndex])) >> 8), 0);
|
||||
}
|
||||
} else if (ina != 0) {
|
||||
if (rgbmod) {
|
||||
out[BlendBlit::kBIndex] = MAX<int32>(out[BlendBlit::kBIndex] - ((in[BlendBlit::kBIndex] * this->cb * (out[BlendBlit::kBIndex]) * ina) >> 24), 0);
|
||||
out[BlendBlit::kGIndex] = MAX<int32>(out[BlendBlit::kGIndex] - ((in[BlendBlit::kGIndex] * this->cg * (out[BlendBlit::kGIndex]) * ina) >> 24), 0);
|
||||
out[BlendBlit::kRIndex] = MAX<int32>(out[BlendBlit::kRIndex] - ((in[BlendBlit::kRIndex] * this->cr * (out[BlendBlit::kRIndex]) * ina) >> 24), 0);
|
||||
} else {
|
||||
out[BlendBlit::kBIndex] = MAX<int32>(out[BlendBlit::kBIndex] - ((in[BlendBlit::kBIndex] * (out[BlendBlit::kBIndex]) * ina) >> 16), 0);
|
||||
out[BlendBlit::kGIndex] = MAX<int32>(out[BlendBlit::kGIndex] - ((in[BlendBlit::kGIndex] * (out[BlendBlit::kGIndex]) * ina) >> 16), 0);
|
||||
out[BlendBlit::kRIndex] = MAX<int32>(out[BlendBlit::kRIndex] - ((in[BlendBlit::kRIndex] * (out[BlendBlit::kRIndex]) * ina) >> 16), 0);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
inline void fill(byte *out) const {
|
||||
out[BlendBlit::kAIndex] = 255;
|
||||
|
||||
if (rgbmod) {
|
||||
out[BlendBlit::kBIndex] = MAX<int32>(out[BlendBlit::kBIndex] - ((this->cb * out[BlendBlit::kBIndex]) >> 8), 0);
|
||||
out[BlendBlit::kGIndex] = MAX<int32>(out[BlendBlit::kGIndex] - ((this->cg * out[BlendBlit::kGIndex]) >> 8), 0);
|
||||
out[BlendBlit::kRIndex] = MAX<int32>(out[BlendBlit::kRIndex] - ((this->cr * out[BlendBlit::kRIndex]) >> 8), 0);
|
||||
} else {
|
||||
out[BlendBlit::kBIndex] = 0;
|
||||
out[BlendBlit::kGIndex] = 0;
|
||||
out[BlendBlit::kRIndex] = 0;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
}; // End of class BlendBlitImpl_Base
|
||||
|
||||
template<class T>
|
||||
void BlendBlit::blitT(Args &args, const TSpriteBlendMode &blendMode, const AlphaType &alphaType) {
|
||||
bool rgbmod = ((args.color & kRGBModMask) != kRGBModMask);
|
||||
bool alphamod = ((args.color & kAModMask) != kAModMask);
|
||||
if (args.scaleX == SCALE_THRESHOLD && args.scaleY == SCALE_THRESHOLD) {
|
||||
if (args.color == 0xffffffff && blendMode == BLEND_NORMAL && alphaType == ALPHA_OPAQUE) {
|
||||
T::template blitInnerLoop<T::template OpaqueBlend, false, false, false>(args);
|
||||
} else if (args.color == 0xffffffff && blendMode == BLEND_NORMAL && alphaType == ALPHA_BINARY) {
|
||||
T::template blitInnerLoop<T::template BinaryBlend, false, false, false>(args);
|
||||
} else {
|
||||
if (blendMode == BLEND_ADDITIVE) {
|
||||
if (rgbmod) {
|
||||
if (alphamod) {
|
||||
T::template blitInnerLoop<T::template AdditiveBlend, false, true, true>(args);
|
||||
} else {
|
||||
T::template blitInnerLoop<T::template AdditiveBlend, false, true, false>(args);
|
||||
}
|
||||
} else {
|
||||
if (alphamod) {
|
||||
T::template blitInnerLoop<T::template AdditiveBlend, false, false, true>(args);
|
||||
} else {
|
||||
T::template blitInnerLoop<T::template AdditiveBlend, false, false, false>(args);
|
||||
}
|
||||
}
|
||||
} else if (blendMode == BLEND_SUBTRACTIVE) {
|
||||
if (rgbmod) {
|
||||
T::template blitInnerLoop<T::template SubtractiveBlend, false, true, false>(args);
|
||||
} else {
|
||||
T::template blitInnerLoop<T::template SubtractiveBlend, false, false, false>(args);
|
||||
}
|
||||
} else if (blendMode == BLEND_MULTIPLY) {
|
||||
if (rgbmod) {
|
||||
if (alphamod) {
|
||||
T::template blitInnerLoop<T::template MultiplyBlend, false, true, true>(args);
|
||||
} else {
|
||||
T::template blitInnerLoop<T::template MultiplyBlend, false, true, false>(args);
|
||||
}
|
||||
} else {
|
||||
if (alphamod) {
|
||||
T::template blitInnerLoop<T::template MultiplyBlend, false, false, true>(args);
|
||||
} else {
|
||||
T::template blitInnerLoop<T::template MultiplyBlend, false, false, false>(args);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
assert(blendMode == BLEND_NORMAL);
|
||||
if (rgbmod) {
|
||||
if (alphamod) {
|
||||
T::template blitInnerLoop<T::template AlphaBlend, false, true, true>(args);
|
||||
} else {
|
||||
T::template blitInnerLoop<T::template AlphaBlend, false, true, false>(args);
|
||||
}
|
||||
} else {
|
||||
if (alphamod) {
|
||||
T::template blitInnerLoop<T::template AlphaBlend, false, false, true>(args);
|
||||
} else {
|
||||
T::template blitInnerLoop<T::template AlphaBlend, false, false, false>(args);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if (args.color == 0xffffffff && blendMode == BLEND_NORMAL && alphaType == ALPHA_OPAQUE) {
|
||||
T::template blitInnerLoop<T::template OpaqueBlend, true, false, false>(args);
|
||||
} else if (args.color == 0xffffffff && blendMode == BLEND_NORMAL && alphaType == ALPHA_BINARY) {
|
||||
T::template blitInnerLoop<T::template BinaryBlend, true, false, false>(args);
|
||||
} else {
|
||||
if (blendMode == BLEND_ADDITIVE) {
|
||||
if (rgbmod) {
|
||||
if (alphamod) {
|
||||
T::template blitInnerLoop<T::template AdditiveBlend, true, true, true>(args);
|
||||
} else {
|
||||
T::template blitInnerLoop<T::template AdditiveBlend, true, true, false>(args);
|
||||
}
|
||||
} else {
|
||||
if (alphamod) {
|
||||
T::template blitInnerLoop<T::template AdditiveBlend, true, false, true>(args);
|
||||
} else {
|
||||
T::template blitInnerLoop<T::template AdditiveBlend, true, false, false>(args);
|
||||
}
|
||||
}
|
||||
} else if (blendMode == BLEND_SUBTRACTIVE) {
|
||||
if (rgbmod) {
|
||||
T::template blitInnerLoop<T::template SubtractiveBlend, true, true, false>(args);
|
||||
} else {
|
||||
T::template blitInnerLoop<T::template SubtractiveBlend, true, false, false>(args);
|
||||
}
|
||||
} else if (blendMode == BLEND_MULTIPLY) {
|
||||
if (rgbmod) {
|
||||
if (alphamod) {
|
||||
T::template blitInnerLoop<T::template MultiplyBlend, true, true, true>(args);
|
||||
} else {
|
||||
T::template blitInnerLoop<T::template MultiplyBlend, true, true, false>(args);
|
||||
}
|
||||
} else {
|
||||
if (alphamod) {
|
||||
T::template blitInnerLoop<T::template MultiplyBlend, true, false, true>(args);
|
||||
} else {
|
||||
T::template blitInnerLoop<T::template MultiplyBlend, true, false, false>(args);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
assert(blendMode == BLEND_NORMAL);
|
||||
if (rgbmod) {
|
||||
if (alphamod) {
|
||||
T::template blitInnerLoop<T::template AlphaBlend, true, true, true>(args);
|
||||
} else {
|
||||
T::template blitInnerLoop<T::template AlphaBlend, true, true, false>(args);
|
||||
}
|
||||
} else {
|
||||
if (alphamod) {
|
||||
T::template blitInnerLoop<T::template AlphaBlend, true, false, true>(args);
|
||||
} else {
|
||||
T::template blitInnerLoop<T::template AlphaBlend, true, false, false>(args);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template<class T>
|
||||
void BlendBlit::fillT(Args &args, const TSpriteBlendMode &blendMode) {
|
||||
bool rgbmod = ((args.color & kRGBModMask) != kRGBModMask);
|
||||
bool alphamod = ((args.color & kAModMask) != kAModMask);
|
||||
|
||||
if (blendMode == BLEND_ADDITIVE) {
|
||||
if (rgbmod) {
|
||||
if (alphamod) {
|
||||
T::template fillInnerLoop<T::template AdditiveBlend, true, true>(args);
|
||||
} else {
|
||||
T::template fillInnerLoop<T::template AdditiveBlend, true, false>(args);
|
||||
}
|
||||
} else {
|
||||
if (alphamod) {
|
||||
T::template fillInnerLoop<T::template AdditiveBlend, false, true>(args);
|
||||
} else {
|
||||
T::template fillInnerLoop<T::template AdditiveBlend, false, false>(args);
|
||||
}
|
||||
}
|
||||
} else if (blendMode == BLEND_SUBTRACTIVE) {
|
||||
if (rgbmod) {
|
||||
T::template fillInnerLoop<T::template SubtractiveBlend, true, false>(args);
|
||||
} else {
|
||||
T::template fillInnerLoop<T::template SubtractiveBlend, false, false>(args);
|
||||
}
|
||||
} else if (blendMode == BLEND_MULTIPLY) {
|
||||
if (rgbmod) {
|
||||
if (alphamod) {
|
||||
T::template fillInnerLoop<T::template MultiplyBlend, true, true>(args);
|
||||
} else {
|
||||
T::template fillInnerLoop<T::template MultiplyBlend, true, false>(args);
|
||||
}
|
||||
} else {
|
||||
if (alphamod) {
|
||||
T::template fillInnerLoop<T::template MultiplyBlend, false, true>(args);
|
||||
} else {
|
||||
T::template fillInnerLoop<T::template MultiplyBlend, false, false>(args);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
assert(blendMode == BLEND_NORMAL);
|
||||
if (rgbmod) {
|
||||
if (alphamod) {
|
||||
T::template fillInnerLoop<T::template AlphaBlend, true, true>(args);
|
||||
} else {
|
||||
T::template fillInnerLoop<T::template AlphaBlend, true, false>(args);
|
||||
}
|
||||
} else {
|
||||
if (alphamod) {
|
||||
T::template fillInnerLoop<T::template AlphaBlend, false, true>(args);
|
||||
} else {
|
||||
T::template fillInnerLoop<T::template AlphaBlend, false, false>(args);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // End of namespace Graphics
|
||||
283
graphics/blit/blit-atari.cpp
Normal file
283
graphics/blit/blit-atari.cpp
Normal file
@@ -0,0 +1,283 @@
|
||||
/* ScummVM - Graphic Adventure Engine
|
||||
*
|
||||
* ScummVM is the legal property of its developers, whose names
|
||||
* are too numerous to list here. Please refer to the COPYRIGHT
|
||||
* file distributed with this source distribution.
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*
|
||||
*/
|
||||
|
||||
#include "graphics/blit.h"
|
||||
|
||||
#include <mint/cookie.h>
|
||||
|
||||
#include "backends/graphics/atari/atari-supervidel.h"
|
||||
#include "backends/platform/atari/dlmalloc.h" // MALLOC_ALIGNMENT
|
||||
|
||||
static_assert(MALLOC_ALIGNMENT == 16, "MALLOC_ALIGNMENT must be == 16");
|
||||
|
||||
#ifdef USE_MOVE16
|
||||
static inline bool hasMove16() {
|
||||
long val;
|
||||
static bool hasMove16 = Getcookie(C__CPU, &val) == C_FOUND && val >= 40;
|
||||
return hasMove16;
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
constexpr bool isAligned(T val) {
|
||||
return (reinterpret_cast<uintptr>(val) & (MALLOC_ALIGNMENT - 1)) == 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
namespace Graphics {
|
||||
|
||||
// Function to blit a rect with a transparent color key
|
||||
void keyBlitLogicAtari(byte *dst, const byte *src, const uint w, const uint h,
|
||||
const uint srcDelta, const uint dstDelta, const uint32 key) {
|
||||
#ifdef USE_SV_BLITTER
|
||||
if (key == 0 && (uintptr)src >= 0xA0000000 && (uintptr)dst >= 0xA0000000) {
|
||||
if (g_superVidelFwVersion >= 9) {
|
||||
*SV_BLITTER_FIFO = (long)src; // SV_BLITTER_SRC1
|
||||
*SV_BLITTER_FIFO = (long)(g_blitMask ? g_blitMask : src); // SV_BLITTER_SRC2
|
||||
*SV_BLITTER_FIFO = (long)dst; // SV_BLITTER_DST
|
||||
*SV_BLITTER_FIFO = w - 1; // SV_BLITTER_COUNT
|
||||
*SV_BLITTER_FIFO = srcDelta + w; // SV_BLITTER_SRC1_OFFSET
|
||||
*SV_BLITTER_FIFO = srcDelta + w; // SV_BLITTER_SRC2_OFFSET
|
||||
*SV_BLITTER_FIFO = dstDelta + w; // SV_BLITTER_DST_OFFSET
|
||||
*SV_BLITTER_FIFO = h; // SV_BLITTER_MASK_AND_LINES
|
||||
*SV_BLITTER_FIFO = 0x03; // SV_BLITTER_CONTROL
|
||||
} else {
|
||||
// make sure the blitter is idle
|
||||
while (*SV_BLITTER_CONTROL & 1);
|
||||
|
||||
*SV_BLITTER_SRC1 = (long)src;
|
||||
*SV_BLITTER_SRC2 = (long)(g_blitMask ? g_blitMask : src);
|
||||
*SV_BLITTER_DST = (long)dst;
|
||||
*SV_BLITTER_COUNT = w - 1;
|
||||
*SV_BLITTER_SRC1_OFFSET = srcDelta + w;
|
||||
*SV_BLITTER_SRC2_OFFSET = srcDelta + w;
|
||||
*SV_BLITTER_DST_OFFSET = dstDelta + w;
|
||||
*SV_BLITTER_MASK_AND_LINES = h;
|
||||
*SV_BLITTER_CONTROL = 0x03;
|
||||
}
|
||||
|
||||
SyncSuperBlitter();
|
||||
} else
|
||||
#endif
|
||||
{
|
||||
for (uint y = 0; y < h; ++y) {
|
||||
for (uint x = 0; x < w; ++x) {
|
||||
const uint32 color = *src++;
|
||||
if (color != key)
|
||||
*dst++ = color;
|
||||
else
|
||||
dst++;
|
||||
}
|
||||
|
||||
src += srcDelta;
|
||||
dst += dstDelta;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Function to blit a rect (version optimized for Atari Falcon with SuperVidel's SuperBlitter)
|
||||
void copyBlit(byte *dst, const byte *src,
|
||||
const uint dstPitch, const uint srcPitch,
|
||||
const uint w, const uint h,
|
||||
const uint bytesPerPixel) {
|
||||
if (dst == src)
|
||||
return;
|
||||
|
||||
#ifdef USE_SV_BLITTER
|
||||
if ((uintptr)src >= 0xA0000000 && (uintptr)dst >= 0xA0000000) {
|
||||
if (g_superVidelFwVersion >= 9) {
|
||||
*SV_BLITTER_FIFO = (long)src; // SV_BLITTER_SRC1
|
||||
*SV_BLITTER_FIFO = 0x00000000; // SV_BLITTER_SRC2
|
||||
*SV_BLITTER_FIFO = (long)dst; // SV_BLITTER_DST
|
||||
*SV_BLITTER_FIFO = w * bytesPerPixel - 1; // SV_BLITTER_COUNT
|
||||
*SV_BLITTER_FIFO = srcPitch; // SV_BLITTER_SRC1_OFFSET
|
||||
*SV_BLITTER_FIFO = 0x00000000; // SV_BLITTER_SRC2_OFFSET
|
||||
*SV_BLITTER_FIFO = dstPitch; // SV_BLITTER_DST_OFFSET
|
||||
*SV_BLITTER_FIFO = h; // SV_BLITTER_MASK_AND_LINES
|
||||
*SV_BLITTER_FIFO = 0x01; // SV_BLITTER_CONTROL
|
||||
} else {
|
||||
// make sure the blitter is idle
|
||||
while (*SV_BLITTER_CONTROL & 1);
|
||||
|
||||
*SV_BLITTER_SRC1 = (long)src;
|
||||
*SV_BLITTER_SRC2 = 0x00000000;
|
||||
*SV_BLITTER_DST = (long)dst;
|
||||
*SV_BLITTER_COUNT = w * bytesPerPixel - 1;
|
||||
*SV_BLITTER_SRC1_OFFSET = srcPitch;
|
||||
*SV_BLITTER_SRC2_OFFSET = 0x00000000;
|
||||
*SV_BLITTER_DST_OFFSET = dstPitch;
|
||||
*SV_BLITTER_MASK_AND_LINES = h;
|
||||
*SV_BLITTER_CONTROL = 0x01;
|
||||
}
|
||||
|
||||
SyncSuperBlitter();
|
||||
} else
|
||||
#endif
|
||||
if (dstPitch == srcPitch && dstPitch == (w * bytesPerPixel)) {
|
||||
#ifdef USE_MOVE16
|
||||
if (hasMove16() && isAligned(src) && isAligned(dst)) {
|
||||
__asm__ volatile(
|
||||
" move.l %2,%%d0\n"
|
||||
" lsr.l #4,%%d0\n"
|
||||
" beq.b 3f\n"
|
||||
|
||||
" moveq #0x0f,%%d1\n"
|
||||
" and.l %%d0,%%d1\n"
|
||||
" neg.l %%d1\n"
|
||||
" lsr.l #4,%%d0\n"
|
||||
" jmp (2f,%%pc,%%d1.l*4)\n"
|
||||
"1:\n"
|
||||
" move16 (%0)+,(%1)+\n"
|
||||
" move16 (%0)+,(%1)+\n"
|
||||
" move16 (%0)+,(%1)+\n"
|
||||
" move16 (%0)+,(%1)+\n"
|
||||
" move16 (%0)+,(%1)+\n"
|
||||
" move16 (%0)+,(%1)+\n"
|
||||
" move16 (%0)+,(%1)+\n"
|
||||
" move16 (%0)+,(%1)+\n"
|
||||
" move16 (%0)+,(%1)+\n"
|
||||
" move16 (%0)+,(%1)+\n"
|
||||
" move16 (%0)+,(%1)+\n"
|
||||
" move16 (%0)+,(%1)+\n"
|
||||
" move16 (%0)+,(%1)+\n"
|
||||
" move16 (%0)+,(%1)+\n"
|
||||
" move16 (%0)+,(%1)+\n"
|
||||
" move16 (%0)+,(%1)+\n"
|
||||
"2:\n"
|
||||
" dbra %%d0,1b\n"
|
||||
// handle also the case when 'dstPitch' is not
|
||||
// divisible by 16 but 'src' and 'dst' are
|
||||
"3:\n"
|
||||
" moveq #0x0f,%%d0\n"
|
||||
" and.l %2,%%d0\n"
|
||||
" neg.l %%d0\n"
|
||||
" jmp (4f,%%pc,%%d0.l*2)\n"
|
||||
// only 15x move.b as 16 would be handled above
|
||||
" move.b (%0)+,(%1)+\n"
|
||||
" move.b (%0)+,(%1)+\n"
|
||||
" move.b (%0)+,(%1)+\n"
|
||||
" move.b (%0)+,(%1)+\n"
|
||||
" move.b (%0)+,(%1)+\n"
|
||||
" move.b (%0)+,(%1)+\n"
|
||||
" move.b (%0)+,(%1)+\n"
|
||||
" move.b (%0)+,(%1)+\n"
|
||||
" move.b (%0)+,(%1)+\n"
|
||||
" move.b (%0)+,(%1)+\n"
|
||||
" move.b (%0)+,(%1)+\n"
|
||||
" move.b (%0)+,(%1)+\n"
|
||||
" move.b (%0)+,(%1)+\n"
|
||||
" move.b (%0)+,(%1)+\n"
|
||||
" move.b (%0)+,(%1)+\n"
|
||||
"4:\n"
|
||||
: // outputs
|
||||
: "a"(src), "a"(dst), "g"(dstPitch * h) // inputs
|
||||
: "d0", "d1", "cc" AND_MEMORY
|
||||
);
|
||||
} else {
|
||||
#else
|
||||
{
|
||||
#endif
|
||||
memcpy(dst, src, dstPitch * h);
|
||||
}
|
||||
} else {
|
||||
#ifdef USE_MOVE16
|
||||
if (hasMove16() && isAligned(src) && isAligned(dst) && isAligned(srcPitch) && isAligned(dstPitch)) {
|
||||
__asm__ volatile(
|
||||
" move.l %2,%%d0\n"
|
||||
|
||||
" moveq #0x0f,%%d1\n"
|
||||
" and.l %%d0,%%d1\n"
|
||||
" neg.l %%d1\n"
|
||||
" lea (4f,%%pc,%%d1.l*2),%%a0\n"
|
||||
" move.l %%a0,%%a1\n"
|
||||
|
||||
" lsr.l #4,%%d0\n"
|
||||
" beq.b 3f\n"
|
||||
|
||||
" moveq #0x0f,%%d1\n"
|
||||
" and.l %%d0,%%d1\n"
|
||||
" neg.l %%d1\n"
|
||||
" lea (2f,%%pc,%%d1.l*4),%%a0\n"
|
||||
" lsr.l #4,%%d0\n"
|
||||
" move.l %%d0,%%d1\n"
|
||||
"0:\n"
|
||||
" move.l %%d1,%%d0\n"
|
||||
" jmp (%%a0)\n"
|
||||
"1:\n"
|
||||
" move16 (%0)+,(%1)+\n"
|
||||
" move16 (%0)+,(%1)+\n"
|
||||
" move16 (%0)+,(%1)+\n"
|
||||
" move16 (%0)+,(%1)+\n"
|
||||
" move16 (%0)+,(%1)+\n"
|
||||
" move16 (%0)+,(%1)+\n"
|
||||
" move16 (%0)+,(%1)+\n"
|
||||
" move16 (%0)+,(%1)+\n"
|
||||
" move16 (%0)+,(%1)+\n"
|
||||
" move16 (%0)+,(%1)+\n"
|
||||
" move16 (%0)+,(%1)+\n"
|
||||
" move16 (%0)+,(%1)+\n"
|
||||
" move16 (%0)+,(%1)+\n"
|
||||
" move16 (%0)+,(%1)+\n"
|
||||
" move16 (%0)+,(%1)+\n"
|
||||
" move16 (%0)+,(%1)+\n"
|
||||
"2:\n"
|
||||
" dbra %%d0,1b\n"
|
||||
// handle (w * bytesPerPixel) % 16
|
||||
"3:\n"
|
||||
" jmp (%%a1)\n"
|
||||
// only 15x move.b as 16 would be handled above
|
||||
" move.b (%0)+,(%1)+\n"
|
||||
" move.b (%0)+,(%1)+\n"
|
||||
" move.b (%0)+,(%1)+\n"
|
||||
" move.b (%0)+,(%1)+\n"
|
||||
" move.b (%0)+,(%1)+\n"
|
||||
" move.b (%0)+,(%1)+\n"
|
||||
" move.b (%0)+,(%1)+\n"
|
||||
" move.b (%0)+,(%1)+\n"
|
||||
" move.b (%0)+,(%1)+\n"
|
||||
" move.b (%0)+,(%1)+\n"
|
||||
" move.b (%0)+,(%1)+\n"
|
||||
" move.b (%0)+,(%1)+\n"
|
||||
" move.b (%0)+,(%1)+\n"
|
||||
" move.b (%0)+,(%1)+\n"
|
||||
" move.b (%0)+,(%1)+\n"
|
||||
"4:\n"
|
||||
" add.l %4,%1\n"
|
||||
" add.l %5,%0\n"
|
||||
" dbra %3,0b\n"
|
||||
: // outputs
|
||||
: "a"(src), "a"(dst), "g"(w * bytesPerPixel), "d"(h - 1),
|
||||
"g"(dstPitch - w * bytesPerPixel), "g"(srcPitch - w * bytesPerPixel) // inputs
|
||||
: "d0", "d1", "a0", "a1", "cc" AND_MEMORY
|
||||
);
|
||||
} else {
|
||||
#else
|
||||
{
|
||||
#endif
|
||||
for (uint i = 0; i < h; ++i) {
|
||||
memcpy(dst, src, w * bytesPerPixel);
|
||||
dst += dstPitch;
|
||||
src += srcPitch;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // End of namespace Graphics
|
||||
330
graphics/blit/blit-avx2.cpp
Normal file
330
graphics/blit/blit-avx2.cpp
Normal file
@@ -0,0 +1,330 @@
|
||||
/* ScummVM - Graphic Adventure Engine
|
||||
*
|
||||
* ScummVM is the legal property of its developers, whose names
|
||||
* are too numerous to list here. Please refer to the COPYRIGHT
|
||||
* file distributed with this source distribution.
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*
|
||||
*/
|
||||
|
||||
#include "common/scummsys.h"
|
||||
|
||||
#include "graphics/blit/blit-alpha.h"
|
||||
#include "graphics/pixelformat.h"
|
||||
|
||||
#include <immintrin.h>
|
||||
|
||||
#if defined(__clang__)
|
||||
#pragma clang attribute push (__attribute__((target("avx2"))), apply_to=function)
|
||||
#elif defined(__GNUC__)
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target("avx2")
|
||||
#endif
|
||||
|
||||
namespace Graphics {
|
||||
|
||||
class BlendBlitImpl_AVX2 : public BlendBlitImpl_Base {
|
||||
friend class BlendBlit;
|
||||
|
||||
template<bool rgbmod, bool alphamod>
|
||||
struct AlphaBlend : public BlendBlitImpl_Base::AlphaBlend<rgbmod, alphamod> {
|
||||
public:
|
||||
constexpr AlphaBlend(const uint32 color) : BlendBlitImpl_Base::AlphaBlend<rgbmod, alphamod>(color) {}
|
||||
|
||||
inline __m256i simd(__m256i src, __m256i dst) const {
|
||||
__m256i ina;
|
||||
if (alphamod)
|
||||
ina = _mm256_srli_epi32(_mm256_mullo_epi16(_mm256_and_si256(src, _mm256_set1_epi32(BlendBlit::kAModMask)), _mm256_set1_epi32(this->ca)), 8);
|
||||
else
|
||||
ina = _mm256_and_si256(src, _mm256_set1_epi32(BlendBlit::kAModMask));
|
||||
__m256i alphaMask = _mm256_cmpeq_epi32(ina, _mm256_setzero_si256());
|
||||
|
||||
if (rgbmod) {
|
||||
__m256i dstR = _mm256_srli_epi32(_mm256_and_si256(dst, _mm256_set1_epi32(BlendBlit::kRModMask)), BlendBlit::kRModShift);
|
||||
__m256i dstG = _mm256_srli_epi32(_mm256_and_si256(dst, _mm256_set1_epi32(BlendBlit::kGModMask)), BlendBlit::kGModShift);
|
||||
__m256i dstB = _mm256_srli_epi32(_mm256_and_si256(dst, _mm256_set1_epi32(BlendBlit::kBModMask)), BlendBlit::kBModShift);
|
||||
__m256i srcR = _mm256_srli_epi32(_mm256_and_si256(src, _mm256_set1_epi32(BlendBlit::kRModMask)), BlendBlit::kRModShift);
|
||||
__m256i srcG = _mm256_srli_epi32(_mm256_and_si256(src, _mm256_set1_epi32(BlendBlit::kGModMask)), BlendBlit::kGModShift);
|
||||
__m256i srcB = _mm256_srli_epi32(_mm256_and_si256(src, _mm256_set1_epi32(BlendBlit::kBModMask)), BlendBlit::kBModShift);
|
||||
|
||||
dstR = _mm256_slli_epi32(_mm256_mullo_epi16(dstR, _mm256_sub_epi32(_mm256_set1_epi32(255), ina)), BlendBlit::kRModShift - 8);
|
||||
dstG = _mm256_slli_epi32(_mm256_mullo_epi16(dstG, _mm256_sub_epi32(_mm256_set1_epi32(255), ina)), BlendBlit::kGModShift - 8);
|
||||
dstB = _mm256_mullo_epi16(dstB, _mm256_sub_epi32(_mm256_set1_epi32(255), ina));
|
||||
srcR = _mm256_add_epi32(dstR, _mm256_slli_epi32(_mm256_mullo_epi16(_mm256_srli_epi32(_mm256_mullo_epi16(srcR, ina), 8), _mm256_set1_epi32(this->cr)), BlendBlit::kRModShift - 8));
|
||||
srcG = _mm256_add_epi32(dstG, _mm256_slli_epi32(_mm256_mullo_epi16(_mm256_srli_epi32(_mm256_mullo_epi16(srcG, ina), 8), _mm256_set1_epi32(this->cg)), BlendBlit::kGModShift - 8));
|
||||
srcB = _mm256_add_epi32(dstB, _mm256_mullo_epi16(_mm256_srli_epi32(_mm256_mullo_epi16(srcB, ina), 8), _mm256_set1_epi32(this->cb)));
|
||||
src = _mm256_or_si256(_mm256_and_si256(srcB, _mm256_set1_epi32(BlendBlit::kBModMask)), _mm256_set1_epi32(BlendBlit::kAModMask));
|
||||
src = _mm256_or_si256(_mm256_and_si256(srcG, _mm256_set1_epi32(BlendBlit::kGModMask)), src);
|
||||
src = _mm256_or_si256(_mm256_and_si256(srcR, _mm256_set1_epi32(BlendBlit::kRModMask)), src);
|
||||
} else {
|
||||
__m256i dstRB = _mm256_srli_epi32(_mm256_and_si256(dst, _mm256_set1_epi32(BlendBlit::kRModMask | BlendBlit::kBModMask)), BlendBlit::kBModShift);
|
||||
__m256i srcRB = _mm256_srli_epi32(_mm256_and_si256(src, _mm256_set1_epi32(BlendBlit::kRModMask | BlendBlit::kBModMask)), BlendBlit::kBModShift);
|
||||
__m256i dstG = _mm256_srli_epi32(_mm256_and_si256(dst, _mm256_set1_epi32(BlendBlit::kGModMask)), BlendBlit::kGModShift);
|
||||
__m256i srcG = _mm256_srli_epi32(_mm256_and_si256(src, _mm256_set1_epi32(BlendBlit::kGModMask)), BlendBlit::kGModShift);
|
||||
|
||||
dstRB = _mm256_srli_epi32(_mm256_mullo_epi32(dstRB, _mm256_sub_epi32(_mm256_set1_epi32(255), ina)), 8);
|
||||
dstG = _mm256_srli_epi32(_mm256_mullo_epi16(dstG, _mm256_sub_epi32(_mm256_set1_epi32(255), ina)), 8);
|
||||
srcRB = _mm256_slli_epi32(_mm256_add_epi32(dstRB, _mm256_srli_epi32(_mm256_mullo_epi32(srcRB, ina), 8)), BlendBlit::kBModShift);
|
||||
srcG = _mm256_slli_epi32(_mm256_add_epi32(dstG, _mm256_srli_epi32(_mm256_mullo_epi16(srcG, ina), 8)), BlendBlit::kGModShift);
|
||||
src = _mm256_or_si256(_mm256_and_si256(srcG, _mm256_set1_epi32(BlendBlit::kGModMask)), _mm256_set1_epi32(BlendBlit::kAModMask));
|
||||
src = _mm256_or_si256(_mm256_and_si256(srcRB, _mm256_set1_epi32(BlendBlit::kBModMask | BlendBlit::kRModMask)), src);
|
||||
}
|
||||
|
||||
dst = _mm256_and_si256(alphaMask, dst);
|
||||
src = _mm256_andnot_si256(alphaMask, src);
|
||||
return _mm256_or_si256(dst, src);
|
||||
}
|
||||
};
|
||||
|
||||
template<bool rgbmod, bool alphamod>
|
||||
struct MultiplyBlend : public BlendBlitImpl_Base::MultiplyBlend<rgbmod, alphamod> {
|
||||
public:
|
||||
constexpr MultiplyBlend(const uint32 color) : BlendBlitImpl_Base::MultiplyBlend<rgbmod, alphamod>(color) {}
|
||||
|
||||
inline __m256i simd(__m256i src, __m256i dst) const {
|
||||
__m256i ina, alphaMask;
|
||||
if (alphamod) {
|
||||
ina = _mm256_srli_epi32(_mm256_mullo_epi16(_mm256_and_si256(src, _mm256_set1_epi32(BlendBlit::kAModMask)), _mm256_set1_epi32(this->ca)), 8);
|
||||
alphaMask = _mm256_cmpeq_epi32(ina, _mm256_setzero_si256());
|
||||
} else {
|
||||
ina = _mm256_and_si256(src, _mm256_set1_epi32(BlendBlit::kAModMask));
|
||||
alphaMask = _mm256_set1_epi32(BlendBlit::kAModMask);
|
||||
}
|
||||
|
||||
if (rgbmod) {
|
||||
__m256i srcB = _mm256_srli_epi32(_mm256_and_si256(src, _mm256_set1_epi32(BlendBlit::kBModMask)), BlendBlit::kBModShift);
|
||||
__m256i srcG = _mm256_srli_epi32(_mm256_and_si256(src, _mm256_set1_epi32(BlendBlit::kGModMask)), BlendBlit::kGModShift);
|
||||
__m256i srcR = _mm256_srli_epi32(_mm256_and_si256(src, _mm256_set1_epi32(BlendBlit::kRModMask)), BlendBlit::kRModShift);
|
||||
__m256i dstB = _mm256_srli_epi32(_mm256_and_si256(dst, _mm256_set1_epi32(BlendBlit::kBModMask)), BlendBlit::kBModShift);
|
||||
__m256i dstG = _mm256_srli_epi32(_mm256_and_si256(dst, _mm256_set1_epi32(BlendBlit::kGModMask)), BlendBlit::kGModShift);
|
||||
__m256i dstR = _mm256_srli_epi32(_mm256_and_si256(dst, _mm256_set1_epi32(BlendBlit::kRModMask)), BlendBlit::kRModShift);
|
||||
|
||||
srcB = _mm256_and_si256(_mm256_slli_epi32(_mm256_mullo_epi32(dstB, _mm256_srli_epi32(_mm256_mullo_epi32(_mm256_mullo_epi16(srcB, _mm256_set1_epi32(this->cb)), ina), 16)), BlendBlit::kBModShift - 8), _mm256_set1_epi32(BlendBlit::kBModMask));
|
||||
srcG = _mm256_and_si256(_mm256_slli_epi32(_mm256_mullo_epi32(dstG, _mm256_srli_epi32(_mm256_mullo_epi32(_mm256_mullo_epi16(srcG, _mm256_set1_epi32(this->cg)), ina), 16)), BlendBlit::kGModShift - 8), _mm256_set1_epi32(BlendBlit::kGModMask));
|
||||
srcR = _mm256_and_si256(_mm256_slli_epi32(_mm256_mullo_epi32(dstR, _mm256_srli_epi32(_mm256_mullo_epi32(_mm256_mullo_epi16(srcR, _mm256_set1_epi32(this->cr)), ina), 16)), BlendBlit::kRModShift - 8), _mm256_set1_epi32(BlendBlit::kRModMask));
|
||||
|
||||
src = _mm256_and_si256(src, _mm256_set1_epi32(BlendBlit::kAModMask));
|
||||
src = _mm256_or_si256(src, _mm256_or_si256(srcB, _mm256_or_si256(srcG, srcR)));
|
||||
} else {
|
||||
constexpr uint32 rbMask = BlendBlit::kRModMask | BlendBlit::kBModMask;
|
||||
__m256i dstRB = _mm256_srli_epi32(_mm256_and_si256(dst, _mm256_set1_epi32(BlendBlit::kRModMask | BlendBlit::kBModMask)), BlendBlit::kBModShift);
|
||||
__m256i srcRB = _mm256_srli_epi32(_mm256_and_si256(src, _mm256_set1_epi32(BlendBlit::kRModMask | BlendBlit::kBModMask)), BlendBlit::kBModShift);
|
||||
__m256i dstG = _mm256_srli_epi32(_mm256_and_si256(dst, _mm256_set1_epi32(BlendBlit::kGModMask)), BlendBlit::kGModShift);
|
||||
__m256i srcG = _mm256_srli_epi32(_mm256_and_si256(src, _mm256_set1_epi32(BlendBlit::kGModMask)), BlendBlit::kGModShift);
|
||||
|
||||
srcG = _mm256_and_si256(_mm256_slli_epi32(_mm256_mullo_epi16(dstG, _mm256_srli_epi32(_mm256_mullo_epi16(srcG, ina), 8)), 8), _mm256_set1_epi32(BlendBlit::kGModMask));
|
||||
srcRB = _mm256_and_si256(_mm256_mullo_epi16(dstRB, _mm256_srli_epi32(_mm256_and_si256(_mm256_mullo_epi32(srcRB, ina), _mm256_set1_epi32(rbMask)), 8)), _mm256_set1_epi32(rbMask));
|
||||
|
||||
src = _mm256_and_si256(src, _mm256_set1_epi32(BlendBlit::kAModMask));
|
||||
src = _mm256_or_si256(src, _mm256_or_si256(srcRB, srcG));
|
||||
}
|
||||
|
||||
dst = _mm256_and_si256(alphaMask, dst);
|
||||
src = _mm256_andnot_si256(alphaMask, src);
|
||||
return _mm256_or_si256(dst, src);
|
||||
}
|
||||
};
|
||||
|
||||
template<bool rgbmod, bool alphamod>
|
||||
struct OpaqueBlend : public BlendBlitImpl_Base::OpaqueBlend<rgbmod, alphamod> {
|
||||
public:
|
||||
constexpr OpaqueBlend(const uint32 color) : BlendBlitImpl_Base::OpaqueBlend<rgbmod, alphamod>(color) {}
|
||||
|
||||
inline __m256i simd(__m256i src, __m256i dst) const {
|
||||
return _mm256_or_si256(src, _mm256_set1_epi32(BlendBlit::kAModMask));
|
||||
}
|
||||
};
|
||||
|
||||
template<bool rgbmod, bool alphamod>
|
||||
struct BinaryBlend : public BlendBlitImpl_Base::BinaryBlend<rgbmod, alphamod> {
|
||||
public:
|
||||
constexpr BinaryBlend(const uint32 color) : BlendBlitImpl_Base::BinaryBlend<rgbmod, alphamod>(color) {}
|
||||
|
||||
inline __m256i simd(__m256i src, __m256i dst) const {
|
||||
__m256i alphaMask = _mm256_cmpeq_epi32(_mm256_and_si256(src, _mm256_set1_epi32(BlendBlit::kAModMask)), _mm256_setzero_si256());
|
||||
dst = _mm256_and_si256(dst, alphaMask);
|
||||
src = _mm256_andnot_si256(alphaMask, _mm256_or_si256(src, _mm256_set1_epi32(BlendBlit::kAModMask)));
|
||||
return _mm256_or_si256(src, dst);
|
||||
}
|
||||
};
|
||||
|
||||
template<bool rgbmod, bool alphamod>
|
||||
struct AdditiveBlend : public BlendBlitImpl_Base::AdditiveBlend<rgbmod, alphamod> {
|
||||
public:
|
||||
constexpr AdditiveBlend(const uint32 color) : BlendBlitImpl_Base::AdditiveBlend<rgbmod, alphamod>(color) {}
|
||||
|
||||
inline __m256i simd(__m256i src, __m256i dst) const {
|
||||
__m256i ina;
|
||||
if (alphamod)
|
||||
ina = _mm256_srli_epi32(_mm256_mullo_epi32(_mm256_and_si256(src, _mm256_set1_epi32(BlendBlit::kAModMask)), _mm256_set1_epi32(this->ca)), 8);
|
||||
else
|
||||
ina = _mm256_and_si256(src, _mm256_set1_epi32(BlendBlit::kAModMask));
|
||||
__m256i alphaMask = _mm256_cmpeq_epi32(ina, _mm256_set1_epi32(0));
|
||||
|
||||
if (rgbmod) {
|
||||
__m256i srcb = _mm256_and_si256(src, _mm256_set1_epi32(BlendBlit::kBModMask));
|
||||
__m256i srcg = _mm256_srli_epi32(_mm256_and_si256(src, _mm256_set1_epi32(BlendBlit::kGModMask)), BlendBlit::kGModShift);
|
||||
__m256i srcr = _mm256_srli_epi32(_mm256_and_si256(src, _mm256_set1_epi32(BlendBlit::kRModMask)), BlendBlit::kRModShift);
|
||||
__m256i dstb = _mm256_and_si256(dst, _mm256_set1_epi32(BlendBlit::kBModMask));
|
||||
__m256i dstg = _mm256_srli_epi32(_mm256_and_si256(dst, _mm256_set1_epi32(BlendBlit::kGModMask)), BlendBlit::kGModShift);
|
||||
__m256i dstr = _mm256_srli_epi32(_mm256_and_si256(dst, _mm256_set1_epi32(BlendBlit::kRModMask)), BlendBlit::kRModShift);
|
||||
|
||||
srcb = _mm256_and_si256(_mm256_add_epi32(dstb, _mm256_srli_epi32(_mm256_mullo_epi32(srcb, _mm256_mullo_epi32(_mm256_set1_epi32(this->cb), ina)), 16)), _mm256_set1_epi32(BlendBlit::kBModMask));
|
||||
srcg = _mm256_and_si256(_mm256_add_epi32(dstg, _mm256_mullo_epi32(srcg, _mm256_mullo_epi32(_mm256_set1_epi32(this->cg), ina))), _mm256_set1_epi32(BlendBlit::kGModMask));
|
||||
srcr = _mm256_and_si256(_mm256_add_epi32(dstr, _mm256_srli_epi32(_mm256_mullo_epi32(srcr, _mm256_mullo_epi32(_mm256_set1_epi32(this->cr), ina)), BlendBlit::kRModShift - 16)), _mm256_set1_epi32(BlendBlit::kRModMask));
|
||||
|
||||
src = _mm256_and_si256(dst, _mm256_set1_epi32(BlendBlit::kAModMask));
|
||||
src = _mm256_or_si256(src, _mm256_or_si256(srcb, _mm256_or_si256(srcg, srcb)));
|
||||
} else if (alphamod) {
|
||||
__m256i srcg = _mm256_and_si256(src, _mm256_set1_epi32(BlendBlit::kGModMask));
|
||||
__m256i srcrb = _mm256_srli_epi32(_mm256_and_si256(src, _mm256_set1_epi32(BlendBlit::kRModMask | BlendBlit::kBModMask)), BlendBlit::kBModShift);
|
||||
__m256i dstg = _mm256_and_si256(dst, _mm256_set1_epi32(BlendBlit::kGModMask));
|
||||
__m256i dstrb = _mm256_srli_epi32(_mm256_and_si256(dst, _mm256_set1_epi32(BlendBlit::kRModMask | BlendBlit::kBModMask)), BlendBlit::kBModShift);
|
||||
|
||||
srcg = _mm256_and_si256(_mm256_add_epi32(dstg, _mm256_srli_epi32(_mm256_mullo_epi32(srcg, ina), 8)), _mm256_set1_epi32(BlendBlit::kGModMask));
|
||||
srcrb = _mm256_and_si256(_mm256_add_epi32(dstrb, _mm256_mullo_epi32(srcrb, ina)), _mm256_set1_epi32(BlendBlit::kRModMask | BlendBlit::kBModMask));
|
||||
|
||||
src = _mm256_and_si256(dst, _mm256_set1_epi32(BlendBlit::kAModMask));
|
||||
src = _mm256_or_si256(src, _mm256_or_si256(srcrb, srcg));
|
||||
} else {
|
||||
__m256i srcg = _mm256_and_si256(src, _mm256_set1_epi32(BlendBlit::kGModMask));
|
||||
__m256i srcrb = _mm256_srli_epi32(_mm256_and_si256(src, _mm256_set1_epi32(BlendBlit::kRModMask | BlendBlit::kBModMask)), BlendBlit::kBModShift);
|
||||
__m256i dstg = _mm256_and_si256(dst, _mm256_set1_epi32(BlendBlit::kGModMask));
|
||||
__m256i dstrb = _mm256_srli_epi32(_mm256_and_si256(dst, _mm256_set1_epi32(BlendBlit::kRModMask | BlendBlit::kBModMask)), BlendBlit::kBModShift);
|
||||
|
||||
srcg = _mm256_and_si256(_mm256_add_epi32(dstg, srcg), _mm256_set1_epi32(BlendBlit::kGModMask));
|
||||
srcrb = _mm256_and_si256(_mm256_slli_epi32(_mm256_add_epi32(dstrb, srcrb), 8), _mm256_set1_epi32(BlendBlit::kRModMask | BlendBlit::kBModMask));
|
||||
|
||||
src = _mm256_and_si256(dst, _mm256_set1_epi32(BlendBlit::kAModMask));
|
||||
src = _mm256_or_si256(src, _mm256_or_si256(srcrb, srcg));
|
||||
}
|
||||
|
||||
dst = _mm256_and_si256(alphaMask, dst);
|
||||
src = _mm256_andnot_si256(alphaMask, src);
|
||||
return _mm256_or_si256(dst, src);
|
||||
}
|
||||
};
|
||||
|
||||
template<bool rgbmod, bool alphamod>
|
||||
struct SubtractiveBlend : public BlendBlitImpl_Base::SubtractiveBlend<rgbmod, alphamod> {
|
||||
public:
|
||||
constexpr SubtractiveBlend(const uint32 color) : BlendBlitImpl_Base::SubtractiveBlend<rgbmod, alphamod>(color) {}
|
||||
|
||||
inline __m256i simd(__m256i src, __m256i dst) const {
|
||||
__m256i ina = _mm256_and_si256(src, _mm256_set1_epi32(BlendBlit::kAModMask));
|
||||
__m256i srcb = _mm256_srli_epi32(_mm256_and_si256(src, _mm256_set1_epi32(BlendBlit::kBModMask)), BlendBlit::kBModShift);
|
||||
__m256i srcg = _mm256_srli_epi32(_mm256_and_si256(src, _mm256_set1_epi32(BlendBlit::kGModMask)), BlendBlit::kGModShift);
|
||||
__m256i srcr = _mm256_srli_epi32(_mm256_and_si256(src, _mm256_set1_epi32(BlendBlit::kRModMask)), BlendBlit::kRModShift);
|
||||
__m256i dstb = _mm256_srli_epi32(_mm256_and_si256(dst, _mm256_set1_epi32(BlendBlit::kBModMask)), BlendBlit::kBModShift);
|
||||
__m256i dstg = _mm256_srli_epi32(_mm256_and_si256(dst, _mm256_set1_epi32(BlendBlit::kGModMask)), BlendBlit::kGModShift);
|
||||
__m256i dstr = _mm256_srli_epi32(_mm256_and_si256(dst, _mm256_set1_epi32(BlendBlit::kRModMask)), BlendBlit::kRModShift);
|
||||
|
||||
srcb = _mm256_and_si256(_mm256_slli_epi32(_mm256_max_epi16(_mm256_sub_epi32(dstb, _mm256_srli_epi32(_mm256_mullo_epi32(_mm256_mullo_epi32(srcb, _mm256_set1_epi32(this->cb)), _mm256_mullo_epi32(dstb, ina)), 24)), _mm256_set1_epi32(0)), BlendBlit::kBModShift), _mm256_set1_epi32(BlendBlit::kBModMask));
|
||||
srcg = _mm256_and_si256(_mm256_slli_epi32(_mm256_max_epi16(_mm256_sub_epi32(dstg, _mm256_srli_epi32(_mm256_mullo_epi32(_mm256_mullo_epi32(srcg, _mm256_set1_epi32(this->cg)), _mm256_mullo_epi32(dstg, ina)), 24)), _mm256_set1_epi32(0)), BlendBlit::kGModShift), _mm256_set1_epi32(BlendBlit::kGModMask));
|
||||
srcr = _mm256_and_si256(_mm256_slli_epi32(_mm256_max_epi16(_mm256_sub_epi32(dstr, _mm256_srli_epi32(_mm256_mullo_epi32(_mm256_mullo_epi32(srcr, _mm256_set1_epi32(this->cr)), _mm256_mullo_epi32(dstr, ina)), 24)), _mm256_set1_epi32(0)), BlendBlit::kRModShift), _mm256_set1_epi32(BlendBlit::kRModMask));
|
||||
|
||||
return _mm256_or_si256(_mm256_set1_epi32(BlendBlit::kAModMask), _mm256_or_si256(srcb, _mm256_or_si256(srcg, srcr)));
|
||||
}
|
||||
};
|
||||
|
||||
public:
|
||||
template<template <bool RGBMOD, bool ALPHAMOD> class PixelFunc, bool doscale, bool rgbmod, bool alphamod>
|
||||
static void blitInnerLoop(BlendBlit::Args &args) {
|
||||
const bool loaddst = true; // TODO: Only set this when necessary
|
||||
|
||||
const byte *in;
|
||||
byte *out;
|
||||
|
||||
const PixelFunc<rgbmod, alphamod> pixelFunc(args.color);
|
||||
|
||||
int scaleXCtr, scaleYCtr = args.scaleYoff;
|
||||
const byte *inBase;
|
||||
|
||||
if (!doscale && (args.flipping & FLIP_H)) args.ino -= 4 * 7;
|
||||
|
||||
for (uint32 i = 0; i < args.height; i++) {
|
||||
if (doscale) {
|
||||
inBase = args.ino + scaleYCtr / BlendBlit::SCALE_THRESHOLD * args.inoStep;
|
||||
scaleXCtr = args.scaleXoff;
|
||||
} else {
|
||||
in = args.ino;
|
||||
}
|
||||
out = args.outo;
|
||||
|
||||
uint32 j = 0;
|
||||
for (; j + 8 <= args.width; j += 8) {
|
||||
__m256i dstPixels, srcPixels;
|
||||
if (loaddst) dstPixels = _mm256_loadu_si256((const __m256i *)out);
|
||||
if (!doscale) {
|
||||
srcPixels = _mm256_loadu_si256((const __m256i *)in);
|
||||
} else {
|
||||
srcPixels = _mm256_setr_epi32(
|
||||
*(const uint32 *)(inBase + (ptrdiff_t)(scaleXCtr + args.scaleX * 0) / (ptrdiff_t)BlendBlit::SCALE_THRESHOLD * args.inStep),
|
||||
*(const uint32 *)(inBase + (ptrdiff_t)(scaleXCtr + args.scaleX * 1) / (ptrdiff_t)BlendBlit::SCALE_THRESHOLD * args.inStep),
|
||||
*(const uint32 *)(inBase + (ptrdiff_t)(scaleXCtr + args.scaleX * 2) / (ptrdiff_t)BlendBlit::SCALE_THRESHOLD * args.inStep),
|
||||
*(const uint32 *)(inBase + (ptrdiff_t)(scaleXCtr + args.scaleX * 3) / (ptrdiff_t)BlendBlit::SCALE_THRESHOLD * args.inStep),
|
||||
*(const uint32 *)(inBase + (ptrdiff_t)(scaleXCtr + args.scaleX * 4) / (ptrdiff_t)BlendBlit::SCALE_THRESHOLD * args.inStep),
|
||||
*(const uint32 *)(inBase + (ptrdiff_t)(scaleXCtr + args.scaleX * 5) / (ptrdiff_t)BlendBlit::SCALE_THRESHOLD * args.inStep),
|
||||
*(const uint32 *)(inBase + (ptrdiff_t)(scaleXCtr + args.scaleX * 6) / (ptrdiff_t)BlendBlit::SCALE_THRESHOLD * args.inStep),
|
||||
*(const uint32 *)(inBase + (ptrdiff_t)(scaleXCtr + args.scaleX * 7) / (ptrdiff_t)BlendBlit::SCALE_THRESHOLD * args.inStep)
|
||||
);
|
||||
scaleXCtr += args.scaleX * 8;
|
||||
}
|
||||
if (!doscale && (args.flipping & FLIP_H)) {
|
||||
srcPixels = _mm256_shuffle_epi32(srcPixels, _MM_SHUFFLE(0, 1, 2, 3));
|
||||
srcPixels = _mm256_permute2x128_si256(srcPixels, srcPixels, 0x01);
|
||||
}
|
||||
{
|
||||
const __m256i res = pixelFunc.simd(srcPixels, dstPixels);
|
||||
_mm256_storeu_si256((__m256i *)out, res);
|
||||
}
|
||||
if (!doscale) in += (ptrdiff_t)args.inStep * 8;
|
||||
out += 4ULL * 8;
|
||||
}
|
||||
if (!doscale && (args.flipping & FLIP_H)) in += 4 * 7;
|
||||
for (; j < args.width; j++) {
|
||||
if (doscale) {
|
||||
in = inBase + scaleXCtr / BlendBlit::SCALE_THRESHOLD * args.inStep;
|
||||
}
|
||||
|
||||
pixelFunc.normal(in, out);
|
||||
|
||||
if (doscale)
|
||||
scaleXCtr += args.scaleX;
|
||||
else
|
||||
in += args.inStep;
|
||||
out += 4;
|
||||
}
|
||||
if (doscale)
|
||||
scaleYCtr += args.scaleY;
|
||||
else
|
||||
args.ino += args.inoStep;
|
||||
args.outo += args.dstPitch;
|
||||
}
|
||||
}
|
||||
|
||||
}; // end of class BlendBlitImpl_AVX2
|
||||
|
||||
void BlendBlit::blitAVX2(Args &args, const TSpriteBlendMode &blendMode, const AlphaType &alphaType) {
|
||||
blitT<BlendBlitImpl_AVX2>(args, blendMode, alphaType);
|
||||
}
|
||||
|
||||
} // End of namespace Graphics
|
||||
|
||||
#if defined(__clang__)
|
||||
#pragma clang attribute pop
|
||||
#elif defined(__GNUC__)
|
||||
#pragma GCC pop_options
|
||||
#endif
|
||||
138
graphics/blit/blit-fast.cpp
Normal file
138
graphics/blit/blit-fast.cpp
Normal file
@@ -0,0 +1,138 @@
|
||||
/* ScummVM - Graphic Adventure Engine
|
||||
*
|
||||
* ScummVM is the legal property of its developers, whose names
|
||||
* are too numerous to list here. Please refer to the COPYRIGHT
|
||||
* file distributed with this source distribution.
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*
|
||||
*/
|
||||
|
||||
#include "graphics/blit.h"
|
||||
#include "graphics/pixelformat.h"
|
||||
#include "common/endian.h"
|
||||
#include "common/system.h"
|
||||
|
||||
namespace Graphics {
|
||||
|
||||
namespace {
|
||||
|
||||
template<bool bswap, int rotate>
|
||||
static void swapBlit(byte *dst, const byte *src,
|
||||
const uint dstPitch, const uint srcPitch,
|
||||
const uint w, const uint h) {
|
||||
// Faster, but larger, to provide optimized handling for each case.
|
||||
const uint srcDelta = (srcPitch - w * sizeof(uint32));
|
||||
const uint dstDelta = (dstPitch - w * sizeof(uint32));
|
||||
|
||||
for (uint y = 0; y < h; ++y) {
|
||||
for (uint x = 0; x < w; ++x) {
|
||||
uint32 col = *(const uint32 *)src;
|
||||
|
||||
if (bswap)
|
||||
col = SWAP_BYTES_32(col);
|
||||
if (rotate != 0)
|
||||
col = ROTATE_RIGHT_32(col, rotate);
|
||||
|
||||
*(uint32 *)dst = col;
|
||||
|
||||
src += sizeof(uint32);
|
||||
dst += sizeof(uint32);
|
||||
}
|
||||
src += srcDelta;
|
||||
dst += dstDelta;
|
||||
}
|
||||
}
|
||||
|
||||
} // End of anonymous namespace
|
||||
|
||||
// TODO: Add fast 24<->32bpp conversion
|
||||
// TODO: Add fast 16<->16bpp conversion
|
||||
struct FastBlitLookup {
|
||||
FastBlitFunc func;
|
||||
Graphics::PixelFormat srcFmt, dstFmt;
|
||||
};
|
||||
|
||||
static const FastBlitLookup fastBlitFuncs_4to4[] = {
|
||||
// 32-bit byteswap
|
||||
{ swapBlit<true, 0>, Graphics::PixelFormat(4, 8, 8, 8, 8, 0, 8, 16, 24), Graphics::PixelFormat(4, 8, 8, 8, 8, 24, 16, 8, 0) }, // ABGR8888 -> RGBA8888
|
||||
{ swapBlit<true, 0>, Graphics::PixelFormat(4, 8, 8, 8, 8, 24, 16, 8, 0), Graphics::PixelFormat(4, 8, 8, 8, 8, 0, 8, 16, 24) }, // RGBA8888 -> ABGR8888
|
||||
{ swapBlit<true, 0>, Graphics::PixelFormat(4, 8, 8, 8, 8, 16, 8, 0, 24), Graphics::PixelFormat(4, 8, 8, 8, 8, 8, 16, 24, 0) }, // ARGB8888 -> BGRA8888
|
||||
{ swapBlit<true, 0>, Graphics::PixelFormat(4, 8, 8, 8, 8, 8, 16, 24, 0), Graphics::PixelFormat(4, 8, 8, 8, 8, 16, 8, 0, 24) }, // BGRA8888 -> ARGB8888
|
||||
|
||||
// 32-bit rotate right
|
||||
{ swapBlit<false, 8>, Graphics::PixelFormat(4, 8, 8, 8, 8, 24, 16, 8, 0), Graphics::PixelFormat(4, 8, 8, 8, 8, 16, 8, 0, 24) }, // RGBA8888 -> ARGB8888
|
||||
{ swapBlit<false, 8>, Graphics::PixelFormat(4, 8, 8, 8, 8, 8, 16, 24, 0), Graphics::PixelFormat(4, 8, 8, 8, 8, 0, 8, 16, 24) }, // BGRA8888 -> ABGR8888
|
||||
|
||||
// 32-bit rotate left
|
||||
{ swapBlit<false, 24>, Graphics::PixelFormat(4, 8, 8, 8, 8, 0, 8, 16, 24), Graphics::PixelFormat(4, 8, 8, 8, 8, 8, 16, 24, 0) }, // ABGR8888 -> BGRA8888
|
||||
{ swapBlit<false, 24>, Graphics::PixelFormat(4, 8, 8, 8, 8, 16, 8, 0, 24), Graphics::PixelFormat(4, 8, 8, 8, 8, 24, 16, 8, 0) }, // ARGB8888 -> RGBA8888
|
||||
|
||||
// 32-bit byteswap and rotate right
|
||||
{ swapBlit<true, 8>, Graphics::PixelFormat(4, 8, 8, 8, 8, 0, 8, 16, 24), Graphics::PixelFormat(4, 8, 8, 8, 8, 16, 8, 0, 24) }, // ABGR8888 -> ARGB8888
|
||||
{ swapBlit<true, 8>, Graphics::PixelFormat(4, 8, 8, 8, 8, 16, 8, 0, 24), Graphics::PixelFormat(4, 8, 8, 8, 8, 0, 8, 16, 24) }, // ARGB8888 -> ABGR8888
|
||||
|
||||
// 32-bit byteswap and rotate left
|
||||
{ swapBlit<true, 24>, Graphics::PixelFormat(4, 8, 8, 8, 8, 24, 16, 8, 0), Graphics::PixelFormat(4, 8, 8, 8, 8, 8, 16, 24, 0) }, // RGBA8888 -> BGRA8888
|
||||
{ swapBlit<true, 24>, Graphics::PixelFormat(4, 8, 8, 8, 8, 8, 16, 24, 0), Graphics::PixelFormat(4, 8, 8, 8, 8, 24, 16, 8, 0) } // BGRA8888 -> RGBA8888
|
||||
|
||||
};
|
||||
|
||||
#ifdef SCUMMVM_NEON
|
||||
static const FastBlitLookup fastBlitFuncs_NEON[] = {
|
||||
// 16-bit with NEON
|
||||
{ fastBlitNEON_XRGB1555_RGB565, Graphics::PixelFormat(2, 5, 5, 5, 0, 10, 5, 0, 0), Graphics::PixelFormat(2, 5, 6, 5, 0, 11, 5, 0, 0) }, // XRGB1555 -> RGB565
|
||||
};
|
||||
#endif
|
||||
|
||||
FastBlitFunc getFastBlitFunc(const PixelFormat &dstFmt, const PixelFormat &srcFmt) {
|
||||
const uint dstBpp = dstFmt.bytesPerPixel;
|
||||
const uint srcBpp = srcFmt.bytesPerPixel;
|
||||
const FastBlitLookup *table = nullptr;
|
||||
size_t length = 0;
|
||||
|
||||
if (srcBpp == 4 && dstBpp == 4) {
|
||||
table = fastBlitFuncs_4to4;
|
||||
length = ARRAYSIZE(fastBlitFuncs_4to4);
|
||||
|
||||
for (size_t i = 0; i < length; i++) {
|
||||
if (srcFmt != table[i].srcFmt)
|
||||
continue;
|
||||
if (dstFmt != table[i].dstFmt)
|
||||
continue;
|
||||
|
||||
return table[i].func;
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef SCUMMVM_NEON
|
||||
if (srcBpp == 2 && dstBpp == 2 && g_system->hasFeature(OSystem::kFeatureCpuNEON)) {
|
||||
table = fastBlitFuncs_NEON;
|
||||
length = ARRAYSIZE(fastBlitFuncs_NEON);
|
||||
|
||||
for (size_t i = 0; i < length; i++) {
|
||||
if (srcFmt != table[i].srcFmt)
|
||||
continue;
|
||||
if (dstFmt != table[i].dstFmt)
|
||||
continue;
|
||||
|
||||
return table[i].func;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
} // End of namespace Graphics
|
||||
99
graphics/blit/blit-generic.cpp
Normal file
99
graphics/blit/blit-generic.cpp
Normal file
@@ -0,0 +1,99 @@
|
||||
/* ScummVM - Graphic Adventure Engine
|
||||
*
|
||||
* ScummVM is the legal property of its developers, whose names
|
||||
* are too numerous to list here. Please refer to the COPYRIGHT
|
||||
* file distributed with this source distribution.
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*
|
||||
*/
|
||||
|
||||
#include "graphics/blit/blit-alpha.h"
|
||||
#include "graphics/pixelformat.h"
|
||||
|
||||
namespace Graphics {
|
||||
|
||||
class BlendBlitImpl_Default : public BlendBlitImpl_Base {
|
||||
friend class BlendBlit;
|
||||
public:
|
||||
|
||||
template<template <bool RGBMOD, bool ALPHAMOD> class PixelFunc, bool doscale, bool rgbmod, bool alphamod>
|
||||
static inline void blitInnerLoop(BlendBlit::Args &args) {
|
||||
const byte *in;
|
||||
byte *out;
|
||||
|
||||
const PixelFunc<rgbmod, alphamod> pixelFunc(args.color);
|
||||
|
||||
int scaleXCtr, scaleYCtr = args.scaleYoff;
|
||||
const byte *inBase;
|
||||
|
||||
for (uint32 i = 0; i < args.height; i++) {
|
||||
if (doscale) {
|
||||
inBase = args.ino + scaleYCtr / BlendBlit::SCALE_THRESHOLD * args.inoStep;
|
||||
scaleXCtr = args.scaleXoff;
|
||||
} else {
|
||||
in = args.ino;
|
||||
}
|
||||
out = args.outo;
|
||||
|
||||
for (uint32 j = 0; j < args.width; j++) {
|
||||
if (doscale) {
|
||||
in = inBase + scaleXCtr / BlendBlit::SCALE_THRESHOLD * args.inStep;
|
||||
}
|
||||
|
||||
pixelFunc.normal(in, out);
|
||||
|
||||
if (doscale)
|
||||
scaleXCtr += args.scaleX;
|
||||
else
|
||||
in += args.inStep;
|
||||
out += 4;
|
||||
}
|
||||
if (doscale)
|
||||
scaleYCtr += args.scaleY;
|
||||
else
|
||||
args.ino += args.inoStep;
|
||||
args.outo += args.dstPitch;
|
||||
}
|
||||
}
|
||||
|
||||
template<template <bool RGBMOD, bool ALPHAMOD> class PixelFunc, bool rgbmod, bool alphamod>
|
||||
static inline void fillInnerLoop(BlendBlit::Args &args) {
|
||||
byte *out;
|
||||
|
||||
const PixelFunc<rgbmod, alphamod> pixelFunc(args.color);
|
||||
|
||||
for (uint32 i = 0; i < args.height; i++) {
|
||||
out = args.outo;
|
||||
|
||||
for (uint32 j = 0; j < args.width; j++) {
|
||||
pixelFunc.fill(out);
|
||||
|
||||
out += 4;
|
||||
}
|
||||
args.outo += args.dstPitch;
|
||||
}
|
||||
}
|
||||
|
||||
}; // end of class BlendBlitImpl_Default
|
||||
|
||||
void BlendBlit::blitGeneric(Args &args, const TSpriteBlendMode &blendMode, const AlphaType &alphaType) {
|
||||
blitT<BlendBlitImpl_Default>(args, blendMode, alphaType);
|
||||
}
|
||||
|
||||
void BlendBlit::fillGeneric(Args &args, const TSpriteBlendMode &blendMode) {
|
||||
fillT<BlendBlitImpl_Default>(args, blendMode);
|
||||
}
|
||||
|
||||
} // End of namespace Graphics
|
||||
391
graphics/blit/blit-neon.cpp
Normal file
391
graphics/blit/blit-neon.cpp
Normal file
@@ -0,0 +1,391 @@
|
||||
/* ScummVM - Graphic Adventure Engine
|
||||
*
|
||||
* ScummVM is the legal property of its developers, whose names
|
||||
* are too numerous to list here. Please refer to the COPYRIGHT
|
||||
* file distributed with this source distribution.
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*
|
||||
*/
|
||||
|
||||
#include "common/scummsys.h"
|
||||
|
||||
#ifdef SCUMMVM_NEON
|
||||
|
||||
#include "graphics/blit/blit-alpha.h"
|
||||
#include "graphics/pixelformat.h"
|
||||
|
||||
#include <arm_neon.h>
|
||||
|
||||
#if !defined(__aarch64__) && !defined(__ARM_NEON)
|
||||
|
||||
#if defined(__clang__)
|
||||
#pragma clang attribute push (__attribute__((target("neon"))), apply_to=function)
|
||||
#elif defined(__GNUC__)
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target("fpu=neon")
|
||||
#endif
|
||||
|
||||
#endif // !defined(__aarch64__) && !defined(__ARM_NEON)
|
||||
|
||||
namespace Graphics {
|
||||
|
||||
class BlendBlitImpl_NEON : public BlendBlitImpl_Base {
|
||||
friend class BlendBlit;
|
||||
|
||||
template<bool rgbmod, bool alphamod>
|
||||
struct AlphaBlend : public BlendBlitImpl_Base::AlphaBlend<rgbmod, alphamod> {
|
||||
public:
|
||||
constexpr AlphaBlend(const uint32 color) : BlendBlitImpl_Base::AlphaBlend<rgbmod, alphamod>(color) {}
|
||||
|
||||
inline uint32x4_t simd(uint32x4_t src, uint32x4_t dst) const {
|
||||
uint32x4_t ina;
|
||||
if (alphamod)
|
||||
ina = vshrq_n_u32(vmulq_u32(vandq_u32(src, vmovq_n_u32(BlendBlit::kAModMask)), vdupq_n_u32(this->ca)), 8);
|
||||
else
|
||||
ina = vandq_u32(src, vmovq_n_u32(BlendBlit::kAModMask));
|
||||
uint32x4_t alphaMask = vceqq_u32(ina, vmovq_n_u32(0));
|
||||
|
||||
if (rgbmod) {
|
||||
uint32x4_t dstR = vshrq_n_u32(vandq_u32(dst, vmovq_n_u32(BlendBlit::kRModMask)), 16);
|
||||
uint32x4_t srcR = vshrq_n_u32(vandq_u32(src, vmovq_n_u32(BlendBlit::kRModMask)), 16);
|
||||
uint32x4_t dstG = vshrq_n_u32(vandq_u32(dst, vmovq_n_u32(BlendBlit::kGModMask)), 8);
|
||||
uint32x4_t srcG = vshrq_n_u32(vandq_u32(src, vmovq_n_u32(BlendBlit::kGModMask)), 8);
|
||||
uint32x4_t dstB = vandq_u32(dst, vmovq_n_u32(BlendBlit::kBModMask));
|
||||
uint32x4_t srcB = vandq_u32(src, vmovq_n_u32(BlendBlit::kBModMask));
|
||||
|
||||
dstR = vshrq_n_u32(vmulq_u32(dstR, vsubq_u32(vmovq_n_u32(255), ina)), 8);
|
||||
dstG = vshrq_n_u32(vmulq_u32(dstG, vsubq_u32(vmovq_n_u32(255), ina)), 8);
|
||||
dstB = vshrq_n_u32(vmulq_u32(dstB, vsubq_u32(vmovq_n_u32(255), ina)), 8);
|
||||
srcR = vaddq_u32(dstR, vshrq_n_u32(vmulq_u32(vmulq_u32(srcR, ina), vmovq_n_u32(this->cr)), 16));
|
||||
srcG = vaddq_u32(dstG, vshrq_n_u32(vmulq_u32(vmulq_u32(srcG, ina), vmovq_n_u32(this->cg)), 16));
|
||||
srcB = vaddq_u32(dstB, vshrq_n_u32(vmulq_u32(vmulq_u32(srcB, ina), vmovq_n_u32(this->cb)), 16));
|
||||
src = vorrq_u32(vandq_u32(srcB, vmovq_n_u32(BlendBlit::kBModMask)), vmovq_n_u32(BlendBlit::kAModMask));
|
||||
src = vorrq_u32(vandq_u32(vshlq_n_u32(srcG, 8), vmovq_n_u32(BlendBlit::kGModMask)), src);
|
||||
src = vorrq_u32(vandq_u32(vshlq_n_u32(srcR, 16), vmovq_n_u32(BlendBlit::kRModMask)), src);
|
||||
} else {
|
||||
uint32x4_t dstRB = vshrq_n_u32(vandq_u32(dst, vmovq_n_u32(BlendBlit::kRModMask | BlendBlit::kBModMask)), 8);
|
||||
uint32x4_t srcRB = vshrq_n_u32(vandq_u32(src, vmovq_n_u32(BlendBlit::kRModMask | BlendBlit::kBModMask)), 8);
|
||||
uint32x4_t dstG = vandq_u32(dst, vmovq_n_u32(BlendBlit::kGModMask));
|
||||
uint32x4_t srcG = vandq_u32(src, vmovq_n_u32(BlendBlit::kGModMask));
|
||||
|
||||
dstRB = vmulq_u32(dstRB, vsubq_u32(vmovq_n_u32(255), ina));
|
||||
dstG = vshrq_n_u32(vmulq_u32(dstG, vsubq_u32(vmovq_n_u32(255), ina)), 8);
|
||||
srcRB = vaddq_u32(dstRB, vmulq_u32(srcRB, ina));
|
||||
srcG = vaddq_u32(dstG, vshrq_n_u32(vmulq_u32(srcG, ina), 8));
|
||||
src = vorrq_u32(vandq_u32(srcG, vmovq_n_u32(BlendBlit::kGModMask)), vmovq_n_u32(BlendBlit::kAModMask));
|
||||
src = vorrq_u32(vandq_u32(srcRB, vmovq_n_u32(BlendBlit::kBModMask | BlendBlit::kRModMask)), src);
|
||||
}
|
||||
|
||||
dst = vandq_u32(alphaMask, dst);
|
||||
src = vandq_u32(vmvnq_u32(alphaMask), src);
|
||||
return vorrq_u32(dst, src);
|
||||
}
|
||||
};
|
||||
|
||||
template<bool rgbmod, bool alphamod>
|
||||
struct MultiplyBlend : public BlendBlitImpl_Base::MultiplyBlend<rgbmod, alphamod> {
|
||||
public:
|
||||
constexpr MultiplyBlend(const uint32 color) : BlendBlitImpl_Base::MultiplyBlend<rgbmod, alphamod>(color) {}
|
||||
|
||||
inline uint32x4_t simd(uint32x4_t src, uint32x4_t dst) const {
|
||||
uint32x4_t ina, alphaMask;
|
||||
if (alphamod) {
|
||||
ina = vshrq_n_u32(vmulq_u32(vandq_u32(src, vmovq_n_u32(BlendBlit::kAModMask)), vdupq_n_u32(this->ca)), 8);
|
||||
alphaMask = vceqq_u32(ina, vmovq_n_u32(0));
|
||||
} else {
|
||||
ina = vandq_u32(src, vmovq_n_u32(BlendBlit::kAModMask));
|
||||
alphaMask = vdupq_n_u32(BlendBlit::kAModMask);
|
||||
}
|
||||
|
||||
if (rgbmod) {
|
||||
uint32x4_t srcB = vshrq_n_u32(vandq_u32(src, vmovq_n_u32(BlendBlit::kBModMask)), BlendBlit::kBModShift);
|
||||
uint32x4_t srcG = vshrq_n_u32(vandq_u32(src, vmovq_n_u32(BlendBlit::kGModMask)), BlendBlit::kGModShift);
|
||||
uint32x4_t srcR = vshrq_n_u32(vandq_u32(src, vmovq_n_u32(BlendBlit::kRModMask)), BlendBlit::kRModShift);
|
||||
uint32x4_t dstB = vshrq_n_u32(vandq_u32(dst, vmovq_n_u32(BlendBlit::kBModMask)), BlendBlit::kBModShift);
|
||||
uint32x4_t dstG = vshrq_n_u32(vandq_u32(dst, vmovq_n_u32(BlendBlit::kGModMask)), BlendBlit::kGModShift);
|
||||
uint32x4_t dstR = vshrq_n_u32(vandq_u32(dst, vmovq_n_u32(BlendBlit::kRModMask)), BlendBlit::kRModShift);
|
||||
|
||||
srcB = vandq_u32(vshlq_n_u32(vmulq_u32(dstB, vshrq_n_u32(vmulq_u32(vmulq_u32(srcB, vmovq_n_u32(this->cb)), ina), 16)), BlendBlit::kBModShift - 8), vmovq_n_u32(BlendBlit::kBModMask));
|
||||
srcG = vandq_u32(vshlq_n_u32(vmulq_u32(dstG, vshrq_n_u32(vmulq_u32(vmulq_u32(srcG, vmovq_n_u32(this->cg)), ina), 16)), BlendBlit::kGModShift - 8), vmovq_n_u32(BlendBlit::kGModMask));
|
||||
srcR = vandq_u32(vshlq_n_u32(vmulq_u32(dstR, vshrq_n_u32(vmulq_u32(vmulq_u32(srcR, vmovq_n_u32(this->cr)), ina), 16)), BlendBlit::kRModShift - 8), vmovq_n_u32(BlendBlit::kRModMask));
|
||||
|
||||
src = vandq_u32(src, vmovq_n_u32(BlendBlit::kAModMask));
|
||||
src = vorrq_u32(src, vorrq_u32(srcB, vorrq_u32(srcG, srcR)));
|
||||
} else {
|
||||
constexpr uint32 rbMask = BlendBlit::kRModMask | BlendBlit::kBModMask;
|
||||
uint32x4_t srcG = vshrq_n_u32(vandq_u32(src, vmovq_n_u32(BlendBlit::kGModMask)), BlendBlit::kGModShift);
|
||||
uint32x4_t srcRB = vshrq_n_u32(vandq_u32(src, vmovq_n_u32(rbMask)), BlendBlit::kBModShift);
|
||||
uint32x4_t dstG = vshrq_n_u32(vandq_u32(dst, vmovq_n_u32(BlendBlit::kGModMask)), BlendBlit::kGModShift);
|
||||
uint32x4_t dstRB = vshrq_n_u32(vandq_u32(dst, vmovq_n_u32(rbMask)), BlendBlit::kBModShift);
|
||||
|
||||
srcG = vandq_u32(vshlq_n_u32(vmulq_u32(dstG, vshrq_n_u32(vmulq_u32(srcG, ina), 8)), 8), vmovq_n_u32(BlendBlit::kGModMask));
|
||||
srcRB = vandq_u32(vreinterpretq_u32_u16(vmulq_u16(vreinterpretq_u16_u32(dstRB), vreinterpretq_u16_u32(vshrq_n_u32(vandq_u32(vmulq_u32(srcRB, ina), vmovq_n_u32(rbMask)), 8)))), vmovq_n_u32(rbMask));
|
||||
|
||||
src = vandq_u32(src, vmovq_n_u32(BlendBlit::kAModMask));
|
||||
src = vorrq_u32(src, vorrq_u32(srcRB, srcG));
|
||||
}
|
||||
|
||||
dst = vandq_u32(alphaMask, dst);
|
||||
src = vandq_u32(vmvnq_u32(alphaMask), src);
|
||||
return vorrq_u32(dst, src);
|
||||
}
|
||||
};
|
||||
|
||||
template<bool rgbmod, bool alphamod>
|
||||
struct OpaqueBlend : public BlendBlitImpl_Base::OpaqueBlend<rgbmod, alphamod> {
|
||||
public:
|
||||
constexpr OpaqueBlend(const uint32 color) : BlendBlitImpl_Base::OpaqueBlend<rgbmod, alphamod>(color) {}
|
||||
|
||||
inline uint32x4_t simd(uint32x4_t src, uint32x4_t dst) const {
|
||||
return vorrq_u32(src, vmovq_n_u32(BlendBlit::kAModMask));
|
||||
}
|
||||
};
|
||||
|
||||
template<bool rgbmod, bool alphamod>
|
||||
struct BinaryBlend : public BlendBlitImpl_Base::BinaryBlend<rgbmod, alphamod> {
|
||||
public:
|
||||
constexpr BinaryBlend(const uint32 color) : BlendBlitImpl_Base::BinaryBlend<rgbmod, alphamod>(color) {}
|
||||
|
||||
inline uint32x4_t simd(uint32x4_t src, uint32x4_t dst) const {
|
||||
uint32x4_t alphaMask = vceqq_u32(vandq_u32(src, vmovq_n_u32(BlendBlit::kAModMask)), vmovq_n_u32(0));
|
||||
dst = vandq_u32(dst, alphaMask);
|
||||
src = vandq_u32(vorrq_u32(src, vmovq_n_u32(BlendBlit::kAModMask)), vmvnq_u32(alphaMask));
|
||||
return vorrq_u32(dst, src);
|
||||
}
|
||||
};
|
||||
|
||||
template<bool rgbmod, bool alphamod>
|
||||
struct AdditiveBlend : public BlendBlitImpl_Base::AdditiveBlend<rgbmod, alphamod> {
|
||||
public:
|
||||
constexpr AdditiveBlend(const uint32 color) : BlendBlitImpl_Base::AdditiveBlend<rgbmod, alphamod>(color) {}
|
||||
|
||||
inline uint32x4_t simd(uint32x4_t src, uint32x4_t dst) const {
|
||||
uint32x4_t ina;
|
||||
if (alphamod)
|
||||
ina = vshrq_n_u32(vmulq_u32(vandq_u32(src, vmovq_n_u32(BlendBlit::kAModMask)), vdupq_n_u32(this->ca)), 8);
|
||||
else
|
||||
ina = vandq_u32(src, vmovq_n_u32(BlendBlit::kAModMask));
|
||||
uint32x4_t alphaMask = vceqq_u32(ina, vmovq_n_u32(0));
|
||||
|
||||
if (rgbmod) {
|
||||
uint32x4_t srcb = vandq_u32(src, vmovq_n_u32(BlendBlit::kBModMask));
|
||||
uint32x4_t srcg = vshrq_n_u32(vandq_u32(src, vmovq_n_u32(BlendBlit::kGModMask)), BlendBlit::kGModShift);
|
||||
uint32x4_t srcr = vshrq_n_u32(vandq_u32(src, vmovq_n_u32(BlendBlit::kRModMask)), BlendBlit::kRModShift);
|
||||
uint32x4_t dstb = vandq_u32(dst, vmovq_n_u32(BlendBlit::kBModMask));
|
||||
uint32x4_t dstg = vshrq_n_u32(vandq_u32(dst, vmovq_n_u32(BlendBlit::kGModMask)), BlendBlit::kGModShift);
|
||||
uint32x4_t dstr = vshrq_n_u32(vandq_u32(dst, vmovq_n_u32(BlendBlit::kRModMask)), BlendBlit::kRModShift);
|
||||
|
||||
srcb = vandq_u32(vaddq_u32(dstb, vshrq_n_u32(vmulq_u32(srcb, vmulq_u32(vmovq_n_u32(this->cb), ina)), 16)), vmovq_n_u32(BlendBlit::kBModMask));
|
||||
srcg = vandq_u32(vaddq_u32(dstg, vmulq_u32(srcg, vmulq_u32(vmovq_n_u32(this->cg), ina))), vmovq_n_u32(BlendBlit::kGModMask));
|
||||
srcr = vandq_u32(vaddq_u32(dstr, vshrq_n_u32(vmulq_u32(srcr, vmulq_u32(vmovq_n_u32(this->cr), ina)), BlendBlit::kRModShift - 16)), vmovq_n_u32(BlendBlit::kRModMask));
|
||||
|
||||
src = vandq_u32(dst, vmovq_n_u32(BlendBlit::kAModMask));
|
||||
src = vorrq_u32(src, vorrq_u32(srcb, vorrq_u32(srcg, srcr)));
|
||||
} else if (alphamod) {
|
||||
uint32x4_t srcg = vandq_u32(src, vmovq_n_u32(BlendBlit::kGModMask));
|
||||
uint32x4_t srcrb = vshrq_n_u32(vandq_u32(src, vmovq_n_u32(BlendBlit::kRModMask | BlendBlit::kBModMask)), BlendBlit::kBModShift);
|
||||
uint32x4_t dstg = vandq_u32(dst, vmovq_n_u32(BlendBlit::kGModMask));
|
||||
uint32x4_t dstrb = vshrq_n_u32(vandq_u32(dst, vmovq_n_u32(BlendBlit::kRModMask | BlendBlit::kBModMask)), BlendBlit::kBModShift);
|
||||
|
||||
srcg = vandq_u32(vaddq_u32(dstg, vshrq_n_u32(vmulq_u32(srcg, ina), 8)), vmovq_n_u32(BlendBlit::kGModMask));
|
||||
srcrb = vandq_u32(vaddq_u32(dstrb, vmulq_u32(srcrb, ina)), vmovq_n_u32(BlendBlit::kRModMask | BlendBlit::kBModMask));
|
||||
|
||||
src = vandq_u32(dst, vmovq_n_u32(BlendBlit::kAModMask));
|
||||
src = vorrq_u32(src, vorrq_u32(srcrb, srcg));
|
||||
} else {
|
||||
uint32x4_t srcg = vandq_u32(src, vmovq_n_u32(BlendBlit::kGModMask));
|
||||
uint32x4_t srcrb = vshrq_n_u32(vandq_u32(src, vmovq_n_u32(BlendBlit::kRModMask | BlendBlit::kBModMask)), BlendBlit::kBModShift);
|
||||
uint32x4_t dstg = vandq_u32(dst, vmovq_n_u32(BlendBlit::kGModMask));
|
||||
uint32x4_t dstrb = vshrq_n_u32(vandq_u32(dst, vmovq_n_u32(BlendBlit::kRModMask | BlendBlit::kBModMask)), BlendBlit::kBModShift);
|
||||
|
||||
srcg = vandq_u32(vaddq_u32(dstg, srcg), vmovq_n_u32(BlendBlit::kGModMask));
|
||||
srcrb = vandq_u32(vshlq_n_u32(vaddq_u32(dstrb, srcrb), 8), vmovq_n_u32(BlendBlit::kRModMask | BlendBlit::kBModMask));
|
||||
|
||||
src = vandq_u32(dst, vmovq_n_u32(BlendBlit::kAModMask));
|
||||
src = vorrq_u32(src, vorrq_u32(srcrb, srcg));
|
||||
}
|
||||
|
||||
dst = vandq_u32(alphaMask, dst);
|
||||
src = vandq_u32(vmvnq_u32(alphaMask), src);
|
||||
return vorrq_u32(dst, src);
|
||||
}
|
||||
};
|
||||
|
||||
template<bool rgbmod, bool alphamod>
|
||||
struct SubtractiveBlend : public BlendBlitImpl_Base::SubtractiveBlend<rgbmod, alphamod> {
|
||||
public:
|
||||
constexpr SubtractiveBlend(const uint32 color) : BlendBlitImpl_Base::SubtractiveBlend<rgbmod, alphamod>(color) {}
|
||||
|
||||
inline uint32x4_t simd(uint32x4_t src, uint32x4_t dst) const {
|
||||
uint32x4_t ina = vandq_u32(src, vmovq_n_u32(BlendBlit::kAModMask));
|
||||
uint32x4_t srcb = vshrq_n_u32(vandq_u32(src, vmovq_n_u32(BlendBlit::kBModMask)), BlendBlit::kBModShift);
|
||||
uint32x4_t srcg = vshrq_n_u32(vandq_u32(src, vmovq_n_u32(BlendBlit::kGModMask)), BlendBlit::kGModShift);
|
||||
uint32x4_t srcr = vshrq_n_u32(vandq_u32(src, vmovq_n_u32(BlendBlit::kRModMask)), BlendBlit::kRModShift);
|
||||
uint32x4_t dstb = vshrq_n_u32(vandq_u32(dst, vmovq_n_u32(BlendBlit::kBModMask)), BlendBlit::kBModShift);
|
||||
uint32x4_t dstg = vshrq_n_u32(vandq_u32(dst, vmovq_n_u32(BlendBlit::kGModMask)), BlendBlit::kGModShift);
|
||||
uint32x4_t dstr = vshrq_n_u32(vandq_u32(dst, vmovq_n_u32(BlendBlit::kRModMask)), BlendBlit::kRModShift);
|
||||
|
||||
srcb = vandq_u32(vshlq_n_u32(vreinterpretq_u32_s32(vmaxq_s32(vsubq_s32(vreinterpretq_s32_u32(dstb), vreinterpretq_s32_u32(vshrq_n_u32(vmulq_u32(vmulq_u32(srcb, vmovq_n_u32(this->cb)), vmulq_u32(dstb, ina)), 24))), vmovq_n_s32(0))), BlendBlit::kBModShift), vmovq_n_u32(BlendBlit::kBModMask));
|
||||
srcg = vandq_u32(vshlq_n_u32(vreinterpretq_u32_s32(vmaxq_s32(vsubq_s32(vreinterpretq_s32_u32(dstg), vreinterpretq_s32_u32(vshrq_n_u32(vmulq_u32(vmulq_u32(srcg, vmovq_n_u32(this->cg)), vmulq_u32(dstg, ina)), 24))), vmovq_n_s32(0))), BlendBlit::kGModShift), vmovq_n_u32(BlendBlit::kGModMask));
|
||||
srcr = vandq_u32(vshlq_n_u32(vreinterpretq_u32_s32(vmaxq_s32(vsubq_s32(vreinterpretq_s32_u32(dstr), vreinterpretq_s32_u32(vshrq_n_u32(vmulq_u32(vmulq_u32(srcr, vmovq_n_u32(this->cr)), vmulq_u32(dstr, ina)), 24))), vmovq_n_s32(0))), BlendBlit::kRModShift), vmovq_n_u32(BlendBlit::kRModMask));
|
||||
|
||||
return vorrq_u32(vmovq_n_u32(BlendBlit::kAModMask), vorrq_u32(srcb, vorrq_u32(srcg, srcr)));
|
||||
}
|
||||
};
|
||||
|
||||
public:
|
||||
template<template <bool RGBMOD, bool ALPHAMOD> class PixelFunc, bool doscale, bool rgbmod, bool alphamod>
|
||||
static inline void blitInnerLoop(BlendBlit::Args &args) {
|
||||
const bool loaddst = true; // TODO: Only set this when necessary
|
||||
|
||||
const byte *in;
|
||||
byte *out;
|
||||
|
||||
PixelFunc<rgbmod, alphamod> pixelFunc(args.color);
|
||||
|
||||
int scaleXCtr, scaleYCtr = args.scaleYoff;
|
||||
const byte *inBase;
|
||||
|
||||
if (!doscale && (args.flipping & FLIP_H)) args.ino -= 4 * 3;
|
||||
|
||||
for (uint32 i = 0; i < args.height; i++) {
|
||||
if (doscale) {
|
||||
inBase = args.ino + scaleYCtr / BlendBlit::SCALE_THRESHOLD * args.inoStep;
|
||||
scaleXCtr = args.scaleXoff;
|
||||
} else {
|
||||
in = args.ino;
|
||||
}
|
||||
out = args.outo;
|
||||
uint32 j = 0;
|
||||
for (; j + 4 <= args.width; j += 4) {
|
||||
uint32x4_t dstPixels;
|
||||
if (loaddst) dstPixels = vld1q_u32((const uint32 *)out);
|
||||
uint32x4_t srcPixels;
|
||||
if (!doscale) {
|
||||
srcPixels = vld1q_u32((const uint32 *)in);
|
||||
} else {
|
||||
srcPixels = vsetq_lane_u32(*(const uint32 *)(inBase + scaleXCtr / BlendBlit::SCALE_THRESHOLD * args.inStep), vmovq_n_u32(0), 0);
|
||||
scaleXCtr += args.scaleX;
|
||||
srcPixels = vsetq_lane_u32(*(const uint32 *)(inBase + scaleXCtr / BlendBlit::SCALE_THRESHOLD * args.inStep), srcPixels, 1);
|
||||
scaleXCtr += args.scaleX;
|
||||
srcPixels = vsetq_lane_u32(*(const uint32 *)(inBase + scaleXCtr / BlendBlit::SCALE_THRESHOLD * args.inStep), srcPixels, 2);
|
||||
scaleXCtr += args.scaleX;
|
||||
srcPixels = vsetq_lane_u32(*(const uint32 *)(inBase + scaleXCtr / BlendBlit::SCALE_THRESHOLD * args.inStep), srcPixels, 3);
|
||||
scaleXCtr += args.scaleX;
|
||||
}
|
||||
if (!doscale && (args.flipping & FLIP_H)) {
|
||||
srcPixels = vrev64q_u32(srcPixels);
|
||||
srcPixels = vcombine_u32(vget_high_u32(srcPixels), vget_low_u32(srcPixels));
|
||||
}
|
||||
{
|
||||
const uint32x4_t res = pixelFunc.simd(srcPixels, dstPixels);
|
||||
vst1q_u32((uint32 *)out, res);
|
||||
}
|
||||
if (!doscale) in += args.inStep * 4;
|
||||
out += 4 * 4;
|
||||
}
|
||||
if (!doscale && (args.flipping & FLIP_H)) in += 4 * 3;
|
||||
for (; j < args.width; j++) {
|
||||
if (doscale) {
|
||||
in = inBase + scaleXCtr / BlendBlit::SCALE_THRESHOLD * args.inStep;
|
||||
}
|
||||
|
||||
pixelFunc.normal(in, out);
|
||||
|
||||
if (doscale)
|
||||
scaleXCtr += args.scaleX;
|
||||
else
|
||||
in += args.inStep;
|
||||
out += 4;
|
||||
}
|
||||
if (doscale)
|
||||
scaleYCtr += args.scaleY;
|
||||
else
|
||||
args.ino += args.inoStep;
|
||||
args.outo += args.dstPitch;
|
||||
}
|
||||
}
|
||||
|
||||
}; // end of class BlendBlitImpl_NEON
|
||||
|
||||
void BlendBlit::blitNEON(Args &args, const TSpriteBlendMode &blendMode, const AlphaType &alphaType) {
|
||||
blitT<BlendBlitImpl_NEON>(args, blendMode, alphaType);
|
||||
}
|
||||
|
||||
void fastBlitNEON_XRGB1555_RGB565(byte *dst, const byte *src,
|
||||
const uint dstPitch, const uint srcPitch,
|
||||
const uint w, const uint h) {
|
||||
const uint srcDelta = (srcPitch - w * 2);
|
||||
const uint dstDelta = (dstPitch - w * 2);
|
||||
|
||||
const uint16 *src_ptr = (const uint16 *)src;
|
||||
uint16 *dst_ptr = (uint16 *)dst;
|
||||
uint16x4_t pixels;
|
||||
|
||||
for (uint y = h; y > 0; --y) {
|
||||
uint x = w;
|
||||
for (; x >= 4; x -= 4) {
|
||||
src_ptr = (const uint16 *)src;
|
||||
dst_ptr = (uint16 *)dst;
|
||||
|
||||
// Load pixels to NEON
|
||||
pixels = vld1_u16(src_ptr);
|
||||
|
||||
// Convert from XRGB1555 to RGB565
|
||||
// Here we do : ((pixels & 0x7FE0) << 1) | ((pixels & 0x0200) >> 4) | (pixels & 0x001F)
|
||||
pixels = vorr_u16(
|
||||
vorr_u16(
|
||||
vshl_n_u16(vand_u16(pixels, vmov_n_u16(0x7FE0)), 1),
|
||||
vshr_n_u16(vand_u16(pixels, vmov_n_u16(0x0200)), 4)
|
||||
),
|
||||
vand_u16(pixels, vmov_n_u16(0x001F))
|
||||
);
|
||||
|
||||
// Store pixels to destination
|
||||
vst1_u16(dst_ptr, pixels);
|
||||
|
||||
src += 4 * 2;
|
||||
dst += 4 * 2;
|
||||
}
|
||||
|
||||
for (; x > 0; --x) {
|
||||
// We have remaining pixels, convert them the classic way
|
||||
src_ptr = (const uint16 *)src;
|
||||
dst_ptr = (uint16 *)dst;
|
||||
|
||||
*dst_ptr = ((((*src_ptr) & 0x7FE0) << 1) | (((*src_ptr) & 0x0200) >> 4) | ((*src_ptr) & 0x001F));
|
||||
|
||||
src += 2;
|
||||
dst += 2;
|
||||
}
|
||||
|
||||
src += srcDelta;
|
||||
dst += dstDelta;
|
||||
}
|
||||
}
|
||||
|
||||
} // end of namespace Graphics
|
||||
|
||||
#if !defined(__aarch64__) && !defined(__ARM_NEON)
|
||||
|
||||
#if defined(__clang__)
|
||||
#pragma clang attribute pop
|
||||
#elif defined(__GNUC__)
|
||||
#pragma GCC pop_options
|
||||
#endif
|
||||
|
||||
#endif // !defined(__aarch64__) && !defined(__ARM_NEON)
|
||||
|
||||
#endif // SCUMMVM_NEON
|
||||
552
graphics/blit/blit-scale.cpp
Normal file
552
graphics/blit/blit-scale.cpp
Normal file
@@ -0,0 +1,552 @@
|
||||
/* ScummVM - Graphic Adventure Engine
|
||||
*
|
||||
* ScummVM is the legal property of its developers, whose names
|
||||
* are too numerous to list here. Please refer to the COPYRIGHT
|
||||
* file distributed with this source distribution.
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*
|
||||
*
|
||||
* The bottom part of this is file is adapted from SDL_rotozoom.c. The
|
||||
* relevant copyright notice for those specific functions can be found at the
|
||||
* top of that section.
|
||||
*
|
||||
*/
|
||||
|
||||
#include "graphics/blit.h"
|
||||
#include "graphics/pixelformat.h"
|
||||
#include "graphics/transform_struct.h"
|
||||
|
||||
#include "common/endian.h"
|
||||
#include "common/rect.h"
|
||||
#include "math/utils.h"
|
||||
|
||||
namespace Graphics {
|
||||
|
||||
namespace {
|
||||
|
||||
static void scaleVertical(byte *dst, const byte *src,
|
||||
const uint dstPitch, const uint srcPitch,
|
||||
const uint w, const uint dstH, const uint srcH,
|
||||
const byte flip, const uint bytesPerPixel) {
|
||||
const bool flipy = flip & FLIP_V;
|
||||
|
||||
// 16.16 fixed point
|
||||
const uint32 srcIncY = (srcH << 16) / dstH;
|
||||
|
||||
const int dstIncY = (flipy ? -static_cast<int>(dstPitch) : static_cast<int>(dstPitch));
|
||||
|
||||
if (flipy) {
|
||||
dst += (dstH - 1) * dstPitch;
|
||||
}
|
||||
|
||||
for (uint32 y = 0, yoff = 0; y < dstH; y++, yoff += srcIncY) {
|
||||
const byte *srcP = src + ((yoff >> 16) * srcPitch);
|
||||
memcpy(dst, srcP, w * bytesPerPixel);
|
||||
dst += dstIncY;
|
||||
}
|
||||
}
|
||||
|
||||
template <typename Color, int Size>
|
||||
static void scaleNN(byte *dst, const byte *src,
|
||||
const uint dstPitch, const uint srcPitch,
|
||||
const uint dstW, const uint dstH,
|
||||
const uint srcW, const uint srcH,
|
||||
const byte flip) {
|
||||
const bool flipx = flip & FLIP_H;
|
||||
const bool flipy = flip & FLIP_V;
|
||||
|
||||
// 16.16 fixed point
|
||||
const uint32 srcIncX = (srcW << 16) / dstW;
|
||||
const uint32 srcIncY = (srcH << 16) / dstH;
|
||||
|
||||
const int dstIncX = (flipx ? -1 : 1);
|
||||
const int dstIncY = (flipy ? -static_cast<int>(dstPitch) : static_cast<int>(dstPitch));
|
||||
|
||||
if (flipx) {
|
||||
dst += (dstW - 1) * Size;
|
||||
}
|
||||
|
||||
if (flipy) {
|
||||
dst += (dstH - 1) * dstPitch;
|
||||
}
|
||||
|
||||
for (uint32 y = 0, yoff = 0; y < dstH; y++, yoff += srcIncY) {
|
||||
const byte *srcP = src + ((yoff >> 16) * srcPitch);
|
||||
byte *dst1 = dst;
|
||||
for (uint32 x = 0, xoff = 0; x < dstW; x++, xoff += srcIncX) {
|
||||
const byte *src1 = srcP + ((xoff >> 16) * Size);
|
||||
if (Size == sizeof(Color)) {
|
||||
*(Color *)dst1 = *(const Color *)src1;
|
||||
} else {
|
||||
memcpy(dst1, src, Size);
|
||||
}
|
||||
dst1 += dstIncX * Size;
|
||||
}
|
||||
dst += dstIncY;
|
||||
}
|
||||
}
|
||||
|
||||
} // End of anonymous namespace
|
||||
|
||||
bool scaleBlit(byte *dst, const byte *src,
|
||||
const uint dstPitch, const uint srcPitch,
|
||||
const uint dstW, const uint dstH,
|
||||
const uint srcW, const uint srcH,
|
||||
const Graphics::PixelFormat &fmt,
|
||||
const byte flip) {
|
||||
// This should be OK since int16 is used in Graphics::Surface.
|
||||
assert(srcW <= 65535);
|
||||
assert(srcH <= 65535);
|
||||
|
||||
if (dstW == srcW && !(flip & FLIP_H)) {
|
||||
if (dstH == srcH && !(flip & FLIP_V))
|
||||
copyBlit(dst, src, dstPitch, srcPitch, dstW, dstH, fmt.bytesPerPixel);
|
||||
else
|
||||
scaleVertical(dst, src, dstPitch, srcPitch, dstW, dstH, srcH, flip, fmt.bytesPerPixel);
|
||||
return true;
|
||||
}
|
||||
|
||||
switch (fmt.bytesPerPixel) {
|
||||
case 1:
|
||||
scaleNN<uint8, 1>(dst, src, dstPitch, srcPitch, dstW, dstH, srcW, srcH, flip);
|
||||
return true;
|
||||
case 2:
|
||||
scaleNN<uint16, 2>(dst, src, dstPitch, srcPitch, dstW, dstH, srcW, srcH, flip);
|
||||
return true;
|
||||
case 3:
|
||||
scaleNN<uint8, 3>(dst, src, dstPitch, srcPitch, dstW, dstH, srcW, srcH, flip);
|
||||
return true;
|
||||
case 4:
|
||||
scaleNN<uint32, 4>(dst, src, dstPitch, srcPitch, dstW, dstH, srcW, srcH, flip);
|
||||
return true;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/*
|
||||
|
||||
The functions below are adapted from SDL_rotozoom.c,
|
||||
taken from SDL_gfx-2.0.18.
|
||||
|
||||
Its copyright notice:
|
||||
|
||||
=============================================================================
|
||||
SDL_rotozoom.c: rotozoomer, zoomer and shrinker for 32bit or 8bit surfaces
|
||||
|
||||
Copyright (C) 2001-2012 Andreas Schiffler
|
||||
|
||||
This software is provided 'as-is', without any express or implied
|
||||
warranty. In no event will the authors be held liable for any damages
|
||||
arising from the use of this software.
|
||||
|
||||
Permission is granted to anyone to use this software for any purpose,
|
||||
including commercial applications, and to alter it and redistribute it
|
||||
freely, subject to the following restrictions:
|
||||
|
||||
1. The origin of this software must not be misrepresented; you must not
|
||||
claim that you wrote the original software. If you use this software
|
||||
in a product, an acknowledgment in the product documentation would be
|
||||
appreciated but is not required.
|
||||
|
||||
2. Altered source versions must be plainly marked as such, and must not be
|
||||
misrepresented as being the original software.
|
||||
|
||||
3. This notice may not be removed or altered from any source
|
||||
distribution.
|
||||
|
||||
Andreas Schiffler -- aschiffler at ferzkopp dot net
|
||||
=============================================================================
|
||||
|
||||
|
||||
The functions have been adapted for different structures, coordinate
|
||||
systems and pixel formats.
|
||||
|
||||
*/
|
||||
|
||||
namespace {
|
||||
|
||||
template <typename Color, int Size>
|
||||
inline uint32 getPixel(const byte *sp) {
|
||||
if (Size == sizeof(Color)) {
|
||||
return *(const Color *)sp;
|
||||
} else {
|
||||
return READ_UINT24(sp);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename Color, int Size>
|
||||
inline void setPixel(byte *pc, const uint32 pix) {
|
||||
if (Size == sizeof(Color)) {
|
||||
*(Color *)pc = pix;
|
||||
} else {
|
||||
WRITE_UINT24(pc, pix);
|
||||
}
|
||||
}
|
||||
|
||||
inline byte scaleBlitBilinearInterpolate(byte c01, byte c00, byte c11, byte c10, int ex, int ey) {
|
||||
int t1 = ((((c01 - c00) * ex) >> 16) + c00) & 0xff;
|
||||
int t2 = ((((c11 - c10) * ex) >> 16) + c10) & 0xff;
|
||||
return (((t2 - t1) * ey) >> 16) + t1;
|
||||
}
|
||||
|
||||
template <typename ColorMask, typename Color, int Size>
|
||||
void scaleBlitBilinearInterpolate(byte *dp, const byte *c01, const byte *c00, const byte *c11, const byte *c10, int ex, int ey,
|
||||
const Graphics::PixelFormat &fmt) {
|
||||
byte c01_a, c01_r, c01_g, c01_b;
|
||||
fmt.colorToARGBT<ColorMask>(getPixel<Color, Size>(c01), c01_a, c01_r, c01_g, c01_b);
|
||||
|
||||
byte c00_a, c00_r, c00_g, c00_b;
|
||||
fmt.colorToARGBT<ColorMask>(getPixel<Color, Size>(c00), c00_a, c00_r, c00_g, c00_b);
|
||||
|
||||
byte c11_a, c11_r, c11_g, c11_b;
|
||||
fmt.colorToARGBT<ColorMask>(getPixel<Color, Size>(c11), c11_a, c11_r, c11_g, c11_b);
|
||||
|
||||
byte c10_a, c10_r, c10_g, c10_b;
|
||||
fmt.colorToARGBT<ColorMask>(getPixel<Color, Size>(c10), c10_a, c10_r, c10_g, c10_b);
|
||||
|
||||
byte dp_a = scaleBlitBilinearInterpolate(c01_a, c00_a, c11_a, c10_a, ex, ey);
|
||||
byte dp_r = scaleBlitBilinearInterpolate(c01_r, c00_r, c11_r, c10_r, ex, ey);
|
||||
byte dp_g = scaleBlitBilinearInterpolate(c01_g, c00_g, c11_g, c10_g, ex, ey);
|
||||
byte dp_b = scaleBlitBilinearInterpolate(c01_b, c00_b, c11_b, c10_b, ex, ey);
|
||||
setPixel<Color, Size>(dp, fmt.ARGBToColorT<ColorMask>(dp_a, dp_r, dp_g, dp_b));
|
||||
}
|
||||
|
||||
template <typename ColorMask, typename Color, int Size>
|
||||
void scaleBlitBilinearLogic(byte *dst, const byte *src,
|
||||
const uint dstPitch, const uint srcPitch,
|
||||
const uint dstW, const uint dstH,
|
||||
const uint srcW, const uint srcH,
|
||||
const Graphics::PixelFormat &fmt,
|
||||
int *sax, int *say, byte flip) {
|
||||
const bool flipx = flip & FLIP_H;
|
||||
const bool flipy = flip & FLIP_V;
|
||||
|
||||
int spixelw = (srcW - 1);
|
||||
int spixelh = (srcH - 1);
|
||||
|
||||
const byte *sp = src;
|
||||
|
||||
if (flipx) {
|
||||
sp += spixelw * Size;
|
||||
}
|
||||
if (flipy) {
|
||||
sp += srcPitch * spixelh;
|
||||
}
|
||||
|
||||
int *csay = say;
|
||||
for (uint y = 0; y < dstH; y++) {
|
||||
byte *dp = dst + (dstPitch * y);
|
||||
const byte *csp = sp;
|
||||
int *csax = sax;
|
||||
for (uint x = 0; x < dstW; x++) {
|
||||
/*
|
||||
* Setup color source pointers
|
||||
*/
|
||||
int ex = (*csax & 0xffff);
|
||||
int ey = (*csay & 0xffff);
|
||||
int cx = (*csax >> 16);
|
||||
int cy = (*csay >> 16);
|
||||
|
||||
const byte *c00, *c01, *c10, *c11;
|
||||
c00 = c01 = c10 = sp;
|
||||
if (cy < spixelh) {
|
||||
if (flipy) {
|
||||
c10 -= srcPitch;
|
||||
} else {
|
||||
c10 += srcPitch;
|
||||
}
|
||||
}
|
||||
c11 = c10;
|
||||
if (cx < spixelw) {
|
||||
if (flipx) {
|
||||
c01 -= Size;
|
||||
c11 -= Size;
|
||||
} else {
|
||||
c01 += Size;
|
||||
c11 += Size;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Draw and interpolate colors
|
||||
*/
|
||||
scaleBlitBilinearInterpolate<ColorMask, Color, Size>(dp, c01, c00, c11, c10, ex, ey, fmt);
|
||||
|
||||
/*
|
||||
* Advance source pointer x
|
||||
*/
|
||||
int *salastx = csax;
|
||||
csax++;
|
||||
int sstepx = (*csax >> 16) - (*salastx >> 16);
|
||||
if (flipx) {
|
||||
sp -= sstepx * Size;
|
||||
} else {
|
||||
sp += sstepx * Size;
|
||||
}
|
||||
|
||||
/*
|
||||
* Advance destination pointer x
|
||||
*/
|
||||
dp += Size;
|
||||
}
|
||||
/*
|
||||
* Advance source pointer y
|
||||
*/
|
||||
int *salasty = csay;
|
||||
csay++;
|
||||
int sstepy = (*csay >> 16) - (*salasty >> 16);
|
||||
sstepy *= srcPitch;
|
||||
if (flipy) {
|
||||
sp = csp - sstepy;
|
||||
} else {
|
||||
sp = csp + sstepy;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template<typename ColorMask, typename Color, int Size, bool filtering>
|
||||
void rotoscaleBlitLogic(byte *dst, const byte *src,
|
||||
const uint dstPitch, const uint srcPitch,
|
||||
const uint dstW, const uint dstH,
|
||||
const uint srcW, const uint srcH,
|
||||
const Graphics::PixelFormat &fmt,
|
||||
const TransformStruct &transform,
|
||||
const Common::Point &newHotspot) {
|
||||
const bool flipx = transform._flip & FLIP_H;
|
||||
const bool flipy = transform._flip & FLIP_V;
|
||||
|
||||
assert(transform._angle != kDefaultAngle); // This would not be ideal; rotoscale() should never be called in conditional branches where angle = 0 anyway.
|
||||
|
||||
if (transform._zoom.x == 0 || transform._zoom.y == 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
uint32 invAngle = 360 - (transform._angle % 360);
|
||||
float invAngleRad = Math::deg2rad<uint32,float>(invAngle);
|
||||
float invCos = cos(invAngleRad);
|
||||
float invSin = sin(invAngleRad);
|
||||
|
||||
int icosx = (int)(invCos * (65536.0f * kDefaultZoomX / transform._zoom.x));
|
||||
int isinx = (int)(invSin * (65536.0f * kDefaultZoomX / transform._zoom.x));
|
||||
int icosy = (int)(invCos * (65536.0f * kDefaultZoomY / transform._zoom.y));
|
||||
int isiny = (int)(invSin * (65536.0f * kDefaultZoomY / transform._zoom.y));
|
||||
|
||||
int xd = transform._hotspot.x << 16;
|
||||
int yd = transform._hotspot.y << 16;
|
||||
int cx = newHotspot.x;
|
||||
int cy = newHotspot.y;
|
||||
|
||||
int ax = -icosx * cx;
|
||||
int ay = -isiny * cx;
|
||||
int sw = srcW - 1;
|
||||
int sh = srcH - 1;
|
||||
|
||||
byte *pc = dst;
|
||||
|
||||
for (uint y = 0; y < dstH; y++) {
|
||||
int t = cy - y;
|
||||
int sdx = ax + (isinx * t) + xd;
|
||||
int sdy = ay - (icosy * t) + yd;
|
||||
for (uint x = 0; x < dstW; x++) {
|
||||
int dx = (sdx >> 16);
|
||||
int dy = (sdy >> 16);
|
||||
if (flipx) {
|
||||
dx = sw - dx;
|
||||
}
|
||||
if (flipy) {
|
||||
dy = sh - dy;
|
||||
}
|
||||
|
||||
if (filtering) {
|
||||
if ((dx > -1) && (dy > -1) && (dx < sw) && (dy < sh)) {
|
||||
const byte *sp = src + dy * srcPitch + dx * Size;
|
||||
const byte *c00, *c01, *c10, *c11;
|
||||
c00 = sp;
|
||||
sp += Size;
|
||||
c01 = sp;
|
||||
sp += srcPitch;
|
||||
c11 = sp;
|
||||
sp -= Size;
|
||||
c10 = sp;
|
||||
if (flipx) {
|
||||
SWAP(c00, c01);
|
||||
SWAP(c10, c11);
|
||||
}
|
||||
if (flipy) {
|
||||
SWAP(c00, c10);
|
||||
SWAP(c01, c11);
|
||||
}
|
||||
/*
|
||||
* Interpolate colors
|
||||
*/
|
||||
int ex = (sdx & 0xffff);
|
||||
int ey = (sdy & 0xffff);
|
||||
scaleBlitBilinearInterpolate<ColorMask, Color, Size>(pc, c01, c00, c11, c10, ex, ey, fmt);
|
||||
}
|
||||
} else {
|
||||
if ((dx >= 0) && (dy >= 0) && (dx < (int)srcW) && (dy < (int)srcH)) {
|
||||
const byte *sp = src + dy * srcPitch + dx * Size;
|
||||
if (Size == sizeof(Color)) {
|
||||
*(Color *)pc = *(const Color *)sp;
|
||||
} else {
|
||||
memcpy(pc, sp, Size);
|
||||
}
|
||||
}
|
||||
}
|
||||
sdx += icosx;
|
||||
sdy += isiny;
|
||||
pc += Size;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // End of anonymous namespace
|
||||
|
||||
bool scaleBlitBilinear(byte *dst, const byte *src,
|
||||
const uint dstPitch, const uint srcPitch,
|
||||
const uint dstW, const uint dstH,
|
||||
const uint srcW, const uint srcH,
|
||||
const Graphics::PixelFormat &fmt,
|
||||
const byte flip) {
|
||||
if (fmt.bytesPerPixel != 2 && fmt.bytesPerPixel != 3 && fmt.bytesPerPixel != 4)
|
||||
return false;
|
||||
|
||||
int *sax = new int[dstW + 1];
|
||||
int *say = new int[dstH + 1];
|
||||
assert(sax && say);
|
||||
|
||||
/*
|
||||
* Precalculate row increments
|
||||
*/
|
||||
int spixelw = (srcW - 1);
|
||||
int spixelh = (srcH - 1);
|
||||
int sx = (int)(65536.0f * (float) spixelw / (float) (dstW - 1));
|
||||
int sy = (int)(65536.0f * (float) spixelh / (float) (dstH - 1));
|
||||
|
||||
/* Maximum scaled source size */
|
||||
int ssx = (srcW << 16) - 1;
|
||||
int ssy = (srcH << 16) - 1;
|
||||
|
||||
/* Precalculate horizontal row increments */
|
||||
int csx = 0;
|
||||
int *csax = sax;
|
||||
for (uint x = 0; x <= dstW; x++) {
|
||||
*csax = csx;
|
||||
csax++;
|
||||
csx += sx;
|
||||
|
||||
/* Guard from overflows */
|
||||
if (csx > ssx) {
|
||||
csx = ssx;
|
||||
}
|
||||
}
|
||||
|
||||
/* Precalculate vertical row increments */
|
||||
int csy = 0;
|
||||
int *csay = say;
|
||||
for (uint y = 0; y <= dstH; y++) {
|
||||
*csay = csy;
|
||||
csay++;
|
||||
csy += sy;
|
||||
|
||||
/* Guard from overflows */
|
||||
if (csy > ssy) {
|
||||
csy = ssy;
|
||||
}
|
||||
}
|
||||
|
||||
if (fmt == createPixelFormat<8888>()) {
|
||||
scaleBlitBilinearLogic<ColorMasks<8888>, uint32, 4>(dst, src, dstPitch, srcPitch, dstW, dstH, srcW, srcH, fmt, sax, say, flip);
|
||||
} else if (fmt == createPixelFormat<888>()) {
|
||||
scaleBlitBilinearLogic<ColorMasks<888>, uint32, 4>(dst, src, dstPitch, srcPitch, dstW, dstH, srcW, srcH, fmt, sax, say, flip);
|
||||
} else if (fmt == createPixelFormat<565>()) {
|
||||
scaleBlitBilinearLogic<ColorMasks<565>, uint16, 2>(dst, src, dstPitch, srcPitch, dstW, dstH, srcW, srcH, fmt, sax, say, flip);
|
||||
} else if (fmt == createPixelFormat<555>()) {
|
||||
scaleBlitBilinearLogic<ColorMasks<555>, uint16, 2>(dst, src, dstPitch, srcPitch, dstW, dstH, srcW, srcH, fmt, sax, say, flip);
|
||||
|
||||
} else if (fmt.bytesPerPixel == 4) {
|
||||
scaleBlitBilinearLogic<ColorMasks<0>, uint32, 4>(dst, src, dstPitch, srcPitch, dstW, dstH, srcW, srcH, fmt, sax, say, flip);
|
||||
} else if (fmt.bytesPerPixel == 3) {
|
||||
scaleBlitBilinearLogic<ColorMasks<0>, uint8, 3>(dst, src, dstPitch, srcPitch, dstW, dstH, srcW, srcH, fmt, sax, say, flip);
|
||||
} else if (fmt.bytesPerPixel == 2) {
|
||||
scaleBlitBilinearLogic<ColorMasks<0>, uint16, 2>(dst, src, dstPitch, srcPitch, dstW, dstH, srcW, srcH, fmt, sax, say, flip);
|
||||
} else {
|
||||
delete[] sax;
|
||||
delete[] say;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
delete[] sax;
|
||||
delete[] say;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool rotoscaleBlit(byte *dst, const byte *src,
|
||||
const uint dstPitch, const uint srcPitch,
|
||||
const uint dstW, const uint dstH,
|
||||
const uint srcW, const uint srcH,
|
||||
const Graphics::PixelFormat &fmt,
|
||||
const TransformStruct &transform,
|
||||
const Common::Point &newHotspot) {
|
||||
if (fmt.bytesPerPixel == 4) {
|
||||
rotoscaleBlitLogic<ColorMasks<0>, uint32, 4, false>(dst, src, dstPitch, srcPitch, dstW, dstH, srcW, srcH, fmt, transform, newHotspot);
|
||||
} else if (fmt.bytesPerPixel == 3) {
|
||||
rotoscaleBlitLogic<ColorMasks<0>, uint8, 3, false>(dst, src, dstPitch, srcPitch, dstW, dstH, srcW, srcH, fmt, transform, newHotspot);
|
||||
} else if (fmt.bytesPerPixel == 2) {
|
||||
rotoscaleBlitLogic<ColorMasks<0>, uint16, 2, false>(dst, src, dstPitch, srcPitch, dstW, dstH, srcW, srcH, fmt, transform, newHotspot);
|
||||
} else if (fmt.bytesPerPixel == 1) {
|
||||
rotoscaleBlitLogic<ColorMasks<0>, uint8, 1, false>(dst, src, dstPitch, srcPitch, dstW, dstH, srcW, srcH, fmt, transform, newHotspot);
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool rotoscaleBlitBilinear(byte *dst, const byte *src,
|
||||
const uint dstPitch, const uint srcPitch,
|
||||
const uint dstW, const uint dstH,
|
||||
const uint srcW, const uint srcH,
|
||||
const Graphics::PixelFormat &fmt,
|
||||
const TransformStruct &transform,
|
||||
const Common::Point &newHotspot) {
|
||||
if (fmt == createPixelFormat<8888>()) {
|
||||
rotoscaleBlitLogic<ColorMasks<8888>, uint32, 4, true>(dst, src, dstPitch, srcPitch, dstW, dstH, srcW, srcH, fmt, transform, newHotspot);
|
||||
} else if (fmt == createPixelFormat<888>()) {
|
||||
rotoscaleBlitLogic<ColorMasks<888>, uint32, 4, true>(dst, src, dstPitch, srcPitch, dstW, dstH, srcW, srcH, fmt, transform, newHotspot);
|
||||
} else if (fmt == createPixelFormat<565>()) {
|
||||
rotoscaleBlitLogic<ColorMasks<565>, uint16, 2, true>(dst, src, dstPitch, srcPitch, dstW, dstH, srcW, srcH, fmt, transform, newHotspot);
|
||||
} else if (fmt == createPixelFormat<555>()) {
|
||||
rotoscaleBlitLogic<ColorMasks<555>, uint16, 2, true>(dst, src, dstPitch, srcPitch, dstW, dstH, srcW, srcH, fmt, transform, newHotspot);
|
||||
|
||||
} else if (fmt.bytesPerPixel == 4) {
|
||||
rotoscaleBlitLogic<ColorMasks<0>, uint32, 4, true>(dst, src, dstPitch, srcPitch, dstW, dstH, srcW, srcH, fmt, transform, newHotspot);
|
||||
} else if (fmt.bytesPerPixel == 3) {
|
||||
rotoscaleBlitLogic<ColorMasks<0>, uint8, 3, true>(dst, src, dstPitch, srcPitch, dstW, dstH, srcW, srcH, fmt, transform, newHotspot);
|
||||
} else if (fmt.bytesPerPixel == 2) {
|
||||
rotoscaleBlitLogic<ColorMasks<0>, uint16, 2, true>(dst, src, dstPitch, srcPitch, dstW, dstH, srcW, srcH, fmt, transform, newHotspot);
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
} // End of namespace Graphics
|
||||
339
graphics/blit/blit-sse2.cpp
Normal file
339
graphics/blit/blit-sse2.cpp
Normal file
@@ -0,0 +1,339 @@
|
||||
/* ScummVM - Graphic Adventure Engine
|
||||
*
|
||||
* ScummVM is the legal property of its developers, whose names
|
||||
* are too numerous to list here. Please refer to the COPYRIGHT
|
||||
* file distributed with this source distribution.
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*
|
||||
*/
|
||||
|
||||
#include "common/scummsys.h"
|
||||
|
||||
#include "graphics/blit/blit-alpha.h"
|
||||
#include "graphics/pixelformat.h"
|
||||
|
||||
#include <emmintrin.h>
|
||||
|
||||
#if !defined(__x86_64__)
|
||||
|
||||
#if defined(__clang__)
|
||||
#pragma clang attribute push (__attribute__((target("sse2"))), apply_to=function)
|
||||
#elif defined(__GNUC__)
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target("sse2")
|
||||
#endif
|
||||
|
||||
#endif // !defined(__x86_64__)
|
||||
|
||||
namespace Graphics {
|
||||
|
||||
static FORCEINLINE __m128i sse2_mul32(__m128i a, __m128i b) {
|
||||
__m128i even = _mm_shuffle_epi32(_mm_mul_epu32(a, b), _MM_SHUFFLE(0, 0, 2, 0));
|
||||
__m128i odd = _mm_shuffle_epi32(_mm_mul_epu32(_mm_bsrli_si128(a, 4), _mm_bsrli_si128(b, 4)), _MM_SHUFFLE(0, 0, 2, 0));
|
||||
return _mm_unpacklo_epi32(even, odd);
|
||||
}
|
||||
|
||||
class BlendBlitImpl_SSE2 : public BlendBlitImpl_Base {
|
||||
friend class BlendBlit;
|
||||
|
||||
template<bool rgbmod, bool alphamod>
|
||||
struct AlphaBlend : public BlendBlitImpl_Base::AlphaBlend<rgbmod, alphamod> {
|
||||
public:
|
||||
constexpr AlphaBlend(const uint32 color) : BlendBlitImpl_Base::AlphaBlend<rgbmod, alphamod>(color) {}
|
||||
|
||||
inline __m128i simd(__m128i src, __m128i dst) const {
|
||||
__m128i ina;
|
||||
if (alphamod)
|
||||
ina = _mm_srli_epi32(_mm_mullo_epi16(_mm_and_si128(src, _mm_set1_epi32(BlendBlit::kAModMask)), _mm_set1_epi32(this->ca)), 8);
|
||||
else
|
||||
ina = _mm_and_si128(src, _mm_set1_epi32(BlendBlit::kAModMask));
|
||||
__m128i alphaMask = _mm_cmpeq_epi32(ina, _mm_setzero_si128());
|
||||
|
||||
if (rgbmod) {
|
||||
__m128i dstR = _mm_srli_epi32(_mm_and_si128(dst, _mm_set1_epi32(BlendBlit::kRModMask)), BlendBlit::kRModShift);
|
||||
__m128i dstG = _mm_srli_epi32(_mm_and_si128(dst, _mm_set1_epi32(BlendBlit::kGModMask)), BlendBlit::kGModShift);
|
||||
__m128i dstB = _mm_srli_epi32(_mm_and_si128(dst, _mm_set1_epi32(BlendBlit::kBModMask)), BlendBlit::kBModShift);
|
||||
__m128i srcR = _mm_srli_epi32(_mm_and_si128(src, _mm_set1_epi32(BlendBlit::kRModMask)), BlendBlit::kRModShift);
|
||||
__m128i srcG = _mm_srli_epi32(_mm_and_si128(src, _mm_set1_epi32(BlendBlit::kGModMask)), BlendBlit::kGModShift);
|
||||
__m128i srcB = _mm_srli_epi32(_mm_and_si128(src, _mm_set1_epi32(BlendBlit::kBModMask)), BlendBlit::kBModShift);
|
||||
|
||||
dstR = _mm_slli_epi32(_mm_mullo_epi16(dstR, _mm_sub_epi32(_mm_set1_epi32(255), ina)), BlendBlit::kRModShift - 8);
|
||||
dstG = _mm_slli_epi32(_mm_mullo_epi16(dstG, _mm_sub_epi32(_mm_set1_epi32(255), ina)), BlendBlit::kGModShift - 8);
|
||||
dstB = _mm_mullo_epi16(dstB, _mm_sub_epi32(_mm_set1_epi32(255), ina));
|
||||
srcR = _mm_add_epi32(dstR, _mm_slli_epi32(_mm_mullo_epi16(_mm_srli_epi32(_mm_mullo_epi16(srcR, ina), 8), _mm_set1_epi32(this->cr)), BlendBlit::kRModShift - 8));
|
||||
srcG = _mm_add_epi32(dstG, _mm_slli_epi32(_mm_mullo_epi16(_mm_srli_epi32(_mm_mullo_epi16(srcG, ina), 8), _mm_set1_epi32(this->cg)), BlendBlit::kGModShift - 8));
|
||||
srcB = _mm_add_epi32(dstB, _mm_mullo_epi16(_mm_srli_epi32(_mm_mullo_epi16(srcB, ina), 8), _mm_set1_epi32(this->cb)));
|
||||
src = _mm_or_si128(_mm_and_si128(srcB, _mm_set1_epi32(BlendBlit::kBModMask)), _mm_set1_epi32(BlendBlit::kAModMask));
|
||||
src = _mm_or_si128(_mm_and_si128(srcG, _mm_set1_epi32(BlendBlit::kGModMask)), src);
|
||||
src = _mm_or_si128(_mm_and_si128(srcR, _mm_set1_epi32(BlendBlit::kRModMask)), src);
|
||||
} else {
|
||||
__m128i dstRB = _mm_srli_epi32(_mm_and_si128(dst, _mm_set1_epi32(BlendBlit::kRModMask | BlendBlit::kBModMask)), BlendBlit::kBModShift);
|
||||
__m128i srcRB = _mm_srli_epi32(_mm_and_si128(src, _mm_set1_epi32(BlendBlit::kRModMask | BlendBlit::kBModMask)), BlendBlit::kBModShift);
|
||||
__m128i dstG = _mm_srli_epi32(_mm_and_si128(dst, _mm_set1_epi32(BlendBlit::kGModMask)), BlendBlit::kGModShift);
|
||||
__m128i srcG = _mm_srli_epi32(_mm_and_si128(src, _mm_set1_epi32(BlendBlit::kGModMask)), BlendBlit::kGModShift);
|
||||
|
||||
dstRB = _mm_srli_epi32(sse2_mul32(dstRB, _mm_sub_epi32(_mm_set1_epi32(255), ina)), 8);
|
||||
dstG = _mm_srli_epi32(_mm_mullo_epi16(dstG, _mm_sub_epi32(_mm_set1_epi32(255), ina)), 8);
|
||||
srcRB = _mm_slli_epi32(_mm_add_epi32(dstRB, _mm_srli_epi32(sse2_mul32(srcRB, ina), 8)), BlendBlit::kBModShift);
|
||||
srcG = _mm_slli_epi32(_mm_add_epi32(dstG, _mm_srli_epi32(_mm_mullo_epi16(srcG, ina), 8)), BlendBlit::kGModShift);
|
||||
src = _mm_or_si128(_mm_and_si128(srcG, _mm_set1_epi32(BlendBlit::kGModMask)), _mm_set1_epi32(BlendBlit::kAModMask));
|
||||
src = _mm_or_si128(_mm_and_si128(srcRB, _mm_set1_epi32(BlendBlit::kBModMask | BlendBlit::kRModMask)), src);
|
||||
}
|
||||
|
||||
dst = _mm_and_si128(alphaMask, dst);
|
||||
src = _mm_andnot_si128(alphaMask, src);
|
||||
return _mm_or_si128(dst, src);
|
||||
}
|
||||
};
|
||||
|
||||
template<bool rgbmod, bool alphamod>
|
||||
struct MultiplyBlend : public BlendBlitImpl_Base::MultiplyBlend<rgbmod, alphamod> {
|
||||
public:
|
||||
constexpr MultiplyBlend(const uint32 color) : BlendBlitImpl_Base::MultiplyBlend<rgbmod, alphamod>(color) {}
|
||||
|
||||
inline __m128i simd(__m128i src, __m128i dst) const {
|
||||
__m128i ina, alphaMask;
|
||||
if (alphamod) {
|
||||
ina = _mm_srli_epi32(_mm_mullo_epi16(_mm_and_si128(src, _mm_set1_epi32(BlendBlit::kAModMask)), _mm_set1_epi32(this->ca)), 8);
|
||||
alphaMask = _mm_cmpeq_epi32(ina, _mm_setzero_si128());
|
||||
} else {
|
||||
ina = _mm_and_si128(src, _mm_set1_epi32(BlendBlit::kAModMask));
|
||||
alphaMask = _mm_set1_epi32(BlendBlit::kAModMask);
|
||||
}
|
||||
|
||||
if (rgbmod) {
|
||||
__m128i srcB = _mm_srli_epi32(_mm_and_si128(src, _mm_set1_epi32(BlendBlit::kBModMask)), BlendBlit::kBModShift);
|
||||
__m128i srcG = _mm_srli_epi32(_mm_and_si128(src, _mm_set1_epi32(BlendBlit::kGModMask)), BlendBlit::kGModShift);
|
||||
__m128i srcR = _mm_srli_epi32(_mm_and_si128(src, _mm_set1_epi32(BlendBlit::kRModMask)), BlendBlit::kRModShift);
|
||||
__m128i dstB = _mm_srli_epi32(_mm_and_si128(dst, _mm_set1_epi32(BlendBlit::kBModMask)), BlendBlit::kBModShift);
|
||||
__m128i dstG = _mm_srli_epi32(_mm_and_si128(dst, _mm_set1_epi32(BlendBlit::kGModMask)), BlendBlit::kGModShift);
|
||||
__m128i dstR = _mm_srli_epi32(_mm_and_si128(dst, _mm_set1_epi32(BlendBlit::kRModMask)), BlendBlit::kRModShift);
|
||||
|
||||
srcB = _mm_and_si128(_mm_slli_epi32(_mm_mullo_epi16(dstB, _mm_srli_epi32(sse2_mul32(_mm_mullo_epi16(srcB, _mm_set1_epi32(this->cb)), ina), 16)), BlendBlit::kBModShift - 8), _mm_set1_epi32(BlendBlit::kBModMask));
|
||||
srcG = _mm_and_si128(_mm_slli_epi32(_mm_mullo_epi16(dstG, _mm_srli_epi32(sse2_mul32(_mm_mullo_epi16(srcG, _mm_set1_epi32(this->cg)), ina), 16)), BlendBlit::kGModShift - 8), _mm_set1_epi32(BlendBlit::kGModMask));
|
||||
srcR = _mm_and_si128(_mm_slli_epi32(_mm_mullo_epi16(dstR, _mm_srli_epi32(sse2_mul32(_mm_mullo_epi16(srcR, _mm_set1_epi32(this->cr)), ina), 16)), BlendBlit::kRModShift - 8), _mm_set1_epi32(BlendBlit::kRModMask));
|
||||
|
||||
src = _mm_and_si128(src, _mm_set1_epi32(BlendBlit::kAModMask));
|
||||
src = _mm_or_si128(src, _mm_or_si128(srcB, _mm_or_si128(srcG, srcR)));
|
||||
} else {
|
||||
constexpr uint32 rbMask = BlendBlit::kRModMask | BlendBlit::kBModMask;
|
||||
__m128i srcG = _mm_srli_epi32(_mm_and_si128(src, _mm_set1_epi32(BlendBlit::kGModMask)), BlendBlit::kGModShift);
|
||||
__m128i srcRB = _mm_srli_epi32(_mm_and_si128(src, _mm_set1_epi32(rbMask)), BlendBlit::kBModShift);
|
||||
__m128i dstG = _mm_srli_epi32(_mm_and_si128(dst, _mm_set1_epi32(BlendBlit::kGModMask)), BlendBlit::kGModShift);
|
||||
__m128i dstRB = _mm_srli_epi32(_mm_and_si128(dst, _mm_set1_epi32(rbMask)), BlendBlit::kBModShift);
|
||||
|
||||
srcG = _mm_and_si128(_mm_slli_epi32(_mm_mullo_epi16(dstG, _mm_srli_epi32(_mm_mullo_epi16(srcG, ina), 8)), 8), _mm_set1_epi32(BlendBlit::kGModMask));
|
||||
srcRB = _mm_and_si128(_mm_mullo_epi16(dstRB, _mm_srli_epi32(_mm_and_si128(sse2_mul32(srcRB, ina), _mm_set1_epi32(rbMask)), 8)), _mm_set1_epi32(rbMask));
|
||||
|
||||
src = _mm_and_si128(src, _mm_set1_epi32(BlendBlit::kAModMask));
|
||||
src = _mm_or_si128(src, _mm_or_si128(srcRB, srcG));
|
||||
}
|
||||
|
||||
dst = _mm_and_si128(alphaMask, dst);
|
||||
src = _mm_andnot_si128(alphaMask, src);
|
||||
return _mm_or_si128(dst, src);
|
||||
}
|
||||
};
|
||||
|
||||
template<bool rgbmod, bool alphamod>
|
||||
struct OpaqueBlend : public BlendBlitImpl_Base::OpaqueBlend<rgbmod, alphamod> {
|
||||
public:
|
||||
constexpr OpaqueBlend(const uint32 color) : BlendBlitImpl_Base::OpaqueBlend<rgbmod, alphamod>(color) {}
|
||||
|
||||
inline __m128i simd(__m128i src, __m128i dst) const {
|
||||
return _mm_or_si128(src, _mm_set1_epi32(BlendBlit::kAModMask));
|
||||
}
|
||||
};
|
||||
|
||||
template<bool rgbmod, bool alphamod>
|
||||
struct BinaryBlend : public BlendBlitImpl_Base::BinaryBlend<rgbmod, alphamod> {
|
||||
public:
|
||||
constexpr BinaryBlend(const uint32 color) : BlendBlitImpl_Base::BinaryBlend<rgbmod, alphamod>(color) {}
|
||||
|
||||
inline __m128i simd(__m128i src, __m128i dst) const {
|
||||
__m128i alphaMask = _mm_cmpeq_epi32(_mm_and_si128(src, _mm_set1_epi32(BlendBlit::kAModMask)), _mm_setzero_si128());
|
||||
dst = _mm_and_si128(dst, alphaMask);
|
||||
src = _mm_andnot_si128(alphaMask, _mm_or_si128(src, _mm_set1_epi32(BlendBlit::kAModMask)));
|
||||
return _mm_or_si128(src, dst);
|
||||
}
|
||||
};
|
||||
|
||||
template<bool rgbmod, bool alphamod>
|
||||
struct AdditiveBlend : public BlendBlitImpl_Base::AdditiveBlend<rgbmod, alphamod> {
|
||||
public:
|
||||
constexpr AdditiveBlend(const uint32 color) : BlendBlitImpl_Base::AdditiveBlend<rgbmod, alphamod>(color) {}
|
||||
|
||||
inline __m128i simd(__m128i src, __m128i dst) const {
|
||||
__m128i ina;
|
||||
if (alphamod)
|
||||
ina = _mm_srli_epi32(sse2_mul32(_mm_and_si128(src, _mm_set1_epi32(BlendBlit::kAModMask)), _mm_set1_epi32(this->ca)), 8);
|
||||
else
|
||||
ina = _mm_and_si128(src, _mm_set1_epi32(BlendBlit::kAModMask));
|
||||
__m128i alphaMask = _mm_cmpeq_epi32(ina, _mm_set1_epi32(0));
|
||||
|
||||
if (rgbmod) {
|
||||
__m128i srcb = _mm_and_si128(src, _mm_set1_epi32(BlendBlit::kBModMask));
|
||||
__m128i srcg = _mm_srli_epi32(_mm_and_si128(src, _mm_set1_epi32(BlendBlit::kGModMask)), BlendBlit::kGModShift);
|
||||
__m128i srcr = _mm_srli_epi32(_mm_and_si128(src, _mm_set1_epi32(BlendBlit::kRModMask)), BlendBlit::kRModShift);
|
||||
__m128i dstb = _mm_and_si128(dst, _mm_set1_epi32(BlendBlit::kBModMask));
|
||||
__m128i dstg = _mm_srli_epi32(_mm_and_si128(dst, _mm_set1_epi32(BlendBlit::kGModMask)), BlendBlit::kGModShift);
|
||||
__m128i dstr = _mm_srli_epi32(_mm_and_si128(dst, _mm_set1_epi32(BlendBlit::kRModMask)), BlendBlit::kRModShift);
|
||||
|
||||
srcb = _mm_and_si128(_mm_add_epi32(dstb, _mm_srli_epi32(sse2_mul32(srcb, sse2_mul32(_mm_set1_epi32(this->cb), ina)), 16)), _mm_set1_epi32(BlendBlit::kBModMask));
|
||||
srcg = _mm_and_si128(_mm_add_epi32(dstg, sse2_mul32(srcg, sse2_mul32(_mm_set1_epi32(this->cg), ina))), _mm_set1_epi32(BlendBlit::kGModMask));
|
||||
srcr = _mm_and_si128(_mm_add_epi32(dstr, _mm_srli_epi32(sse2_mul32(srcr, sse2_mul32(_mm_set1_epi32(this->cr), ina)), BlendBlit::kRModShift - 16)), _mm_set1_epi32(BlendBlit::kRModMask));
|
||||
|
||||
src = _mm_and_si128(dst, _mm_set1_epi32(BlendBlit::kAModMask));
|
||||
src = _mm_or_si128(src, _mm_or_si128(srcb, _mm_or_si128(srcg, srcr)));
|
||||
} else if (alphamod) {
|
||||
__m128i srcg = _mm_and_si128(src, _mm_set1_epi32(BlendBlit::kGModMask));
|
||||
__m128i srcrb = _mm_srli_epi32(_mm_and_si128(src, _mm_set1_epi32(BlendBlit::kRModMask | BlendBlit::kBModMask)), BlendBlit::kBModShift);
|
||||
__m128i dstg = _mm_and_si128(dst, _mm_set1_epi32(BlendBlit::kGModMask));
|
||||
__m128i dstrb = _mm_srli_epi32(_mm_and_si128(dst, _mm_set1_epi32(BlendBlit::kRModMask | BlendBlit::kBModMask)), BlendBlit::kBModShift);
|
||||
|
||||
srcg = _mm_and_si128(_mm_add_epi32(dstg, _mm_srli_epi32(sse2_mul32(srcg, ina), 8)), _mm_set1_epi32(BlendBlit::kGModMask));
|
||||
srcrb = _mm_and_si128(_mm_add_epi32(dstrb, sse2_mul32(srcrb, ina)), _mm_set1_epi32(BlendBlit::kRModMask | BlendBlit::kBModMask));
|
||||
|
||||
src = _mm_and_si128(dst, _mm_set1_epi32(BlendBlit::kAModMask));
|
||||
src = _mm_or_si128(src, _mm_or_si128(srcrb, srcg));
|
||||
} else {
|
||||
__m128i srcg = _mm_and_si128(src, _mm_set1_epi32(BlendBlit::kGModMask));
|
||||
__m128i srcrb = _mm_srli_epi32(_mm_and_si128(src, _mm_set1_epi32(BlendBlit::kRModMask | BlendBlit::kBModMask)), BlendBlit::kBModShift);
|
||||
__m128i dstg = _mm_and_si128(dst, _mm_set1_epi32(BlendBlit::kGModMask));
|
||||
__m128i dstrb = _mm_srli_epi32(_mm_and_si128(dst, _mm_set1_epi32(BlendBlit::kRModMask | BlendBlit::kBModMask)), BlendBlit::kBModShift);
|
||||
|
||||
srcg = _mm_and_si128(_mm_add_epi32(dstg, srcg), _mm_set1_epi32(BlendBlit::kGModMask));
|
||||
srcrb = _mm_and_si128(_mm_slli_epi32(_mm_add_epi32(dstrb, srcrb), 8), _mm_set1_epi32(BlendBlit::kRModMask | BlendBlit::kBModMask));
|
||||
|
||||
src = _mm_and_si128(dst, _mm_set1_epi32(BlendBlit::kAModMask));
|
||||
src = _mm_or_si128(src, _mm_or_si128(srcrb, srcg));
|
||||
}
|
||||
|
||||
dst = _mm_and_si128(alphaMask, dst);
|
||||
src = _mm_andnot_si128(alphaMask, src);
|
||||
return _mm_or_si128(dst, src);
|
||||
}
|
||||
};
|
||||
|
||||
template<bool rgbmod, bool alphamod>
|
||||
struct SubtractiveBlend : public BlendBlitImpl_Base::SubtractiveBlend<rgbmod, alphamod> {
|
||||
public:
|
||||
constexpr SubtractiveBlend(const uint32 color) : BlendBlitImpl_Base::SubtractiveBlend<rgbmod, alphamod>(color) {}
|
||||
|
||||
inline __m128i simd(__m128i src, __m128i dst) const {
|
||||
__m128i ina = _mm_and_si128(src, _mm_set1_epi32(BlendBlit::kAModMask));
|
||||
__m128i srcb = _mm_srli_epi32(_mm_and_si128(src, _mm_set1_epi32(BlendBlit::kBModMask)), BlendBlit::kBModShift);
|
||||
__m128i srcg = _mm_srli_epi32(_mm_and_si128(src, _mm_set1_epi32(BlendBlit::kGModMask)), BlendBlit::kGModShift);
|
||||
__m128i srcr = _mm_srli_epi32(_mm_and_si128(src, _mm_set1_epi32(BlendBlit::kRModMask)), BlendBlit::kRModShift);
|
||||
__m128i dstb = _mm_srli_epi32(_mm_and_si128(dst, _mm_set1_epi32(BlendBlit::kBModMask)), BlendBlit::kBModShift);
|
||||
__m128i dstg = _mm_srli_epi32(_mm_and_si128(dst, _mm_set1_epi32(BlendBlit::kGModMask)), BlendBlit::kGModShift);
|
||||
__m128i dstr = _mm_srli_epi32(_mm_and_si128(dst, _mm_set1_epi32(BlendBlit::kRModMask)), BlendBlit::kRModShift);
|
||||
|
||||
srcb = _mm_and_si128(_mm_slli_epi32(_mm_max_epi16(_mm_sub_epi32(dstb, _mm_srli_epi32(sse2_mul32(sse2_mul32(srcb, _mm_set1_epi32(this->cb)), sse2_mul32(dstb, ina)), 24)), _mm_set1_epi32(0)), BlendBlit::kBModShift), _mm_set1_epi32(BlendBlit::kBModMask));
|
||||
srcg = _mm_and_si128(_mm_slli_epi32(_mm_max_epi16(_mm_sub_epi32(dstg, _mm_srli_epi32(sse2_mul32(sse2_mul32(srcg, _mm_set1_epi32(this->cg)), sse2_mul32(dstg, ina)), 24)), _mm_set1_epi32(0)), BlendBlit::kGModShift), _mm_set1_epi32(BlendBlit::kGModMask));
|
||||
srcr = _mm_and_si128(_mm_slli_epi32(_mm_max_epi16(_mm_sub_epi32(dstr, _mm_srli_epi32(sse2_mul32(sse2_mul32(srcr, _mm_set1_epi32(this->cr)), sse2_mul32(dstr, ina)), 24)), _mm_set1_epi32(0)), BlendBlit::kRModShift), _mm_set1_epi32(BlendBlit::kRModMask));
|
||||
|
||||
return _mm_or_si128(_mm_set1_epi32(BlendBlit::kAModMask), _mm_or_si128(srcb, _mm_or_si128(srcg, srcr)));
|
||||
}
|
||||
};
|
||||
|
||||
public:
|
||||
template<template <bool RGBMOD, bool ALPHAMOD> class PixelFunc, bool doscale, bool rgbmod, bool alphamod>
|
||||
static inline void blitInnerLoop(BlendBlit::Args &args) {
|
||||
const bool loaddst = true; // TODO: Only set this when necessary
|
||||
|
||||
const byte *in;
|
||||
byte *out;
|
||||
|
||||
PixelFunc<rgbmod, alphamod> pixelFunc(args.color);
|
||||
|
||||
int scaleXCtr, scaleYCtr = args.scaleYoff;
|
||||
const byte *inBase;
|
||||
|
||||
if (!doscale && (args.flipping & FLIP_H)) args.ino -= 4 * 3;
|
||||
|
||||
for (uint32 i = 0; i < args.height; i++) {
|
||||
if (doscale) {
|
||||
inBase = args.ino + scaleYCtr / BlendBlit::SCALE_THRESHOLD * args.inoStep;
|
||||
scaleXCtr = args.scaleXoff;
|
||||
} else {
|
||||
in = args.ino;
|
||||
}
|
||||
out = args.outo;
|
||||
|
||||
uint32 j = 0;
|
||||
for (; j + 4 <= args.width; j += 4) {
|
||||
__m128i dstPixels, srcPixels;
|
||||
if (loaddst) dstPixels = _mm_loadu_si128((const __m128i *)out);
|
||||
if (!doscale) {
|
||||
srcPixels = _mm_loadu_si128((const __m128i *)in);
|
||||
} else {
|
||||
srcPixels = _mm_setr_epi32(
|
||||
*(const uint32 *)(inBase + (ptrdiff_t)(scaleXCtr + args.scaleX * 0) / (ptrdiff_t)BlendBlit::SCALE_THRESHOLD * args.inStep),
|
||||
*(const uint32 *)(inBase + (ptrdiff_t)(scaleXCtr + args.scaleX * 1) / (ptrdiff_t)BlendBlit::SCALE_THRESHOLD * args.inStep),
|
||||
*(const uint32 *)(inBase + (ptrdiff_t)(scaleXCtr + args.scaleX * 2) / (ptrdiff_t)BlendBlit::SCALE_THRESHOLD * args.inStep),
|
||||
*(const uint32 *)(inBase + (ptrdiff_t)(scaleXCtr + args.scaleX * 3) / (ptrdiff_t)BlendBlit::SCALE_THRESHOLD * args.inStep)
|
||||
);
|
||||
scaleXCtr += args.scaleX * 4;
|
||||
}
|
||||
if (!doscale && (args.flipping & FLIP_H)) {
|
||||
srcPixels = _mm_shuffle_epi32(srcPixels, _MM_SHUFFLE(0, 1, 2, 3));
|
||||
}
|
||||
{
|
||||
const __m128i res = pixelFunc.simd(srcPixels, dstPixels);
|
||||
_mm_storeu_si128((__m128i *)out, res);
|
||||
}
|
||||
if (!doscale) in += (ptrdiff_t)args.inStep * 4;
|
||||
out += 4ULL * 4;
|
||||
}
|
||||
if (!doscale && (args.flipping & FLIP_H)) in += 4 * 3;
|
||||
for (; j < args.width; j++) {
|
||||
if (doscale) {
|
||||
in = inBase + scaleXCtr / BlendBlit::SCALE_THRESHOLD * args.inStep;
|
||||
}
|
||||
|
||||
pixelFunc.normal(in, out);
|
||||
|
||||
if (doscale)
|
||||
scaleXCtr += args.scaleX;
|
||||
else
|
||||
in += args.inStep;
|
||||
out += 4;
|
||||
}
|
||||
if (doscale)
|
||||
scaleYCtr += args.scaleY;
|
||||
else
|
||||
args.ino += args.inoStep;
|
||||
args.outo += args.dstPitch;
|
||||
}
|
||||
}
|
||||
|
||||
}; // End of class BlendBlitImpl_SSE2
|
||||
|
||||
void BlendBlit::blitSSE2(Args &args, const TSpriteBlendMode &blendMode, const AlphaType &alphaType) {
|
||||
blitT<BlendBlitImpl_SSE2>(args, blendMode, alphaType);
|
||||
}
|
||||
|
||||
} // End of namespace Graphics
|
||||
|
||||
#if !defined(__x86_64__)
|
||||
|
||||
#if defined(__clang__)
|
||||
#pragma clang attribute pop
|
||||
#elif defined(__GNUC__)
|
||||
#pragma GCC pop_options
|
||||
#endif
|
||||
|
||||
#endif // !defined(__x86_64__)
|
||||
501
graphics/blit/blit.cpp
Normal file
501
graphics/blit/blit.cpp
Normal file
@@ -0,0 +1,501 @@
|
||||
/* ScummVM - Graphic Adventure Engine
|
||||
*
|
||||
* ScummVM is the legal property of its developers, whose names
|
||||
* are too numerous to list here. Please refer to the COPYRIGHT
|
||||
* file distributed with this source distribution.
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*
|
||||
*/
|
||||
|
||||
#include "graphics/blit.h"
|
||||
#include "graphics/pixelformat.h"
|
||||
#include "common/endian.h"
|
||||
|
||||
namespace Graphics {
|
||||
|
||||
// see graphics/blit/blit-atari.cpp
|
||||
#ifdef ATARI
|
||||
extern void keyBlitLogicAtari(byte *dst, const byte *src, const uint w, const uint h,
|
||||
const uint srcDelta, const uint dstDelta, const uint32 key);
|
||||
#else
|
||||
// Function to blit a rect
|
||||
void copyBlit(byte *dst, const byte *src,
|
||||
const uint dstPitch, const uint srcPitch,
|
||||
const uint w, const uint h,
|
||||
const uint bytesPerPixel) {
|
||||
if (dst == src)
|
||||
return;
|
||||
|
||||
if (dstPitch == srcPitch && ((w * bytesPerPixel) == dstPitch)) {
|
||||
// Buffers have equal line pitch AND total number of bytes per line matches that pitch
|
||||
// Therefore we may copy a whole subset of h full-width raster lines in one go.
|
||||
memcpy(dst, src, dstPitch * h);
|
||||
} else {
|
||||
// Not transferring whole width of either source or destination buffer, therefore must copy line-by-line
|
||||
for (uint i = 0; i < h; ++i) {
|
||||
// Copy sublength w of one full buffer raster line
|
||||
memcpy(dst, src, w * bytesPerPixel);
|
||||
// Iterate both buffer pointers by respective pitch, to horizontally align starting point of next raster line with that of the one just copied
|
||||
dst += dstPitch;
|
||||
src += srcPitch;
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
namespace {
|
||||
|
||||
template<typename Color, int Size>
|
||||
inline void keyBlitLogic(byte *dst, const byte *src, const uint w, const uint h,
|
||||
const uint srcDelta, const uint dstDelta, const uint32 key) {
|
||||
const uint8 *col = (const uint8 *)&key;
|
||||
#ifdef SCUMM_BIG_ENDIAN
|
||||
if (Size == 3)
|
||||
col++;
|
||||
#endif
|
||||
|
||||
for (uint y = 0; y < h; ++y) {
|
||||
for (uint x = 0; x < w; ++x) {
|
||||
if (Size == sizeof(Color)) {
|
||||
const uint32 color = *(const Color *)src;
|
||||
if (color != key)
|
||||
*(Color *)dst = color;
|
||||
} else {
|
||||
if (memcmp(src, col, Size))
|
||||
memcpy(dst, src, Size);
|
||||
}
|
||||
|
||||
src += Size;
|
||||
dst += Size;
|
||||
}
|
||||
|
||||
src += srcDelta;
|
||||
dst += dstDelta;
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef ATARI
|
||||
template<>
|
||||
inline void keyBlitLogic<uint8, 1>(byte *dst, const byte *src, const uint w, const uint h,
|
||||
const uint srcDelta, const uint dstDelta, const uint32 key) {
|
||||
keyBlitLogicAtari(dst, src, w, h, srcDelta, dstDelta, key);
|
||||
}
|
||||
#endif
|
||||
|
||||
} // End of anonymous namespace
|
||||
|
||||
// Function to blit a rect with a transparent color key
|
||||
bool keyBlit(byte *dst, const byte *src,
|
||||
const uint dstPitch, const uint srcPitch,
|
||||
const uint w, const uint h,
|
||||
const uint bytesPerPixel, const uint32 key) {
|
||||
if (dst == src)
|
||||
return true;
|
||||
|
||||
// Faster, but larger, to provide optimized handling for each case.
|
||||
const uint srcDelta = (srcPitch - w * bytesPerPixel);
|
||||
const uint dstDelta = (dstPitch - w * bytesPerPixel);
|
||||
|
||||
if (bytesPerPixel == 1) {
|
||||
keyBlitLogic<uint8, 1>(dst, src, w, h, srcDelta, dstDelta, key);
|
||||
} else if (bytesPerPixel == 2) {
|
||||
keyBlitLogic<uint16, 2>(dst, src, w, h, srcDelta, dstDelta, key);
|
||||
} else if (bytesPerPixel == 3) {
|
||||
keyBlitLogic<uint8, 3>(dst, src, w, h, srcDelta, dstDelta, key);
|
||||
} else if (bytesPerPixel == 4) {
|
||||
keyBlitLogic<uint32, 4>(dst, src, w, h, srcDelta, dstDelta, key);
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
namespace {
|
||||
|
||||
template<typename Color, int Size>
|
||||
inline void maskBlitLogic(byte *dst, const byte *src, const byte *mask, const uint w, const uint h,
|
||||
const uint srcDelta, const uint dstDelta, const uint maskDelta) {
|
||||
for (uint y = 0; y < h; ++y) {
|
||||
for (uint x = 0; x < w; ++x) {
|
||||
if (*mask) {
|
||||
if (Size == sizeof(Color)) {
|
||||
*(Color *)dst = *(const Color *)src;
|
||||
} else {
|
||||
memcpy(dst, src, Size);
|
||||
}
|
||||
}
|
||||
|
||||
src += Size;
|
||||
dst += Size;
|
||||
mask += 1;
|
||||
}
|
||||
|
||||
src += srcDelta;
|
||||
dst += dstDelta;
|
||||
mask += maskDelta;
|
||||
}
|
||||
}
|
||||
|
||||
} // End of anonymous namespace
|
||||
|
||||
// Function to blit a rect with a transparent color mask
|
||||
bool maskBlit(byte *dst, const byte *src, const byte *mask,
|
||||
const uint dstPitch, const uint srcPitch, const uint maskPitch,
|
||||
const uint w, const uint h,
|
||||
const uint bytesPerPixel) {
|
||||
if (dst == src)
|
||||
return true;
|
||||
|
||||
// Faster, but larger, to provide optimized handling for each case.
|
||||
const uint srcDelta = (srcPitch - w * bytesPerPixel);
|
||||
const uint dstDelta = (dstPitch - w * bytesPerPixel);
|
||||
const uint maskDelta = (maskPitch - w);
|
||||
|
||||
if (bytesPerPixel == 1) {
|
||||
maskBlitLogic<uint8, 1>(dst, src, mask, w, h, srcDelta, dstDelta, maskDelta);
|
||||
} else if (bytesPerPixel == 2) {
|
||||
maskBlitLogic<uint16, 2>(dst, src, mask, w, h, srcDelta, dstDelta, maskDelta);
|
||||
} else if (bytesPerPixel == 3) {
|
||||
maskBlitLogic<uint8, 3>(dst, src, mask, w, h, srcDelta, dstDelta, maskDelta);
|
||||
} else if (bytesPerPixel == 4) {
|
||||
maskBlitLogic<uint32, 4>(dst, src, mask, w, h, srcDelta, dstDelta, maskDelta);
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
namespace {
|
||||
|
||||
template<typename SrcColor, int SrcSize, typename DstColor, int DstSize, bool backward, bool hasKey, bool hasMask>
|
||||
inline void crossBlitLogic(byte *dst, const byte *src, const byte *mask, const uint w, const uint h,
|
||||
const PixelFormat &srcFmt, const PixelFormat &dstFmt,
|
||||
const uint srcDelta, const uint dstDelta, const uint maskDelta,
|
||||
const uint32 key) {
|
||||
uint32 color;
|
||||
byte a, r, g, b;
|
||||
uint8 *col = (uint8 *)&color;
|
||||
#ifdef SCUMM_BIG_ENDIAN
|
||||
if (SrcSize == 3 || DstSize == 3)
|
||||
col++;
|
||||
#endif
|
||||
|
||||
for (uint y = 0; y < h; ++y) {
|
||||
for (uint x = 0; x < w; ++x) {
|
||||
if (SrcSize == sizeof(SrcColor))
|
||||
color = *(const SrcColor *)src;
|
||||
else
|
||||
memcpy(col, src, SrcSize);
|
||||
|
||||
if ((!hasKey || color != key) && (!hasMask || *mask != 0)) {
|
||||
srcFmt.colorToARGB(color, a, r, g, b);
|
||||
color = dstFmt.ARGBToColor(a, r, g, b);
|
||||
|
||||
if (DstSize == sizeof(DstColor))
|
||||
*(DstColor *)dst = color;
|
||||
else
|
||||
memcpy(dst, col, DstSize);
|
||||
}
|
||||
|
||||
if (backward) {
|
||||
src -= SrcSize;
|
||||
dst -= DstSize;
|
||||
if (hasMask)
|
||||
mask -= 1;
|
||||
} else {
|
||||
src += SrcSize;
|
||||
dst += DstSize;
|
||||
if (hasMask)
|
||||
mask += 1;
|
||||
}
|
||||
}
|
||||
|
||||
if (backward) {
|
||||
src -= srcDelta;
|
||||
dst -= dstDelta;
|
||||
if (hasMask)
|
||||
mask -= maskDelta;
|
||||
} else {
|
||||
src += srcDelta;
|
||||
dst += dstDelta;
|
||||
if (hasMask)
|
||||
mask += maskDelta;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template<bool hasKey, bool hasMask>
|
||||
inline bool crossBlitHelper(byte *dst, const byte *src, const byte *mask, const uint w, const uint h,
|
||||
const PixelFormat &srcFmt, const PixelFormat &dstFmt,
|
||||
const uint srcPitch, const uint dstPitch, const uint maskPitch,
|
||||
const uint32 key) {
|
||||
// Faster, but larger, to provide optimized handling for each case.
|
||||
const uint srcDelta = (srcPitch - w * srcFmt.bytesPerPixel);
|
||||
const uint dstDelta = (dstPitch - w * dstFmt.bytesPerPixel);
|
||||
const uint maskDelta = hasMask ? (maskPitch - w) : 0;
|
||||
|
||||
// TODO: optimized cases for dstDelta of 0
|
||||
if (dstFmt.bytesPerPixel == 2) {
|
||||
if (srcFmt.bytesPerPixel == 2) {
|
||||
crossBlitLogic<uint16, 2, uint16, 2, false, hasKey, hasMask>(dst, src, mask, w, h, srcFmt, dstFmt, srcDelta, dstDelta, maskDelta, key);
|
||||
} else if (srcFmt.bytesPerPixel == 3) {
|
||||
crossBlitLogic<uint8, 3, uint16, 2, false, hasKey, hasMask>(dst, src, mask, w, h, srcFmt, dstFmt, srcDelta, dstDelta, maskDelta, key);
|
||||
} else {
|
||||
crossBlitLogic<uint32, 4, uint16, 2, false, hasKey, hasMask>(dst, src, mask, w, h, srcFmt, dstFmt, srcDelta, dstDelta, maskDelta, key);
|
||||
}
|
||||
} else if (dstFmt.bytesPerPixel == 3) {
|
||||
if (srcFmt.bytesPerPixel == 2) {
|
||||
// We need to blit the surface from bottom right to top left here.
|
||||
// This is needed, because when we convert to the same memory
|
||||
// buffer copying the surface from top left to bottom right would
|
||||
// overwrite the source, since we have more bits per destination
|
||||
// color than per source color.
|
||||
dst += h * dstPitch - dstDelta - dstFmt.bytesPerPixel;
|
||||
src += h * srcPitch - srcDelta - srcFmt.bytesPerPixel;
|
||||
if (hasMask) mask += h * maskPitch - maskDelta - 1;
|
||||
crossBlitLogic<uint16, 2, uint8, 3, true, hasKey, hasMask>(dst, src, mask, w, h, srcFmt, dstFmt, srcDelta, dstDelta, maskDelta, key);
|
||||
} else if (srcFmt.bytesPerPixel == 3) {
|
||||
crossBlitLogic<uint8, 3, uint8, 3, false, hasKey, hasMask>(dst, src, mask, w, h, srcFmt, dstFmt, srcDelta, dstDelta, maskDelta, key);
|
||||
} else {
|
||||
crossBlitLogic<uint32, 4, uint8, 3, false, hasKey, hasMask>(dst, src, mask, w, h, srcFmt, dstFmt, srcDelta, dstDelta, maskDelta, key);
|
||||
}
|
||||
} else if (dstFmt.bytesPerPixel == 4) {
|
||||
if (srcFmt.bytesPerPixel == 2) {
|
||||
// We need to blit the surface from bottom right to top left here.
|
||||
// This is neeeded, because when we convert to the same memory
|
||||
// buffer copying the surface from top left to bottom right would
|
||||
// overwrite the source, since we have more bits per destination
|
||||
// color than per source color.
|
||||
dst += h * dstPitch - dstDelta - dstFmt.bytesPerPixel;
|
||||
src += h * srcPitch - srcDelta - srcFmt.bytesPerPixel;
|
||||
if (hasMask) mask += h * maskPitch - maskDelta - 1;
|
||||
crossBlitLogic<uint16, 2, uint32, 4, true, hasKey, hasMask>(dst, src, mask, w, h, srcFmt, dstFmt, srcDelta, dstDelta, maskDelta, key);
|
||||
} else if (srcFmt.bytesPerPixel == 3) {
|
||||
// We need to blit the surface from bottom right to top left here.
|
||||
// This is neeeded, because when we convert to the same memory
|
||||
// buffer copying the surface from top left to bottom right would
|
||||
// overwrite the source, since we have more bits per destination
|
||||
// color than per source color.
|
||||
dst += h * dstPitch - dstDelta - dstFmt.bytesPerPixel;
|
||||
src += h * srcPitch - srcDelta - srcFmt.bytesPerPixel;
|
||||
if (hasMask) mask += h * maskPitch - maskDelta - 1;
|
||||
crossBlitLogic<uint8, 3, uint32, 4, true, hasKey, hasMask>(dst, src, mask, w, h, srcFmt, dstFmt, srcDelta, dstDelta, maskDelta, key);
|
||||
} else {
|
||||
crossBlitLogic<uint32, 4, uint32, 4, false, hasKey, hasMask>(dst, src, mask, w, h, srcFmt, dstFmt, srcDelta, dstDelta, maskDelta, key);
|
||||
}
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
} // End of anonymous namespace
|
||||
|
||||
// Function to blit a rect from one color format to another
|
||||
bool crossBlit(byte *dst, const byte *src,
|
||||
const uint dstPitch, const uint srcPitch,
|
||||
const uint w, const uint h,
|
||||
const Graphics::PixelFormat &dstFmt, const Graphics::PixelFormat &srcFmt) {
|
||||
// Error out if conversion is impossible
|
||||
if ((srcFmt.bytesPerPixel == 1) || (dstFmt.bytesPerPixel == 1)
|
||||
|| (!srcFmt.bytesPerPixel) || (!dstFmt.bytesPerPixel))
|
||||
return false;
|
||||
|
||||
// Don't perform unnecessary conversion
|
||||
if (srcFmt == dstFmt) {
|
||||
copyBlit(dst, src, dstPitch, srcPitch, w, h, dstFmt.bytesPerPixel);
|
||||
return true;
|
||||
}
|
||||
|
||||
// Attempt to use a faster method if possible
|
||||
FastBlitFunc blitFunc = getFastBlitFunc(dstFmt, srcFmt);
|
||||
if (blitFunc) {
|
||||
blitFunc(dst, src, dstPitch, srcPitch, w, h);
|
||||
return true;
|
||||
}
|
||||
|
||||
return crossBlitHelper<false, false>(dst, src, nullptr, w, h, srcFmt, dstFmt, srcPitch, dstPitch, 0, 0);
|
||||
}
|
||||
|
||||
// Function to blit a rect from one color format to another with a transparent color key
|
||||
bool crossKeyBlit(byte *dst, const byte *src,
|
||||
const uint dstPitch, const uint srcPitch,
|
||||
const uint w, const uint h,
|
||||
const Graphics::PixelFormat &dstFmt, const Graphics::PixelFormat &srcFmt, const uint32 key) {
|
||||
// Error out if conversion is impossible
|
||||
if ((srcFmt.bytesPerPixel == 1) || (dstFmt.bytesPerPixel == 1)
|
||||
|| (!srcFmt.bytesPerPixel) || (!dstFmt.bytesPerPixel))
|
||||
return false;
|
||||
|
||||
// Don't perform unnecessary conversion
|
||||
if (srcFmt == dstFmt) {
|
||||
keyBlit(dst, src, dstPitch, srcPitch, w, h, dstFmt.bytesPerPixel, key);
|
||||
return true;
|
||||
}
|
||||
|
||||
return crossBlitHelper<true, false>(dst, src, nullptr, w, h, srcFmt, dstFmt, srcPitch, dstPitch, 0, key);
|
||||
}
|
||||
|
||||
// Function to blit a rect from one color format to another with a transparent color mask
|
||||
bool crossMaskBlit(byte *dst, const byte *src, const byte *mask,
|
||||
const uint dstPitch, const uint srcPitch, const uint maskPitch,
|
||||
const uint w, const uint h,
|
||||
const Graphics::PixelFormat &dstFmt, const Graphics::PixelFormat &srcFmt) {
|
||||
// Error out if conversion is impossible
|
||||
if ((srcFmt.bytesPerPixel == 1) || (dstFmt.bytesPerPixel == 1)
|
||||
|| (!srcFmt.bytesPerPixel) || (!dstFmt.bytesPerPixel))
|
||||
return false;
|
||||
|
||||
// Don't perform unnecessary conversion
|
||||
if (srcFmt == dstFmt) {
|
||||
maskBlit(dst, src, mask, dstPitch, srcPitch, maskPitch, w, h, dstFmt.bytesPerPixel);
|
||||
return true;
|
||||
}
|
||||
|
||||
return crossBlitHelper<false, true>(dst, src, mask, w, h, srcFmt, dstFmt, srcPitch, dstPitch, maskPitch, 0);
|
||||
}
|
||||
|
||||
namespace {
|
||||
|
||||
template<typename DstColor, int DstSize, bool backward, bool hasKey, bool hasMask>
|
||||
inline void crossBlitMapLogic(byte *dst, const byte *src, const byte *mask, const uint w, const uint h,
|
||||
const uint srcDelta, const uint dstDelta, const uint maskDelta, const uint32 *map, const uint32 key) {
|
||||
for (uint y = 0; y < h; ++y) {
|
||||
for (uint x = 0; x < w; ++x) {
|
||||
const byte color = *src;
|
||||
if ((!hasKey || color != key) && (!hasMask || *mask != 0)) {
|
||||
if (DstSize == sizeof(DstColor)) {
|
||||
*(DstColor *)dst = map[color];
|
||||
} else {
|
||||
WRITE_UINT24(dst, map[color]);
|
||||
}
|
||||
}
|
||||
|
||||
if (backward) {
|
||||
src -= 1;
|
||||
dst -= DstSize;
|
||||
if (hasMask)
|
||||
mask -= 1;
|
||||
} else {
|
||||
src += 1;
|
||||
dst += DstSize;
|
||||
if (hasMask)
|
||||
mask += 1;
|
||||
}
|
||||
}
|
||||
|
||||
if (backward) {
|
||||
src -= srcDelta;
|
||||
dst -= dstDelta;
|
||||
if (hasMask)
|
||||
mask -= maskDelta;
|
||||
} else {
|
||||
src += srcDelta;
|
||||
dst += dstDelta;
|
||||
if (hasMask)
|
||||
mask += maskDelta;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template<bool hasKey, bool hasMask>
|
||||
inline bool crossBlitMapHelperLogic(byte *dst, const byte *src, const byte *mask, const uint w, const uint h,
|
||||
const uint bytesPerPixel, const uint32 *map,
|
||||
const uint srcPitch, const uint dstPitch, const uint maskPitch,
|
||||
const uint32 key) {
|
||||
// Faster, but larger, to provide optimized handling for each case.
|
||||
const uint srcDelta = (srcPitch - w);
|
||||
const uint dstDelta = (dstPitch - w * bytesPerPixel);
|
||||
const uint maskDelta = hasMask ? (maskPitch - w) : 0;
|
||||
|
||||
if (bytesPerPixel == 1) {
|
||||
crossBlitMapLogic<uint8, 1, false, hasKey, hasMask>(dst, src, mask, w, h, srcDelta, dstDelta, maskDelta, map, key);
|
||||
} else if (bytesPerPixel == 2) {
|
||||
// We need to blit the surface from bottom right to top left here.
|
||||
// This is neeeded, because when we convert to the same memory
|
||||
// buffer copying the surface from top left to bottom right would
|
||||
// overwrite the source, since we have more bits per destination
|
||||
// color than per source color.
|
||||
dst += h * dstPitch - dstDelta - bytesPerPixel;
|
||||
src += h * srcPitch - srcDelta - 1;
|
||||
if (hasMask) mask += h * maskPitch - maskDelta - 1;
|
||||
crossBlitMapLogic<uint16, 2, true, hasKey, hasMask>(dst, src, mask, w, h, srcDelta, dstDelta, maskDelta, map, key);
|
||||
} else if (bytesPerPixel == 3) {
|
||||
// We need to blit the surface from bottom right to top left here.
|
||||
// This is needed, because when we convert to the same memory
|
||||
// buffer copying the surface from top left to bottom right would
|
||||
// overwrite the source, since we have more bits per destination
|
||||
// color than per source color.
|
||||
dst += h * dstPitch - dstDelta - bytesPerPixel;
|
||||
src += h * srcPitch - srcDelta - 1;
|
||||
if (hasMask) mask += h * maskPitch - maskDelta - 1;
|
||||
crossBlitMapLogic<uint8, 3, true, hasKey, hasMask>(dst, src, mask, w, h, srcDelta, dstDelta, maskDelta, map, key);
|
||||
} else if (bytesPerPixel == 4) {
|
||||
// We need to blit the surface from bottom right to top left here.
|
||||
// This is needed, because when we convert to the same memory
|
||||
// buffer copying the surface from top left to bottom right would
|
||||
// overwrite the source, since we have more bits per destination
|
||||
// color than per source color.
|
||||
dst += h * dstPitch - dstDelta - bytesPerPixel;
|
||||
src += h * srcPitch - srcDelta - 1;
|
||||
if (hasMask) mask += h * maskPitch - maskDelta - 1;
|
||||
crossBlitMapLogic<uint32, 4, true, hasKey, hasMask>(dst, src, mask, w, h, srcDelta, dstDelta, maskDelta, map, key);
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
} // End of anonymous namespace
|
||||
|
||||
// Function to blit a rect from one color format to another using a map
|
||||
bool crossBlitMap(byte *dst, const byte *src,
|
||||
const uint dstPitch, const uint srcPitch,
|
||||
const uint w, const uint h,
|
||||
const uint bytesPerPixel, const uint32 *map) {
|
||||
// Error out if conversion is impossible
|
||||
if (!bytesPerPixel)
|
||||
return false;
|
||||
|
||||
return crossBlitMapHelperLogic<false, false>(dst, src, nullptr, w, h, bytesPerPixel, map, srcPitch, dstPitch, 0, 0);
|
||||
}
|
||||
|
||||
// Function to blit a rect from one color format to another using a map with a transparent color key
|
||||
bool crossKeyBlitMap(byte *dst, const byte *src,
|
||||
const uint dstPitch, const uint srcPitch,
|
||||
const uint w, const uint h,
|
||||
const uint bytesPerPixel, const uint32 *map, const uint32 key) {
|
||||
// Error out if conversion is impossible
|
||||
if (!bytesPerPixel)
|
||||
return false;
|
||||
|
||||
return crossBlitMapHelperLogic<true, false>(dst, src, nullptr, w, h, bytesPerPixel, map, srcPitch, dstPitch, 0, key);
|
||||
}
|
||||
|
||||
// Function to blit a rect from one color format to another using a map with a transparent color mask
|
||||
bool crossMaskBlitMap(byte *dst, const byte *src, const byte *mask,
|
||||
const uint dstPitch, const uint srcPitch, const uint maskPitch,
|
||||
const uint w, const uint h,
|
||||
const uint bytesPerPixel, const uint32 *map) {
|
||||
// Error out if conversion is impossible
|
||||
if (!bytesPerPixel)
|
||||
return false;
|
||||
|
||||
return crossBlitMapHelperLogic<false, true>(dst, src, mask, w, h, bytesPerPixel, map, srcPitch, dstPitch, maskPitch, 0);
|
||||
}
|
||||
|
||||
} // End of namespace Graphics
|
||||
Reference in New Issue
Block a user