Initial NDS Universal-Core

This commit is contained in:
Pk11
2021-01-13 15:51:37 -06:00
commit 8a55d0e7cb
13 changed files with 2060 additions and 0 deletions
+346
View File
@@ -0,0 +1,346 @@
#include "font.hpp"
#include "tonccpy.h"
#ifdef TEXT_BUFFERED
u8 Font::textBuf[2][256 * 192];
#endif
Font::Font(const std::vector<std::string> &paths) {
FILE *file = nullptr;
for(const auto &path : paths) {
file = fopen(path.c_str(), "rb");
if(file)
break;
}
if(file) {
// Get file size
fseek(file, 0, SEEK_END);
u32 fileSize = ftell(file);
// Skip font info
fseek(file, 0x14, SEEK_SET);
fseek(file, fgetc(file) - 1, SEEK_CUR);
// Load glyph info
u32 chunkSize;
fread(&chunkSize, 4, 1, file);
tileWidth = fgetc(file);
tileHeight = fgetc(file);
fread(&tileSize, 2, 1, file);
// Load character glyphs
int tileAmount = ((chunkSize - 0x10) / tileSize);
fontTiles = std::vector<u8>(tileSize * tileAmount);
fseek(file, 4, SEEK_CUR);
fread(fontTiles.data(), tileSize, tileAmount, file);
// Fix top row
for(int i = 0; i < tileAmount; i++) {
fontTiles[i * tileSize] = 0;
fontTiles[i * tileSize + 1] = 0;
fontTiles[i * tileSize + 2] = 0;
}
// Load character widths
fseek(file, 0x24, SEEK_SET);
u32 locHDWC;
fread(&locHDWC, 4, 1, file);
fseek(file, locHDWC - 4, SEEK_SET);
fread(&chunkSize, 4, 1, file);
fseek(file, 8, SEEK_CUR);
fontWidths = std::vector<u8>(3 * tileAmount);
fread(fontWidths.data(), 3, tileAmount, file);
// Load character maps
fontMap = std::vector<u16>(tileAmount);
fseek(file, 0x28, SEEK_SET);
u32 locPAMC, mapType;
fread(&locPAMC, 4, 1, file);
while(locPAMC < fileSize) {
u16 firstChar, lastChar;
fseek(file, locPAMC, SEEK_SET);
fread(&firstChar, 2, 1, file);
fread(&lastChar, 2, 1, file);
fread(&mapType, 4, 1, file);
fread(&locPAMC, 4, 1, file);
switch(mapType) {
case 0: {
u16 firstTile;
fread(&firstTile, 2, 1, file);
for(unsigned i = firstChar; i <= lastChar; i++) {
fontMap[firstTile + (i - firstChar)] = i;
}
break;
}
case 1: {
for(int i = firstChar; i <= lastChar; i++) {
u16 tile;
fread(&tile, 2, 1, file);
fontMap[tile] = i;
}
break;
}
case 2: {
u16 groupAmount;
fread(&groupAmount, 2, 1, file);
for(int i = 0; i < groupAmount; i++) {
u16 charNo, tileNo;
fread(&charNo, 2, 1, file);
fread(&tileNo, 2, 1, file);
fontMap[tileNo] = charNo;
}
break;
}
}
}
fclose(file);
questionMark = charIndex('?');
}
}
u16 Font::charIndex(char16_t c) {
// Try a binary search
int left = 0;
int right = fontMap.size();
while(left <= right) {
int mid = left + ((right - left) / 2);
if(fontMap[mid] == c) {
return mid;
}
if(fontMap[mid] < c) {
left = mid + 1;
} else {
right = mid - 1;
}
}
// If not found, return a question mark
return questionMark;
}
std::u16string Font::utf8to16(std::string_view text) {
std::u16string out;
for(uint i = 0; i < text.size();) {
char16_t c;
if(!(text[i] & 0x80)) {
c = text[i++];
} else if((text[i] & 0xE0) == 0xC0) {
c = (text[i++] & 0x1F) << 6;
c |= text[i++] & 0x3F;
} else if((text[i] & 0xF0) == 0xE0) {
c = (text[i++] & 0x0F) << 12;
c |= (text[i++] & 0x3F) << 6;
c |= text[i++] & 0x3F;
} else {
i++; // out of range or something (This only does up to 0xFFFF since it goes to a U16 anyways)
}
out += c;
}
return out;
}
int Font::calcWidth(std::u16string_view text) {
uint x = 0;
for(auto c : text) {
u16 index = charIndex(c);
x += fontWidths[(index * 3) + 2];
}
return x;
}
ITCM_CODE void Font::print(std::u16string_view text, int x, int y, bool top, int layer, Alignment align, int maxWidth,
int color, float scaleX, float scaleY, Sprite *sprite) {
// Adjust x for alignment
switch(align) {
case Alignment::left: {
break;
}
case Alignment::center: {
size_t newline = text.find('\n');
while(newline != text.npos) {
print(text.substr(0, newline), x, y, top, layer, align, maxWidth, color, scaleX, scaleY, sprite);
text = text.substr(newline + 1);
newline = text.find('\n');
y += tileHeight;
}
x += ((sprite ? sprite->width() : 256) - (calcWidth(text) * scaleX)) / 2;
break;
}
case Alignment::right: {
size_t newline = text.find('\n');
while(newline != text.npos) {
print(text.substr(0, newline), x - (calcWidth(text.substr(0, newline)) * scaleX), y, top, layer,
Alignment::left, maxWidth, color, scaleX, scaleY, sprite);
text = text.substr(newline + 1);
newline = text.find('\n');
y += tileHeight;
}
x -= calcWidth(text) * scaleX;
break;
}
}
const int xStart = x;
bool rtl = false;
for(const auto c : text) {
if(c >= 0x0590 && c <= 0x05FF) {
rtl = true;
break;
}
}
auto ltrBegin = text.end(), ltrEnd = text.end();
// Loop through string and print it
for(auto it = (rtl ? text.end() - 1 : text.begin()); true; it += (rtl ? -1 : 1)) {
// If we hit the end of the string in an LTR section of an RTL
// string, it may not be done, if so jump back to printing RTL
if(it == (rtl ? text.begin() - 1 : text.end())) {
if(ltrBegin == text.end()) {
break;
} else {
it = ltrBegin;
ltrBegin = text.end();
rtl = true;
}
}
// If at the end of an LRT section within RTL, jump back to the RTL
if(it == ltrEnd && ltrBegin != text.end()) {
if(ltrBegin == text.begin())
break;
it = ltrBegin;
ltrBegin = text.end();
rtl = true;
// If in RTL and hit a non-RTL character that's not punctuation, switch to LTR
} else if(rtl &&
((*it < 0x0590 || *it > 0x05FF) &&
((*it >= '0' && *it <= '9') || (*it >= 'A' && *it <= 'Z') || (*it >= 'a' && *it <= 'z') ||
*it >= 127))) {
// Save where we are as the end of the LTR section
ltrEnd = it + 1;
// Go back until an RTL character or the start of the string
while((*it < 0x0590 || *it > 0x05FF) && it != text.begin())
it--;
// Save where we are to return to after printing the LTR section
ltrBegin = it;
// If not at the start, then we're on the first RTL right now, so add one
if(it != text.begin())
it++;
// Skip all punctuation at the end if not at beginning
while(it != text.begin() &&
(*it < '0' || (*it > '9' && *it < 'A') || (*it > 'Z' && *it < 'a') || (*it > 'z' && *it < 127))) {
it++;
ltrBegin++;
}
rtl = false;
}
if(*it == '\n') {
x = xStart;
y += tileHeight;
continue;
}
u16 index = charIndex(*it);
// Brackets are flipped in RTL
if(rtl) {
if(*it == '(')
index = charIndex(')');
else if(*it == ')')
index = charIndex('(');
else if(*it == '[')
index = charIndex(']');
else if(*it == ']')
index = charIndex('[');
}
if(sprite) {
// Don't draw off sprite chars
if(x >= 0 && x < sprite->width() && y >= 0 && y + tileHeight < sprite->height()) {
u16 *dst = sprite->gfx() + x + fontWidths[(index * 3)];
// Use faster integer math if scale is 1
if(scaleX == 1.0f && scaleY == 1.0f) {
for(int i = 0; i < tileHeight; i++) {
for(int j = 0; j < tileWidth; j++) {
u8 px = fontTiles[(index * tileSize) + (i * tileWidth + j) / 4] >>
((3 - ((i * tileWidth + j) % 4)) * 2) &
3;
if(px)
dst[(y + i) * sprite->width() + j] = px + (color * 4);
}
}
} else {
for(float i = 0.0f; i < tileHeight; i += 1 / scaleY) {
for(float j = 0.0f; j < tileWidth; j += 1 / scaleY) {
u8 px = fontTiles[(index * tileSize) + (i * tileWidth + j) / 4] >>
((3 - (int(i * tileWidth + j) % 4)) * 2) &
3;
if(px)
dst[int((y + i) * sprite->width() + j)] = px + (color * 4);
}
}
}
}
} else {
// Don't draw off screen chars
if(x >= 0 && x + fontWidths[(index * 3) + 2] < 256 && y >= 0 && y + tileHeight < 192) {
#ifdef TEXT_BUFFERED
u8 *dst = textBuf[top] + x + fontWidths[(index * 3)];
#else
u8 *dst = (u8 *)bgGetGfxPtr(top ? layer : layer + 4) + x + fontWidths[(index * 3)];
#endif
// Use faster integer math if scale is 1
if(scaleX == 1.0f && scaleY == 1.0f) {
for(int i = 0; i < tileHeight; i++) {
for(int j = 0; j < tileWidth; j++) {
u8 px = fontTiles[(index * tileSize) + (i * tileWidth + j) / 4] >>
((3 - ((i * tileWidth + j) % 4)) * 2) &
3;
if(px)
#ifdef TEXT_BUFFERED
dst[(y + i) * 256 + j] = px + (color * 4);
#else
toncset(dst + (y + i) * 256 + j, px + (color * 4), 1);
#endif
}
}
} else {
for(float i = 0.0f; i < tileHeight; i += 1 / scaleY) {
for(float j = 0.0f; j < tileWidth; j += 1 / scaleX) {
u8 px = fontTiles[(index * tileSize) + (i * tileWidth + j) / 4] >>
((3 - (int(i * tileWidth + j) % 4)) * 2) &
3;
if(px)
#ifdef TEXT_BUFFERED
dst[(y + i) * 256 + j] = px + (color * 4);
#else
toncset(dst + int((y + i) * 256 + j), px + (color * 4), 1);
#endif
}
}
}
}
}
x += fontWidths[(index * 3) + 2];
}
}
#ifdef TEXT_BUFFERED
void Font::clear(bool top) { dmaFillWords(0, FontGraphic::textBuf[top], 256 * 192); }
void Font::update(bool top) {
tonccpy(bgGetGfxPtr(top ? TEXT_TOP_LAYER : TEXT_BOTTOM_LAYER + 4), FontGraphic::textBuf[top], 256 * 192);
}
#endif
+65
View File
@@ -0,0 +1,65 @@
#include "graphics.hpp"
#include "tonccpy.h"
#include <nds.h>
int Graphics::bg3Main, Graphics::bg2Main, Graphics::bg3Sub, Graphics::bg2Sub;
bool Graphics::wideScreen = false;
void Graphics::init(void) {
// Initialize video mode
videoSetMode(MODE_5_2D);
videoSetModeSub(MODE_5_2D);
// initialize all the VRAM banks
vramSetPrimaryBanks(VRAM_A_MAIN_BG, VRAM_B_MAIN_SPRITE, VRAM_C_SUB_BG, VRAM_D_SUB_SPRITE);
// Init oam with 1D mapping 128 byte boundary and no external palette support
oamInit(&oamSub, SpriteMapping_Bmp_1D_128, false);
oamInit(&oamMain, SpriteMapping_Bmp_1D_128, false);
// Init the backgrounds
bg3Main = bgInit(3, BgType_Bmp8, BgSize_B8_256x256, 0, 0);
bgSetPriority(bg3Main, 3);
bg2Main = bgInit(2, BgType_Bmp8, BgSize_B8_256x256, 3, 0);
bgSetPriority(bg2Main, 2);
bg3Sub = bgInitSub(3, BgType_Bmp8, BgSize_B8_256x256, 0, 0);
bgSetPriority(bg3Sub, 3);
bg2Sub = bgInitSub(2, BgType_Bmp8, BgSize_B8_256x256, 3, 0);
bgSetPriority(bg2Sub, 2);
// Set main background as target for sprite transparency
REG_BLDCNT = 1 << 11;
REG_BLDCNT_SUB = 1 << 11;
}
void Graphics::clear(bool top, int layer) { toncset(bgGetGfxPtr(top ? layer : layer + 4), 0, 256 * 192); }
void Graphics::drawOutline(int x, int y, int w, int h, u8 color, bool top, int layer) {
u8 *dst = (u8 *)bgGetGfxPtr(top ? layer : layer + 4);
h += y;
if(y >= 0 && y < 192)
toncset(dst + ((y * 256) + (x < 0 ? 0 : x)), color, (x + w > 256 ? w + (256 - x - w) : w));
for(y++; y < (h - 1); y++) {
if(y >= 0 && y < 192 && x > 0)
toncset(dst + ((y)*256 + x), color, 1);
if(y >= 0 && y < 192 && x + w < 256)
toncset(dst + ((y)*256 + x + w - 1), color, 1);
}
if(y >= 0 && y < 192)
toncset(dst + ((y * 256) + (x < 0 ? 0 : x)), color, (x + w > 256 ? w + (256 - x - w) : w));
}
void Graphics::drawRectangle(int x, int y, int w, int h, u8 color, bool top, bool layer) {
Graphics::drawRectangle(x, y, w, h, color, color, top, layer);
}
void Graphics::drawRectangle(int x, int y, int w, int h, u8 color1, u8 color2, bool top, bool layer) {
u8 *dst = (u8 *)bgGetGfxPtr(top ? layer : layer + 4);
for(int i = 0; i < h; i++) {
toncset(dst + ((y + i) * 256 + x), ((i % 2) ? color1 : color2), w);
}
}
+141
View File
@@ -0,0 +1,141 @@
#include "image.hpp"
#include "tonccpy.h"
Image::Image(const std::vector<std::string> &paths) {
// Attempt to load from the given paths
FILE *file = nullptr;
for(const auto &path : paths) {
file = fopen(path.c_str(), "rb");
if(file)
break;
}
// Return if no file found
if(!file)
return;
// Return if not in the correct format
char magic[5] = {0};
fread(magic, 1, 4, file);
if(strcmp(magic, ".GFX"))
return;
// Read image
fread(&_width, 1, 2, file);
fread(&_height, 1, 2, file);
_bitmap = std::vector<u8>(_width * _height);
fread(_bitmap.data(), 1, _bitmap.size(), file);
u16 palCount;
fread(&palCount, 1, 2, file);
fread(&_palOfs, 1, 2, file);
_palette = std::vector<u16>(palCount);
fread(_palette.data(), 2, palCount, file);
fclose(file);
}
Image::Image(FILE *file) {
// Return if no file
if(!file)
return;
// Return if not in the correct format
char magic[5];
fread(magic, 1, 4, file);
if(strcmp(magic, ".GFX"))
return;
// Read image
fread(&_width, 1, 2, file);
fread(&_height, 1, 2, file);
_bitmap = std::vector<u8>(_width * _height);
fread(_bitmap.data(), 1, _bitmap.size(), file);
u16 palCount;
fread(&palCount, 1, 2, file);
fread(&_palOfs, 1, 2, file);
_palette = std::vector<u16>(palCount);
fread(_palette.data(), 2, palCount, file);
fclose(file);
}
void Image::draw(int x, int y, bool top, int layer, int channel, bool copyPal) {
if(copyPal)
tonccpy((top ? BG_PALETTE : BG_PALETTE_SUB) + _palOfs, _palette.data(), _palette.size() * 2);
u8 *dst = (u8 *)bgGetGfxPtr(top ? layer : layer + 4);
int width = 256;
// If full width and X is 0, copy it all in one go
if(_width == width && x == 0) {
dmaCopyHalfWords(channel, _bitmap.data(), dst, _width * _height);
} else {
for(int i = 0; i < _height; i++) {
dmaCopyHalfWords(channel, _bitmap.data() + (i * _width), dst + (y + i) * 256 + x, _width);
}
}
}
void Image::drawSpecial(int x, int y, bool top, int layer, float scaleX, float scaleY, int paletteOffset,
bool copyPal) {
if(copyPal)
tonccpy((top ? BG_PALETTE : BG_PALETTE_SUB) + _palOfs + paletteOffset, _palette.data(), _palette.size() * 2);
u8 *dst = (u8 *)bgGetGfxPtr(top ? layer : layer + 4);
// If the scale is 1 use faster integer math
if(scaleX == 1.0f && scaleY == 1.0f) {
for(int i = 0; i < _height; i++) {
for(float j = 0; j < _width; j++) {
u8 px = _bitmap[i * _width + j];
if(_palette[px - _palOfs] & 0x8000)
toncset(dst + int((y + i) * 256 + x + j), px + paletteOffset, 1);
}
}
} else {
for(float i = 0.0f; i < _height; i += 1 / scaleY) {
for(float j = 0.0f; j < _width; j += 1 / scaleX) {
u8 px = _bitmap[int((i * _width) + j)];
if(_palette[px - _palOfs] & 0x8000)
toncset(dst + int((y + i) * 256 + x + j), px + paletteOffset, 1);
}
}
}
}
void Image::drawSegment(int x, int y, int imageX, int imageY, int w, int h, bool top, int layer, int channel,
bool copyPal) {
if(copyPal)
tonccpy((top ? BG_PALETTE : BG_PALETTE_SUB) + _palOfs, _palette.data(), _palette.size() * 2);
for(int i = 0; i < h; i++) {
dmaCopyHalfWords(channel, _bitmap.data() + ((imageY + i) * _width) + imageX,
(u8 *)bgGetGfxPtr(top ? layer : layer + 4) + ((y + i) * 256) + x, w);
}
}
void Image::drawSegmentSpecial(int x, int y, int imageX, int imageY, int w, int h, bool top, int layer, float scaleX,
float scaleY, int paletteOffset, bool copyPal) {
if(copyPal)
tonccpy((top ? BG_PALETTE : BG_PALETTE_SUB) + _palOfs + paletteOffset, _palette.data(), _palette.size() * 2);
u8 *dst = (u8 *)bgGetGfxPtr(top ? layer : layer + 4);
// If the scale is 1 use faster integer math
if(scaleX == 1.0f && scaleY == 1.0f) {
for(int i = 0; i < h; i++) {
for(int j = 0; j < w; j++) {
u8 px = _bitmap[i * _width + j];
if(_palette[px - _palOfs] & 0x8000)
toncset(dst + ((y + i) * 256 + x + j), px + paletteOffset, 1);
}
}
} else {
for(float i = 0; i < h; i += 1 / scaleX) {
for(float j = 0; j < _width; j += 1 / scaleY) {
u8 px = _bitmap[i * _width + j];
if(_palette[px - _palOfs] & 0x8000)
toncset(dst + int((y + i) * 256 + x + j), px + paletteOffset, 1);
}
}
}
}
+156
View File
@@ -0,0 +1,156 @@
#include "sprite.hpp"
#include "tonccpy.h"
bool Sprite::_assigned[2][256];
Sprite::Sprite(bool top, SpriteSize size, SpriteColorFormat format, int x, int y, int priority, int id,
int paletteAlpha, int rotationIndex, bool doubleSize, bool visible, bool vFlip, bool hFlip, bool mosaic)
: _top(top), _oam(top ? &oamMain : &oamSub), _size(size), _format(format), _x(x), _y(y), _priority(priority),
_id(id), _rotationIndex(rotationIndex), _paletteAlpha(paletteAlpha) {
// If the ID is -1, set it to the first free one
if(_id == -1) {
for(uint i = 0; i < sizeof(_assigned[top]) / sizeof(_assigned[top][0]); i++) {
if(!_assigned[top][i]) {
_assigned[top][i] = true;
id = i;
break;
}
}
}
// Get the sprite width and height from the SpriteSize
if(((_size >> 12) & 3) == OBJSHAPE_SQUARE) {
_width = 8 << ((_size >> 14) & 3);
_height = 8 << ((_size >> 14) & 3);
} else if(((_size >> 12) & 3) == OBJSHAPE_WIDE) {
switch(_size & 0xFF) {
case 4:
_width = 16;
_height = 8;
break;
case 8:
_width = 32;
_height = 8;
break;
case 16:
_width = 32;
_height = 16;
break;
case 64:
_width = 64;
_width = 32;
break;
}
} else if(((_size >> 12) & 3) == OBJSHAPE_TALL) {
switch(_size & 0xFF) {
case 4:
_width = 8;
_height = 16;
break;
case 8:
_width = 8;
_height = 32;
break;
case 16:
_width = 16;
_height = 32;
break;
case 64:
_width = 32;
_height = 64;
break;
}
}
// Allocate memory for graphics
_gfx = oamAllocateGfx(_oam, _size, _format);
// Set sprite
oamSet(_oam, _id, _x, _y, _priority, _paletteAlpha, _size, _format, _gfx, _rotationIndex, doubleSize, !visible,
vFlip, hFlip, mosaic);
}
Sprite::~Sprite(void) {
oamFreeGfx(_oam, _gfx);
_assigned[_top][_id] = false;
}
void Sprite::rotation(int rotation) {
_rotation = rotation;
if(_rotationIndex != -1)
oamRotateScale(_oam, _rotationIndex, _rotation, (1 / _scaleX) * (1 << 8), (1 / _scaleY) * (1 << 8));
}
void Sprite::scale(float scaleX, float scaleY) {
_scaleX = scaleX;
_scaleY = scaleY;
if(_rotationIndex != -1)
oamRotateScale(_oam, _rotationIndex, _rotation, (1 / scaleX) * (1 << 8), (1 / scaleY) * (1 << 8));
}
void Sprite::visibility(bool show) {
_visibility = show;
if(_rotationIndex == -1) {
oamSetHidden(_oam, _id, !_visibility);
} else {
// Can't hide sprites with rotation / scaling, so move them off screen
if(_visibility)
oamSetXY(_oam, _id, _x, _y);
else
oamSetXY(_oam, _id, -64, -64);
}
}
void Sprite::clear(void) { toncset16(_gfx, 0, (_size & 0xFF) << 5); }
void Sprite::fillColor(u16 color) { toncset16(_gfx, color, (_size & 0xFF) << 5); }
void Sprite::drawRectangle(int x, int y, int w, int h, u16 color1, u16 color2) {
for(int i = 0; i < h; i++) {
toncset(_gfx + ((y + i) * _width + x), ((i % 2) ? color1 : color2), w);
}
}
void Sprite::drawImage(int x, int y, const Image &image, float scaleX, float scaleY) {
// If the scale is 1 use faster integer math
if(scaleX == 1.0f && scaleY == 1.0f) {
for(int i = 0; i < image.height(); i++) {
for(int j = 0; j < image.width(); j++) {
u16 px = image.palette()[image.bitmap()[i * image.width() + j] - image.palOfs()];
if(px & 0x8000)
toncset16(_gfx + (y + i) * _height + x + j, px, 1);
}
}
} else {
for(float i = 0.0f; i < image.height(); i += 1 / scaleY) {
for(float j = 0.0f; j < image.width(); j += 1 / scaleX) {
u16 px = image.palette()[image.bitmap()[int(i * image.width() + j)] - image.palOfs()];
if(px & 0x8000)
toncset16(_gfx + int((y + i) * _height + j) + x, px, 1);
}
}
}
}
void Sprite::drawImageSegment(int x, int y, int imageX, int imageY, int w, int h, const Image &image, float scaleX,
float scaleY) {
// If the scale is 1 use faster integer math
if(scaleX == 1.0f && scaleY == 1.0f) {
for(int i = 0; i < h; i++) {
for(int j = 0; j < w; j++) {
u16 px = image.palette()[image.bitmap()[i * image.width() + j] - image.palOfs()];
if(px & 0x8000)
toncset16(_gfx + ((y + i) * _height + x + j), px, 1);
}
}
} else {
for(float i = 0; i < h; i += 1 / scaleX) {
for(float j = 0; j < image.width(); j += 1 / scaleY) {
u16 px = image.palette()[image.bitmap()[i * image.width() + j] - image.palOfs()];
if(px & 0x8000)
toncset16(_gfx + int((y + i) * _height + x + j), px, 1);
}
}
}
}
+136
View File
@@ -0,0 +1,136 @@
#include "tonccpy.h"
//# tonccpy.c
//! VRAM-safe cpy.
/*! This version mimics memcpy in functionality, with
the benefit of working for VRAM as well. It is also
slightly faster than the original memcpy, but faster
implementations can be made.
\param dst Destination pointer.
\param src Source pointer.
\param size Fill-length in bytes.
\note The pointers and size need not be word-aligned.
*/
void tonccpy(void *dst, const void *src, uint size) {
if(size == 0 || dst == NULL || src == NULL)
return;
uint count;
u16 *dst16; // hword destination
u8 *src8; // byte source
// Ideal case: copy by 4x words. Leaves tail for later.
if(((u32)src | (u32)dst) % 4 == 0 && size >= 4) {
u32 *src32 = (u32 *)src, *dst32 = (u32 *)dst;
count = size / 4;
uint tmp = count & 3;
count /= 4;
// Duff's Device, good friend!
switch(tmp) {
do {
*dst32++ = *src32++;
case 3:
*dst32++ = *src32++;
case 2:
*dst32++ = *src32++;
case 1:
*dst32++ = *src32++;
case 0:;
} while(count--);
}
// Check for tail
size &= 3;
if(size == 0)
return;
src8 = (u8 *)src32;
dst16 = (u16 *)dst32;
} else { // Unaligned.
uint dstOfs = (u32)dst & 1;
src8 = (u8 *)src;
dst16 = (u16 *)(dst - dstOfs);
// Head: 1 byte.
if(dstOfs != 0) {
*dst16 = (*dst16 & 0xFF) | *src8++ << 8;
dst16++;
if(--size == 0)
return;
}
}
// Unaligned main: copy by 2x byte.
count = size / 2;
while(count--) {
*dst16++ = src8[0] | src8[1] << 8;
src8 += 2;
}
// Tail: 1 byte.
if(size & 1)
*dst16 = (*dst16 & ~0xFF) | *src8;
}
//# toncset.c
//! VRAM-safe memset, internal routine.
/*! This version mimics memset in functionality, with
the benefit of working for VRAM as well. It is also
slightly faster than the original memset.
\param dst Destination pointer.
\param fill Word to fill with.
\param size Fill-length in bytes.
\note The \a dst pointer and \a size need not be
word-aligned. In the case of unaligned fills, \a fill
will be masked off to match the situation.
*/
void __toncset(void *dst, u32 fill, uint size) {
if(size == 0 || dst == NULL)
return;
uint left = (u32)dst & 3;
u32 *dst32 = (u32 *)(dst - left);
u32 count, mask;
// Unaligned head.
if(left != 0) {
// Adjust for very small stint.
if(left + size < 4) {
mask = BIT_MASK(size * 8) << (left * 8);
*dst32 = (*dst32 & ~mask) | (fill & mask);
return;
}
mask = BIT_MASK(left * 8);
*dst32 = (*dst32 & mask) | (fill & ~mask);
dst32++;
size -= 4 - left;
}
// Main stint.
count = size / 4;
uint tmp = count & 3;
count /= 4;
switch(tmp) {
do {
*dst32++ = fill;
case 3:
*dst32++ = fill;
case 2:
*dst32++ = fill;
case 1:
*dst32++ = fill;
case 0:;
} while(count--);
}
// Tail
size &= 3;
if(size) {
mask = BIT_MASK(size * 8);
*dst32 = (*dst32 & ~mask) | (fill & mask);
}
}