Commit 700b3bb6 authored by Sam Lantinga's avatar Sam Lantinga

Merged Martin's code changes from Google Summer of Code 2009

--HG--
extra : convert_revision : svn%3Ac70aab31-4412-0410-b14c-859654838e24/trunk%403787
parent ed007a4d
......@@ -40,6 +40,11 @@ SDLMAIN_TARGET = libSDLmain.a
SDLMAIN_SOURCES = @SDLMAIN_SOURCES@
SDLMAIN_OBJECTS = @SDLMAIN_OBJECTS@
# PS3 SPU programs
SPU_GCC = @SPU_GCC@
EMBEDSPU = @EMBEDSPU@
include $(srcdir)/src/video/ps3/spulibs/Makefile
DIST = acinclude.m4 autogen.sh Borland.html Borland.zip BUGS build-scripts configure configure.in COPYING CREDITS docs docs.html include INSTALL Makefile.dc Makefile.minimal Makefile.in README* sdl-config.in sdl.m4 sdl.pc.in SDL.qpg.in SDL.spec SDL.spec.in src test TODO VisualC.html VisualC VisualCE Watcom-OS2.zip Watcom-Win32.zip WhatsNew Xcode
HDRS = SDL.h SDL_atomic.h SDL_audio.h SDL_cdrom.h SDL_compat.h SDL_cpuinfo.h SDL_endian.h SDL_error.h SDL_events.h SDL_haptic.h SDL_joystick.h SDL_keyboard.h SDL_keysym.h SDL_loadso.h SDL_main.h SDL_mouse.h SDL_mutex.h SDL_name.h SDL_opengl.h SDL_opengles.h SDL_pixels.h SDL_platform.h SDL_power.h SDL_quit.h SDL_rect.h SDL_revision.h SDL_rwops.h SDL_scancode.h SDL_stdinc.h SDL_surface.h SDL_syswm.h SDL_thread.h SDL_timer.h SDL_types.h SDL_version.h SDL_video.h begin_code.h close_code.h
......
SDL on Sony Playstation3
------------------------
Installation:
First, you have to install the Cell SDK
- Download the Cell SDK installer RPM and ISO images to
a temporary directory such as /tmp/cellsdk.
- Mount the image: mount -o loop CellSDK-Devel-Fedora_3.1.0.0.0.iso /tmp/cellsdk
- Install the SDK installer: rpm -ivh cell-install-3.1.0-0.0.noarch.rpm
- Install the SDK: cd /opt/cell && ./cellsdk --iso /tmp/cellsdkiso install
You'll than need to install the SPU-libs
- Run make ps3-libs && make ps3libs-install
Finally, install SDL
- Go to SDL-1.2/ and build SDL like any other GNU style package.
e.g.
- Build the configure-script with ./autogen.sh
- Configure SDL for your needs: ./configure --enable-video-ps3 ...
- Build and install it: make && make install
Todo:
- Mouse & Keyboard support
- On SPU-side the current scaler and converter restrictions are:
- resolution has to be a multiple of 8 (will work on that)
- scaler/converter only supports the YV12 and IYUV format
- the scaler works only bilinear (lanzos would be nice)
- Optimize the SPU-program handling on the PPE side
- Integrate spumedia in SDL
Have fun!
Dirk Herrendoerfer <d.herrendoerfer [at] de [dot ibm [dot] com>
......@@ -1509,6 +1509,46 @@ AC_HELP_STRING([--enable-video-ps2gs], [use PlayStation 2 GS video driver [[defa
fi
}
dnl See if we're running on PlayStation 3 Cell hardware
CheckPS3()
{
AC_ARG_ENABLE(video-ps3,
AC_HELP_STRING([--enable-video-ps3], [use PlayStation 3 Cell driver [[default=yes]]]),
, enable_video_ps3=yes)
if test x$enable_video = xyes -a x$enable_video_ps3 = xyes; then
video_ps3=no
AC_CHECK_HEADER([linux/fb.h])
AC_CHECK_HEADER([asm/ps3fb.h], [have_ps3fb_hdr=yes], [],
[#ifndef _LINUX_TYPES_H
#include <linux/types.h>
#endif])
AC_CHECK_HEADER([libspe2.h], have_libspe2_hdr=yes)
AC_CHECK_LIB([spe2], spe_context_create, have_spe2_lib=yes)
AC_CHECK_PROGS(SPU_GCC, [spu-gcc])
AC_CHECK_PROGS(EMBEDSPU, [embedspu])
have_spu_libs=yes
AC_CHECK_LIB([fb_writer_spu], [main], [], [have_spu_libs=no])
AC_CHECK_LIB([yuv2rgb_spu], [main], [], [have_spu_libs=no])
AC_CHECK_LIB([bilin_scaler_spu], [main], [], [have_spu_libs=no])
if test x$have_ps3fb_hdr = xyes -a x$have_libspe2_hdr = xyes -a x$have_spe2_lib = xyes -a "$SPU_GCC" -a "$EMBEDSPU"; then
AC_DEFINE(SDL_VIDEO_DRIVER_PS3)
video_ps3=yes
have_video=yes
SOURCES="$SOURCES $srcdir/src/video/ps3/*.c"
EXTRA_CFLAGS="$EXTRA_CFLAGS -I/opt/cell/sdk/usr/include"
EXTRA_LDFLAGS="$EXTRA_LDFLAGS -L/opt/cell/sdk/usr/lib -lspe2 -lfb_writer_spu -lyuv2rgb_spu -lbilin_scaler_spu"
if test x$have_spu_libs = xno; then
AC_MSG_WARN([ps3libs missing, please run make ps3libs])
fi
fi
AC_MSG_CHECKING([for PlayStation 3 Cell support])
AC_MSG_RESULT([$video_ps3])
fi
}
dnl Find the SVGAlib includes and libraries
CheckSVGA()
{
......@@ -2401,6 +2441,7 @@ case "$host" in
CheckDirectFB
CheckFusionSound
CheckPS2GS
CheckPS3
CheckSVGA
CheckVGL
CheckWscons
......
......@@ -273,6 +273,7 @@
#undef SDL_VIDEO_DRIVER_PHOTON
#undef SDL_VIDEO_DRIVER_QNXGF
#undef SDL_VIDEO_DRIVER_PS2GS
#undef SDL_VIDEO_DRIVER_PS3
#undef SDL_VIDEO_DRIVER_RISCOS
#undef SDL_VIDEO_DRIVER_SVGALIB
#undef SDL_VIDEO_DRIVER_VGL
......
......@@ -359,6 +359,9 @@ extern VideoBootStrap DirectFB_bootstrap;
#if SDL_VIDEO_DRIVER_PS2GS
extern VideoBootStrap PS2GS_bootstrap;
#endif
#if SDL_VIDEO_DRIVER_PS3
extern VideoBootStrap PS3_bootstrap;
#endif
#if SDL_VIDEO_DRIVER_VGL
extern VideoBootStrap VGL_bootstrap;
#endif
......
......@@ -73,6 +73,9 @@ static VideoBootStrap *bootstrap[] = {
#if SDL_VIDEO_DRIVER_PS2GS
&PS2GS_bootstrap,
#endif
#if SDL_VIDEO_DRIVER_PS3
&PS3_bootstrap,
#endif
#if SDL_VIDEO_DRIVER_VGL
&VGL_bootstrap,
#endif
......
......@@ -88,32 +88,6 @@
#include "SDL_yuv_sw_c.h"
struct SDL_SW_YUVTexture
{
Uint32 format;
Uint32 target_format;
int w, h;
Uint8 *pixels;
int *colortab;
Uint32 *rgb_2_pix;
void (*Display1X) (int *colortab, Uint32 * rgb_2_pix,
unsigned char *lum, unsigned char *cr,
unsigned char *cb, unsigned char *out,
int rows, int cols, int mod);
void (*Display2X) (int *colortab, Uint32 * rgb_2_pix,
unsigned char *lum, unsigned char *cr,
unsigned char *cb, unsigned char *out,
int rows, int cols, int mod);
/* These are just so we don't have to allocate them separately */
Uint16 pitches[3];
Uint8 *planes[3];
/* This is a temporary surface in case we have to stretch copy */
SDL_Surface *stretch;
SDL_Surface *display;
};
/* The colorspace conversion functions */
#if (__GNUC__ > 2) && defined(__i386__) && __OPTIMIZE__ && SDL_ASSEMBLY_ROUTINES
......
......@@ -26,6 +26,32 @@
/* This is the software implementation of the YUV texture support */
struct SDL_SW_YUVTexture
{
Uint32 format;
Uint32 target_format;
int w, h;
Uint8 *pixels;
int *colortab;
Uint32 *rgb_2_pix;
void (*Display1X) (int *colortab, Uint32 * rgb_2_pix,
unsigned char *lum, unsigned char *cr,
unsigned char *cb, unsigned char *out,
int rows, int cols, int mod);
void (*Display2X) (int *colortab, Uint32 * rgb_2_pix,
unsigned char *lum, unsigned char *cr,
unsigned char *cb, unsigned char *out,
int rows, int cols, int mod);
/* These are just so we don't have to allocate them separately */
Uint16 pitches[3];
Uint8 *planes[3];
/* This is a temporary surface in case we have to stretch copy */
SDL_Surface *stretch;
SDL_Surface *display;
};
typedef struct SDL_SW_YUVTexture SDL_SW_YUVTexture;
SDL_SW_YUVTexture *SDL_SW_CreateYUVTexture(Uint32 format, int w, int h);
......
/*
SDL - Simple DirectMedia Layer
Copyright (C) 1997-2009 Sam Lantinga
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
Sam Lantinga
slouken@libsdl.org
*/
#include "SDL_config.h"
#include "../../events/SDL_sysevents.h"
#include "../../events/SDL_events_c.h"
#include "SDL_ps3video.h"
#include "SDL_ps3events_c.h"
void
PS3_PumpEvents(_THIS)
{
/* do nothing. */
}
/* vi: set ts=4 sw=4 expandtab: */
/*
SDL - Simple DirectMedia Layer
Copyright (C) 1997-2009 Sam Lantinga
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
Sam Lantinga
slouken@libsdl.org
*/
#include "SDL_config.h"
#include "SDL_ps3video.h"
extern void PS3_PumpEvents(_THIS);
/* vi: set ts=4 sw=4 expandtab: */
/*
SDL - Simple DirectMedia Layer
Copyright (C) 1997-2009 Sam Lantinga
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
Sam Lantinga
slouken@libsdl.org
*/
#include "SDL_config.h"
#include "SDL_ps3video.h"
void
PS3_InitModes(_THIS)
{
deprintf(1, "+PS3_InitModes()\n");
SDL_VideoDisplay display;
SDL_VideoData *data = (SDL_VideoData *) _this->driverdata;
SDL_DisplayMode mode;
PS3_DisplayModeData *modedata;
unsigned long vid = 0;
modedata = (PS3_DisplayModeData *) SDL_malloc(sizeof(*modedata));
if (!modedata) {
return;
}
/* Setting up the DisplayMode based on current settings */
struct ps3fb_ioctl_res res;
if (ioctl(data->fbdev, PS3FB_IOCTL_SCREENINFO, &res)) {
SDL_SetError("Can't get PS3FB_IOCTL_SCREENINFO");
}
mode.format = SDL_PIXELFORMAT_RGB888;
mode.refresh_rate = 0;
mode.w = res.xres;
mode.h = res.yres;
/* Setting up driver specific mode data,
* Get the current ps3 specific videmode number */
if (ioctl(data->fbdev, PS3FB_IOCTL_GETMODE, (unsigned long)&vid)) {
SDL_SetError("Can't get PS3FB_IOCTL_GETMODE");
}
deprintf(2, "PS3FB_IOCTL_GETMODE = %u\n", vid);
modedata->mode = vid;
mode.driverdata = modedata;
/* Set display's videomode and add it */
SDL_zero(display);
display.desktop_mode = mode;
display.current_mode = mode;
SDL_AddVideoDisplay(&display);
deprintf(1, "-PS3_InitModes()\n");
}
/* DisplayModes available on the PS3 */
static SDL_DisplayMode ps3fb_modedb[] = {
/* VESA */
{SDL_PIXELFORMAT_RGB888, 1280, 768, 0, NULL}, // WXGA
{SDL_PIXELFORMAT_RGB888, 1280, 1024, 0, NULL}, // SXGA
{SDL_PIXELFORMAT_RGB888, 1920, 1200, 0, NULL}, // WUXGA
/* Native resolutions (progressive, "fullscreen") */
{SDL_PIXELFORMAT_RGB888, 720, 480, 0, NULL}, // 480p
{SDL_PIXELFORMAT_RGB888, 1280, 720, 0, NULL}, // 720p
{SDL_PIXELFORMAT_RGB888, 1920, 1080, 0, NULL} // 1080p
};
/* PS3 videomode number according to ps3fb_modedb */
static PS3_DisplayModeData ps3fb_data[] = {
{11}, {12}, {13}, {130}, {131}, {133},
};
void
PS3_GetDisplayModes(_THIS) {
deprintf(1, "+PS3_GetDisplayModes()\n");
SDL_DisplayMode mode;
unsigned int nummodes;
nummodes = sizeof(ps3fb_modedb) / sizeof(SDL_DisplayMode);
int n;
for (n=0; n<nummodes; ++n) {
/* Get driver specific mode data */
ps3fb_modedb[n].driverdata = &ps3fb_data[n];
/* Add DisplayMode to list */
deprintf(2, "Adding resolution %u x %u\n", ps3fb_modedb[n].w, ps3fb_modedb[n].h);
SDL_AddDisplayMode(_this->current_display, &ps3fb_modedb[n]);
}
deprintf(1, "-PS3_GetDisplayModes()\n");
}
int
PS3_SetDisplayMode(_THIS, SDL_DisplayMode * mode)
{
deprintf(1, "+PS3_SetDisplayMode()\n");
SDL_VideoData *data = (SDL_VideoData *) _this->driverdata;
PS3_DisplayModeData *dispdata = (PS3_DisplayModeData *) mode->driverdata;
/* Set the new DisplayMode */
deprintf(2, "Setting PS3FB_MODE to %u\n", dispdata->mode);
if (ioctl(data->fbdev, PS3FB_IOCTL_SETMODE, (unsigned long)&dispdata->mode)) {
deprintf(2, "Could not set PS3FB_MODE\n");
SDL_SetError("Could not set PS3FB_MODE\n");
return -1;
}
deprintf(1, "-PS3_SetDisplayMode()\n");
return 0;
}
void
PS3_QuitModes(_THIS) {
deprintf(1, "+PS3_QuitModes()\n");
/* There was no mem allocated for driverdata */
int i, j;
for (i = _this->num_displays; i--;) {
SDL_VideoDisplay *display = &_this->displays[i];
for (j = display->num_display_modes; j--;) {
display->display_modes[j].driverdata = NULL;
}
}
deprintf(1, "-PS3_QuitModes()\n");
}
/* vi: set ts=4 sw=4 expandtab: */
/*
SDL - Simple DirectMedia Layer
Copyright (C) 1997-2009 Sam Lantinga
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
Sam Lantinga
slouken@libsdl.org
*/
#include "SDL_config.h"
#ifndef _SDL_ps3modes_h
#define _SDL_ps3modes_h
extern void PS3_InitModes(_THIS);
extern void PS3_GetDisplayModes(_THIS);
extern int PS3_SetDisplayMode(_THIS, SDL_DisplayMode * mode);
extern void PS3_QuitModes(_THIS);
#endif /* SDL_ps3modes_h */
/* vi: set ts=4 sw=4 expandtab: */
/*
SDL - Simple DirectMedia Layer
Copyright (C) 1997-2009 Sam Lantinga
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
Sam Lantinga
slouken@libsdl.org
*/
#include "SDL_config.h"
#include "SDL_video.h"
#include "../SDL_sysvideo.h"
#include "../SDL_yuv_sw_c.h"
#include "../SDL_renderer_sw.h"
#include "SDL_ps3video.h"
#include "SDL_ps3spe_c.h"
#include <fcntl.h>
#include <stdlib.h>
#include <sys/ioctl.h>
#include <linux/kd.h>
#include <linux/fb.h>
#include <sys/mman.h>
#include <asm/ps3fb.h>
/* Stores the executable name */
extern spe_program_handle_t yuv2rgb_spu;
extern spe_program_handle_t bilin_scaler_spu;
/* SDL surface based renderer implementation */
static SDL_Renderer *SDL_PS3_CreateRenderer(SDL_Window * window,
Uint32 flags);
static int SDL_PS3_DisplayModeChanged(SDL_Renderer * renderer);
static int SDL_PS3_ActivateRenderer(SDL_Renderer * renderer);
static int SDL_PS3_RenderPoint(SDL_Renderer * renderer, int x, int y);
static int SDL_PS3_RenderLine(SDL_Renderer * renderer, int x1, int y1,
int x2, int y2);
static int SDL_PS3_RenderFill(SDL_Renderer * renderer,
const SDL_Rect * rect);
static int SDL_PS3_RenderCopy(SDL_Renderer * renderer,
SDL_Texture * texture,
const SDL_Rect * srcrect,
const SDL_Rect * dstrect);
static void SDL_PS3_RenderPresent(SDL_Renderer * renderer);
static void SDL_PS3_DestroyRenderer(SDL_Renderer * renderer);
/* Texture */
static int PS3_CreateTexture(SDL_Renderer * renderer, SDL_Texture * texture);
static int PS3_QueryTexturePixels(SDL_Renderer * renderer, SDL_Texture * texture, void **pixels, int *pitch);
static int PS3_UpdateTexture(SDL_Renderer * renderer, SDL_Texture * texture, const SDL_Rect * rect, const void *pixels, int pitch);
static int PS3_LockTexture(SDL_Renderer * renderer, SDL_Texture * texture, const SDL_Rect * rect, int markDirty, void **pixels, int *pitch);
static void PS3_UnlockTexture(SDL_Renderer * renderer, SDL_Texture * texture);
static void PS3_DestroyTexture(SDL_Renderer * renderer, SDL_Texture * texture);
SDL_RenderDriver SDL_PS3_RenderDriver = {
SDL_PS3_CreateRenderer,
{
"ps3",
(SDL_RENDERER_SINGLEBUFFER | SDL_RENDERER_PRESENTVSYNC |
SDL_RENDERER_PRESENTFLIP2 | SDL_RENDERER_PRESENTDISCARD |
SDL_RENDERER_ACCELERATED),
(SDL_TEXTUREMODULATE_NONE),
(SDL_BLENDMODE_NONE),
/* We use bilinear scaling on the SPE for YV12 & IYUV
* (width and height % 8 = 0) */
(SDL_TEXTURESCALEMODE_SLOW)
}
};
typedef struct
{
int current_screen;
SDL_Surface *screen;
SDL_VideoDisplay *display;
/* adress of the centered image in the framebuffer (double buffered) */
uint8_t *center[2];
/* width of input (bounded by writeable width) */
unsigned int bounded_width;
/* height of input (bounded by writeable height) */
unsigned int bounded_height;
/* offset from the left side (used for centering) */
unsigned int offset_left;
/* offset from the upper side (used for centering) */
unsigned int offset_top;
/* width of screen which is writeable */
unsigned int wr_width;
/* width of screen which is writeable */
unsigned int wr_height;
/* size of a screen line: width * bpp/8 */
unsigned int line_length;
/* Is the kernels fb size bigger than ~12MB
* double buffering will work for 1080p */
unsigned int double_buffering;
/* SPE threading stuff */
spu_data_t *converter_thread_data;
spu_data_t *scaler_thread_data;
/* YUV converting transfer data */
volatile struct yuv2rgb_parms_t * converter_parms __attribute__((aligned(128)));
/* Scaler transfer data */
volatile struct scale_parms_t * scaler_parms __attribute__((aligned(128)));
} SDL_PS3_RenderData;
typedef struct
{
int pitch;
/* Image data */
volatile void *pixels;
/* Use software renderer for not supported formats */
SDL_SW_YUVTexture *yuv;
} PS3_TextureData;
SDL_Renderer *
SDL_PS3_CreateRenderer(SDL_Window * window, Uint32 flags)
{
deprintf(1, "+SDL_PS3_CreateRenderer()\n");
SDL_VideoDisplay *display = SDL_GetDisplayFromWindow(window);
SDL_DisplayMode *displayMode = &display->current_mode;
SDL_VideoData *devdata = display->device->driverdata;
SDL_Renderer *renderer;
SDL_PS3_RenderData *data;
struct ps3fb_ioctl_res res;
int i, n;
int bpp;
Uint32 Rmask, Gmask, Bmask, Amask;
if (!SDL_PixelFormatEnumToMasks
(displayMode->format, &bpp, &Rmask, &Gmask, &Bmask, &Amask)) {
SDL_SetError("Unknown display format");
return NULL;
}
renderer = (SDL_Renderer *) SDL_calloc(1, sizeof(*renderer));
if (!renderer) {
SDL_OutOfMemory();
return NULL;
}
data = (SDL_PS3_RenderData *) SDL_malloc(sizeof(*data));
if (!data) {
SDL_PS3_DestroyRenderer(renderer);
SDL_OutOfMemory();
return NULL;
}
SDL_zerop(data);
renderer->CreateTexture = PS3_CreateTexture;
renderer->DestroyTexture = PS3_DestroyTexture;
renderer->QueryTexturePixels = PS3_QueryTexturePixels;
renderer->UpdateTexture = PS3_UpdateTexture;
renderer->LockTexture = PS3_LockTexture;
renderer->UnlockTexture = PS3_UnlockTexture;
renderer->ActivateRenderer = SDL_PS3_ActivateRenderer;
renderer->DisplayModeChanged = SDL_PS3_DisplayModeChanged;
renderer->RenderPoint = SDL_PS3_RenderPoint;
renderer->RenderLine = SDL_PS3_RenderLine;
renderer->RenderFill = SDL_PS3_RenderFill;
renderer->RenderCopy = SDL_PS3_RenderCopy;
renderer->RenderPresent = SDL_PS3_RenderPresent;
renderer->DestroyRenderer = SDL_PS3_DestroyRenderer;
renderer->info.name = SDL_PS3_RenderDriver.info.name;
renderer->info.flags = 0;
renderer->window = window->id;
renderer->driverdata = data;
deprintf(1, "window->w = %u\n", window->w);
deprintf(1, "window->h = %u\n", window->h);
data->double_buffering = 0;
/* Get ps3 screeninfo */
if (ioctl(devdata->fbdev, PS3FB_IOCTL_SCREENINFO, (unsigned long)&res) < 0) {
SDL_SetError("[PS3] PS3FB_IOCTL_SCREENINFO failed");
}
deprintf(2, "res.num_frames = %d\n", res.num_frames);
/* Only use double buffering if enough fb memory is available */
if (res.num_frames > 1) {
renderer->info.flags |= SDL_RENDERER_PRESENTFLIP2;
n = 2;
data->double_buffering = 1;
} else {
renderer->info.flags |= SDL_RENDERER_PRESENTCOPY;
n = 1;
}
data->screen =
SDL_CreateRGBSurface(0, window->w, window->h, bpp, Rmask, Gmask,
Bmask, Amask);
if (!data->screen) {
SDL_PS3_DestroyRenderer(renderer);
return NULL;
}
/* Allocate aligned memory for pixels */
SDL_free(data->screen->pixels);
data->screen->pixels = (void *)memalign(16, data->screen->h * data->screen->pitch);
if (!data->screen->pixels) {
SDL_FreeSurface(data->screen);
SDL_OutOfMemory();
return NULL;
}
SDL_memset(data->screen->pixels, 0, data->screen->h * data->screen->pitch);
SDL_SetSurfacePalette(data->screen, display->palette);
data->current_screen = 0;
/* Create SPU parms structure */
data->converter_parms = (struct yuv2rgb_parms_t *) memalign(16, sizeof(struct yuv2rgb_parms_t));
data->scaler_parms = (struct scale_parms_t *) memalign(16, sizeof(struct scale_parms_t));
if (data->converter_parms == NULL || data->scaler_parms == NULL) {
SDL_PS3_DestroyRenderer(renderer);
SDL_OutOfMemory();
return NULL;
}
/* Set up the SPE threading data */
data->converter_thread_data = (spu_data_t *) malloc(sizeof(spu_data_t));
data->scaler_thread_data = (spu_data_t *) malloc(sizeof(spu_data_t));
if (data->converter_thread_data == NULL || data->scaler_thread_data == NULL) {
SDL_PS3_DestroyRenderer(renderer);
SDL_OutOfMemory();
return NULL;
}
/* Set up the SPE scaler (booted) */
data->scaler_thread_data->program = bilin_scaler_spu;
data->scaler_thread_data->program_name = "bilin_scaler_spu";
data->scaler_thread_data->keepalive = 0;
data->scaler_thread_data->booted = 0;
/* Set up the SPE converter (always running) */
data->converter_thread_data->program = yuv2rgb_spu;
data->converter_thread_data->program_name = "yuv2rgb_spu";
data->converter_thread_data->keepalive = 1;
data->converter_thread_data->booted = 0;
SPE_Start(data->converter_thread_data);
deprintf(1, "-SDL_PS3_CreateRenderer()\n");
return renderer;
}
static int
SDL_PS3_ActivateRenderer(SDL_Renderer * renderer)
{
deprintf(1, "+PS3_ActivateRenderer()\n");
SDL_PS3_RenderData *data = (SDL_PS3_RenderData *) renderer->driverdata;
deprintf(1, "-PS3_ActivateRenderer()\n");
return 0;
}
static int SDL_PS3_DisplayModeChanged(SDL_Renderer * renderer) {
deprintf(1, "+PS3_DisplayModeChanged()\n");
SDL_PS3_RenderData *data = (SDL_PS3_RenderData *) renderer->driverdata;
deprintf(1, "-PS3_DisplayModeChanged()\n");
return 0;
}
static int
PS3_CreateTexture(SDL_Renderer * renderer, SDL_Texture * texture) {
deprintf(1, "+PS3_CreateTexture()\n");
PS3_TextureData *data;
data = (PS3_TextureData *) SDL_calloc(1, sizeof(*data));
if (!data) {
SDL_OutOfMemory();
return -1;
}
data->pitch = (texture->w * SDL_BYTESPERPIXEL(texture->format));
if (SDL_ISPIXELFORMAT_FOURCC(texture->format)) {
/* Use SDLs SW_YUVTexture */
data->yuv =
SDL_SW_CreateYUVTexture(texture->format, texture->w, texture->h);
if (!data->yuv) {
SDL_OutOfMemory();
return -1;
}
/* but align pixels */
SDL_free(data->yuv->pixels);
data->yuv->pixels = (Uint8 *)memalign(16, texture->w * texture->h * 2);
if (!data->yuv->pixels) {
SDL_OutOfMemory();
return -1;
}
/* Redo: Find the pitch and offset values for the overlay */
SDL_SW_YUVTexture *swdata = (SDL_SW_YUVTexture *) data->yuv;
switch (texture->format) {
case SDL_PIXELFORMAT_YV12:
case SDL_PIXELFORMAT_IYUV:
swdata->pitches[0] = texture->w;
swdata->pitches[1] = swdata->pitches[0] / 2;
swdata->pitches[2] = swdata->pitches[0] / 2;
swdata->planes[0] = swdata->pixels;
swdata->planes[1] = swdata->planes[0] + swdata->pitches[0] * texture->h;
swdata->planes[2] = swdata->planes[1] + swdata->pitches[1] * texture->h / 2;
break;
case SDL_PIXELFORMAT_YUY2:
case SDL_PIXELFORMAT_UYVY:
case SDL_PIXELFORMAT_YVYU:
swdata->pitches[0] = texture->w * 2;
swdata->planes[0] = swdata->pixels;
break;
default:
/* We should never get here (caught above) */
break;
}
} else {
data->pixels = NULL;
data->pixels = SDL_malloc(texture->h * data->pitch);
if (!data->pixels) {
PS3_DestroyTexture(renderer, texture);
SDL_OutOfMemory();
return -1;
}
}
texture->driverdata = data;
deprintf(1, "-PS3_CreateTexture()\n");
return 0;
}
static int
PS3_QueryTexturePixels(SDL_Renderer * renderer, SDL_Texture * texture,
void **pixels, int *pitch)
{
deprintf(1, "+PS3_QueryTexturePixels()\n");
PS3_TextureData *data = (PS3_TextureData *) texture->driverdata;
if (SDL_ISPIXELFORMAT_FOURCC(texture->format)) {
return SDL_SW_QueryYUVTexturePixels(data->yuv, pixels, pitch);
} else {
*pixels = (void *)data->pixels;
*pitch = data->pitch;
}
deprintf(1, "-PS3_QueryTexturePixels()\n");
return 0;
}
static int
PS3_UpdateTexture(SDL_Renderer * renderer, SDL_Texture * texture,
const SDL_Rect * rect, const void *pixels, int pitch)
{
deprintf(1, "+PS3_UpdateTexture()\n");
PS3_TextureData *data = (PS3_TextureData *) texture->driverdata;
if (SDL_ISPIXELFORMAT_FOURCC(texture->format)) {
return SDL_SW_UpdateYUVTexture(data->yuv, rect, pixels, pitch);
} else {
Uint8 *src, *dst;
int row;
size_t length;
Uint8 *dstpixels;
src = (Uint8 *) pixels;
dst = (Uint8 *) dstpixels + rect->y * data->pitch + rect->x
* SDL_BYTESPERPIXEL(texture->format);
length = rect->w * SDL_BYTESPERPIXEL(texture->format);
/* Update the texture */
for (row = 0; row < rect->h; ++row) {
SDL_memcpy(dst, src, length);
src += pitch;
dst += data->pitch;
}
}
deprintf(1, "-PS3_UpdateTexture()\n");
return 0;
}
static int
PS3_LockTexture(SDL_Renderer * renderer, SDL_Texture * texture,
const SDL_Rect * rect, int markDirty, void **pixels,
int *pitch)
{
deprintf(1, "+PS3_LockTexture()\n");
PS3_TextureData *data = (PS3_TextureData *) texture->driverdata;
if (SDL_ISPIXELFORMAT_FOURCC(texture->format)) {
deprintf(1, "-PS3_LockTexture()\n");
return SDL_SW_LockYUVTexture(data->yuv, rect, markDirty, pixels, pitch);
} else {
*pixels =
(void *) ((Uint8 *) data->pixels + rect->y * data->pitch +
rect->x * SDL_BYTESPERPIXEL(texture->format));
*pitch = data->pitch;
deprintf(1, "-PS3_LockTexture()\n");
return 0;
}
}
static void
PS3_UnlockTexture(SDL_Renderer * renderer, SDL_Texture * texture)
{
deprintf(1, "+PS3_UnlockTexture()\n");
PS3_TextureData *data = (PS3_TextureData *) texture->driverdata;
if (SDL_ISPIXELFORMAT_FOURCC(texture->format)) {
SDL_SW_UnlockYUVTexture(data->yuv);
}
deprintf(1, "-PS3_UnlockTexture()\n");
}
static void
PS3_DestroyTexture(SDL_Renderer * renderer, SDL_Texture * texture)
{
deprintf(1, "+PS3_DestroyTexture()\n");
PS3_TextureData *data = (PS3_TextureData *) texture->driverdata;
if (!data) {
return;
}
if (data->yuv) {
SDL_SW_DestroyYUVTexture(data->yuv);
}
if (data->pixels) {
SDL_free((void *)data->pixels);
}
deprintf(1, "-PS3_DestroyTexture()\n");
}
static int
SDL_PS3_RenderPoint(SDL_Renderer * renderer, int x, int y)
{
SDL_PS3_RenderData *data =
(SDL_PS3_RenderData *) renderer->driverdata;
SDL_Surface *target = data->screen;
int status;
if (renderer->blendMode == SDL_BLENDMODE_NONE ||
renderer->blendMode == SDL_BLENDMODE_MASK) {
Uint32 color =
SDL_MapRGBA(target->format, renderer->r, renderer->g, renderer->b,
renderer->a);
status = SDL_DrawPoint(target, x, y, color);
} else {
status =
SDL_BlendPoint(target, x, y, renderer->blendMode, renderer->r,
renderer->g, renderer->b, renderer->a);
}
return status;
}
static int
SDL_PS3_RenderLine(SDL_Renderer * renderer, int x1, int y1, int x2, int y2)
{
SDL_PS3_RenderData *data =
(SDL_PS3_RenderData *) renderer->driverdata;
SDL_Surface *target = data->screen;
int status;
if (renderer->blendMode == SDL_BLENDMODE_NONE ||
renderer->blendMode == SDL_BLENDMODE_MASK) {
Uint32 color =
SDL_MapRGBA(target->format, renderer->r, renderer->g, renderer->b,
renderer->a);
status = SDL_DrawLine(target, x1, y1, x2, y2, color);
} else {
status =
SDL_BlendLine(target, x1, y1, x2, y2, renderer->blendMode,
renderer->r, renderer->g, renderer->b, renderer->a);
}
return status;
}
static int
SDL_PS3_RenderFill(SDL_Renderer * renderer, const SDL_Rect * rect)
{
deprintf(1, "SDL_PS3_RenderFill()\n");
SDL_PS3_RenderData *data =
(SDL_PS3_RenderData *) renderer->driverdata;
SDL_Surface *target = data->screen;
SDL_Rect real_rect = *rect;
int status;
if (renderer->blendMode == SDL_BLENDMODE_NONE) {
Uint32 color =
SDL_MapRGBA(target->format, renderer->r, renderer->g, renderer->b,
renderer->a);
status = SDL_FillRect(target, &real_rect, color);
} else {
status =
SDL_BlendRect(target, &real_rect, renderer->blendMode,
renderer->r, renderer->g, renderer->b, renderer->a);
}
return status;
}
static int
SDL_PS3_RenderCopy(SDL_Renderer * renderer, SDL_Texture * texture,
const SDL_Rect * srcrect, const SDL_Rect * dstrect)
{
deprintf(1, "+SDL_PS3_RenderCopy()\n");
SDL_PS3_RenderData *data =
(SDL_PS3_RenderData *) renderer->driverdata;
SDL_Window *window = SDL_GetWindowFromID(renderer->window);
SDL_VideoDisplay *display = SDL_GetDisplayFromWindow(window);
PS3_TextureData *txdata = (PS3_TextureData *) texture->driverdata;
SDL_VideoData *devdata = display->device->driverdata;
if (SDL_ISPIXELFORMAT_FOURCC(texture->format)) {
deprintf(1, "Texture is in a FOURCC format\n");
if ((texture->format == SDL_PIXELFORMAT_YV12 || texture->format == SDL_PIXELFORMAT_IYUV)
&& texture->w % 8 == 0 && texture->h % 8 == 0
&& dstrect->w % 8 == 0 && dstrect->h % 8 == 0) {
deprintf(1, "Use SPE for scaling/converting\n");
SDL_SW_YUVTexture *swdata = (SDL_SW_YUVTexture *) txdata->yuv;
Uint8 *lum, *Cr, *Cb;
Uint8 *scaler_out = NULL;
Uint8 *dstpixels;
switch (texture->format) {
case SDL_PIXELFORMAT_YV12:
lum = swdata->planes[0];
Cr = swdata->planes[1];
Cb = swdata->planes[2];
break;
case SDL_PIXELFORMAT_IYUV:
lum = swdata->planes[0];
Cr = swdata->planes[2];
Cb = swdata->planes[1];
break;
default:
/* We should never get here (caught above) */
return -1;
}
if (srcrect->w != dstrect->w || srcrect->h != dstrect->h) {
deprintf(1, "We need to scale the texture from %u x %u to %u x %u\n",
srcrect->w, srcrect->h, dstrect->w, dstrect->h);
/* Alloc mem for scaled YUV picture */
scaler_out = (Uint8 *) memalign(16, dstrect->w * dstrect->h + ((dstrect->w * dstrect->h) >> 1));
if (scaler_out == NULL) {
SDL_OutOfMemory();
return -1;
}
/* Set parms for scaling */
data->scaler_parms->src_pixel_width = srcrect->w;
data->scaler_parms->src_pixel_height = srcrect->h;
data->scaler_parms->dst_pixel_width = dstrect->w;
data->scaler_parms->dst_pixel_height = dstrect->h;
data->scaler_parms->y_plane = lum;
data->scaler_parms->v_plane = Cr;
data->scaler_parms->u_plane = Cb;
data->scaler_parms->dstBuffer = scaler_out;
data->scaler_thread_data->argp = (void *)data->scaler_parms;
/* Scale the YUV overlay to given size */
SPE_Start(data->scaler_thread_data);
SPE_Stop(data->scaler_thread_data);
/* Set parms for converting after scaling */
data->converter_parms->y_plane = scaler_out;
data->converter_parms->v_plane = scaler_out + dstrect->w * dstrect->h;
data->converter_parms->u_plane = scaler_out + dstrect->w * dstrect->h + ((dstrect->w * dstrect->h) >> 2);
} else {
data->converter_parms->y_plane = lum;
data->converter_parms->v_plane = Cr;
data->converter_parms->u_plane = Cb;
}
dstpixels = (Uint8 *) data->screen->pixels + dstrect->y * data->screen->pitch + dstrect->x
* SDL_BYTESPERPIXEL(texture->format);
data->converter_parms->src_pixel_width = dstrect->w;
data->converter_parms->src_pixel_height = dstrect->h;
data->converter_parms->dstBuffer = dstpixels/*(Uint8 *)data->screen->pixels*/;
data->converter_thread_data->argp = (void *)data->converter_parms;
/* Convert YUV texture to RGB */
SPE_SendMsg(data->converter_thread_data, SPU_START);
SPE_SendMsg(data->converter_thread_data, (unsigned int)data->converter_thread_data->argp);
/* We can probably move that to RenderPresent() */
SPE_WaitForMsg(data->converter_thread_data, SPU_FIN);
if (scaler_out) {
free(scaler_out);
}
} else {
deprintf(1, "Use software for scaling/converting\n");
Uint8 *dst;
/* FIXME: Not good */
dst = (Uint8 *) data->screen->pixels + dstrect->y * data->screen->pitch + dstrect->x
* SDL_BYTESPERPIXEL(texture->format);
return SDL_SW_CopyYUVToRGB(txdata->yuv, srcrect, display->current_mode.format,
dstrect->w, dstrect->h, dst/*data->screen->pixels*/,
data->screen->pitch);
}
} else {
deprintf(1, "SDL_ISPIXELFORMAT_FOURCC = false\n");
Uint8 *src, *dst;
int row;
size_t length;
Uint8 *dstpixels;
src = (Uint8 *) txdata->pixels;
dst = (Uint8 *) data->screen->pixels + dstrect->y * data->screen->pitch + dstrect->x
* SDL_BYTESPERPIXEL(texture->format);
length = dstrect->w * SDL_BYTESPERPIXEL(texture->format);
for (row = 0; row < dstrect->h; ++row) {
SDL_memcpy(dst, src, length);
src += txdata->pitch;
dst += data->screen->pitch;
}
}
deprintf(1, "-SDL_PS3_RenderCopy()\n");
return 0;
}
static void
SDL_PS3_RenderPresent(SDL_Renderer * renderer)
{
deprintf(1, "+SDL_PS3_RenderPresent()\n");
SDL_PS3_RenderData *data =
(SDL_PS3_RenderData *) renderer->driverdata;
SDL_Window *window = SDL_GetWindowFromID(renderer->window);
SDL_VideoDisplay *display = SDL_GetDisplayFromWindow(window);
SDL_VideoData *devdata = display->device->driverdata;
/* Send the data to the screen */
/* Get screeninfo */
struct fb_fix_screeninfo fb_finfo;
if (ioctl(devdata->fbdev, FBIOGET_FSCREENINFO, &fb_finfo)) {
SDL_SetError("[PS3] Can't get fixed screeninfo");
}
struct fb_var_screeninfo fb_vinfo;
if (ioctl(devdata->fbdev, FBIOGET_VSCREENINFO, &fb_vinfo)) {
SDL_SetError("[PS3] Can't get VSCREENINFO");
}
/* 16 and 15 bpp is reported as 16 bpp */
//txdata->bpp = fb_vinfo.bits_per_pixel;
//if (txdata->bpp == 16)
// txdata->bpp = fb_vinfo.red.length + fb_vinfo.green.length + fb_vinfo.blue.length;
/* Adjust centering */
data->bounded_width = window->w < fb_vinfo.xres ? window->w : fb_vinfo.xres;
data->bounded_height = window->h < fb_vinfo.yres ? window->h : fb_vinfo.yres;
/* We could use SDL's CENTERED flag for centering */
data->offset_left = (fb_vinfo.xres - data->bounded_width) >> 1;
data->offset_top = (fb_vinfo.yres - data->bounded_height) >> 1;
data->center[0] = devdata->frame_buffer + data->offset_left * /*txdata->bpp/8*/ 4 +
data->offset_top * fb_finfo.line_length;
data->center[1] = data->center[0] + fb_vinfo.yres * fb_finfo.line_length;
deprintf(1, "offset_left = %u\n", data->offset_left);
deprintf(1, "offset_top = %u\n", data->offset_top);
/* Set SPU parms for copying the surface to framebuffer */
devdata->fb_parms->data = (unsigned char *)data->screen->pixels;
devdata->fb_parms->center = data->center[data->current_screen];
devdata->fb_parms->out_line_stride = fb_finfo.line_length;
devdata->fb_parms->in_line_stride = window->w * /*txdata->bpp / 8*/4;
devdata->fb_parms->bounded_input_height = data->bounded_height;
devdata->fb_parms->bounded_input_width = data->bounded_width;
//devdata->fb_parms->fb_pixel_size = txdata->bpp / 8;
devdata->fb_parms->fb_pixel_size = 4;//SDL_BYTESPERPIXEL(window->format);
deprintf(3, "[PS3->SPU] fb_thread_data->argp = 0x%x\n", devdata->fb_thread_data->argp);
/* Copying.. */
SPE_SendMsg(devdata->fb_thread_data, SPU_START);
SPE_SendMsg(devdata->fb_thread_data, (unsigned int)devdata->fb_thread_data->argp);
SPE_WaitForMsg(devdata->fb_thread_data, SPU_FIN);
/* Wait for vsync */
if (renderer->info.flags & SDL_RENDERER_PRESENTVSYNC) {
unsigned long crt = 0;
deprintf(1, "[PS3] Wait for vsync\n");
ioctl(devdata->fbdev, FBIO_WAITFORVSYNC, &crt);
}
/* Page flip */
deprintf(1, "[PS3] Page flip to buffer #%u 0x%x\n", data->current_screen, data->center[data->current_screen]);
ioctl(devdata->fbdev, PS3FB_IOCTL_FSEL, (unsigned long)&data->current_screen);
/* Update the flipping chain, if any */
if (data->double_buffering) {
data->current_screen = (data->current_screen + 1) % 2;
}
deprintf(1, "-SDL_PS3_RenderPresent()\n");
}
static void
SDL_PS3_DestroyRenderer(SDL_Renderer * renderer)
{
deprintf(1, "+SDL_PS3_DestroyRenderer()\n");
SDL_PS3_RenderData *data =
(SDL_PS3_RenderData *) renderer->driverdata;
int i;
if (data) {
for (i = 0; i < SDL_arraysize(data->screen); ++i) {
if (data->screen) {
SDL_FreeSurface(data->screen);
}
}
/* Shutdown SPE and release related resources */
if (data->scaler_thread_data) {
free((void *)data->scaler_thread_data);
}
if (data->scaler_parms) {
free((void *)data->scaler_parms);
}
if (data->converter_thread_data) {
SPE_Shutdown(data->converter_thread_data);
free((void *)data->converter_thread_data);
}
if (data->converter_parms) {
free((void *)data->converter_parms);
}
SDL_free(data);
}
SDL_free(renderer);
deprintf(1, "-SDL_PS3_DestroyRenderer()\n");
}
/* vi: set ts=4 sw=4 expandtab: */
/*
SDL - Simple DirectMedia Layer
Copyright (C) 1997-2009 Sam Lantinga
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
Sam Lantinga
slouken@libsdl.org
*/
#include "SDL_config.h"
/* Default framebuffer device on PS3 */
/* SDL surface based renderer implementation */
extern SDL_RenderDriver SDL_PS3_RenderDriver;
/* vi: set ts=4 sw=4 expandtab: */
/*
SDL - Simple DirectMedia Layer
Copyright (C) 1997-2009 Sam Lantinga
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
Sam Lantinga
slouken@libsdl.org
*/
#include "SDL_config.h"
#include "SDL_video.h"
#include "SDL_ps3spe_c.h"
#include "SDL_ps3video.h"
#include "SDL_ps3render_c.h"
/* Start the SPE thread */
int SPE_Start(spu_data_t * spe_data)
{
deprintf(2, "[PS3->SPU] Start SPE: %s\n", spe_data->program_name);
if (!(spe_data->booted))
SPE_Boot(spe_data);
/* To allow re-running of context, spe_ctx_entry has to be set before each call */
spe_data->entry = SPE_DEFAULT_ENTRY;
spe_data->error_code = 0;
/* Create SPE thread and run */
deprintf(2, "[PS3->SPU] Create Thread: %s\n", spe_data->program_name);
if (pthread_create
(&spe_data->thread, NULL, (void *)&SPE_RunContext, (void *)spe_data)) {
deprintf(2, "[PS3->SPU] Could not create pthread for spe: %s\n", spe_data->program_name);
SDL_SetError("[PS3->SPU] Could not create pthread for spe");
return -1;
}
if (spe_data->keepalive)
SPE_WaitForMsg(spe_data, SPU_READY);
}
/* Stop the SPE thread */
int SPE_Stop(spu_data_t * spe_data)
{
deprintf(2, "[PS3->SPU] Stop SPE: %s\n", spe_data->program_name);
/* Wait for SPE thread to complete */
deprintf(2, "[PS3->SPU] Wait for SPE thread to complete: %s\n", spe_data->program_name);
if (pthread_join(spe_data->thread, NULL)) {
deprintf(2, "[PS3->SPU] Failed joining the thread: %s\n", spe_data->program_name);
SDL_SetError("[PS3->SPU] Failed joining the thread");
return -1;
}
return 0;
}
/* Create SPE context and load program */
int SPE_Boot(spu_data_t * spe_data)
{
/* Create SPE context */
deprintf(2, "[PS3->SPU] Create SPE Context: %s\n", spe_data->program_name);
spe_data->ctx = spe_context_create(0, NULL);
if (spe_data->ctx == NULL) {
deprintf(2, "[PS3->SPU] Failed creating SPE context: %s\n", spe_data->program_name);
SDL_SetError("[PS3->SPU] Failed creating SPE context");
return -1;
}
/* Load SPE object into SPE local store */
deprintf(2, "[PS3->SPU] Load Program into SPE: %s\n", spe_data->program_name);
if (spe_program_load(spe_data->ctx, &spe_data->program)) {
deprintf(2, "[PS3->SPU] Failed loading program into SPE context: %s\n", spe_data->program_name);
SDL_SetError
("[PS3->SPU] Failed loading program into SPE context");
return -1;
}
spe_data->booted = 1;
deprintf(2, "[PS3->SPU] SPE boot successful\n");
return 0;
}
/* (Stop and) shutdown the SPE */
int SPE_Shutdown(spu_data_t * spe_data)
{
if (spe_data->keepalive && spe_data->booted) {
SPE_SendMsg(spe_data, SPU_EXIT);
SPE_Stop(spe_data);
}
/* Destroy SPE context */
deprintf(2, "[PS3->SPU] Destroy SPE context: %s\n", spe_data->program_name);
if (spe_context_destroy(spe_data->ctx)) {
deprintf(2, "[PS3->SPU] Failed destroying context: %s\n", spe_data->program_name);
SDL_SetError("[PS3->SPU] Failed destroying context");
return -1;
}
deprintf(2, "[PS3->SPU] SPE shutdown successful: %s\n", spe_data->program_name);
return 0;
}
/* Send message to the SPE via mailboxe */
int SPE_SendMsg(spu_data_t * spe_data, unsigned int msg)
{
deprintf(2, "[PS3->SPU] Sending message %u to %s\n", msg, spe_data->program_name);
/* Send one message, block until message was sent */
unsigned int spe_in_mbox_msgs[1];
spe_in_mbox_msgs[0] = msg;
int in_mbox_write = spe_in_mbox_write(spe_data->ctx, spe_in_mbox_msgs, 1, SPE_MBOX_ALL_BLOCKING);
if (1 > in_mbox_write) {
deprintf(2, "[PS3->SPU] No message could be written to %s\n", spe_data->program_name);
SDL_SetError("[PS3->SPU] No message could be written");
return -1;
}
return 0;
}
/* Read 1 message from SPE, block until at least 1 message was received */
int SPE_WaitForMsg(spu_data_t * spe_data, unsigned int msg)
{
deprintf(2, "[PS3->SPU] Waiting for message from %s\n", spe_data->program_name);
unsigned int out_messages[1];
while (!spe_out_mbox_status(spe_data->ctx));
int mbox_read = spe_out_mbox_read(spe_data->ctx, out_messages, 1);
deprintf(2, "[PS3->SPU] Got message from %s, message was %u\n", spe_data->program_name, out_messages[0]);
if (out_messages[0] == msg)
return 0;
else
return -1;
}
/* Re-runnable invocation of the spe_context_run call */
void SPE_RunContext(void *thread_argp)
{
/* argp is the pointer to argument to be passed to the SPE program */
spu_data_t *args = (spu_data_t *) thread_argp;
deprintf(3, "[PS3->SPU] void* argp=0x%x\n", (unsigned int)args->argp);
/* Run it.. */
deprintf(2, "[PS3->SPU] Run SPE program: %s\n", args->program_name);
if (spe_context_run
(args->ctx, &args->entry, 0, (void *)args->argp, NULL,
NULL) < 0) {
deprintf(2, "[PS3->SPU] Failed running SPE context: %s\n", args->program_name);
SDL_SetError("[PS3->SPU] Failed running SPE context: %s", args->program_name);
exit(1);
}
pthread_exit(NULL);
}
/* vi: set ts=4 sw=4 expandtab: */
/*
SDL - Simple DirectMedia Layer
Copyright (C) 1997-2009 Sam Lantinga
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
Sam Lantinga
slouken@libsdl.org
*/
/* This SPE API basically provides 3 ways to run and control a program
* on the SPE:
* - Start and stop the program (keepalive=0).
* SPE_Start() will implicitly boot up the program, create a thread and run
* the context.
* SPE_Stop() will join the (terminated) thread (may block) and return.
* - Boot the program and run it (keepalive=0).
* SPE_Boot() will create a context and load the program and finally start
* the context with SPE_Start().
* SPE_Stop() will savely end the program.
* - Boot, Run and send messages to the program (keepalive=1).
* Start the program by using one of the methods described above. When
* received the READY-message the program is in its infinite loop waiting
* for new messages.
* Every time you run the program, send SPU_START and the address of the
* according struct using SPE_SendMsg().
* SPE_WaitForMsg() will than wait for SPU_FIN and is blocking.
* SPE_Shutdown() sends SPU_EXIT and finally stops the program.
*
* Therefor the SPE program
* - either runs once and returns
* - or runs in an infinite loop and is controlled by messages.
*/
#include "SDL_config.h"
#include "spulibs/spu_common.h"
#include <libspe2.h>
#ifndef _SDL_ps3spe_h
#define _SDL_ps3spe_h
/* SPU handling data */
typedef struct spu_data {
/* Context to be executed */
spe_context_ptr_t ctx;
spe_program_handle_t program;
/* Thread running the context */
pthread_t thread;
/* For debugging */
char * program_name;
/* SPE_Start() or SPE_Boot() called */
unsigned int booted;
/* Runs the program in an infinite loop? */
unsigned int keepalive;
unsigned int entry;
/* Exit code of the program */
int error_code;
/* Arguments passed to the program */
void * argp;
} spu_data_t;
/* SPU specific API functions */
int SPE_Start(spu_data_t * spe_data);
int SPE_Stop(spu_data_t * spe_data);
int SPE_Boot(spu_data_t * spe_data);
int SPE_Shutdown(spu_data_t * spe_data);
int SPE_SendMsg(spu_data_t * spe_data, unsigned int msg);
int SPE_WaitForMsg(spu_data_t * spe_data, unsigned int msg);
void SPE_RunContext(void *thread_argp);
#endif /* _SDL_ps3spe_h */
/* vi: set ts=4 sw=4 expandtab: */
/*
SDL - Simple DirectMedia Layer
Copyright (C) 1997-2009 Sam Lantinga
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
Sam Lantinga
slouken@libsdl.org
*/
#include "SDL_config.h"
/* SDL PS3 video driver implementation based on dummy video driver
*
* Initial work by Ryan C. Gordon (icculus@icculus.org). A good portion
* of this was cut-and-pasted from Stephane Peter's work in the AAlib
* SDL video driver. Renamed to "DUMMY" by Sam Lantinga.
*/
#include "SDL_video.h"
#include "SDL_mouse.h"
#include "../SDL_sysvideo.h"
#include "../SDL_pixels_c.h"
#include "../../events/SDL_events_c.h"
#include "SDL_ps3video.h"
#include "SDL_ps3spe_c.h"
#include "SDL_ps3events_c.h"
#include "SDL_ps3render_c.h"
#include "SDL_ps3modes_c.h"
#include <fcntl.h>
#include <linux/fb.h>
#include <asm/ps3fb.h>
#include <sys/mman.h>
#define PS3VID_DRIVER_NAME "ps3"
/* Initialization/Query functions */
static int PS3_VideoInit(_THIS);
static void PS3_VideoQuit(_THIS);
/* Stores the SPE executable name of fb_writer_spu */
extern spe_program_handle_t fb_writer_spu;
/* PS3 driver bootstrap functions */
static int
PS3_Available(void)
{
deprintf(1, "+PS3_Available()\n");
const char *envr = SDL_getenv("SDL_VIDEODRIVER");
if ((envr) && (SDL_strcmp(envr, PS3VID_DRIVER_NAME) == 0)) {
return (1);
}
deprintf(1, "-PS3_Available()\n");
return (0);
}
static void
PS3_DeleteDevice(SDL_VideoDevice * device)
{
deprintf(1, "+PS3_DeleteDevice()\n");
SDL_free(device->driverdata);
SDL_free(device);
deprintf(1, "-PS3_DeleteDevice()\n");
}
static SDL_VideoDevice *
PS3_CreateDevice(int devindex)
{
deprintf(1, "+PS3_CreateDevice()\n");
SDL_VideoDevice *device;
SDL_VideoData *data;
/* Initialize all variables that we clean on shutdown */
device = (SDL_VideoDevice *) SDL_calloc(1, sizeof(SDL_VideoDevice));
if (!device) {
SDL_OutOfMemory();
if (device) {
SDL_free(device);
}
return (0);
}
data = (struct SDL_VideoData *) SDL_calloc(1, sizeof(SDL_VideoData));
if (!data) {
SDL_OutOfMemory();
SDL_free(device);
return (0);
}
device->driverdata = data;
/* Set the function pointers */
device->VideoInit = PS3_VideoInit;
device->VideoQuit = PS3_VideoQuit;
device->SetDisplayMode = PS3_SetDisplayMode;
device->GetDisplayModes = PS3_GetDisplayModes;
device->PumpEvents = PS3_PumpEvents;
device->free = PS3_DeleteDevice;
deprintf(1, "-PS3_CreateDevice()\n");
return device;
}
VideoBootStrap PS3_bootstrap = {
PS3VID_DRIVER_NAME, "SDL PS3 Cell video driver",
PS3_Available, PS3_CreateDevice
};
int
PS3_VideoInit(_THIS)
{
deprintf(1, "PS3_VideoInit()\n");
SDL_VideoData *data = (SDL_VideoData *) _this->driverdata;
SDL_DisplayMode mode;
/* Create SPU fb_parms and thread structure */
data->fb_parms = (struct fb_writer_parms_t *)
memalign(16, sizeof(struct fb_writer_parms_t));
data->fb_thread_data = (spu_data_t *) malloc(sizeof(spu_data_t));
if (data->fb_parms == NULL || data->fb_thread_data == NULL) {
SDL_OutOfMemory();
return -1;
}
data->fb_thread_data->program = fb_writer_spu;
data->fb_thread_data->program_name = "fb_writer_spu";
data->fb_thread_data->argp = (void *)data->fb_parms;
data->fb_thread_data->keepalive = 1;
data->fb_thread_data->booted = 0;
SPE_Start(data->fb_thread_data);
/* Open the device */
data->fbdev = open(PS3DEV, O_RDWR);
if (data->fbdev < 0) {
SDL_SetError("[PS3] Unable to open device %s", PS3DEV);
return -1;
}
/* Take control of frame buffer from kernel, for details see
* http://felter.org/wesley/files/ps3/linux-20061110-docs/ApplicationProgrammingEnvironment.html
* kernel will no longer flip the screen itself
*/
ioctl(data->fbdev, PS3FB_IOCTL_ON, 0);
/* Unblank screen */
ioctl(data->fbdev, FBIOBLANK, 0);
struct fb_fix_screeninfo fb_finfo;
if (ioctl(data->fbdev, FBIOGET_FSCREENINFO, &fb_finfo)) {
SDL_SetError("[PS3] Can't get fixed screeninfo");
return (0);
}
/* Note: on PS3, fb_finfo.smem_len is enough for double buffering */
if ((data->frame_buffer = (uint8_t *)mmap(0, fb_finfo.smem_len,
PROT_READ | PROT_WRITE, MAP_SHARED,
data->fbdev, 0)) == (uint8_t *) - 1) {
SDL_SetError("[PS3] Can't mmap for %s", PS3DEV);
return (0);
} else {
/* Enable double buffering */
}
/* Blank screen */
memset(data->frame_buffer, 0x00, fb_finfo.smem_len);
PS3_InitModes(_this);
SDL_AddRenderDriver(0, &SDL_PS3_RenderDriver);
/* We're done! */
return 0;
}
void
PS3_VideoQuit(_THIS)
{
deprintf(1, "PS3_VideoQuit()\n");
SDL_VideoData *data = (SDL_VideoData *) _this->driverdata;
PS3_QuitModes(_this);
/* Unmap framebuffer */
if (data->frame_buffer) {
struct fb_fix_screeninfo fb_finfo;
if (ioctl(data->fbdev, FBIOGET_FSCREENINFO, &fb_finfo) != -1) {
munmap(data->frame_buffer, fb_finfo.smem_len);
data->frame_buffer = 0;
}
}
/* Shutdown SPE and related resources */
if (data->fb_parms)
free((void *)data->fb_parms);
if (data->fb_thread_data) {
SPE_Shutdown(data->fb_thread_data);
free((void *)data->fb_thread_data);
}
/* Close device */
if (data->fbdev) {
/* Give control of frame buffer back to kernel */
ioctl(data->fbdev, PS3FB_IOCTL_OFF, 0);
close(data->fbdev);
data->fbdev = -1;
}
}
/* vi: set ts=4 sw=4 expandtab: */
/*
SDL - Simple DirectMedia Layer
Copyright (C) 1997-2009 Sam Lantinga
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
Sam Lantinga
slouken@libsdl.org
*/
#include "SDL_config.h"
#ifndef _SDL_ps3video_h
#define _SDL_ps3video_h
#include "../SDL_sysvideo.h"
#include "SDL_ps3spe_c.h"
#include <linux/fb.h>
#include <asm/ps3fb.h>
/* Debugging
* 0: No debug messages
* 1: Video debug messages
* 2: SPE debug messages
* 3: Memory adresses
*/
#define DEBUG_LEVEL 0
#ifdef DEBUG_LEVEL
#define deprintf( level, fmt, args... ) \
do \
{ \
if ( (unsigned)(level) <= DEBUG_LEVEL ) \
{ \
fprintf( stdout, fmt, ##args ); \
fflush( stdout ); \
} \
} while ( 0 )
#else
#define deprintf( level, fmt, args... )
#endif
/* Default framebuffer device on PS3 */
#define PS3DEV "/dev/fb0"
/* Private display data */
typedef struct SDL_VideoData
{
/* Framebuffer device descriptor */
int fbdev;
/* mmap'd access to fbdev */
uint8_t * frame_buffer;
/* SPE threading stuff of the framebuffer */
spu_data_t * fb_thread_data;
/* Framebuffer transfer data */
volatile struct fb_writer_parms_t * fb_parms __attribute__((aligned(128)));
} SDL_VideoData;
typedef struct SDL_DisplayModeData
{
unsigned long mode;
//struct ps3fb_ioctl_res res;
} PS3_DisplayModeData;
#endif /* _SDL_ps3video_h */
/* vi: set ts=4 sw=4 expandtab: */
# This Makefile is for building the CELL BE SPU libs
# libfb_writer_spu.so, libyuv2rgb_spu.so, libbilin_scaler_spu.so
# Toolchain
PPU_LD=/usr/bin/ld
SPU_SRCDIR=$(srcdir)/src/video/ps3/spulibs
SPU_LIBDIR=$(srcdir)/src/video/ps3/spulibs/libs
SPU_CFLAGS=-g -W -Wall -Winline -Wno-main -I. -I /usr/spu/include -I /opt/cell/sdk/usr/spu/include -finline-limit=10000 -Winline -ftree-vectorize -funroll-loops -fmodulo-sched -ffast-math -fPIC -O2
DEPS = $(SPU_SRCDIR)/spu_common.h
LIBS= fb_writer yuv2rgb bilin_scaler
OBJLIBS = $(foreach lib,$(LIBS),lib$(lib)_spu.a)
SHALIBS = $(foreach lib,$(LIBS),lib$(lib)_spu.so)
ps3libs: $(foreach lib,$(OBJLIBS),$(SPU_LIBDIR)/$(lib)) $(foreach lib,$(SHALIBS),$(SPU_LIBDIR)/$(lib))
$(SPU_LIBDIR)/lib%_spu.a: $(SPU_LIBDIR)/%-embed.o
$(AR) -qcs $@ $<
$(SPU_LIBDIR)/lib%_spu.so: $(SPU_LIBDIR)/%-embed.o
$(PPU_LD) -o $@ -shared -soname=$(notdir $@) $<
$(SPU_LIBDIR)/%-embed.o: $(SPU_LIBDIR)/%.o
$(EMBEDSPU) -m32 $(subst -embed.o,,$(notdir $@))_spu $< $@
$(SPU_LIBDIR)/%.o: $(SPU_SRCDIR)/%.c $(DEPS)
$(SPU_GCC) $(SPU_CFLAGS) -o $@ $< -lm
ps3libs-install: $(foreach obj,$(OBJLIBS),$(SPU_LIBDIR)/$(obj)) $(foreach obj,$(SHALIBS),$(SPU_LIBDIR)/$(obj))
for file in $(OBJLIBS); do \
$(INSTALL) -c -m 0655 $(SPU_LIBDIR)/$$file $(DESTDIR)$(libdir)/$$file; \
done
for file in $(SHALIBS); do \
$(INSTALL) -c -m 0755 $(SPU_LIBDIR)/$$file $(DESTDIR)$(libdir)/$$file; \
done
ps3libs-uninstall:
for file in $(OBJLIBS) $(SHALIBS); do \
rm -f $(DESTDIR)$(libdir)/$$file; \
done
ps3libs-clean:
rm -f $(SPU_LIBDIR)/*
/*
* SDL - Simple DirectMedia Layer
* CELL BE Support for PS3 Framebuffer
* Copyright (C) 2008, 2009 International Business Machines Corporation
*
* This library is free software; you can redistribute it and/or modify it
* under the terms of the GNU Lesser General Public License as published
* by the Free Software Foundation; either version 2.1 of the License, or
* (at your option) any later version.
*
* This library is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
* USA
*
* Martin Lowinski <lowinski [at] de [dot] ibm [ibm] com>
* Dirk Herrendoerfer <d.herrendoerfer [at] de [dot] ibm [dot] com>
* SPE code based on research by:
* Rene Becker
* Thimo Emmerich
*/
#include "spu_common.h"
#include <spu_intrinsics.h>
#include <spu_mfcio.h>
// Debugging
//#define DEBUG
#ifdef DEBUG
#define deprintf(fmt, args... ) \
fprintf( stdout, fmt, ##args ); \
fflush( stdout );
#else
#define deprintf( fmt, args... )
#endif
struct scale_parms_t parms __attribute__((aligned(128)));
/* A maximum of 8 lines Y, therefore 4 lines V, 4 lines U are stored
* there might be the need to retrieve misaligned data, adjust
* incoming v and u plane to be able to handle this (add 128)
*/
unsigned char y_plane[2][(MAX_HDTV_WIDTH+128)*4] __attribute__((aligned(128)));
unsigned char v_plane[2][(MAX_HDTV_WIDTH+128)*2] __attribute__((aligned(128)));
unsigned char u_plane[2][(MAX_HDTV_WIDTH+128)*2] __attribute__((aligned(128)));
/* temp-buffer for scaling: 4 lines Y, therefore 2 lines V, 2 lines U */
unsigned char scaled_y_plane[2][MAX_HDTV_WIDTH*2] __attribute__((aligned(128)));
unsigned char scaled_v_plane[2][MAX_HDTV_WIDTH/2] __attribute__((aligned(128)));
unsigned char scaled_u_plane[2][MAX_HDTV_WIDTH/2] __attribute__((aligned(128)));
/* some vectors needed by the float to int conversion */
static const vector float vec_255 = { 255.0f, 255.0f, 255.0f, 255.0f };
static const vector float vec_0_1 = { 0.1f, 0.1f, 0.1f, 0.1f };
void bilinear_scale_line_w8(unsigned char* src, unsigned char* dst_, unsigned int dst_width, vector float vf_x_scale, vector float vf_NSweight, unsigned int src_linestride);
void bilinear_scale_line_w16(unsigned char* src, unsigned char* dst_, unsigned int dst_width, vector float vf_x_scale, vector float vf_NSweight, unsigned int src_linestride);
void scale_srcw16_dstw16();
void scale_srcw16_dstw32();
void scale_srcw32_dstw16();
void scale_srcw32_dstw32();
int main( unsigned long long spe_id __attribute__((unused)), unsigned long long argp )
{
deprintf("[SPU] bilin_scaler_spu is up... (on SPE #%llu)\n", spe_id);
/* DMA transfer for the input parameters */
spu_mfcdma32(&parms, (unsigned int)argp, sizeof(struct scale_parms_t), TAG_INIT, MFC_GET_CMD);
DMA_WAIT_TAG(TAG_INIT);
deprintf("[SPU] Scale %ux%u to %ux%u\n", parms.src_pixel_width, parms.src_pixel_height,
parms.dst_pixel_width, parms.dst_pixel_height);
if(parms.src_pixel_width & 0x1f) {
if(parms.dst_pixel_width & 0x1F) {
deprintf("[SPU] Using scale_srcw16_dstw16\n");
scale_srcw16_dstw16();
} else {
deprintf("[SPU] Using scale_srcw16_dstw32\n");
scale_srcw16_dstw32();
}
} else {
if(parms.dst_pixel_width & 0x1F) {
deprintf("[SPU] Using scale_srcw32_dstw16\n");
scale_srcw32_dstw16();
} else {
deprintf("[SPU] Using scale_srcw32_dstw32\n");
scale_srcw32_dstw32();
}
}
deprintf("[SPU] bilin_scaler_spu... done!\n");
return 0;
}
/*
* vfloat_to_vuint()
*
* converts a float vector to an unsinged int vector using saturated
* arithmetic
*
* @param vec_s float vector for conversion
* @returns converted unsigned int vector
*/
inline static vector unsigned int vfloat_to_vuint(vector float vec_s) {
vector unsigned int select_1 = spu_cmpgt(vec_0_1, vec_s);
vec_s = spu_sel(vec_s, vec_0_1, select_1);
vector unsigned int select_2 = spu_cmpgt(vec_s, vec_255);
vec_s = spu_sel(vec_s, vec_255, select_2);
return spu_convtu(vec_s,0);
}
/*
* scale_srcw16_dstw16()
*
* processes an input image of width 16
* scaling is done to a width 16
* result stored in RAM
*/
void scale_srcw16_dstw16() {
// extract parameters
unsigned char* dst_addr = (unsigned char *)parms.dstBuffer;
unsigned int src_width = parms.src_pixel_width;
unsigned int src_height = parms.src_pixel_height;
unsigned int dst_width = parms.dst_pixel_width;
unsigned int dst_height = parms.dst_pixel_height;
// YVU
unsigned int src_linestride_y = src_width;
unsigned int src_dbl_linestride_y = src_width<<1;
unsigned int src_linestride_vu = src_width>>1;
unsigned int src_dbl_linestride_vu = src_width;
// scaled YVU
unsigned int scaled_src_linestride_y = dst_width;
// ram addresses
unsigned char* src_addr_y = parms.y_plane;
unsigned char* src_addr_v = parms.v_plane;
unsigned char* src_addr_u = parms.u_plane;
// for handling misalignment, addresses are precalculated
unsigned char* precalc_src_addr_v = src_addr_v;
unsigned char* precalc_src_addr_u = src_addr_u;
unsigned int dst_picture_size = dst_width*dst_height;
// Sizes for destination
unsigned int dst_dbl_linestride_y = dst_width<<1;
unsigned int dst_dbl_linestride_vu = dst_width>>1;
// Perform address calculation for Y, V and U in main memory with dst_addr as base
unsigned char* dst_addr_main_memory_y = dst_addr;
unsigned char* dst_addr_main_memory_v = dst_addr + dst_picture_size;
unsigned char* dst_addr_main_memory_u = dst_addr_main_memory_v +(dst_picture_size>>2);
// calculate scale factors
vector float vf_x_scale = spu_splats( (float)src_width/(float)dst_width );
float y_scale = (float)src_height/(float)dst_height;
// double buffered processing
// buffer switching
unsigned int curr_src_idx = 0;
unsigned int curr_dst_idx = 0;
unsigned int next_src_idx, next_dst_idx;
// 2 lines y as output, upper and lowerline
unsigned int curr_interpl_y_upper = 0;
unsigned int next_interpl_y_upper;
unsigned int curr_interpl_y_lower, next_interpl_y_lower;
// only 1 line v/u output, both planes have the same dimension
unsigned int curr_interpl_vu = 0;
unsigned int next_interpl_vu;
// weights, calculated in every loop iteration
vector float vf_curr_NSweight_y_upper = { 0.0f, 0.0f, 0.0f, 0.0f };
vector float vf_next_NSweight_y_upper;
vector float vf_curr_NSweight_y_lower, vf_next_NSweight_y_lower;
vector float vf_curr_NSweight_vu = { 0.0f, 0.0f, 0.0f, 0.0f };
vector float vf_next_NSweight_vu;
// line indices for the src picture
float curr_src_y_upper = 0.0f, next_src_y_upper;
float curr_src_y_lower, next_src_y_lower;
float curr_src_vu = 0.0f, next_src_vu;
// line indices for the dst picture
unsigned int dst_y=0, dst_vu=0;
// offset for the v and u plane to handle misalignement
unsigned int curr_lsoff_v = 0, next_lsoff_v;
unsigned int curr_lsoff_u = 0, next_lsoff_u;
// calculate lower line indices
curr_src_y_lower = ((float)curr_interpl_y_upper+1)*y_scale;
curr_interpl_y_lower = (unsigned int)curr_src_y_lower;
// lower line weight
vf_curr_NSweight_y_lower = spu_splats( curr_src_y_lower-(float)curr_interpl_y_lower );
// start partially double buffered processing
// get initial data, 2 sets of y, 1 set v, 1 set u
mfc_get( y_plane[curr_src_idx], (unsigned int) src_addr_y, src_dbl_linestride_y, RETR_BUF, 0, 0 );
mfc_get( y_plane[curr_src_idx]+src_dbl_linestride_y,
(unsigned int) src_addr_y+(curr_interpl_y_lower*src_linestride_y),
src_dbl_linestride_y,
RETR_BUF,
0, 0 );
mfc_get( v_plane[curr_src_idx], (unsigned int) src_addr_v, src_dbl_linestride_vu, RETR_BUF, 0, 0 );
mfc_get( u_plane[curr_src_idx], (unsigned int) src_addr_u, src_dbl_linestride_vu, RETR_BUF, 0, 0 );
/* iteration loop
* within each iteration 4 lines y, 2 lines v, 2 lines u are retrieved
* the scaled output is 2 lines y, 1 line v, 1 line u
* the yuv2rgb-converted output is stored to RAM
*/
for( dst_vu=0; dst_vu<(dst_height>>1)-1; dst_vu++ ) {
dst_y = dst_vu<<1;
// calculate next indices
next_src_vu = ((float)dst_vu+1)*y_scale;
next_src_y_upper = ((float)dst_y+2)*y_scale;
next_src_y_lower = ((float)dst_y+3)*y_scale;
next_interpl_vu = (unsigned int) next_src_vu;
next_interpl_y_upper = (unsigned int) next_src_y_upper;
next_interpl_y_lower = (unsigned int) next_src_y_lower;
// calculate weight NORTH-SOUTH
vf_next_NSweight_vu = spu_splats( next_src_vu-(float)next_interpl_vu );
vf_next_NSweight_y_upper = spu_splats( next_src_y_upper-(float)next_interpl_y_upper );
vf_next_NSweight_y_lower = spu_splats( next_src_y_lower-(float)next_interpl_y_lower );
// get next lines
next_src_idx = curr_src_idx^1;
next_dst_idx = curr_dst_idx^1;
// 4 lines y
mfc_get( y_plane[next_src_idx],
(unsigned int) src_addr_y+(next_interpl_y_upper*src_linestride_y),
src_dbl_linestride_y,
RETR_BUF+next_src_idx,
0, 0 );
mfc_get( y_plane[next_src_idx]+src_dbl_linestride_y,
(unsigned int) src_addr_y+(next_interpl_y_lower*src_linestride_y),
src_dbl_linestride_y,
RETR_BUF+next_src_idx,
0, 0 );
// 2 lines v
precalc_src_addr_v = src_addr_v+(next_interpl_vu*src_linestride_vu);
next_lsoff_v = ((unsigned int)precalc_src_addr_v)&0x0F;
mfc_get( v_plane[next_src_idx],
((unsigned int) precalc_src_addr_v)&0xFFFFFFF0,
src_dbl_linestride_vu+(next_lsoff_v<<1),
RETR_BUF+next_src_idx,
0, 0 );
// 2 lines u
precalc_src_addr_u = src_addr_u+(next_interpl_vu*src_linestride_vu);
next_lsoff_u = ((unsigned int)precalc_src_addr_u)&0x0F;
mfc_get( u_plane[next_src_idx],
((unsigned int) precalc_src_addr_u)&0xFFFFFFF0,
src_dbl_linestride_vu+(next_lsoff_v<<1),
RETR_BUF+next_src_idx,
0, 0 );
DMA_WAIT_TAG( (RETR_BUF+curr_src_idx) );
// scaling
// work line y_upper
bilinear_scale_line_w16( y_plane[curr_src_idx],
scaled_y_plane[curr_src_idx],
dst_width,
vf_x_scale,
vf_curr_NSweight_y_upper,
src_linestride_y );
// work line y_lower
bilinear_scale_line_w16( y_plane[curr_src_idx]+src_dbl_linestride_y,
scaled_y_plane[curr_src_idx]+scaled_src_linestride_y,
dst_width,
vf_x_scale,
vf_curr_NSweight_y_lower,
src_linestride_y );
// work line v
bilinear_scale_line_w8( v_plane[curr_src_idx]+curr_lsoff_v,
scaled_v_plane[curr_src_idx],
dst_width>>1,
vf_x_scale,
vf_curr_NSweight_vu,
src_linestride_vu );
// work line u
bilinear_scale_line_w8( u_plane[curr_src_idx]+curr_lsoff_u,
scaled_u_plane[curr_src_idx],
dst_width>>1,
vf_x_scale,
vf_curr_NSweight_vu,
src_linestride_vu );
// Store the result back to main memory into a destination buffer in YUV format
//---------------------------------------------------------------------------------------------
DMA_WAIT_TAG( (STR_BUF+curr_dst_idx) );
// Perform three DMA transfers to 3 different locations in the main memory!
// dst_width: Pixel width of destination image
// dst_addr: Destination address in main memory
// dst_vu: Counter which is incremented one by one
// dst_y: Counter which is twice larger than dst_vu (dst_y = 2*dst_vu)
mfc_put( scaled_y_plane[curr_src_idx], // What from local store (addr)
(unsigned int)dst_addr_main_memory_y + (dst_vu*dst_dbl_linestride_y), // Destination in main memory (addr)
dst_dbl_linestride_y, // Two Y lines (depending on the widht of the destination resolution)
STR_BUF+curr_dst_idx, // Tag
0, 0 );
mfc_put( scaled_v_plane[curr_src_idx], // What from local store (addr)
(unsigned int)dst_addr_main_memory_v + (dst_vu*dst_dbl_linestride_vu), // Destination in main memory (addr)
dst_dbl_linestride_vu, // Two V lines (depending on the widht of the destination resolution)
STR_BUF+curr_dst_idx, // Tag
0, 0 );
mfc_put( scaled_u_plane[curr_src_idx], // What from local store (addr)
(unsigned int)dst_addr_main_memory_u + (dst_vu*dst_dbl_linestride_vu), // Destination in main memory (addr)
dst_dbl_linestride_vu, // Two U lines (depending on the widht of the destination resolution)
STR_BUF+curr_dst_idx, // Tag
0, 0 );
//---------------------------------------------------------------------------------------------
// update for next cycle
curr_src_idx = next_src_idx;
curr_dst_idx = next_dst_idx;
curr_interpl_y_upper = next_interpl_y_upper;
curr_interpl_y_lower = next_interpl_y_lower;
curr_interpl_vu = next_interpl_vu;
vf_curr_NSweight_y_upper = vf_curr_NSweight_y_upper;
vf_curr_NSweight_y_lower = vf_curr_NSweight_y_lower;
vf_curr_NSweight_vu = vf_next_NSweight_vu;
curr_src_y_upper = next_src_y_upper;
curr_src_y_lower = next_src_y_lower;
curr_src_vu = next_src_vu;
curr_lsoff_v = next_lsoff_v;
curr_lsoff_u = next_lsoff_u;
}
DMA_WAIT_TAG( (RETR_BUF+curr_src_idx) );
// scaling
// work line y_upper
bilinear_scale_line_w16( y_plane[curr_src_idx],
scaled_y_plane[curr_src_idx],
dst_width,
vf_x_scale,
vf_curr_NSweight_y_upper,
src_linestride_y );
// work line y_lower
bilinear_scale_line_w16( y_plane[curr_src_idx]+src_dbl_linestride_y,
scaled_y_plane[curr_src_idx]+scaled_src_linestride_y,
dst_width,
vf_x_scale,
vf_curr_NSweight_y_lower,
src_linestride_y );
// work line v
bilinear_scale_line_w8( v_plane[curr_src_idx]+curr_lsoff_v,
scaled_v_plane[curr_src_idx],
dst_width>>1,
vf_x_scale,
vf_curr_NSweight_vu,
src_linestride_vu );
// work line u
bilinear_scale_line_w8( u_plane[curr_src_idx]+curr_lsoff_u,
scaled_u_plane[curr_src_idx],
dst_width>>1,
vf_x_scale,
vf_curr_NSweight_vu,
src_linestride_vu );
// Store the result back to main memory into a destination buffer in YUV format
//---------------------------------------------------------------------------------------------
DMA_WAIT_TAG( (STR_BUF+curr_dst_idx) );
// Perform three DMA transfers to 3 different locations in the main memory!
// dst_width: Pixel width of destination image
// dst_addr: Destination address in main memory
// dst_vu: Counter which is incremented one by one
// dst_y: Counter which is twice larger than dst_vu (dst_y = 2*dst_vu)
mfc_put( scaled_y_plane[curr_src_idx], // What from local store (addr)
(unsigned int)dst_addr_main_memory_y + (dst_vu*dst_dbl_linestride_y), // Destination in main memory (addr)
dst_dbl_linestride_y, // Two Y lines (depending on the widht of the destination resolution)
STR_BUF+curr_dst_idx, // Tag
0, 0 );
mfc_put( scaled_v_plane[curr_src_idx], // What from local store (addr)
(unsigned int)dst_addr_main_memory_v + (dst_vu*dst_dbl_linestride_vu), // Destination in main memory (addr)
dst_dbl_linestride_vu, // Two V lines (depending on the widht of the destination resolution)
STR_BUF+curr_dst_idx, // Tag
0, 0 );
mfc_put( scaled_u_plane[curr_src_idx], // What from local store (addr)
(unsigned int)dst_addr_main_memory_u + (dst_vu*dst_dbl_linestride_vu), // Destination in main memory (addr)
dst_dbl_linestride_vu, // Two U lines (depending on the widht of the destination resolution)
STR_BUF+curr_dst_idx, // Tag
0, 0 );
// wait for completion
DMA_WAIT_TAG( (STR_BUF+curr_dst_idx) );
//---------------------------------------------------------------------------------------------
}
/*
* scale_srcw16_dstw32()
*
* processes an input image of width 16
* scaling is done to a width 32
* yuv2rgb conversion on a width of 32
* result stored in RAM
*/
void scale_srcw16_dstw32() {
// extract parameters
unsigned char* dst_addr = (unsigned char *)parms.dstBuffer;
unsigned int src_width = parms.src_pixel_width;
unsigned int src_height = parms.src_pixel_height;
unsigned int dst_width = parms.dst_pixel_width;
unsigned int dst_height = parms.dst_pixel_height;
// YVU
unsigned int src_linestride_y = src_width;
unsigned int src_dbl_linestride_y = src_width<<1;
unsigned int src_linestride_vu = src_width>>1;
unsigned int src_dbl_linestride_vu = src_width;
// scaled YVU
unsigned int scaled_src_linestride_y = dst_width;
// ram addresses
unsigned char* src_addr_y = parms.y_plane;
unsigned char* src_addr_v = parms.v_plane;
unsigned char* src_addr_u = parms.u_plane;
unsigned int dst_picture_size = dst_width*dst_height;
// Sizes for destination
unsigned int dst_dbl_linestride_y = dst_width<<1;
unsigned int dst_dbl_linestride_vu = dst_width>>1;
// Perform address calculation for Y, V and U in main memory with dst_addr as base
unsigned char* dst_addr_main_memory_y = dst_addr;
unsigned char* dst_addr_main_memory_v = dst_addr + dst_picture_size;
unsigned char* dst_addr_main_memory_u = dst_addr_main_memory_v +(dst_picture_size>>2);
// for handling misalignment, addresses are precalculated
unsigned char* precalc_src_addr_v = src_addr_v;
unsigned char* precalc_src_addr_u = src_addr_u;
// calculate scale factors
vector float vf_x_scale = spu_splats( (float)src_width/(float)dst_width );
float y_scale = (float)src_height/(float)dst_height;
// double buffered processing
// buffer switching
unsigned int curr_src_idx = 0;
unsigned int curr_dst_idx = 0;
unsigned int next_src_idx, next_dst_idx;
// 2 lines y as output, upper and lowerline
unsigned int curr_interpl_y_upper = 0;
unsigned int next_interpl_y_upper;
unsigned int curr_interpl_y_lower, next_interpl_y_lower;
// only 1 line v/u output, both planes have the same dimension
unsigned int curr_interpl_vu = 0;
unsigned int next_interpl_vu;
// weights, calculated in every loop iteration
vector float vf_curr_NSweight_y_upper = { 0.0f, 0.0f, 0.0f, 0.0f };
vector float vf_next_NSweight_y_upper;
vector float vf_curr_NSweight_y_lower, vf_next_NSweight_y_lower;
vector float vf_curr_NSweight_vu = { 0.0f, 0.0f, 0.0f, 0.0f };
vector float vf_next_NSweight_vu;
// line indices for the src picture
float curr_src_y_upper = 0.0f, next_src_y_upper;
float curr_src_y_lower, next_src_y_lower;
float curr_src_vu = 0.0f, next_src_vu;
// line indices for the dst picture
unsigned int dst_y=0, dst_vu=0;
// offset for the v and u plane to handle misalignement
unsigned int curr_lsoff_v = 0, next_lsoff_v;
unsigned int curr_lsoff_u = 0, next_lsoff_u;
// calculate lower line idices
curr_src_y_lower = ((float)curr_interpl_y_upper+1)*y_scale;
curr_interpl_y_lower = (unsigned int)curr_src_y_lower;
// lower line weight
vf_curr_NSweight_y_lower = spu_splats( curr_src_y_lower-(float)curr_interpl_y_lower );
// start partially double buffered processing
// get initial data, 2 sets of y, 1 set v, 1 set u
mfc_get( y_plane[curr_src_idx], (unsigned int) src_addr_y, src_dbl_linestride_y, RETR_BUF, 0, 0 );
mfc_get( y_plane[curr_src_idx]+src_dbl_linestride_y,
(unsigned int) src_addr_y+(curr_interpl_y_lower*src_linestride_y),
src_dbl_linestride_y,
RETR_BUF,
0, 0 );
mfc_get( v_plane[curr_src_idx], (unsigned int) src_addr_v, src_dbl_linestride_vu, RETR_BUF, 0, 0 );
mfc_get( u_plane[curr_src_idx], (unsigned int) src_addr_u, src_dbl_linestride_vu, RETR_BUF, 0, 0 );
// iteration loop
// within each iteration 4 lines y, 2 lines v, 2 lines u are retrieved
// the scaled output is 2 lines y, 1 line v, 1 line u
// the yuv2rgb-converted output is stored to RAM
for( dst_vu=0; dst_vu<(dst_height>>1)-1; dst_vu++ ) {
dst_y = dst_vu<<1;
// calculate next indices
next_src_vu = ((float)dst_vu+1)*y_scale;
next_src_y_upper = ((float)dst_y+2)*y_scale;
next_src_y_lower = ((float)dst_y+3)*y_scale;
next_interpl_vu = (unsigned int) next_src_vu;
next_interpl_y_upper = (unsigned int) next_src_y_upper;
next_interpl_y_lower = (unsigned int) next_src_y_lower;
// calculate weight NORTH-SOUTH
vf_next_NSweight_vu = spu_splats( next_src_vu-(float)next_interpl_vu );
vf_next_NSweight_y_upper = spu_splats( next_src_y_upper-(float)next_interpl_y_upper );
vf_next_NSweight_y_lower = spu_splats( next_src_y_lower-(float)next_interpl_y_lower );
// get next lines
next_src_idx = curr_src_idx^1;
next_dst_idx = curr_dst_idx^1;
// 4 lines y
mfc_get( y_plane[next_src_idx],
(unsigned int) src_addr_y+(next_interpl_y_upper*src_linestride_y),
src_dbl_linestride_y,
RETR_BUF+next_src_idx,
0, 0 );
mfc_get( y_plane[next_src_idx]+src_dbl_linestride_y,
(unsigned int) src_addr_y+(next_interpl_y_lower*src_linestride_y),
src_dbl_linestride_y,
RETR_BUF+next_src_idx,
0, 0 );
// 2 lines v
precalc_src_addr_v = src_addr_v+(next_interpl_vu*src_linestride_vu);
next_lsoff_v = ((unsigned int)precalc_src_addr_v)&0x0F;
mfc_get( v_plane[next_src_idx],
((unsigned int) precalc_src_addr_v)&0xFFFFFFF0,
src_dbl_linestride_vu+(next_lsoff_v<<1),
RETR_BUF+next_src_idx,
0, 0 );
// 2 lines u
precalc_src_addr_u = src_addr_u+(next_interpl_vu*src_linestride_vu);
next_lsoff_u = ((unsigned int)precalc_src_addr_u)&0x0F;
mfc_get( u_plane[next_src_idx],
((unsigned int) precalc_src_addr_u)&0xFFFFFFF0,
src_dbl_linestride_vu+(next_lsoff_v<<1),
RETR_BUF+next_src_idx,
0, 0 );
DMA_WAIT_TAG( (RETR_BUF+curr_src_idx) );
// scaling
// work line y_upper
bilinear_scale_line_w16( y_plane[curr_src_idx],
scaled_y_plane[curr_src_idx],
dst_width,
vf_x_scale,
vf_curr_NSweight_y_upper,
src_linestride_y );
// work line y_lower
bilinear_scale_line_w16( y_plane[curr_src_idx]+src_dbl_linestride_y,
scaled_y_plane[curr_src_idx]+scaled_src_linestride_y,
dst_width,
vf_x_scale,
vf_curr_NSweight_y_lower,
src_linestride_y );
// work line v
bilinear_scale_line_w8( v_plane[curr_src_idx]+curr_lsoff_v,
scaled_v_plane[curr_src_idx],
dst_width>>1,
vf_x_scale,
vf_curr_NSweight_vu,
src_linestride_vu );
// work line u
bilinear_scale_line_w8( u_plane[curr_src_idx]+curr_lsoff_u,
scaled_u_plane[curr_src_idx],
dst_width>>1,
vf_x_scale,
vf_curr_NSweight_vu,
src_linestride_vu );
//---------------------------------------------------------------------------------------------
DMA_WAIT_TAG( (STR_BUF+curr_dst_idx) );
// Perform three DMA transfers to 3 different locations in the main memory!
// dst_width: Pixel width of destination image
// dst_addr: Destination address in main memory
// dst_vu: Counter which is incremented one by one
// dst_y: Counter which is twice larger than dst_vu (dst_y = 2*dst_vu)
mfc_put( scaled_y_plane[curr_src_idx], // What from local store (addr)
(unsigned int) dst_addr_main_memory_y + (dst_vu*dst_dbl_linestride_y), // Destination in main memory (addr)
dst_dbl_linestride_y, // Two Y lines (depending on the widht of the destination resolution)
STR_BUF+curr_dst_idx, // Tag
0, 0 );
mfc_put( scaled_v_plane[curr_src_idx], // What from local store (addr)
(unsigned int) dst_addr_main_memory_v + (dst_vu*dst_dbl_linestride_vu), // Destination in main memory (addr)
dst_dbl_linestride_vu, // Two V lines (depending on the widht of the destination resolution)
STR_BUF+curr_dst_idx, // Tag
0, 0 );
mfc_put( scaled_u_plane[curr_src_idx], // What from local store (addr)
(unsigned int) dst_addr_main_memory_u + (dst_vu*dst_dbl_linestride_vu), // Destination in main memory (addr)
dst_dbl_linestride_vu, // Two U lines (depending on the widht of the destination resolution)
STR_BUF+curr_dst_idx, // Tag
0, 0 );
//---------------------------------------------------------------------------------------------
// update for next cycle
curr_src_idx = next_src_idx;
curr_dst_idx = next_dst_idx;
curr_interpl_y_upper = next_interpl_y_upper;
curr_interpl_y_lower = next_interpl_y_lower;
curr_interpl_vu = next_interpl_vu;
vf_curr_NSweight_y_upper = vf_curr_NSweight_y_upper;
vf_curr_NSweight_y_lower = vf_curr_NSweight_y_lower;
vf_curr_NSweight_vu = vf_next_NSweight_vu;
curr_src_y_upper = next_src_y_upper;
curr_src_y_lower = next_src_y_lower;
curr_src_vu = next_src_vu;
curr_lsoff_v = next_lsoff_v;
curr_lsoff_u = next_lsoff_u;
}
DMA_WAIT_TAG( (RETR_BUF+curr_src_idx) );
// scaling
// work line y_upper
bilinear_scale_line_w16( y_plane[curr_src_idx],
scaled_y_plane[curr_src_idx],
dst_width,
vf_x_scale,
vf_curr_NSweight_y_upper,
src_linestride_y );
// work line y_lower
bilinear_scale_line_w16( y_plane[curr_src_idx]+src_dbl_linestride_y,
scaled_y_plane[curr_src_idx]+scaled_src_linestride_y,
dst_width,
vf_x_scale,
vf_curr_NSweight_y_lower,
src_linestride_y );
// work line v
bilinear_scale_line_w8( v_plane[curr_src_idx]+curr_lsoff_v,
scaled_v_plane[curr_src_idx],
dst_width>>1,
vf_x_scale,
vf_curr_NSweight_vu,
src_linestride_vu );
// work line u
bilinear_scale_line_w8( u_plane[curr_src_idx]+curr_lsoff_u,
scaled_u_plane[curr_src_idx],
dst_width>>1,
vf_x_scale,
vf_curr_NSweight_vu,
src_linestride_vu );
//---------------------------------------------------------------------------------------------
DMA_WAIT_TAG( (STR_BUF+curr_dst_idx) );
// Perform three DMA transfers to 3 different locations in the main memory!
// dst_width: Pixel width of destination image
// dst_addr: Destination address in main memory
// dst_vu: Counter which is incremented one by one
// dst_y: Counter which is twice larger than dst_vu (dst_y = 2*dst_vu)
mfc_put( scaled_y_plane[curr_src_idx], // What from local store (addr)
(unsigned int) dst_addr_main_memory_y + (dst_vu*dst_dbl_linestride_y), // Destination in main memory (addr)
dst_dbl_linestride_y, // Two Y lines (depending on the widht of the destination resolution)
STR_BUF+curr_dst_idx, // Tag
0, 0 );
mfc_put( scaled_v_plane[curr_src_idx], // What from local store (addr)
(unsigned int) dst_addr_main_memory_v + (dst_vu*dst_dbl_linestride_vu), // Destination in main memory (addr)
dst_dbl_linestride_vu, // Two V lines (depending on the widht of the destination resolution)
STR_BUF+curr_dst_idx, // Tag
0, 0 );
mfc_put( scaled_u_plane[curr_src_idx], // What from local store (addr)
(unsigned int) dst_addr_main_memory_u + (dst_vu*dst_dbl_linestride_vu), // Destination in main memory (addr)
dst_dbl_linestride_vu, // Two U lines (depending on the widht of the destination resolution)
STR_BUF+curr_dst_idx, // Tag
0, 0 );
// wait for completion
DMA_WAIT_TAG( (STR_BUF+curr_dst_idx) );
//---------------------------------------------------------------------------------------------
}
/*
* scale_srcw32_dstw16()
*
* processes an input image of width 32
* scaling is done to a width 16
* yuv2rgb conversion on a width of 16
* result stored in RAM
*/
void scale_srcw32_dstw16() {
// extract parameters
unsigned char* dst_addr = (unsigned char *)parms.dstBuffer;
unsigned int src_width = parms.src_pixel_width;
unsigned int src_height = parms.src_pixel_height;
unsigned int dst_width = parms.dst_pixel_width;
unsigned int dst_height = parms.dst_pixel_height;
// YVU
unsigned int src_linestride_y = src_width;
unsigned int src_dbl_linestride_y = src_width<<1;
unsigned int src_linestride_vu = src_width>>1;
unsigned int src_dbl_linestride_vu = src_width;
// scaled YVU
unsigned int scaled_src_linestride_y = dst_width;
// ram addresses
unsigned char* src_addr_y = parms.y_plane;
unsigned char* src_addr_v = parms.v_plane;
unsigned char* src_addr_u = parms.u_plane;
unsigned int dst_picture_size = dst_width*dst_height;
// Sizes for destination
unsigned int dst_dbl_linestride_y = dst_width<<1;
unsigned int dst_dbl_linestride_vu = dst_width>>1;
// Perform address calculation for Y, V and U in main memory with dst_addr as base
unsigned char* dst_addr_main_memory_y = dst_addr;
unsigned char* dst_addr_main_memory_v = dst_addr + dst_picture_size;
unsigned char* dst_addr_main_memory_u = dst_addr_main_memory_v +(dst_picture_size>>2);
// calculate scale factors
vector float vf_x_scale = spu_splats( (float)src_width/(float)dst_width );
float y_scale = (float)src_height/(float)dst_height;
// double buffered processing
// buffer switching
unsigned int curr_src_idx = 0;
unsigned int curr_dst_idx = 0;
unsigned int next_src_idx, next_dst_idx;
// 2 lines y as output, upper and lowerline
unsigned int curr_interpl_y_upper = 0;
unsigned int next_interpl_y_upper;
unsigned int curr_interpl_y_lower, next_interpl_y_lower;
// only 1 line v/u output, both planes have the same dimension
unsigned int curr_interpl_vu = 0;
unsigned int next_interpl_vu;
// weights, calculated in every loop iteration
vector float vf_curr_NSweight_y_upper = { 0.0f, 0.0f, 0.0f, 0.0f };
vector float vf_next_NSweight_y_upper;
vector float vf_curr_NSweight_y_lower, vf_next_NSweight_y_lower;
vector float vf_curr_NSweight_vu = { 0.0f, 0.0f, 0.0f, 0.0f };
vector float vf_next_NSweight_vu;
// line indices for the src picture
float curr_src_y_upper = 0.0f, next_src_y_upper;
float curr_src_y_lower, next_src_y_lower;
float curr_src_vu = 0.0f, next_src_vu;
// line indices for the dst picture
unsigned int dst_y=0, dst_vu=0;
// calculate lower line idices
curr_src_y_lower = ((float)curr_interpl_y_upper+1)*y_scale;
curr_interpl_y_lower = (unsigned int)curr_src_y_lower;
// lower line weight
vf_curr_NSweight_y_lower = spu_splats( curr_src_y_lower-(float)curr_interpl_y_lower );
// start partially double buffered processing
// get initial data, 2 sets of y, 1 set v, 1 set u
mfc_get( y_plane[curr_src_idx], (unsigned int) src_addr_y, src_dbl_linestride_y, RETR_BUF, 0, 0 );
mfc_get( y_plane[curr_src_idx]+src_dbl_linestride_y,
(unsigned int) src_addr_y+(curr_interpl_y_lower*src_linestride_y),
src_dbl_linestride_y,
RETR_BUF,
0, 0 );
mfc_get( v_plane[curr_src_idx], (unsigned int) src_addr_v, src_dbl_linestride_vu, RETR_BUF, 0, 0 );
mfc_get( u_plane[curr_src_idx], (unsigned int) src_addr_u, src_dbl_linestride_vu, RETR_BUF, 0, 0 );
// iteration loop
// within each iteration 4 lines y, 2 lines v, 2 lines u are retrieved
// the scaled output is 2 lines y, 1 line v, 1 line u
// the yuv2rgb-converted output is stored to RAM
for( dst_vu=0; dst_vu<(dst_height>>1)-1; dst_vu++ ) {
dst_y = dst_vu<<1;
// calculate next indices
next_src_vu = ((float)dst_vu+1)*y_scale;
next_src_y_upper = ((float)dst_y+2)*y_scale;
next_src_y_lower = ((float)dst_y+3)*y_scale;
next_interpl_vu = (unsigned int) next_src_vu;
next_interpl_y_upper = (unsigned int) next_src_y_upper;
next_interpl_y_lower = (unsigned int) next_src_y_lower;
// calculate weight NORTH-SOUTH
vf_next_NSweight_vu = spu_splats( next_src_vu-(float)next_interpl_vu );
vf_next_NSweight_y_upper = spu_splats( next_src_y_upper-(float)next_interpl_y_upper );
vf_next_NSweight_y_lower = spu_splats( next_src_y_lower-(float)next_interpl_y_lower );
// get next lines
next_src_idx = curr_src_idx^1;
next_dst_idx = curr_dst_idx^1;
// 4 lines y
mfc_get( y_plane[next_src_idx],
(unsigned int) src_addr_y+(next_interpl_y_upper*src_linestride_y),
src_dbl_linestride_y,
RETR_BUF+next_src_idx,
0, 0 );
mfc_get( y_plane[next_src_idx]+src_dbl_linestride_y,
(unsigned int) src_addr_y+(next_interpl_y_lower*src_linestride_y),
src_dbl_linestride_y,
RETR_BUF+next_src_idx,
0, 0 );
// 2 lines v
mfc_get( v_plane[next_src_idx],
(unsigned int) src_addr_v+(next_interpl_vu*src_linestride_vu),
src_dbl_linestride_vu,
RETR_BUF+next_src_idx,
0, 0 );
// 2 lines u
mfc_get( u_plane[next_src_idx],
(unsigned int) src_addr_u+(next_interpl_vu*src_linestride_vu),
src_dbl_linestride_vu,
RETR_BUF+next_src_idx,
0, 0 );
DMA_WAIT_TAG( (RETR_BUF+curr_src_idx) );
// scaling
// work line y_upper
bilinear_scale_line_w16( y_plane[curr_src_idx],
scaled_y_plane[curr_src_idx],
dst_width,
vf_x_scale,
vf_curr_NSweight_y_upper,
src_linestride_y );
// work line y_lower
bilinear_scale_line_w16( y_plane[curr_src_idx]+src_dbl_linestride_y,
scaled_y_plane[curr_src_idx]+scaled_src_linestride_y,
dst_width,
vf_x_scale,
vf_curr_NSweight_y_lower,
src_linestride_y );
// work line v
bilinear_scale_line_w16( v_plane[curr_src_idx],
scaled_v_plane[curr_src_idx],
dst_width>>1,
vf_x_scale,
vf_curr_NSweight_vu,
src_linestride_vu );
// work line u
bilinear_scale_line_w16( u_plane[curr_src_idx],
scaled_u_plane[curr_src_idx],
dst_width>>1,
vf_x_scale,
vf_curr_NSweight_vu,
src_linestride_vu );
//---------------------------------------------------------------------------------------------
DMA_WAIT_TAG( (STR_BUF+curr_dst_idx) );
// Perform three DMA transfers to 3 different locations in the main memory!
// dst_width: Pixel width of destination image
// dst_addr: Destination address in main memory
// dst_vu: Counter which is incremented one by one
// dst_y: Counter which is twice larger than dst_vu (dst_y = 2*dst_vu)
mfc_put( scaled_y_plane[curr_src_idx], // What from local store (addr)
(unsigned int) dst_addr_main_memory_y + (dst_vu*dst_dbl_linestride_y), // Destination in main memory (addr)
dst_dbl_linestride_y, // Two Y lines (depending on the widht of the destination resolution)
STR_BUF+curr_dst_idx, // Tag
0, 0 );
mfc_put( scaled_v_plane[curr_src_idx], // What from local store (addr)
(unsigned int) dst_addr_main_memory_v + (dst_vu*dst_dbl_linestride_vu), // Destination in main memory (addr)
dst_dbl_linestride_vu, // Two V lines (depending on the widht of the destination resolution)
STR_BUF+curr_dst_idx, // Tag
0, 0 );
mfc_put( scaled_u_plane[curr_src_idx], // What from local store (addr)
(unsigned int) dst_addr_main_memory_u + (dst_vu*dst_dbl_linestride_vu), // Destination in main memory (addr)
dst_dbl_linestride_vu, // Two U lines (depending on the widht of the destination resolution)
STR_BUF+curr_dst_idx, // Tag
0, 0 );
//---------------------------------------------------------------------------------------------
// update for next cycle
curr_src_idx = next_src_idx;
curr_dst_idx = next_dst_idx;
curr_interpl_y_upper = next_interpl_y_upper;
curr_interpl_y_lower = next_interpl_y_lower;
curr_interpl_vu = next_interpl_vu;
vf_curr_NSweight_y_upper = vf_curr_NSweight_y_upper;
vf_curr_NSweight_y_lower = vf_curr_NSweight_y_lower;
vf_curr_NSweight_vu = vf_next_NSweight_vu;
curr_src_y_upper = next_src_y_upper;
curr_src_y_lower = next_src_y_lower;
curr_src_vu = next_src_vu;
}
DMA_WAIT_TAG( (RETR_BUF+curr_src_idx) );
// scaling
// work line y_upper
bilinear_scale_line_w16( y_plane[curr_src_idx],
scaled_y_plane[curr_src_idx],
dst_width,
vf_x_scale,
vf_curr_NSweight_y_upper,
src_linestride_y );
// work line y_lower
bilinear_scale_line_w16( y_plane[curr_src_idx]+src_dbl_linestride_y,
scaled_y_plane[curr_src_idx]+scaled_src_linestride_y,
dst_width,
vf_x_scale,
vf_curr_NSweight_y_lower,
src_linestride_y );
// work line v
bilinear_scale_line_w16( v_plane[curr_src_idx],
scaled_v_plane[curr_src_idx],
dst_width>>1,
vf_x_scale,
vf_curr_NSweight_vu,
src_linestride_vu );
// work line u
bilinear_scale_line_w16( u_plane[curr_src_idx],
scaled_u_plane[curr_src_idx],
dst_width>>1,
vf_x_scale,
vf_curr_NSweight_vu,
src_linestride_vu );
//---------------------------------------------------------------------------------------------
DMA_WAIT_TAG( (STR_BUF+curr_dst_idx) );
// Perform three DMA transfers to 3 different locations in the main memory!
// dst_width: Pixel width of destination image
// dst_addr: Destination address in main memory
// dst_vu: Counter which is incremented one by one
// dst_y: Counter which is twice larger than dst_vu (dst_y = 2*dst_vu)
mfc_put( scaled_y_plane[curr_src_idx], // What from local store (addr)
(unsigned int) dst_addr_main_memory_y + (dst_vu*dst_dbl_linestride_y), // Destination in main memory (addr)
dst_dbl_linestride_y, // Two Y lines (depending on the widht of the destination resolution)
STR_BUF+curr_dst_idx, // Tag
0, 0 );
mfc_put( scaled_v_plane[curr_src_idx], // What from local store (addr)
(unsigned int) dst_addr_main_memory_v + (dst_vu*dst_dbl_linestride_vu), // Destination in main memory (addr)
dst_dbl_linestride_vu, // Two V lines (depending on the widht of the destination resolution)
STR_BUF+curr_dst_idx, // Tag
0, 0 );
mfc_put( scaled_u_plane[curr_src_idx], // What from local store (addr)
(unsigned int) dst_addr_main_memory_u + (dst_vu*dst_dbl_linestride_vu), // Destination in main memory (addr)
dst_dbl_linestride_vu, // Two U lines (depending on the widht of the destination resolution)
STR_BUF+curr_dst_idx, // Tag
0, 0 );
// wait for completion
DMA_WAIT_TAG( (STR_BUF+curr_dst_idx) );
//---------------------------------------------------------------------------------------------
}
/**
* scale_srcw32_dstw32()
*
* processes an input image of width 32
* scaling is done to a width 32
* yuv2rgb conversion on a width of 32
* result stored in RAM
*/
void scale_srcw32_dstw32() {
// extract parameters
unsigned char* dst_addr = (unsigned char *)parms.dstBuffer;
unsigned int src_width = parms.src_pixel_width;
unsigned int src_height = parms.src_pixel_height;
unsigned int dst_width = parms.dst_pixel_width;
unsigned int dst_height = parms.dst_pixel_height;
// YVU
unsigned int src_linestride_y = src_width;
unsigned int src_dbl_linestride_y = src_width<<1;
unsigned int src_linestride_vu = src_width>>1;
unsigned int src_dbl_linestride_vu = src_width;
// scaled YVU
unsigned int scaled_src_linestride_y = dst_width;
// ram addresses
unsigned char* src_addr_y = parms.y_plane;
unsigned char* src_addr_v = parms.v_plane;
unsigned char* src_addr_u = parms.u_plane;
unsigned int dst_picture_size = dst_width*dst_height;
// Sizes for destination
unsigned int dst_dbl_linestride_y = dst_width<<1;
unsigned int dst_dbl_linestride_vu = dst_width>>1;
// Perform address calculation for Y, V and U in main memory with dst_addr as base
unsigned char* dst_addr_main_memory_y = dst_addr;
unsigned char* dst_addr_main_memory_v = dst_addr + dst_picture_size;
unsigned char* dst_addr_main_memory_u = dst_addr_main_memory_v +(dst_picture_size>>2);
// calculate scale factors
vector float vf_x_scale = spu_splats( (float)src_width/(float)dst_width );
float y_scale = (float)src_height/(float)dst_height;
// double buffered processing
// buffer switching
unsigned int curr_src_idx = 0;
unsigned int curr_dst_idx = 0;
unsigned int next_src_idx, next_dst_idx;
// 2 lines y as output, upper and lowerline
unsigned int curr_interpl_y_upper = 0;
unsigned int next_interpl_y_upper;
unsigned int curr_interpl_y_lower, next_interpl_y_lower;
// only 1 line v/u output, both planes have the same dimension
unsigned int curr_interpl_vu = 0;
unsigned int next_interpl_vu;
// weights, calculated in every loop iteration
vector float vf_curr_NSweight_y_upper = { 0.0f, 0.0f, 0.0f, 0.0f };
vector float vf_next_NSweight_y_upper;
vector float vf_curr_NSweight_y_lower, vf_next_NSweight_y_lower;
vector float vf_curr_NSweight_vu = { 0.0f, 0.0f, 0.0f, 0.0f };
vector float vf_next_NSweight_vu;
// line indices for the src picture
float curr_src_y_upper = 0.0f, next_src_y_upper;
float curr_src_y_lower, next_src_y_lower;
float curr_src_vu = 0.0f, next_src_vu;
// line indices for the dst picture
unsigned int dst_y=0, dst_vu=0;
// calculate lower line idices
curr_src_y_lower = ((float)curr_interpl_y_upper+1)*y_scale;
curr_interpl_y_lower = (unsigned int)curr_src_y_lower;
// lower line weight
vf_curr_NSweight_y_lower = spu_splats( curr_src_y_lower-(float)curr_interpl_y_lower );
// start partially double buffered processing
// get initial data, 2 sets of y, 1 set v, 1 set u
mfc_get( y_plane[curr_src_idx], (unsigned int) src_addr_y, src_dbl_linestride_y, RETR_BUF, 0, 0 );
mfc_get( y_plane[curr_src_idx]+src_dbl_linestride_y,
(unsigned int) src_addr_y+(curr_interpl_y_lower*src_linestride_y),
src_dbl_linestride_y,
RETR_BUF,
0, 0 );
mfc_get( v_plane[curr_src_idx], (unsigned int) src_addr_v, src_dbl_linestride_vu, RETR_BUF, 0, 0 );
mfc_get( u_plane[curr_src_idx], (unsigned int) src_addr_u, src_dbl_linestride_vu, RETR_BUF, 0, 0 );
// iteration loop
// within each iteration 4 lines y, 2 lines v, 2 lines u are retrieved
// the scaled output is 2 lines y, 1 line v, 1 line u
// the yuv2rgb-converted output is stored to RAM
for( dst_vu=0; dst_vu<(dst_height>>1)-1; dst_vu++ ) {
dst_y = dst_vu<<1;
// calculate next indices
next_src_vu = ((float)dst_vu+1)*y_scale;
next_src_y_upper = ((float)dst_y+2)*y_scale;
next_src_y_lower = ((float)dst_y+3)*y_scale;
next_interpl_vu = (unsigned int) next_src_vu;
next_interpl_y_upper = (unsigned int) next_src_y_upper;
next_interpl_y_lower = (unsigned int) next_src_y_lower;
// calculate weight NORTH-SOUTH
vf_next_NSweight_vu = spu_splats( next_src_vu-(float)next_interpl_vu );
vf_next_NSweight_y_upper = spu_splats( next_src_y_upper-(float)next_interpl_y_upper );
vf_next_NSweight_y_lower = spu_splats( next_src_y_lower-(float)next_interpl_y_lower );
// get next lines
next_src_idx = curr_src_idx^1;
next_dst_idx = curr_dst_idx^1;
// 4 lines y
mfc_get( y_plane[next_src_idx],
(unsigned int) src_addr_y+(next_interpl_y_upper*src_linestride_y),
src_dbl_linestride_y,
RETR_BUF+next_src_idx,
0, 0 );
mfc_get( y_plane[next_src_idx]+src_dbl_linestride_y,
(unsigned int) src_addr_y+(next_interpl_y_lower*src_linestride_y),
src_dbl_linestride_y,
RETR_BUF+next_src_idx,
0, 0 );
// 2 lines v
mfc_get( v_plane[next_src_idx],
(unsigned int) src_addr_v+(next_interpl_vu*src_linestride_vu),
src_dbl_linestride_vu,
RETR_BUF+next_src_idx,
0, 0 );
// 2 lines u
mfc_get( u_plane[next_src_idx],
(unsigned int) src_addr_u+(next_interpl_vu*src_linestride_vu),
src_dbl_linestride_vu,
RETR_BUF+next_src_idx,
0, 0 );
DMA_WAIT_TAG( (RETR_BUF+curr_src_idx) );
// scaling
// work line y_upper
bilinear_scale_line_w16( y_plane[curr_src_idx],
scaled_y_plane[curr_src_idx],
dst_width,
vf_x_scale,
vf_curr_NSweight_y_upper,
src_linestride_y );
// work line y_lower
bilinear_scale_line_w16( y_plane[curr_src_idx]+src_dbl_linestride_y,
scaled_y_plane[curr_src_idx]+scaled_src_linestride_y,
dst_width,
vf_x_scale,
vf_curr_NSweight_y_lower,
src_linestride_y );
// work line v
bilinear_scale_line_w16( v_plane[curr_src_idx],
scaled_v_plane[curr_src_idx],
dst_width>>1,
vf_x_scale,
vf_curr_NSweight_vu,
src_linestride_vu );
// work line u
bilinear_scale_line_w16( u_plane[curr_src_idx],
scaled_u_plane[curr_src_idx],
dst_width>>1,
vf_x_scale,
vf_curr_NSweight_vu,
src_linestride_vu );
// Store the result back to main memory into a destination buffer in YUV format
//---------------------------------------------------------------------------------------------
DMA_WAIT_TAG( (STR_BUF+curr_dst_idx) );
// Perform three DMA transfers to 3 different locations in the main memory!
// dst_width: Pixel width of destination image
// dst_addr: Destination address in main memory
// dst_vu: Counter which is incremented one by one
// dst_y: Counter which is twice larger than dst_vu (dst_y = 2*dst_vu)
mfc_put( scaled_y_plane[curr_src_idx], // What from local store (addr)
(unsigned int) dst_addr_main_memory_y + (dst_vu*dst_dbl_linestride_y), // Destination in main memory (addr)
dst_dbl_linestride_y, // Two Y lines (depending on the widht of the destination resolution)
STR_BUF+curr_dst_idx, // Tag
0, 0 );
mfc_put( scaled_v_plane[curr_src_idx], // What from local store (addr)
(unsigned int) dst_addr_main_memory_v + (dst_vu*dst_dbl_linestride_vu), // Destination in main memory (addr)
dst_dbl_linestride_vu, // Two V lines (depending on the widht of the destination resolution)
STR_BUF+curr_dst_idx, // Tag
0, 0 );
mfc_put( scaled_u_plane[curr_src_idx], // What from local store (addr)
(unsigned int) dst_addr_main_memory_u + (dst_vu*dst_dbl_linestride_vu), // Destination in main memory (addr)
dst_dbl_linestride_vu, // Two U lines (depending on the widht of the destination resolution)
STR_BUF+curr_dst_idx, // Tag
0, 0 );
//---------------------------------------------------------------------------------------------
// update for next cycle
curr_src_idx = next_src_idx;
curr_dst_idx = next_dst_idx;
curr_interpl_y_upper = next_interpl_y_upper;
curr_interpl_y_lower = next_interpl_y_lower;
curr_interpl_vu = next_interpl_vu;
vf_curr_NSweight_y_upper = vf_curr_NSweight_y_upper;
vf_curr_NSweight_y_lower = vf_curr_NSweight_y_lower;
vf_curr_NSweight_vu = vf_next_NSweight_vu;
curr_src_y_upper = next_src_y_upper;
curr_src_y_lower = next_src_y_lower;
curr_src_vu = next_src_vu;
}
DMA_WAIT_TAG( (RETR_BUF+curr_src_idx) );
// scaling
// work line y_upper
bilinear_scale_line_w16( y_plane[curr_src_idx],
scaled_y_plane[curr_src_idx],
dst_width,
vf_x_scale,
vf_curr_NSweight_y_upper,
src_linestride_y );
// work line y_lower
bilinear_scale_line_w16( y_plane[curr_src_idx]+src_dbl_linestride_y,
scaled_y_plane[curr_src_idx]+scaled_src_linestride_y,
dst_width,
vf_x_scale,
vf_curr_NSweight_y_lower,
src_linestride_y );
// work line v
bilinear_scale_line_w16( v_plane[curr_src_idx],
scaled_v_plane[curr_src_idx],
dst_width>>1,
vf_x_scale,
vf_curr_NSweight_vu,
src_linestride_vu );
// work line u
bilinear_scale_line_w16( u_plane[curr_src_idx],
scaled_u_plane[curr_src_idx],
dst_width>>1,
vf_x_scale,
vf_curr_NSweight_vu,
src_linestride_vu );
// Store the result back to main memory into a destination buffer in YUV format
//---------------------------------------------------------------------------------------------
DMA_WAIT_TAG( (STR_BUF+curr_dst_idx) );
// Perform three DMA transfers to 3 different locations in the main memory!
// dst_width: Pixel width of destination image
// dst_addr: Destination address in main memory
// dst_vu: Counter which is incremented one by one
// dst_y: Counter which is twice larger than dst_vu (dst_y = 2*dst_vu)
mfc_put( scaled_y_plane[curr_src_idx], // What from local store (addr)
(unsigned int) dst_addr_main_memory_y + (dst_vu*dst_dbl_linestride_y), // Destination in main memory (addr)
dst_dbl_linestride_y, // Two Y lines (depending on the widht of the destination resolution)
STR_BUF+curr_dst_idx, // Tag
0, 0 );
mfc_put( scaled_v_plane[curr_src_idx], // What from local store (addr)
(unsigned int) dst_addr_main_memory_v + (dst_vu*dst_dbl_linestride_vu), // Destination in main memory (addr)
dst_dbl_linestride_vu, // Two V lines (depending on the widht of the destination resolution)
STR_BUF+curr_dst_idx, // Tag
0, 0 );
mfc_put( scaled_u_plane[curr_src_idx], // What from local store (addr)
(unsigned int) dst_addr_main_memory_u + (dst_vu*dst_dbl_linestride_vu), // Destination in main memory (addr)
dst_dbl_linestride_vu, // Two U lines (depending on the widht of the destination resolution)
STR_BUF+curr_dst_idx, // Tag
0, 0 );
// wait for completion
DMA_WAIT_TAG( (STR_BUF+curr_dst_idx) );
//---------------------------------------------------------------------------------------------
}
/*
* bilinear_scale_line_w8()
*
* processes a line of yuv-input, width has to be a multiple of 8
* scaled yuv-output is written to local store buffer
*
* @param src buffer for 2 lines input
* @param dst_ buffer for 1 line output
* @param dst_width the width of the destination line
* @param vf_x_scale a float vector, at each entry is the x_scale-factor
* @param vf_NSweight a float vector, at each position is the weight NORTH/SOUTH for the current line
* @param src_linestride the stride of the srcline
*/
void bilinear_scale_line_w8( unsigned char* src, unsigned char* dst_, unsigned int dst_width, vector float vf_x_scale, vector float vf_NSweight, unsigned int src_linestride ) {
unsigned char* dst = dst_;
unsigned int dst_x;
for( dst_x=0; dst_x<dst_width; dst_x+=8) {
// address calculation for loading the 4 surrounding pixel of each calculated
// destination pixel
vector unsigned int vui_dst_x_tmp = spu_splats( dst_x );
// lower range->first 4 pixel
// upper range->next 4 pixel
vector unsigned int vui_inc_dst_x_lower_range = { 0, 1, 2, 3 };
vector unsigned int vui_inc_dst_x_upper_range = { 4, 5, 6, 7 };
vector unsigned int vui_dst_x_lower_range = spu_add( vui_dst_x_tmp, vui_inc_dst_x_lower_range );
vector unsigned int vui_dst_x_upper_range = spu_add( vui_dst_x_tmp, vui_inc_dst_x_upper_range );
// calculate weight EAST-WEST
vector float vf_dst_x_lower_range = spu_convtf( vui_dst_x_lower_range, 0 );
vector float vf_dst_x_upper_range = spu_convtf( vui_dst_x_upper_range, 0 );
vector float vf_src_x_lower_range = spu_mul( vf_dst_x_lower_range, vf_x_scale );
vector float vf_src_x_upper_range = spu_mul( vf_dst_x_upper_range, vf_x_scale );
vector unsigned int vui_interpl_x_lower_range = spu_convtu( vf_src_x_lower_range, 0 );
vector unsigned int vui_interpl_x_upper_range = spu_convtu( vf_src_x_upper_range, 0 );
vector float vf_interpl_x_lower_range = spu_convtf( vui_interpl_x_lower_range, 0 );
vector float vf_interpl_x_upper_range = spu_convtf( vui_interpl_x_upper_range, 0 );
vector float vf_EWweight_lower_range = spu_sub( vf_src_x_lower_range, vf_interpl_x_lower_range );
vector float vf_EWweight_upper_range = spu_sub( vf_src_x_upper_range, vf_interpl_x_upper_range );
// calculate address offset
//
// pixel NORTH WEST
vector unsigned int vui_off_pixelNW_lower_range = vui_interpl_x_lower_range;
vector unsigned int vui_off_pixelNW_upper_range = vui_interpl_x_upper_range;
// pixel NORTH EAST-->(offpixelNW+1)
vector unsigned int vui_add_1 = { 1, 1, 1, 1 };
vector unsigned int vui_off_pixelNE_lower_range = spu_add( vui_off_pixelNW_lower_range, vui_add_1 );
vector unsigned int vui_off_pixelNE_upper_range = spu_add( vui_off_pixelNW_upper_range, vui_add_1 );
// SOUTH-WEST-->(offpixelNW+src_linestride)
vector unsigned int vui_srclinestride = spu_splats( src_linestride );
vector unsigned int vui_off_pixelSW_lower_range = spu_add( vui_srclinestride, vui_off_pixelNW_lower_range );
vector unsigned int vui_off_pixelSW_upper_range = spu_add( vui_srclinestride, vui_off_pixelNW_upper_range );
// SOUTH-EAST-->(offpixelNW+src_linestride+1)
vector unsigned int vui_off_pixelSE_lower_range = spu_add( vui_srclinestride, vui_off_pixelNE_lower_range );
vector unsigned int vui_off_pixelSE_upper_range = spu_add( vui_srclinestride, vui_off_pixelNE_upper_range );
// calculate each address
vector unsigned int vui_src_ls = spu_splats( (unsigned int) src );
vector unsigned int vui_addr_pixelNW_lower_range = spu_add( vui_src_ls, vui_off_pixelNW_lower_range );
vector unsigned int vui_addr_pixelNW_upper_range = spu_add( vui_src_ls, vui_off_pixelNW_upper_range );
vector unsigned int vui_addr_pixelNE_lower_range = spu_add( vui_src_ls, vui_off_pixelNE_lower_range );
vector unsigned int vui_addr_pixelNE_upper_range = spu_add( vui_src_ls, vui_off_pixelNE_upper_range );
vector unsigned int vui_addr_pixelSW_lower_range = spu_add( vui_src_ls, vui_off_pixelSW_lower_range );
vector unsigned int vui_addr_pixelSW_upper_range = spu_add( vui_src_ls, vui_off_pixelSW_upper_range );
vector unsigned int vui_addr_pixelSE_lower_range = spu_add( vui_src_ls, vui_off_pixelSE_lower_range );
vector unsigned int vui_addr_pixelSE_upper_range = spu_add( vui_src_ls, vui_off_pixelSE_upper_range );
// get each pixel
//
// scalar load, afterwards insertion into the right position
// NORTH WEST
vector unsigned char null_vector = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
vector unsigned char vuc_pixel_NW_lower_range = spu_insert(
*((unsigned char*) spu_extract( vui_addr_pixelNW_lower_range, 0 )), null_vector, 3 );
vuc_pixel_NW_lower_range = spu_insert(
*((unsigned char*) spu_extract( vui_addr_pixelNW_lower_range, 1 )),
vuc_pixel_NW_lower_range, 7 );
vuc_pixel_NW_lower_range = spu_insert(
*((unsigned char*) spu_extract( vui_addr_pixelNW_lower_range, 2 )),
vuc_pixel_NW_lower_range, 11 );
vuc_pixel_NW_lower_range = spu_insert(
*((unsigned char*) spu_extract( vui_addr_pixelNW_lower_range, 3 )),
vuc_pixel_NW_lower_range, 15 );
vector unsigned char vuc_pixel_NW_upper_range = spu_insert(
*((unsigned char*) spu_extract( vui_addr_pixelNW_upper_range, 0 )), null_vector, 3 );
vuc_pixel_NW_upper_range = spu_insert(
*((unsigned char*) spu_extract( vui_addr_pixelNW_upper_range, 1 )),
vuc_pixel_NW_upper_range, 7 );
vuc_pixel_NW_upper_range = spu_insert(
*((unsigned char*) spu_extract( vui_addr_pixelNW_upper_range, 2 )),
vuc_pixel_NW_upper_range, 11 );
vuc_pixel_NW_upper_range = spu_insert(
*((unsigned char*) spu_extract( vui_addr_pixelNW_upper_range, 3 )),
vuc_pixel_NW_upper_range, 15 );
// NORTH EAST
vector unsigned char vuc_pixel_NE_lower_range = spu_insert(
*((unsigned char*) spu_extract( vui_addr_pixelNE_lower_range, 0 )), null_vector, 3 );
vuc_pixel_NE_lower_range = spu_insert(
*((unsigned char*) spu_extract( vui_addr_pixelNE_lower_range, 1 )),
vuc_pixel_NE_lower_range, 7 );
vuc_pixel_NE_lower_range = spu_insert(
*((unsigned char*) spu_extract( vui_addr_pixelNE_lower_range, 2 )),
vuc_pixel_NE_lower_range, 11 );
vuc_pixel_NE_lower_range = spu_insert(
*((unsigned char*) spu_extract( vui_addr_pixelNE_lower_range, 3 )),
vuc_pixel_NE_lower_range, 15 );
vector unsigned char vuc_pixel_NE_upper_range = spu_insert(
*((unsigned char*) spu_extract( vui_addr_pixelNE_upper_range, 0 )), null_vector, 3 );
vuc_pixel_NE_upper_range = spu_insert(
*((unsigned char*) spu_extract( vui_addr_pixelNE_upper_range, 1 )),
vuc_pixel_NE_upper_range, 7 );
vuc_pixel_NE_upper_range = spu_insert(
*((unsigned char*) spu_extract( vui_addr_pixelNE_upper_range, 2 )),
vuc_pixel_NE_upper_range, 11 );
vuc_pixel_NE_upper_range = spu_insert(
*((unsigned char*) spu_extract( vui_addr_pixelNE_upper_range, 3 )),
vuc_pixel_NE_upper_range, 15 );
// SOUTH WEST
vector unsigned char vuc_pixel_SW_lower_range = spu_insert(
*((unsigned char*) spu_extract( vui_addr_pixelSW_lower_range, 0 )), null_vector, 3 );
vuc_pixel_SW_lower_range = spu_insert(
*((unsigned char*) spu_extract( vui_addr_pixelSW_lower_range, 1 )),
vuc_pixel_SW_lower_range, 7 );
vuc_pixel_SW_lower_range = spu_insert(
*((unsigned char*) spu_extract( vui_addr_pixelSW_lower_range, 2 )),
vuc_pixel_SW_lower_range, 11 );
vuc_pixel_SW_lower_range = spu_insert(
*((unsigned char*) spu_extract( vui_addr_pixelSW_lower_range, 3 )),
vuc_pixel_SW_lower_range, 15 );
vector unsigned char vuc_pixel_SW_upper_range = spu_insert(
*((unsigned char*) spu_extract( vui_addr_pixelSW_upper_range, 0 )), null_vector, 3 );
vuc_pixel_SW_upper_range = spu_insert(
*((unsigned char*) spu_extract( vui_addr_pixelSW_upper_range, 1 )),
vuc_pixel_SW_upper_range, 7 );
vuc_pixel_SW_upper_range = spu_insert(
*((unsigned char*) spu_extract( vui_addr_pixelSW_upper_range, 2 )),
vuc_pixel_SW_upper_range, 11 );
vuc_pixel_SW_upper_range = spu_insert(
*((unsigned char*) spu_extract( vui_addr_pixelSW_upper_range, 3 )),
vuc_pixel_SW_upper_range, 15 );
// SOUTH EAST
vector unsigned char vuc_pixel_SE_lower_range = spu_insert(
*((unsigned char*) spu_extract( vui_addr_pixelSE_lower_range, 0 )), null_vector, 3 );
vuc_pixel_SE_lower_range = spu_insert(
*((unsigned char*) spu_extract( vui_addr_pixelSE_lower_range, 1 )),
vuc_pixel_SE_lower_range, 7 );
vuc_pixel_SE_lower_range = spu_insert(
*((unsigned char*) spu_extract( vui_addr_pixelSE_lower_range, 2 )),
vuc_pixel_SE_lower_range, 11 );
vuc_pixel_SE_lower_range = spu_insert(
*((unsigned char*) spu_extract( vui_addr_pixelSE_lower_range, 3 )),
vuc_pixel_SE_lower_range, 15 );
vector unsigned char vuc_pixel_SE_upper_range = spu_insert(
*((unsigned char*) spu_extract( vui_addr_pixelSE_upper_range, 0 )), null_vector, 3 );
vuc_pixel_SE_upper_range = spu_insert(
*((unsigned char*) spu_extract( vui_addr_pixelSE_upper_range, 1 )),
vuc_pixel_SE_upper_range, 7 );
vuc_pixel_SE_upper_range = spu_insert(
*((unsigned char*) spu_extract( vui_addr_pixelSE_upper_range, 2 )),
vuc_pixel_SE_upper_range, 11 );
vuc_pixel_SE_upper_range = spu_insert(
*((unsigned char*) spu_extract( vui_addr_pixelSE_upper_range, 3 )),
vuc_pixel_SE_upper_range, 15 );
// convert to float
vector float vf_pixel_NW_lower_range = spu_convtf( (vector unsigned int) vuc_pixel_NW_lower_range, 0 );
vector float vf_pixel_NW_upper_range = spu_convtf( (vector unsigned int) vuc_pixel_NW_upper_range, 0 );
vector float vf_pixel_SW_lower_range = spu_convtf( (vector unsigned int) vuc_pixel_SW_lower_range, 0 );
vector float vf_pixel_SW_upper_range = spu_convtf( (vector unsigned int) vuc_pixel_SW_upper_range, 0 );
vector float vf_pixel_NE_lower_range = spu_convtf( (vector unsigned int) vuc_pixel_NE_lower_range, 0 );
vector float vf_pixel_NE_upper_range = spu_convtf( (vector unsigned int) vuc_pixel_NE_upper_range, 0 );
vector float vf_pixel_SE_lower_range = spu_convtf( (vector unsigned int) vuc_pixel_SE_lower_range, 0 );
vector float vf_pixel_SE_upper_range = spu_convtf( (vector unsigned int) vuc_pixel_SE_upper_range, 0 );
// first linear interpolation: EWtop
// EWtop = NW + EWweight*(NE-NW)
//
// lower range
vector float vf_EWtop_lower_range_tmp = spu_sub( vf_pixel_NE_lower_range, vf_pixel_NW_lower_range );
vector float vf_EWtop_lower_range = spu_madd( vf_EWweight_lower_range,
vf_EWtop_lower_range_tmp,
vf_pixel_NW_lower_range );
// upper range
vector float vf_EWtop_upper_range_tmp = spu_sub( vf_pixel_NE_upper_range, vf_pixel_NW_upper_range );
vector float vf_EWtop_upper_range = spu_madd( vf_EWweight_upper_range,
vf_EWtop_upper_range_tmp,
vf_pixel_NW_upper_range );
// second linear interpolation: EWbottom
// EWbottom = SW + EWweight*(SE-SW)
//
// lower range
vector float vf_EWbottom_lower_range_tmp = spu_sub( vf_pixel_SE_lower_range, vf_pixel_SW_lower_range );
vector float vf_EWbottom_lower_range = spu_madd( vf_EWweight_lower_range,
vf_EWbottom_lower_range_tmp,
vf_pixel_SW_lower_range );
// upper range
vector float vf_EWbottom_upper_range_tmp = spu_sub( vf_pixel_SE_upper_range, vf_pixel_SW_upper_range );
vector float vf_EWbottom_upper_range = spu_madd( vf_EWweight_upper_range,
vf_EWbottom_upper_range_tmp,
vf_pixel_SW_upper_range );
// third linear interpolation: the bilinear interpolated value
// result = EWtop + NSweight*(EWbottom-EWtop);
//
// lower range
vector float vf_result_lower_range_tmp = spu_sub( vf_EWbottom_lower_range, vf_EWtop_lower_range );
vector float vf_result_lower_range = spu_madd( vf_NSweight,
vf_result_lower_range_tmp,
vf_EWtop_lower_range );
// upper range
vector float vf_result_upper_range_tmp = spu_sub( vf_EWbottom_upper_range, vf_EWtop_upper_range );
vector float vf_result_upper_range = spu_madd( vf_NSweight,
vf_result_upper_range_tmp,
vf_EWtop_upper_range );
// convert back: using saturated arithmetic
vector unsigned int vui_result_lower_range = vfloat_to_vuint( vf_result_lower_range );
vector unsigned int vui_result_upper_range = vfloat_to_vuint( vf_result_upper_range );
// merge results->lower,upper
vector unsigned char vuc_mask_merge_result = { 0x03, 0x07, 0x0B, 0x0F,
0x13, 0x17, 0x1B, 0x1F,
0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00 };
vector unsigned char vuc_result = spu_shuffle( (vector unsigned char) vui_result_lower_range,
(vector unsigned char) vui_result_upper_range,
vuc_mask_merge_result );
// partial storing
vector unsigned char vuc_mask_out = { 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00,
0xFF, 0xFF, 0xFF, 0xFF,
0xFF, 0xFF, 0xFF, 0xFF };
// get currently stored data
vector unsigned char vuc_orig = *((vector unsigned char*)dst);
// clear currently stored data
vuc_orig = spu_and( vuc_orig,
spu_rlqwbyte( vuc_mask_out, ((unsigned int)dst)&0x0F) );
// rotate result according to storing address
vuc_result = spu_rlqwbyte( vuc_result, ((unsigned int)dst)&0x0F );
// store result
*((vector unsigned char*)dst) = spu_or( vuc_result,
vuc_orig );
dst += 8;
}
}
/*
* bilinear_scale_line_w16()
*
* processes a line of yuv-input, width has to be a multiple of 16
* scaled yuv-output is written to local store buffer
*
* @param src buffer for 2 lines input
* @param dst_ buffer for 1 line output
* @param dst_width the width of the destination line
* @param vf_x_scale a float vector, at each entry is the x_scale-factor
* @param vf_NSweight a float vector, at each position is the weight NORTH/SOUTH for the current line
* @param src_linestride the stride of the srcline
*/
void bilinear_scale_line_w16( unsigned char* src, unsigned char* dst_, unsigned int dst_width, vector float vf_x_scale, vector float vf_NSweight, unsigned int src_linestride ) {
unsigned char* dst = dst_;
unsigned int dst_x;
for( dst_x=0; dst_x<dst_width; dst_x+=16) {
// address calculation for loading the 4 surrounding pixel of each calculated
// destination pixel
vector unsigned int vui_dst_x_tmp = spu_splats( dst_x );
// parallelised processing
// first range->pixel 1 2 3 4
// second range->pixel 5 6 7 8
// third range->pixel 9 10 11 12
// fourth range->pixel 13 14 15 16
vector unsigned int vui_inc_dst_x_first_range = { 0, 1, 2, 3 };
vector unsigned int vui_inc_dst_x_second_range = { 4, 5, 6, 7 };
vector unsigned int vui_inc_dst_x_third_range = { 8, 9, 10, 11 };
vector unsigned int vui_inc_dst_x_fourth_range = { 12, 13, 14, 15 };
vector unsigned int vui_dst_x_first_range = spu_add( vui_dst_x_tmp, vui_inc_dst_x_first_range );
vector unsigned int vui_dst_x_second_range = spu_add( vui_dst_x_tmp, vui_inc_dst_x_second_range );
vector unsigned int vui_dst_x_third_range = spu_add( vui_dst_x_tmp, vui_inc_dst_x_third_range );
vector unsigned int vui_dst_x_fourth_range = spu_add( vui_dst_x_tmp, vui_inc_dst_x_fourth_range );
// calculate weight EAST-WEST
vector float vf_dst_x_first_range = spu_convtf( vui_dst_x_first_range, 0 );
vector float vf_dst_x_second_range = spu_convtf( vui_dst_x_second_range, 0 );
vector float vf_dst_x_third_range = spu_convtf( vui_dst_x_third_range, 0 );
vector float vf_dst_x_fourth_range = spu_convtf( vui_dst_x_fourth_range, 0 );
vector float vf_src_x_first_range = spu_mul( vf_dst_x_first_range, vf_x_scale );
vector float vf_src_x_second_range = spu_mul( vf_dst_x_second_range, vf_x_scale );
vector float vf_src_x_third_range = spu_mul( vf_dst_x_third_range, vf_x_scale );
vector float vf_src_x_fourth_range = spu_mul( vf_dst_x_fourth_range, vf_x_scale );
vector unsigned int vui_interpl_x_first_range = spu_convtu( vf_src_x_first_range, 0 );
vector unsigned int vui_interpl_x_second_range = spu_convtu( vf_src_x_second_range, 0 );
vector unsigned int vui_interpl_x_third_range = spu_convtu( vf_src_x_third_range, 0 );
vector unsigned int vui_interpl_x_fourth_range = spu_convtu( vf_src_x_fourth_range, 0 );
vector float vf_interpl_x_first_range = spu_convtf( vui_interpl_x_first_range, 0 );
vector float vf_interpl_x_second_range = spu_convtf( vui_interpl_x_second_range, 0 );
vector float vf_interpl_x_third_range = spu_convtf( vui_interpl_x_third_range, 0 );
vector float vf_interpl_x_fourth_range = spu_convtf( vui_interpl_x_fourth_range, 0 );
vector float vf_EWweight_first_range = spu_sub( vf_src_x_first_range, vf_interpl_x_first_range );
vector float vf_EWweight_second_range = spu_sub( vf_src_x_second_range, vf_interpl_x_second_range );
vector float vf_EWweight_third_range = spu_sub( vf_src_x_third_range, vf_interpl_x_third_range );
vector float vf_EWweight_fourth_range = spu_sub( vf_src_x_fourth_range, vf_interpl_x_fourth_range );
// calculate address offset
//
// pixel NORTH WEST
vector unsigned int vui_off_pixelNW_first_range = vui_interpl_x_first_range;
vector unsigned int vui_off_pixelNW_second_range = vui_interpl_x_second_range;
vector unsigned int vui_off_pixelNW_third_range = vui_interpl_x_third_range;
vector unsigned int vui_off_pixelNW_fourth_range = vui_interpl_x_fourth_range;
// pixel NORTH EAST-->(offpixelNW+1)
vector unsigned int vui_add_1 = { 1, 1, 1, 1 };
vector unsigned int vui_off_pixelNE_first_range = spu_add( vui_off_pixelNW_first_range, vui_add_1 );
vector unsigned int vui_off_pixelNE_second_range = spu_add( vui_off_pixelNW_second_range, vui_add_1 );
vector unsigned int vui_off_pixelNE_third_range = spu_add( vui_off_pixelNW_third_range, vui_add_1 );
vector unsigned int vui_off_pixelNE_fourth_range = spu_add( vui_off_pixelNW_fourth_range, vui_add_1 );
// SOUTH-WEST-->(offpixelNW+src_linestride)
vector unsigned int vui_srclinestride = spu_splats( src_linestride );
vector unsigned int vui_off_pixelSW_first_range = spu_add( vui_srclinestride, vui_off_pixelNW_first_range );
vector unsigned int vui_off_pixelSW_second_range = spu_add( vui_srclinestride, vui_off_pixelNW_second_range );
vector unsigned int vui_off_pixelSW_third_range = spu_add( vui_srclinestride, vui_off_pixelNW_third_range );
vector unsigned int vui_off_pixelSW_fourth_range = spu_add( vui_srclinestride, vui_off_pixelNW_fourth_range );
// SOUTH-EAST-->(offpixelNW+src_linestride+1)
vector unsigned int vui_off_pixelSE_first_range = spu_add( vui_srclinestride, vui_off_pixelNE_first_range );
vector unsigned int vui_off_pixelSE_second_range = spu_add( vui_srclinestride, vui_off_pixelNE_second_range );
vector unsigned int vui_off_pixelSE_third_range = spu_add( vui_srclinestride, vui_off_pixelNE_third_range );
vector unsigned int vui_off_pixelSE_fourth_range = spu_add( vui_srclinestride, vui_off_pixelNE_fourth_range );
// calculate each address
vector unsigned int vui_src_ls = spu_splats( (unsigned int) src );
vector unsigned int vui_addr_pixelNW_first_range = spu_add( vui_src_ls, vui_off_pixelNW_first_range );
vector unsigned int vui_addr_pixelNW_second_range = spu_add( vui_src_ls, vui_off_pixelNW_second_range );
vector unsigned int vui_addr_pixelNW_third_range = spu_add( vui_src_ls, vui_off_pixelNW_third_range );
vector unsigned int vui_addr_pixelNW_fourth_range = spu_add( vui_src_ls, vui_off_pixelNW_fourth_range );
vector unsigned int vui_addr_pixelNE_first_range = spu_add( vui_src_ls, vui_off_pixelNE_first_range );
vector unsigned int vui_addr_pixelNE_second_range = spu_add( vui_src_ls, vui_off_pixelNE_second_range );
vector unsigned int vui_addr_pixelNE_third_range = spu_add( vui_src_ls, vui_off_pixelNE_third_range );
vector unsigned int vui_addr_pixelNE_fourth_range = spu_add( vui_src_ls, vui_off_pixelNE_fourth_range );
vector unsigned int vui_addr_pixelSW_first_range = spu_add( vui_src_ls, vui_off_pixelSW_first_range );
vector unsigned int vui_addr_pixelSW_second_range = spu_add( vui_src_ls, vui_off_pixelSW_second_range );
vector unsigned int vui_addr_pixelSW_third_range = spu_add( vui_src_ls, vui_off_pixelSW_third_range );
vector unsigned int vui_addr_pixelSW_fourth_range = spu_add( vui_src_ls, vui_off_pixelSW_fourth_range );
vector unsigned int vui_addr_pixelSE_first_range = spu_add( vui_src_ls, vui_off_pixelSE_first_range );
vector unsigned int vui_addr_pixelSE_second_range = spu_add( vui_src_ls, vui_off_pixelSE_second_range );
vector unsigned int vui_addr_pixelSE_third_range = spu_add( vui_src_ls, vui_off_pixelSE_third_range );
vector unsigned int vui_addr_pixelSE_fourth_range = spu_add( vui_src_ls, vui_off_pixelSE_fourth_range );
// get each pixel
//
// scalar load, afterwards insertion into the right position
// NORTH WEST
// first range
vector unsigned char null_vector = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
vector unsigned char vuc_pixel_NW_first_range = spu_insert(
*((unsigned char*) spu_extract( vui_addr_pixelNW_first_range, 0 )), null_vector, 3 );
vuc_pixel_NW_first_range = spu_insert(
*((unsigned char*) spu_extract( vui_addr_pixelNW_first_range, 1 )),
vuc_pixel_NW_first_range, 7 );
vuc_pixel_NW_first_range = spu_insert(
*((unsigned char*) spu_extract( vui_addr_pixelNW_first_range, 2 )),
vuc_pixel_NW_first_range, 11 );
vuc_pixel_NW_first_range = spu_insert(
*((unsigned char*) spu_extract( vui_addr_pixelNW_first_range, 3 )),
vuc_pixel_NW_first_range, 15 );
// second range
vector unsigned char vuc_pixel_NW_second_range = spu_insert(
*((unsigned char*) spu_extract( vui_addr_pixelNW_second_range, 0 )), null_vector, 3 );
vuc_pixel_NW_second_range = spu_insert(
*((unsigned char*) spu_extract( vui_addr_pixelNW_second_range, 1 )),
vuc_pixel_NW_second_range, 7 );
vuc_pixel_NW_second_range = spu_insert(
*((unsigned char*) spu_extract( vui_addr_pixelNW_second_range, 2 )),
vuc_pixel_NW_second_range, 11 );
vuc_pixel_NW_second_range = spu_insert(
*((unsigned char*) spu_extract( vui_addr_pixelNW_second_range, 3 )),
vuc_pixel_NW_second_range, 15 );
// third range
vector unsigned char vuc_pixel_NW_third_range = spu_insert(
*((unsigned char*) spu_extract( vui_addr_pixelNW_third_range, 0 )), null_vector, 3 );
vuc_pixel_NW_third_range = spu_insert(
*((unsigned char*) spu_extract( vui_addr_pixelNW_third_range, 1 )),
vuc_pixel_NW_third_range, 7 );
vuc_pixel_NW_third_range = spu_insert(
*((unsigned char*) spu_extract( vui_addr_pixelNW_third_range, 2 )),
vuc_pixel_NW_third_range, 11 );
vuc_pixel_NW_third_range = spu_insert(
*((unsigned char*) spu_extract( vui_addr_pixelNW_third_range, 3 )),
vuc_pixel_NW_third_range, 15 );
// fourth range
vector unsigned char vuc_pixel_NW_fourth_range = spu_insert(
*((unsigned char*) spu_extract( vui_addr_pixelNW_fourth_range, 0 )), null_vector, 3 );
vuc_pixel_NW_fourth_range = spu_insert(
*((unsigned char*) spu_extract( vui_addr_pixelNW_fourth_range, 1 )),
vuc_pixel_NW_fourth_range, 7 );
vuc_pixel_NW_fourth_range = spu_insert(
*((unsigned char*) spu_extract( vui_addr_pixelNW_fourth_range, 2 )),
vuc_pixel_NW_fourth_range, 11 );
vuc_pixel_NW_fourth_range = spu_insert(
*((unsigned char*) spu_extract( vui_addr_pixelNW_fourth_range, 3 )),
vuc_pixel_NW_fourth_range, 15 );
// NORTH EAST
// first range
vector unsigned char vuc_pixel_NE_first_range = spu_insert(
*((unsigned char*) spu_extract( vui_addr_pixelNE_first_range, 0 )), null_vector, 3 );
vuc_pixel_NE_first_range = spu_insert(
*((unsigned char*) spu_extract( vui_addr_pixelNE_first_range, 1 )),
vuc_pixel_NE_first_range, 7 );
vuc_pixel_NE_first_range = spu_insert(
*((unsigned char*) spu_extract( vui_addr_pixelNE_first_range, 2 )),
vuc_pixel_NE_first_range, 11 );
vuc_pixel_NE_first_range = spu_insert(
*((unsigned char*) spu_extract( vui_addr_pixelNE_first_range, 3 )),
vuc_pixel_NE_first_range, 15 );
// second range
vector unsigned char vuc_pixel_NE_second_range = spu_insert(
*((unsigned char*) spu_extract( vui_addr_pixelNE_second_range, 0 )), null_vector, 3 );
vuc_pixel_NE_second_range = spu_insert(
*((unsigned char*) spu_extract( vui_addr_pixelNE_second_range, 1 )),
vuc_pixel_NE_second_range, 7 );
vuc_pixel_NE_second_range = spu_insert(
*((unsigned char*) spu_extract( vui_addr_pixelNE_second_range, 2 )),
vuc_pixel_NE_second_range, 11 );
vuc_pixel_NE_second_range = spu_insert(
*((unsigned char*) spu_extract( vui_addr_pixelNE_second_range, 3 )),
vuc_pixel_NE_second_range, 15 );
// third range
vector unsigned char vuc_pixel_NE_third_range = spu_insert(
*((unsigned char*) spu_extract( vui_addr_pixelNE_third_range, 0 )), null_vector, 3 );
vuc_pixel_NE_third_range = spu_insert(
*((unsigned char*) spu_extract( vui_addr_pixelNE_third_range, 1 )),
vuc_pixel_NE_third_range, 7 );
vuc_pixel_NE_third_range = spu_insert(
*((unsigned char*) spu_extract( vui_addr_pixelNE_third_range, 2 )),
vuc_pixel_NE_third_range, 11 );
vuc_pixel_NE_third_range = spu_insert(
*((unsigned char*) spu_extract( vui_addr_pixelNE_third_range, 3 )),
vuc_pixel_NE_third_range, 15 );
// fourth range
vector unsigned char vuc_pixel_NE_fourth_range = spu_insert(
*((unsigned char*) spu_extract( vui_addr_pixelNE_fourth_range, 0 )), null_vector, 3 );
vuc_pixel_NE_fourth_range = spu_insert(
*((unsigned char*) spu_extract( vui_addr_pixelNE_fourth_range, 1 )),
vuc_pixel_NE_fourth_range, 7 );
vuc_pixel_NE_fourth_range = spu_insert(
*((unsigned char*) spu_extract( vui_addr_pixelNE_fourth_range, 2 )),
vuc_pixel_NE_fourth_range, 11 );
vuc_pixel_NE_fourth_range = spu_insert(
*((unsigned char*) spu_extract( vui_addr_pixelNE_fourth_range, 3 )),
vuc_pixel_NE_fourth_range, 15 );
// SOUTH WEST
// first range
vector unsigned char vuc_pixel_SW_first_range = spu_insert(
*((unsigned char*) spu_extract( vui_addr_pixelSW_first_range, 0 )), null_vector, 3 );
vuc_pixel_SW_first_range = spu_insert(
*((unsigned char*) spu_extract( vui_addr_pixelSW_first_range, 1 )),
vuc_pixel_SW_first_range, 7 );
vuc_pixel_SW_first_range = spu_insert(
*((unsigned char*) spu_extract( vui_addr_pixelSW_first_range, 2 )),
vuc_pixel_SW_first_range, 11 );
vuc_pixel_SW_first_range = spu_insert(
*((unsigned char*) spu_extract( vui_addr_pixelSW_first_range, 3 )),
vuc_pixel_SW_first_range, 15 );
// second range
vector unsigned char vuc_pixel_SW_second_range = spu_insert(
*((unsigned char*) spu_extract( vui_addr_pixelSW_second_range, 0 )), null_vector, 3 );
vuc_pixel_SW_second_range = spu_insert(
*((unsigned char*) spu_extract( vui_addr_pixelSW_second_range, 1 )),
vuc_pixel_SW_second_range, 7 );
vuc_pixel_SW_second_range = spu_insert(
*((unsigned char*) spu_extract( vui_addr_pixelSW_second_range, 2 )),
vuc_pixel_SW_second_range, 11 );
vuc_pixel_SW_second_range = spu_insert(
*((unsigned char*) spu_extract( vui_addr_pixelSW_second_range, 3 )),
vuc_pixel_SW_second_range, 15 );
// third range
vector unsigned char vuc_pixel_SW_third_range = spu_insert(
*((unsigned char*) spu_extract( vui_addr_pixelSW_third_range, 0 )), null_vector, 3 );
vuc_pixel_SW_third_range = spu_insert(
*((unsigned char*) spu_extract( vui_addr_pixelSW_third_range, 1 )),
vuc_pixel_SW_third_range, 7 );
vuc_pixel_SW_third_range = spu_insert(
*((unsigned char*) spu_extract( vui_addr_pixelSW_third_range, 2 )),
vuc_pixel_SW_third_range, 11 );
vuc_pixel_SW_third_range = spu_insert(
*((unsigned char*) spu_extract( vui_addr_pixelSW_third_range, 3 )),
vuc_pixel_SW_third_range, 15 );
// fourth range
vector unsigned char vuc_pixel_SW_fourth_range = spu_insert(
*((unsigned char*) spu_extract( vui_addr_pixelSW_fourth_range, 0 )), null_vector, 3 );
vuc_pixel_SW_fourth_range = spu_insert(
*((unsigned char*) spu_extract( vui_addr_pixelSW_fourth_range, 1 )),
vuc_pixel_SW_fourth_range, 7 );
vuc_pixel_SW_fourth_range = spu_insert(
*((unsigned char*) spu_extract( vui_addr_pixelSW_fourth_range, 2 )),
vuc_pixel_SW_fourth_range, 11 );
vuc_pixel_SW_fourth_range = spu_insert(
*((unsigned char*) spu_extract( vui_addr_pixelSW_fourth_range, 3 )),
vuc_pixel_SW_fourth_range, 15 );
// NORTH EAST
// first range
vector unsigned char vuc_pixel_SE_first_range = spu_insert(
*((unsigned char*) spu_extract( vui_addr_pixelSE_first_range, 0 )), null_vector, 3 );
vuc_pixel_SE_first_range = spu_insert(
*((unsigned char*) spu_extract( vui_addr_pixelSE_first_range, 1 )),
vuc_pixel_SE_first_range, 7 );
vuc_pixel_SE_first_range = spu_insert(
*((unsigned char*) spu_extract( vui_addr_pixelSE_first_range, 2 )),
vuc_pixel_SE_first_range, 11 );
vuc_pixel_SE_first_range = spu_insert(
*((unsigned char*) spu_extract( vui_addr_pixelSE_first_range, 3 )),
vuc_pixel_SE_first_range, 15 );
// second range
vector unsigned char vuc_pixel_SE_second_range = spu_insert(
*((unsigned char*) spu_extract( vui_addr_pixelSE_second_range, 0 )), null_vector, 3 );
vuc_pixel_SE_second_range = spu_insert(
*((unsigned char*) spu_extract( vui_addr_pixelSE_second_range, 1 )),
vuc_pixel_SE_second_range, 7 );
vuc_pixel_SE_second_range = spu_insert(
*((unsigned char*) spu_extract( vui_addr_pixelSE_second_range, 2 )),
vuc_pixel_SE_second_range, 11 );
vuc_pixel_SE_second_range = spu_insert(
*((unsigned char*) spu_extract( vui_addr_pixelSE_second_range, 3 )),
vuc_pixel_SE_second_range, 15 );
// third range
vector unsigned char vuc_pixel_SE_third_range = spu_insert(
*((unsigned char*) spu_extract( vui_addr_pixelSE_third_range, 0 )), null_vector, 3 );
vuc_pixel_SE_third_range = spu_insert(
*((unsigned char*) spu_extract( vui_addr_pixelSE_third_range, 1 )),
vuc_pixel_SE_third_range, 7 );
vuc_pixel_SE_third_range = spu_insert(
*((unsigned char*) spu_extract( vui_addr_pixelSE_third_range, 2 )),
vuc_pixel_SE_third_range, 11 );
vuc_pixel_SE_third_range = spu_insert(
*((unsigned char*) spu_extract( vui_addr_pixelSE_third_range, 3 )),
vuc_pixel_SE_third_range, 15 );
// fourth range
vector unsigned char vuc_pixel_SE_fourth_range = spu_insert(
*((unsigned char*) spu_extract( vui_addr_pixelSE_fourth_range, 0 )), null_vector, 3 );
vuc_pixel_SE_fourth_range = spu_insert(
*((unsigned char*) spu_extract( vui_addr_pixelSE_fourth_range, 1 )),
vuc_pixel_SE_fourth_range, 7 );
vuc_pixel_SE_fourth_range = spu_insert(
*((unsigned char*) spu_extract( vui_addr_pixelSE_fourth_range, 2 )),
vuc_pixel_SE_fourth_range, 11 );
vuc_pixel_SE_fourth_range = spu_insert(
*((unsigned char*) spu_extract( vui_addr_pixelSE_fourth_range, 3 )),
vuc_pixel_SE_fourth_range, 15 );
// convert to float
vector float vf_pixel_NW_first_range = spu_convtf( (vector unsigned int) vuc_pixel_NW_first_range, 0 );
vector float vf_pixel_NW_second_range = spu_convtf( (vector unsigned int) vuc_pixel_NW_second_range, 0 );
vector float vf_pixel_NW_third_range = spu_convtf( (vector unsigned int) vuc_pixel_NW_third_range, 0 );
vector float vf_pixel_NW_fourth_range = spu_convtf( (vector unsigned int) vuc_pixel_NW_fourth_range, 0 );
vector float vf_pixel_NE_first_range = spu_convtf( (vector unsigned int) vuc_pixel_NE_first_range, 0 );
vector float vf_pixel_NE_second_range = spu_convtf( (vector unsigned int) vuc_pixel_NE_second_range, 0 );
vector float vf_pixel_NE_third_range = spu_convtf( (vector unsigned int) vuc_pixel_NE_third_range, 0 );
vector float vf_pixel_NE_fourth_range = spu_convtf( (vector unsigned int) vuc_pixel_NE_fourth_range, 0 );
vector float vf_pixel_SW_first_range = spu_convtf( (vector unsigned int) vuc_pixel_SW_first_range, 0 );
vector float vf_pixel_SW_second_range = spu_convtf( (vector unsigned int) vuc_pixel_SW_second_range, 0 );
vector float vf_pixel_SW_third_range = spu_convtf( (vector unsigned int) vuc_pixel_SW_third_range, 0 );
vector float vf_pixel_SW_fourth_range = spu_convtf( (vector unsigned int) vuc_pixel_SW_fourth_range, 0 );
vector float vf_pixel_SE_first_range = spu_convtf( (vector unsigned int) vuc_pixel_SE_first_range, 0 );
vector float vf_pixel_SE_second_range = spu_convtf( (vector unsigned int) vuc_pixel_SE_second_range, 0 );
vector float vf_pixel_SE_third_range = spu_convtf( (vector unsigned int) vuc_pixel_SE_third_range, 0 );
vector float vf_pixel_SE_fourth_range = spu_convtf( (vector unsigned int) vuc_pixel_SE_fourth_range, 0 );
// first linear interpolation: EWtop
// EWtop = NW + EWweight*(NE-NW)
//
// first range
vector float vf_EWtop_first_range_tmp = spu_sub( vf_pixel_NE_first_range, vf_pixel_NW_first_range );
vector float vf_EWtop_first_range = spu_madd( vf_EWweight_first_range,
vf_EWtop_first_range_tmp,
vf_pixel_NW_first_range );
// second range
vector float vf_EWtop_second_range_tmp = spu_sub( vf_pixel_NE_second_range, vf_pixel_NW_second_range );
vector float vf_EWtop_second_range = spu_madd( vf_EWweight_second_range,
vf_EWtop_second_range_tmp,
vf_pixel_NW_second_range );
// third range
vector float vf_EWtop_third_range_tmp = spu_sub( vf_pixel_NE_third_range, vf_pixel_NW_third_range );
vector float vf_EWtop_third_range = spu_madd( vf_EWweight_third_range,
vf_EWtop_third_range_tmp,
vf_pixel_NW_third_range );
// fourth range
vector float vf_EWtop_fourth_range_tmp = spu_sub( vf_pixel_NE_fourth_range, vf_pixel_NW_fourth_range );
vector float vf_EWtop_fourth_range = spu_madd( vf_EWweight_fourth_range,
vf_EWtop_fourth_range_tmp,
vf_pixel_NW_fourth_range );
// second linear interpolation: EWbottom
// EWbottom = SW + EWweight*(SE-SW)
//
// first range
vector float vf_EWbottom_first_range_tmp = spu_sub( vf_pixel_SE_first_range, vf_pixel_SW_first_range );
vector float vf_EWbottom_first_range = spu_madd( vf_EWweight_first_range,
vf_EWbottom_first_range_tmp,
vf_pixel_SW_first_range );
// second range
vector float vf_EWbottom_second_range_tmp = spu_sub( vf_pixel_SE_second_range, vf_pixel_SW_second_range );
vector float vf_EWbottom_second_range = spu_madd( vf_EWweight_second_range,
vf_EWbottom_second_range_tmp,
vf_pixel_SW_second_range );
// first range
vector float vf_EWbottom_third_range_tmp = spu_sub( vf_pixel_SE_third_range, vf_pixel_SW_third_range );
vector float vf_EWbottom_third_range = spu_madd( vf_EWweight_third_range,
vf_EWbottom_third_range_tmp,
vf_pixel_SW_third_range );
// first range
vector float vf_EWbottom_fourth_range_tmp = spu_sub( vf_pixel_SE_fourth_range, vf_pixel_SW_fourth_range );
vector float vf_EWbottom_fourth_range = spu_madd( vf_EWweight_fourth_range,
vf_EWbottom_fourth_range_tmp,
vf_pixel_SW_fourth_range );
// third linear interpolation: the bilinear interpolated value
// result = EWtop + NSweight*(EWbottom-EWtop);
//
// first range
vector float vf_result_first_range_tmp = spu_sub( vf_EWbottom_first_range, vf_EWtop_first_range );
vector float vf_result_first_range = spu_madd( vf_NSweight,
vf_result_first_range_tmp,
vf_EWtop_first_range );
// second range
vector float vf_result_second_range_tmp = spu_sub( vf_EWbottom_second_range, vf_EWtop_second_range );
vector float vf_result_second_range = spu_madd( vf_NSweight,
vf_result_second_range_tmp,
vf_EWtop_second_range );
// third range
vector float vf_result_third_range_tmp = spu_sub( vf_EWbottom_third_range, vf_EWtop_third_range );
vector float vf_result_third_range = spu_madd( vf_NSweight,
vf_result_third_range_tmp,
vf_EWtop_third_range );
// fourth range
vector float vf_result_fourth_range_tmp = spu_sub( vf_EWbottom_fourth_range, vf_EWtop_fourth_range );
vector float vf_result_fourth_range = spu_madd( vf_NSweight,
vf_result_fourth_range_tmp,
vf_EWtop_fourth_range );
// convert back: using saturated arithmetic
vector unsigned int vui_result_first_range = vfloat_to_vuint( vf_result_first_range );
vector unsigned int vui_result_second_range = vfloat_to_vuint( vf_result_second_range );
vector unsigned int vui_result_third_range = vfloat_to_vuint( vf_result_third_range );
vector unsigned int vui_result_fourth_range = vfloat_to_vuint( vf_result_fourth_range );
// merge results->lower,upper
vector unsigned char vuc_mask_merge_result_first_second = { 0x03, 0x07, 0x0B, 0x0F,
0x13, 0x17, 0x1B, 0x1F,
0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00 };
vector unsigned char vuc_mask_merge_result_third_fourth = { 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00,
0x03, 0x07, 0x0B, 0x0F,
0x13, 0x17, 0x1B, 0x1F };
vector unsigned char vuc_result_first_second =
spu_shuffle( (vector unsigned char) vui_result_first_range,
(vector unsigned char) vui_result_second_range,
vuc_mask_merge_result_first_second );
vector unsigned char vuc_result_third_fourth =
spu_shuffle( (vector unsigned char) vui_result_third_range,
(vector unsigned char) vui_result_fourth_range,
vuc_mask_merge_result_third_fourth );
// store result
*((vector unsigned char*)dst) = spu_or( vuc_result_first_second,
vuc_result_third_fourth );
dst += 16;
}
}
/*
* SDL - Simple DirectMedia Layer
* CELL BE Support for PS3 Framebuffer
* Copyright (C) 2008, 2009 International Business Machines Corporation
*
* This library is free software; you can redistribute it and/or modify it
* under the terms of the GNU Lesser General Public License as published
* by the Free Software Foundation; either version 2.1 of the License, or
* (at your option) any later version.
*
* This library is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
* USA
*
* Martin Lowinski <lowinski [at] de [dot] ibm [ibm] com>
* Dirk Herrendoerfer <d.herrendoerfer [at] de [dot] ibm [dot] com>
* SPE code based on research by:
* Rene Becker
* Thimo Emmerich
*/
#include "spu_common.h"
#include <spu_intrinsics.h>
#include <spu_mfcio.h>
#include <stdio.h>
#include <string.h>
// Debugging
//#define DEBUG
#ifdef DEBUG
#define deprintf(fmt, args... ) \
fprintf( stdout, fmt, ##args ); \
fflush( stdout );
#else
#define deprintf( fmt, args... )
#endif
void cpy_to_fb(unsigned int);
/* fb_writer_spu parms */
static volatile struct fb_writer_parms_t parms __attribute__ ((aligned(128)));
/* Code running on SPU */
int main(unsigned long long spe_id __attribute__ ((unused)), unsigned long long argp __attribute__ ((unused)))
{
deprintf("[SPU] fb_writer_spu is up... (on SPE #%llu)\n", spe_id);
uint32_t ea_mfc, mbox;
// send ready message
spu_write_out_mbox(SPU_READY);
while (1) {
/* Check mailbox */
mbox = spu_read_in_mbox();
deprintf("[SPU] Message is %u\n", mbox);
switch (mbox) {
case SPU_EXIT:
deprintf("[SPU] fb_writer goes down...\n");
return 0;
case SPU_START:
break;
default:
deprintf("[SPU] Cannot handle message\n");
continue;
}
/* Tag Manager setup */
unsigned int tags;
tags = mfc_multi_tag_reserve(5);
if (tags == MFC_TAG_INVALID) {
deprintf("[SPU] Failed to reserve mfc tags on fb_writer\n");
return 0;
}
/* Framebuffer parms */
ea_mfc = spu_read_in_mbox();
deprintf("[SPU] Message on fb_writer is %u\n", ea_mfc);
spu_mfcdma32(&parms, (unsigned int)ea_mfc,
sizeof(struct fb_writer_parms_t), tags,
MFC_GET_CMD);
deprintf("[SPU] argp = %u\n", (unsigned int)argp);
DMA_WAIT_TAG(tags);
/* Copy parms->data to framebuffer */
deprintf("[SPU] Copying to framebuffer started\n");
cpy_to_fb(tags);
deprintf("[SPU] Copying to framebuffer done!\n");
mfc_multi_tag_release(tags, 5);
deprintf("[SPU] fb_writer_spu... done!\n");
/* Send FIN msg */
spu_write_out_mbox(SPU_FIN);
}
return 0;
}
void cpy_to_fb(unsigned int tag_id_base)
{
unsigned int i;
unsigned char current_buf;
uint8_t *in = parms.data;
/* Align fb pointer which was centered before */
uint8_t *fb =
(unsigned char *)((unsigned int)parms.center & 0xFFFFFFF0);
uint32_t bounded_input_height = parms.bounded_input_height;
uint32_t bounded_input_width = parms.bounded_input_width;
uint32_t fb_pixel_size = parms.fb_pixel_size;
uint32_t out_line_stride = parms.out_line_stride;
uint32_t in_line_stride = parms.in_line_stride;
uint32_t in_line_size = bounded_input_width * fb_pixel_size;
current_buf = 0;
/* Local store buffer */
static volatile uint8_t buf[4][BUFFER_SIZE]
__attribute__ ((aligned(128)));
/* do 4-times multibuffering using DMA list, process in two steps */
for (i = 0; i < bounded_input_height >> 2; i++) {
/* first buffer */
DMA_WAIT_TAG(tag_id_base + 1);
// retrieve buffer
spu_mfcdma32(buf[0], (unsigned int)in, in_line_size,
tag_id_base + 1, MFC_GETB_CMD);
DMA_WAIT_TAG(tag_id_base + 1);
// store buffer
spu_mfcdma32(buf[0], (unsigned int)fb, in_line_size,
tag_id_base + 1, MFC_PUTB_CMD);
in += in_line_stride;
fb += out_line_stride;
deprintf("[SPU] 1st buffer copied in=0x%x, fb=0x%x\n", in,
fb);
/* second buffer */
DMA_WAIT_TAG(tag_id_base + 2);
// retrieve buffer
spu_mfcdma32(buf[1], (unsigned int)in, in_line_size,
tag_id_base + 2, MFC_GETB_CMD);
DMA_WAIT_TAG(tag_id_base + 2);
// store buffer
spu_mfcdma32(buf[1], (unsigned int)fb, in_line_size,
tag_id_base + 2, MFC_PUTB_CMD);
in += in_line_stride;
fb += out_line_stride;
deprintf("[SPU] 2nd buffer copied in=0x%x, fb=0x%x\n", in,
fb);
/* third buffer */
DMA_WAIT_TAG(tag_id_base + 3);
// retrieve buffer
spu_mfcdma32(buf[2], (unsigned int)in, in_line_size,
tag_id_base + 3, MFC_GETB_CMD);
DMA_WAIT_TAG(tag_id_base + 3);
// store buffer
spu_mfcdma32(buf[2], (unsigned int)fb, in_line_size,
tag_id_base + 3, MFC_PUTB_CMD);
in += in_line_stride;
fb += out_line_stride;
deprintf("[SPU] 3rd buffer copied in=0x%x, fb=0x%x\n", in,
fb);
/* fourth buffer */
DMA_WAIT_TAG(tag_id_base + 4);
// retrieve buffer
spu_mfcdma32(buf[3], (unsigned int)in, in_line_size,
tag_id_base + 4, MFC_GETB_CMD);
DMA_WAIT_TAG(tag_id_base + 4);
// store buffer
spu_mfcdma32(buf[3], (unsigned int)fb, in_line_size,
tag_id_base + 4, MFC_PUTB_CMD);
in += in_line_stride;
fb += out_line_stride;
deprintf("[SPU] 4th buffer copied in=0x%x, fb=0x%x\n", in,
fb);
deprintf("[SPU] Loop #%i, bounded_input_height=%i\n", i,
bounded_input_height >> 2);
}
DMA_WAIT_TAG(tag_id_base + 2);
DMA_WAIT_TAG(tag_id_base + 3);
DMA_WAIT_TAG(tag_id_base + 4);
}
/*
* SDL - Simple DirectMedia Layer
* CELL BE Support for PS3 Framebuffer
* Copyright (C) 2008, 2009 International Business Machines Corporation
*
* This library is free software; you can redistribute it and/or modify it
* under the terms of the GNU Lesser General Public License as published
* by the Free Software Foundation; either version 2.1 of the License, or
* (at your option) any later version.
*
* This library is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
* USA
*
* Martin Lowinski <lowinski [at] de [dot] ibm [ibm] com>
* Dirk Herrendoerfer <d.herrendoerfer [at] de [dot] ibm [dot] com>
* SPE code based on research by:
* Rene Becker
* Thimo Emmerich
*/
/* Common definitions/makros for SPUs */
#ifndef _SPU_COMMON_H
#define _SPU_COMMON_H
#include <stdio.h>
#include <stdint.h>
#include <string.h>
/* Tag management */
#define DMA_WAIT_TAG(_tag) \
mfc_write_tag_mask(1<<(_tag)); \
mfc_read_tag_status_all();
/* SPU mailbox messages */
#define SPU_READY 0
#define SPU_START 1
#define SPU_FIN 2
#define SPU_EXIT 3
/* Tags */
#define RETR_BUF 0
#define STR_BUF 1
#define TAG_INIT 2
/* Buffersizes */
#define MAX_HDTV_WIDTH 1920
#define MAX_HDTV_HEIGHT 1080
/* One stride of HDTV */
#define BUFFER_SIZE 7680
/* fb_writer ppu/spu exchange parms */
struct fb_writer_parms_t {
uint8_t *data;
uint8_t *center;
uint32_t out_line_stride;
uint32_t in_line_stride;
uint32_t bounded_input_height;
uint32_t bounded_input_width;
uint32_t fb_pixel_size;
/* This padding is to fulfill the need for 16 byte alignment. On parm change, update! */
char padding[4];
} __attribute__((aligned(128)));
/* yuv2rgb ppu/spu exchange parms */
struct yuv2rgb_parms_t {
uint8_t* y_plane;
uint8_t* v_plane;
uint8_t* u_plane;
uint8_t* dstBuffer;
unsigned int src_pixel_width;
unsigned int src_pixel_height;
/* This padding is to fulfill the need for 16 byte alignment. On parm change, update! */
char padding[128 - ((4 * sizeof(uint8_t *) + 2 * sizeof(unsigned int)) & 0x7F)];
} __attribute__((aligned(128)));
/* bilin_scaler ppu/spu exchange parms */
struct scale_parms_t {
uint8_t* y_plane;
uint8_t* v_plane;
uint8_t* u_plane;
uint8_t* dstBuffer;
unsigned int src_pixel_width;
unsigned int src_pixel_height;
unsigned int dst_pixel_width;
unsigned int dst_pixel_height;
/* This padding is to fulfill the need for 16 byte alignment. On parm change, update! */
char padding[128 - ((4 * sizeof(uint8_t *) + 4 * sizeof(unsigned int)) & 0x7F)];
} __attribute__((aligned(128)));
#endif /* _SPU_COMMON_H */
/*
* SDL - Simple DirectMedia Layer
* CELL BE Support for PS3 Framebuffer
* Copyright (C) 2008, 2009 International Business Machines Corporation
*
* This library is free software; you can redistribute it and/or modify it
* under the terms of the GNU Lesser General Public License as published
* by the Free Software Foundation; either version 2.1 of the License, or
* (at your option) any later version.
*
* This library is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
* USA
*
* Martin Lowinski <lowinski [at] de [dot] ibm [ibm] com>
* Dirk Herrendoerfer <d.herrendoerfer [at] de [dot] ibm [dot] com>
* SPE code based on research by:
* Rene Becker
* Thimo Emmerich
*/
#include "spu_common.h"
#include <spu_intrinsics.h>
#include <spu_mfcio.h>
// Debugging
//#define DEBUG
// Test environment for /2 resolutions
//#define TESTING
#ifdef DEBUG
#define deprintf(fmt, args... ) \
fprintf( stdout, fmt, ##args ); \
fflush( stdout );
#else
#define deprintf( fmt, args... )
#endif
struct yuv2rgb_parms_t parms_converter __attribute__((aligned(128)));
/* A maximum of 8 lines Y, therefore 4 lines V, 4 lines U are stored
* there might be the need to retrieve misaligned data, adjust
* incoming v and u plane to be able to handle this (add 128)
*/
unsigned char y_plane[2][(MAX_HDTV_WIDTH + 128) * 4] __attribute__((aligned(128)));
unsigned char v_plane[2][(MAX_HDTV_WIDTH + 128) * 2] __attribute__((aligned(128)));
unsigned char u_plane[2][(MAX_HDTV_WIDTH + 128) * 2] __attribute__((aligned(128)));
/* A maximum of 4 lines BGRA are stored, 4 byte per pixel */
unsigned char bgra[4 * MAX_HDTV_WIDTH * 4] __attribute__((aligned(128)));
/* some vectors needed by the float to int conversion */
static const vector float vec_255 = { 255.0f, 255.0f, 255.0f, 255.0f };
static const vector float vec_0_1 = { 0.1f, 0.1f, 0.1f, 0.1f };
void yuv_to_rgb_w16();
void yuv_to_rgb_w32();
void yuv_to_rgb_w2_line(unsigned char* y_addr, unsigned char* v_addr, unsigned char* u_addr, unsigned char* bgra_addr, unsigned int width);
void yuv_to_rgb_w32_line(unsigned char* y_addr, unsigned char* v_addr, unsigned char* u_addr, unsigned char* bgra_addr_, unsigned int width);
int main(unsigned long long spe_id __attribute__((unused)), unsigned long long argp __attribute__ ((unused)))
{
deprintf("[SPU] yuv2rgb_spu is up... (on SPE #%llu)\n", spe_id);
uint32_t ea_mfc, mbox;
// send ready message
spu_write_out_mbox(SPU_READY);
while (1) {
/* Check mailbox */
mbox = spu_read_in_mbox();
deprintf("[SPU] Message is %u\n", mbox);
switch (mbox) {
case SPU_EXIT:
deprintf("[SPU] yuv2rgb_converter goes down...\n");
return 0;
case SPU_START:
break;
default:
deprintf("[SPU] Cannot handle message\n");
continue;
}
/* Tag Manager setup */
unsigned int tag_id;
tag_id = mfc_multi_tag_reserve(1);
if (tag_id == MFC_TAG_INVALID) {
deprintf("[SPU] Failed to reserve mfc tags on yuv2rgb_converter\n");
return 0;
}
/* DMA transfer for the input parameters */
ea_mfc = spu_read_in_mbox();
deprintf("[SPU] Message on yuv2rgb_converter is %u\n", ea_mfc);
spu_mfcdma32(&parms_converter, (unsigned int)ea_mfc, sizeof(struct yuv2rgb_parms_t), tag_id, MFC_GET_CMD);
DMA_WAIT_TAG(tag_id);
/* There are alignment issues that involve handling of special cases
* a width of 32 results in a width of 16 in the chrominance
* --> choose the proper handling to optimize the performance
*/
deprintf("[SPU] Convert %ix%i from YUV to RGB\n", parms_converter.src_pixel_width, parms_converter.src_pixel_height);
if (!(parms_converter.src_pixel_width & 0x1f)) {
deprintf("[SPU] Using yuv_to_rgb_w16\n");
yuv_to_rgb_w16();
} else {
deprintf("[SPU] Using yuv_to_rgb_w32\n");
yuv_to_rgb_w32();
}
mfc_multi_tag_release(tag_id, 1);
deprintf("[SPU] yuv2rgb_spu... done!\n");
/* Send FIN message */
spu_write_out_mbox(SPU_FIN);
}
return 0;
}
/*
* float_to_char()
*
* converts a float to a character using saturated
* arithmetic
*
* @param s float for conversion
* @returns converted character
*/
inline static unsigned char float_to_char(float s) {
vector float vec_s = spu_splats(s);
vector unsigned int select_1 = spu_cmpgt(vec_0_1, vec_s);
vec_s = spu_sel(vec_s, vec_0_1, select_1);
vector unsigned int select_2 = spu_cmpgt(vec_s, vec_255);
vec_s = spu_sel(vec_s, vec_255, select_2);
return (unsigned char) spu_extract(vec_s,0);
}
/*
* vfloat_to_vuint()
*
* converts a float vector to an unsinged int vector using saturated
* arithmetic
*
* @param vec_s float vector for conversion
* @returns converted unsigned int vector
*/
inline static vector unsigned int vfloat_to_vuint(vector float vec_s) {
vector unsigned int select_1 = spu_cmpgt(vec_0_1, vec_s);
vec_s = spu_sel(vec_s, vec_0_1, select_1);
vector unsigned int select_2 = spu_cmpgt(vec_s, vec_255);
vec_s = spu_sel(vec_s, vec_255, select_2);
return spu_convtu(vec_s,0);
}
void yuv_to_rgb_w16() {
// Pixel dimensions of the picture
uint32_t width, height;
// Extract parameters
width = parms_converter.src_pixel_width;
height = parms_converter.src_pixel_height;
// Plane data management
// Y
unsigned char* ram_addr_y = parms_converter.y_plane;
// V
unsigned char* ram_addr_v = parms_converter.v_plane;
// U
unsigned char* ram_addr_u = parms_converter.u_plane;
// BGRA
unsigned char* ram_addr_bgra = parms_converter.dstBuffer;
// Strides
unsigned int stride_y = width;
unsigned int stride_vu = width>>1;
// Buffer management
unsigned int buf_idx = 0;
unsigned int size_4lines_y = stride_y<<2;
unsigned int size_2lines_y = stride_y<<1;
unsigned int size_2lines_vu = stride_vu<<1;
// 2*width*4byte_per_pixel
unsigned int size_2lines_bgra = width<<3;
// start double-buffered processing
// 4 lines y
spu_mfcdma32(y_plane[buf_idx], (unsigned int) ram_addr_y, size_4lines_y, RETR_BUF+buf_idx, MFC_GET_CMD);
// 2 lines v
spu_mfcdma32(v_plane[buf_idx], (unsigned int) ram_addr_v, size_2lines_vu, RETR_BUF+buf_idx, MFC_GET_CMD);
// 2 lines u
spu_mfcdma32(u_plane[buf_idx], (unsigned int) ram_addr_u, size_2lines_vu, RETR_BUF+buf_idx, MFC_GET_CMD);
// Wait for these transfers to be completed
DMA_WAIT_TAG((RETR_BUF + buf_idx));
unsigned int i;
for(i=0; i<(height>>2)-1; i++) {
buf_idx^=1;
// 4 lines y
spu_mfcdma32(y_plane[buf_idx], (unsigned int) ram_addr_y+size_4lines_y, size_4lines_y, RETR_BUF+buf_idx, MFC_GET_CMD);
// 2 lines v
spu_mfcdma32(v_plane[buf_idx], (unsigned int) ram_addr_v+size_2lines_vu, size_2lines_vu, RETR_BUF+buf_idx, MFC_GET_CMD);
// 2 lines u
spu_mfcdma32(u_plane[buf_idx], (unsigned int) ram_addr_u+size_2lines_vu, size_2lines_vu, RETR_BUF+buf_idx, MFC_GET_CMD);
DMA_WAIT_TAG((RETR_BUF + buf_idx));
buf_idx^=1;
// Convert YUV to BGRA, store it back (first two lines)
#ifndef TESTING
yuv_to_rgb_w16_line(y_plane[buf_idx], v_plane[buf_idx], u_plane[buf_idx], bgra, width);
// Next two lines
yuv_to_rgb_w16_line(y_plane[buf_idx] + size_2lines_y,
v_plane[buf_idx] + stride_vu,
u_plane[buf_idx] + stride_vu,
bgra + size_2lines_bgra,
width);
#else
yuv_to_rgb_w2_line(y_plane[buf_idx], v_plane[buf_idx], u_plane[buf_idx], bgra, width);
// Next two lines
yuv_to_rgb_w2_line(y_plane[buf_idx] + size_2lines_y,
v_plane[buf_idx] + stride_vu,
u_plane[buf_idx] + stride_vu,
bgra + size_2lines_bgra,
width);
#endif
// Wait for previous storing transfer to be completed
DMA_WAIT_TAG(STR_BUF);
// Store converted lines in two steps->max transfer size 16384
spu_mfcdma32(bgra, (unsigned int) ram_addr_bgra, size_2lines_bgra, STR_BUF, MFC_PUT_CMD);
ram_addr_bgra += size_2lines_bgra;
spu_mfcdma32(bgra+size_2lines_bgra, (unsigned int) ram_addr_bgra, size_2lines_bgra, STR_BUF, MFC_PUT_CMD);
ram_addr_bgra += size_2lines_bgra;
// Move 4 lines
ram_addr_y += size_4lines_y;
ram_addr_v += size_2lines_vu;
ram_addr_u += size_2lines_vu;
buf_idx^=1;
}
#ifndef TESTING
// Convert YUV to BGRA, store it back (first two lines)
yuv_to_rgb_w16_line(y_plane[buf_idx], v_plane[buf_idx], u_plane[buf_idx], bgra, width);
// Next two lines
yuv_to_rgb_w16_line(y_plane[buf_idx] + size_2lines_y,
v_plane[buf_idx] + stride_vu,
u_plane[buf_idx] + stride_vu,
bgra + size_2lines_bgra,
width);
#else
// Convert YUV to BGRA, store it back (first two lines)
yuv_to_rgb_w2_line(y_plane[buf_idx], v_plane[buf_idx], u_plane[buf_idx], bgra, width);
// Next two lines
yuv_to_rgb_w2_line(y_plane[buf_idx] + size_2lines_y,
v_plane[buf_idx] + stride_vu,
u_plane[buf_idx] + stride_vu,
bgra + size_2lines_bgra,
width);
#endif
// Wait for previous storing transfer to be completed
DMA_WAIT_TAG(STR_BUF);
spu_mfcdma32(bgra, (unsigned int) ram_addr_bgra, size_2lines_bgra, STR_BUF, MFC_PUT_CMD);
ram_addr_bgra += size_2lines_bgra;
spu_mfcdma32(bgra+size_2lines_bgra, (unsigned int) ram_addr_bgra, size_2lines_bgra, STR_BUF, MFC_PUT_CMD);
// wait for previous storing transfer to be completed
DMA_WAIT_TAG(STR_BUF);
}
void yuv_to_rgb_w32() {
// Pixel dimensions of the picture
uint32_t width, height;
// Extract parameters
width = parms_converter.src_pixel_width;
height = parms_converter.src_pixel_height;
// Plane data management
// Y
unsigned char* ram_addr_y = parms_converter.y_plane;
// V
unsigned char* ram_addr_v = parms_converter.v_plane;
// U
unsigned char* ram_addr_u = parms_converter.u_plane;
// BGRA
unsigned char* ram_addr_bgra = parms_converter.dstBuffer;
// Strides
unsigned int stride_y = width;
unsigned int stride_vu = width>>1;
// Buffer management
unsigned int buf_idx = 0;
unsigned int size_4lines_y = stride_y<<2;
unsigned int size_2lines_y = stride_y<<1;
unsigned int size_2lines_vu = stride_vu<<1;
// 2*width*4byte_per_pixel
unsigned int size_2lines_bgra = width<<3;
// start double-buffered processing
// 4 lines y
spu_mfcdma32(y_plane[buf_idx], (unsigned int) ram_addr_y, size_4lines_y, RETR_BUF + buf_idx, MFC_GET_CMD);
// 2 lines v
spu_mfcdma32(v_plane[buf_idx], (unsigned int) ram_addr_v, size_2lines_vu, RETR_BUF + buf_idx, MFC_GET_CMD);
// 2 lines u
spu_mfcdma32(u_plane[buf_idx], (unsigned int) ram_addr_u, size_2lines_vu, RETR_BUF + buf_idx, MFC_GET_CMD);
// Wait for these transfers to be completed
DMA_WAIT_TAG((RETR_BUF + buf_idx));
unsigned int i;
for(i=0; i < (height>>2)-1; i++) {
buf_idx^=1;
// 4 lines y
spu_mfcdma32(y_plane[buf_idx], (unsigned int) ram_addr_y+size_4lines_y, size_4lines_y, RETR_BUF + buf_idx, MFC_GET_CMD);
deprintf("4lines = %d\n", size_4lines_y);
// 2 lines v
spu_mfcdma32(v_plane[buf_idx], (unsigned int) ram_addr_v+size_2lines_vu, size_2lines_vu, RETR_BUF + buf_idx, MFC_GET_CMD);
deprintf("2lines = %d\n", size_2lines_vu);
// 2 lines u
spu_mfcdma32(u_plane[buf_idx], (unsigned int) ram_addr_u+size_2lines_vu, size_2lines_vu, RETR_BUF + buf_idx, MFC_GET_CMD);
deprintf("2lines = %d\n", size_2lines_vu);
DMA_WAIT_TAG((RETR_BUF + buf_idx));
buf_idx^=1;
// Convert YUV to BGRA, store it back (first two lines)
yuv_to_rgb_w32_line(y_plane[buf_idx], v_plane[buf_idx], u_plane[buf_idx], bgra, width);
// Next two lines
yuv_to_rgb_w32_line(y_plane[buf_idx] + size_2lines_y,
v_plane[buf_idx] + stride_vu,
u_plane[buf_idx] + stride_vu,
bgra + size_2lines_bgra,
width);
// Wait for previous storing transfer to be completed
DMA_WAIT_TAG(STR_BUF);
// Store converted lines in two steps->max transfer size 16384
spu_mfcdma32(bgra, (unsigned int)ram_addr_bgra, size_2lines_bgra, STR_BUF, MFC_PUT_CMD);
ram_addr_bgra += size_2lines_bgra;
spu_mfcdma32(bgra + size_2lines_bgra, (unsigned int)ram_addr_bgra, size_2lines_bgra, STR_BUF, MFC_PUT_CMD);
ram_addr_bgra += size_2lines_bgra;
// Move 4 lines
ram_addr_y += size_4lines_y;
ram_addr_v += size_2lines_vu;
ram_addr_u += size_2lines_vu;
buf_idx^=1;
}
// Convert YUV to BGRA, store it back (first two lines)
yuv_to_rgb_w32_line(y_plane[buf_idx], v_plane[buf_idx], u_plane[buf_idx], bgra, width);
// Next two lines
yuv_to_rgb_w32_line(y_plane[buf_idx] + size_2lines_y,
v_plane[buf_idx] + stride_vu,
u_plane[buf_idx] + stride_vu,
bgra + size_2lines_bgra,
width);
// Wait for previous storing transfer to be completed
DMA_WAIT_TAG(STR_BUF);
spu_mfcdma32(bgra, (unsigned int) ram_addr_bgra, size_2lines_bgra, STR_BUF, MFC_PUT_CMD);
ram_addr_bgra += size_2lines_bgra;
spu_mfcdma32(bgra + size_2lines_bgra, (unsigned int) ram_addr_bgra, size_2lines_bgra, STR_BUF, MFC_PUT_CMD);
// Wait for previous storing transfer to be completed
DMA_WAIT_TAG(STR_BUF);
}
/* Some vectors needed by the yuv 2 rgb conversion algorithm */
const vector float vec_minus_128 = { -128.0f, -128.0f, -128.0f, -128.0f };
const vector unsigned char vec_null = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
const vector unsigned char vec_char2int_first = { 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, 0x13 };
const vector unsigned char vec_char2int_second = { 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x15, 0x00, 0x00, 0x00, 0x16, 0x00, 0x00, 0x00, 0x17 };
const vector unsigned char vec_char2int_third = { 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x1A, 0x00, 0x00, 0x00, 0x1B };
const vector unsigned char vec_char2int_fourth = { 0x00, 0x00, 0x00, 0x1C, 0x00, 0x00, 0x00, 0x1D, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x00, 0x00, 0x1F };
const vector float vec_R_precalc_coeff = {1.403f, 1.403f, 1.403f, 1.403f};
const vector float vec_Gu_precalc_coeff = {-0.344f, -0.344f, -0.344f, -0.344f};
const vector float vec_Gv_precalc_coeff = {-0.714f, -0.714f, -0.714f, -0.714f};
const vector float vec_B_precalc_coeff = {1.773f, 1.773f, 1.773f, 1.773f};
const vector unsigned int vec_alpha = { 255 << 24, 255 << 24, 255 << 24, 255 << 24 };
const vector unsigned char vec_select_floats_upper = { 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x04, 0x05, 0x06, 0x07 };
const vector unsigned char vec_select_floats_lower = { 0x08, 0x09, 0x0A, 0x0B, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0x0C, 0x0D, 0x0E, 0x0F };
#ifdef TESTING
/*
* yuv_to_rgb_w2()
*
* - converts x * 4 pixels from YUV to RGB
* - two lines of YUV are taken as input.
* - width has to be a multiple of 2 (= 4 pixel)
*
* @param y_addr address of the y plane (local store)
* @param v_addr address of the v plane (local store)
* @param u_addr address of the u plane (local store)
* @param bgra_addr_char address of the bgra output buffer (local store)
* @param width the width of a line in pixel
*/
void yuv_to_rgb_w2_line(unsigned char* y_addr, unsigned char* v_addr, unsigned char* u_addr, unsigned char* bgra_addr_char, unsigned int width) {
// each pixel is stored as an integer
unsigned int* bgra_addr = (unsigned int*) bgra_addr_char;
unsigned int x;
// Go through each line in steps of 2, because every U and V value is connected to 4 pixels Y (YUV 4:2:0)
for(x = 0; x < width; x+=2) {
// Get the 4 Y, 1 U and 1 V values
const unsigned char Y_1 = *(y_addr + x);
const unsigned char Y_2 = *(y_addr + x + 1);
const unsigned char Y_3 = *(y_addr + x + width);
const unsigned char Y_4 = *(y_addr + x + width + 1);
const unsigned char U = *(u_addr + (x >> 1));
const unsigned char V = *(v_addr + (x >> 1));
// Start converting
float V_minus_128 = (float)((float)V - 128.0f);
float U_minus_128 = (float)((float)U - 128.0f);
float R_precalculate = 1.403f * V_minus_128;
float G_precalculate = -(0.344f * U_minus_128 + 0.714f * V_minus_128);
float B_precalculate = 1.773f * U_minus_128;
// Cast the results
const unsigned char R_1 = float_to_char((Y_1 + R_precalculate));
const unsigned char R_2 = float_to_char((Y_2 + R_precalculate));
const unsigned char R_3 = float_to_char((Y_3 + R_precalculate));
const unsigned char R_4 = float_to_char((Y_4 + R_precalculate));
const unsigned char G_1 = float_to_char((Y_1 + G_precalculate));
const unsigned char G_2 = float_to_char((Y_2 + G_precalculate));
const unsigned char G_3 = float_to_char((Y_3 + G_precalculate));
const unsigned char G_4 = float_to_char((Y_4 + G_precalculate));
const unsigned char B_1 = float_to_char((Y_1 + B_precalculate));
const unsigned char B_2 = float_to_char((Y_2 + B_precalculate));
const unsigned char B_3 = float_to_char((Y_3 + B_precalculate));
const unsigned char B_4 = float_to_char((Y_4 + B_precalculate));
// Write back
*(bgra_addr + x) = (B_1 << 0)| (G_1 << 8) | (R_1 << 16) | (255 << 24);
*(bgra_addr + x + 1) = (B_2 << 0)| (G_2 << 8) | (R_2 << 16) | (255 << 24);
*(bgra_addr + x + width) = (B_3 << 0)| (G_3 << 8) | (R_3 << 16) | (255 << 24);
*(bgra_addr + x + width + 1) = (B_4 << 0)| (G_4 << 8) | (R_4 << 16) | (255 << 24);
}
}
#endif
/*
* yuv_to_rgb_w32()
*
* processes to line of yuv-input, width has to be a multiple of 32
* two lines of yuv are taken as input
*
* @param y_addr address of the y plane in local store
* @param v_addr address of the v plane in local store
* @param u_addr address of the u plane in local store
* @param bgra_addr_ address of the bgra output buffer
* @param width the width in pixel
*/
void yuv_to_rgb_w32_line(unsigned char* y_addr, unsigned char* v_addr, unsigned char* u_addr, unsigned char* bgra_addr_, unsigned int width) {
// each pixel is stored as an integer
unsigned int* bgra_addr = (unsigned int*) bgra_addr_;
unsigned int x;
for(x = 0; x < width; x+=32) {
// Gehe zweischrittig durch die zeile, da jeder u und v wert fuer 4 pixel(zwei hoch, zwei breit) gilt
const vector unsigned char vchar_Y_1 = *((vector unsigned char*)(y_addr + x));
const vector unsigned char vchar_Y_2 = *((vector unsigned char*)(y_addr + x + 16));
const vector unsigned char vchar_Y_3 = *((vector unsigned char*)(y_addr + x + width));
const vector unsigned char vchar_Y_4 = *((vector unsigned char*)(y_addr + x + width + 16));
const vector unsigned char vchar_U = *((vector unsigned char*)(u_addr + (x >> 1)));
const vector unsigned char vchar_V = *((vector unsigned char*)(v_addr + (x >> 1)));
const vector float vfloat_U_1 = spu_add(spu_convtf((vector unsigned int)spu_shuffle(vec_null, vchar_U, vec_char2int_first), 0),vec_minus_128);
const vector float vfloat_U_2 = spu_add(spu_convtf((vector unsigned int)spu_shuffle(vec_null, vchar_U, vec_char2int_second), 0),vec_minus_128);
const vector float vfloat_U_3 = spu_add(spu_convtf((vector unsigned int)spu_shuffle(vec_null, vchar_U, vec_char2int_third), 0),vec_minus_128);
const vector float vfloat_U_4 = spu_add(spu_convtf((vector unsigned int)spu_shuffle(vec_null, vchar_U, vec_char2int_fourth), 0),vec_minus_128);
const vector float vfloat_V_1 = spu_add(spu_convtf((vector unsigned int)spu_shuffle(vec_null, vchar_V, vec_char2int_first), 0),vec_minus_128);
const vector float vfloat_V_2 = spu_add(spu_convtf((vector unsigned int)spu_shuffle(vec_null, vchar_V, vec_char2int_second), 0),vec_minus_128);
const vector float vfloat_V_3 = spu_add(spu_convtf((vector unsigned int)spu_shuffle(vec_null, vchar_V, vec_char2int_third), 0),vec_minus_128);
const vector float vfloat_V_4 = spu_add(spu_convtf((vector unsigned int)spu_shuffle(vec_null, vchar_V, vec_char2int_fourth), 0),vec_minus_128);
vector float Y_1 = spu_convtf((vector unsigned int)spu_shuffle(vec_null, vchar_Y_1, vec_char2int_first), 0);
vector float Y_2 = spu_convtf((vector unsigned int)spu_shuffle(vec_null, vchar_Y_1, vec_char2int_second), 0);
vector float Y_3 = spu_convtf((vector unsigned int)spu_shuffle(vec_null, vchar_Y_1, vec_char2int_third), 0);
vector float Y_4 = spu_convtf((vector unsigned int)spu_shuffle(vec_null, vchar_Y_1, vec_char2int_fourth), 0);
vector float Y_5 = spu_convtf((vector unsigned int)spu_shuffle(vec_null, vchar_Y_2, vec_char2int_first), 0);
vector float Y_6 = spu_convtf((vector unsigned int)spu_shuffle(vec_null, vchar_Y_2, vec_char2int_second), 0);
vector float Y_7 = spu_convtf((vector unsigned int)spu_shuffle(vec_null, vchar_Y_2, vec_char2int_third), 0);
vector float Y_8 = spu_convtf((vector unsigned int)spu_shuffle(vec_null, vchar_Y_2, vec_char2int_fourth), 0);
vector float Y_9 = spu_convtf((vector unsigned int)spu_shuffle(vec_null, vchar_Y_3, vec_char2int_first), 0);
vector float Y_10 = spu_convtf((vector unsigned int)spu_shuffle(vec_null, vchar_Y_3, vec_char2int_second), 0);
vector float Y_11 = spu_convtf((vector unsigned int)spu_shuffle(vec_null, vchar_Y_3, vec_char2int_third), 0);
vector float Y_12 = spu_convtf((vector unsigned int)spu_shuffle(vec_null, vchar_Y_3, vec_char2int_fourth), 0);
vector float Y_13 = spu_convtf((vector unsigned int)spu_shuffle(vec_null, vchar_Y_4, vec_char2int_first), 0);
vector float Y_14 = spu_convtf((vector unsigned int)spu_shuffle(vec_null, vchar_Y_4, vec_char2int_second), 0);
vector float Y_15 = spu_convtf((vector unsigned int)spu_shuffle(vec_null, vchar_Y_4, vec_char2int_third), 0);
vector float Y_16 = spu_convtf((vector unsigned int)spu_shuffle(vec_null, vchar_Y_4, vec_char2int_fourth), 0);
const vector float R1a_precalculate = spu_mul(vec_R_precalc_coeff, vfloat_V_1);
const vector float R2a_precalculate = spu_mul(vec_R_precalc_coeff, vfloat_V_2);
const vector float R3a_precalculate = spu_mul(vec_R_precalc_coeff, vfloat_V_3);
const vector float R4a_precalculate = spu_mul(vec_R_precalc_coeff, vfloat_V_4);
const vector float R1_precalculate = spu_shuffle(R1a_precalculate, R1a_precalculate, vec_select_floats_upper);
const vector float R2_precalculate = spu_shuffle(R1a_precalculate, R1a_precalculate, vec_select_floats_lower);
const vector float R3_precalculate = spu_shuffle(R2a_precalculate, R2a_precalculate, vec_select_floats_upper);
const vector float R4_precalculate = spu_shuffle(R2a_precalculate, R2a_precalculate, vec_select_floats_lower);
const vector float R5_precalculate = spu_shuffle(R3a_precalculate, R3a_precalculate, vec_select_floats_upper);
const vector float R6_precalculate = spu_shuffle(R3a_precalculate, R3a_precalculate, vec_select_floats_lower);
const vector float R7_precalculate = spu_shuffle(R4a_precalculate, R4a_precalculate, vec_select_floats_upper);
const vector float R8_precalculate = spu_shuffle(R4a_precalculate, R4a_precalculate, vec_select_floats_lower);
const vector float G1a_precalculate = spu_madd(vec_Gu_precalc_coeff, vfloat_U_1, spu_mul(vfloat_V_1, vec_Gv_precalc_coeff));
const vector float G2a_precalculate = spu_madd(vec_Gu_precalc_coeff, vfloat_U_2, spu_mul(vfloat_V_2, vec_Gv_precalc_coeff));
const vector float G3a_precalculate = spu_madd(vec_Gu_precalc_coeff, vfloat_U_3, spu_mul(vfloat_V_3, vec_Gv_precalc_coeff));
const vector float G4a_precalculate = spu_madd(vec_Gu_precalc_coeff, vfloat_U_4, spu_mul(vfloat_V_4, vec_Gv_precalc_coeff));
const vector float G1_precalculate = spu_shuffle(G1a_precalculate, G1a_precalculate, vec_select_floats_upper);
const vector float G2_precalculate = spu_shuffle(G1a_precalculate, G1a_precalculate, vec_select_floats_lower);
const vector float G3_precalculate = spu_shuffle(G2a_precalculate, G2a_precalculate, vec_select_floats_upper);
const vector float G4_precalculate = spu_shuffle(G2a_precalculate, G2a_precalculate, vec_select_floats_lower);
const vector float G5_precalculate = spu_shuffle(G3a_precalculate, G3a_precalculate, vec_select_floats_upper);
const vector float G6_precalculate = spu_shuffle(G3a_precalculate, G3a_precalculate, vec_select_floats_lower);
const vector float G7_precalculate = spu_shuffle(G4a_precalculate, G4a_precalculate, vec_select_floats_upper);
const vector float G8_precalculate = spu_shuffle(G4a_precalculate, G4a_precalculate, vec_select_floats_lower);
const vector float B1a_precalculate = spu_mul(vec_B_precalc_coeff, vfloat_U_1);
const vector float B2a_precalculate = spu_mul(vec_B_precalc_coeff, vfloat_U_2);
const vector float B3a_precalculate = spu_mul(vec_B_precalc_coeff, vfloat_U_3);
const vector float B4a_precalculate = spu_mul(vec_B_precalc_coeff, vfloat_U_4);
const vector float B1_precalculate = spu_shuffle(B1a_precalculate, B1a_precalculate, vec_select_floats_upper);
const vector float B2_precalculate = spu_shuffle(B1a_precalculate, B1a_precalculate, vec_select_floats_lower);
const vector float B3_precalculate = spu_shuffle(B2a_precalculate, B2a_precalculate, vec_select_floats_upper);
const vector float B4_precalculate = spu_shuffle(B2a_precalculate, B2a_precalculate, vec_select_floats_lower);
const vector float B5_precalculate = spu_shuffle(B3a_precalculate, B3a_precalculate, vec_select_floats_upper);
const vector float B6_precalculate = spu_shuffle(B3a_precalculate, B3a_precalculate, vec_select_floats_lower);
const vector float B7_precalculate = spu_shuffle(B4a_precalculate, B4a_precalculate, vec_select_floats_upper);
const vector float B8_precalculate = spu_shuffle(B4a_precalculate, B4a_precalculate, vec_select_floats_lower);
const vector unsigned int R_1 = vfloat_to_vuint(spu_add( Y_1, R1_precalculate));
const vector unsigned int R_2 = vfloat_to_vuint(spu_add( Y_2, R2_precalculate));
const vector unsigned int R_3 = vfloat_to_vuint(spu_add( Y_3, R3_precalculate));
const vector unsigned int R_4 = vfloat_to_vuint(spu_add( Y_4, R4_precalculate));
const vector unsigned int R_5 = vfloat_to_vuint(spu_add( Y_5, R5_precalculate));
const vector unsigned int R_6 = vfloat_to_vuint(spu_add( Y_6, R6_precalculate));
const vector unsigned int R_7 = vfloat_to_vuint(spu_add( Y_7, R7_precalculate));
const vector unsigned int R_8 = vfloat_to_vuint(spu_add( Y_8, R8_precalculate));
const vector unsigned int R_9 = vfloat_to_vuint(spu_add( Y_9, R1_precalculate));
const vector unsigned int R_10 = vfloat_to_vuint(spu_add(Y_10, R2_precalculate));
const vector unsigned int R_11 = vfloat_to_vuint(spu_add(Y_11, R3_precalculate));
const vector unsigned int R_12 = vfloat_to_vuint(spu_add(Y_12, R4_precalculate));
const vector unsigned int R_13 = vfloat_to_vuint(spu_add(Y_13, R5_precalculate));
const vector unsigned int R_14 = vfloat_to_vuint(spu_add(Y_14, R6_precalculate));
const vector unsigned int R_15 = vfloat_to_vuint(spu_add(Y_15, R7_precalculate));
const vector unsigned int R_16 = vfloat_to_vuint(spu_add(Y_16, R8_precalculate));
const vector unsigned int G_1 = vfloat_to_vuint(spu_add( Y_1, G1_precalculate));
const vector unsigned int G_2 = vfloat_to_vuint(spu_add( Y_2, G2_precalculate));
const vector unsigned int G_3 = vfloat_to_vuint(spu_add( Y_3, G3_precalculate));
const vector unsigned int G_4 = vfloat_to_vuint(spu_add( Y_4, G4_precalculate));
const vector unsigned int G_5 = vfloat_to_vuint(spu_add( Y_5, G5_precalculate));
const vector unsigned int G_6 = vfloat_to_vuint(spu_add( Y_6, G6_precalculate));
const vector unsigned int G_7 = vfloat_to_vuint(spu_add( Y_7, G7_precalculate));
const vector unsigned int G_8 = vfloat_to_vuint(spu_add( Y_8, G8_precalculate));
const vector unsigned int G_9 = vfloat_to_vuint(spu_add( Y_9, G1_precalculate));
const vector unsigned int G_10 = vfloat_to_vuint(spu_add(Y_10, G2_precalculate));
const vector unsigned int G_11 = vfloat_to_vuint(spu_add(Y_11, G3_precalculate));
const vector unsigned int G_12 = vfloat_to_vuint(spu_add(Y_12, G4_precalculate));
const vector unsigned int G_13 = vfloat_to_vuint(spu_add(Y_13, G5_precalculate));
const vector unsigned int G_14 = vfloat_to_vuint(spu_add(Y_14, G6_precalculate));
const vector unsigned int G_15 = vfloat_to_vuint(spu_add(Y_15, G7_precalculate));
const vector unsigned int G_16 = vfloat_to_vuint(spu_add(Y_16, G8_precalculate));
const vector unsigned int B_1 = vfloat_to_vuint(spu_add( Y_1, B1_precalculate));
const vector unsigned int B_2 = vfloat_to_vuint(spu_add( Y_2, B2_precalculate));
const vector unsigned int B_3 = vfloat_to_vuint(spu_add( Y_3, B3_precalculate));
const vector unsigned int B_4 = vfloat_to_vuint(spu_add( Y_4, B4_precalculate));
const vector unsigned int B_5 = vfloat_to_vuint(spu_add( Y_5, B5_precalculate));
const vector unsigned int B_6 = vfloat_to_vuint(spu_add( Y_6, B6_precalculate));
const vector unsigned int B_7 = vfloat_to_vuint(spu_add( Y_7, B7_precalculate));
const vector unsigned int B_8 = vfloat_to_vuint(spu_add( Y_8, B8_precalculate));
const vector unsigned int B_9 = vfloat_to_vuint(spu_add( Y_9, B1_precalculate));
const vector unsigned int B_10 = vfloat_to_vuint(spu_add(Y_10, B2_precalculate));
const vector unsigned int B_11 = vfloat_to_vuint(spu_add(Y_11, B3_precalculate));
const vector unsigned int B_12 = vfloat_to_vuint(spu_add(Y_12, B4_precalculate));
const vector unsigned int B_13 = vfloat_to_vuint(spu_add(Y_13, B5_precalculate));
const vector unsigned int B_14 = vfloat_to_vuint(spu_add(Y_14, B6_precalculate));
const vector unsigned int B_15 = vfloat_to_vuint(spu_add(Y_15, B7_precalculate));
const vector unsigned int B_16 = vfloat_to_vuint(spu_add(Y_16, B8_precalculate));
*((vector unsigned int*)(bgra_addr + x)) = spu_or(spu_or(vec_alpha, B_1), spu_or(spu_slqwbyte( R_1, 2),spu_slqwbyte(G_1, 1)));
*((vector unsigned int*)(bgra_addr + x + 4)) = spu_or(spu_or(vec_alpha, B_2), spu_or(spu_slqwbyte( R_2, 2),spu_slqwbyte(G_2, 1)));
*((vector unsigned int*)(bgra_addr + x + 8)) = spu_or(spu_or(vec_alpha, B_3), spu_or(spu_slqwbyte( R_3, 2),spu_slqwbyte(G_3, 1)));
*((vector unsigned int*)(bgra_addr + x + 12)) = spu_or(spu_or(vec_alpha, B_4), spu_or(spu_slqwbyte( R_4, 2),spu_slqwbyte(G_4, 1)));
*((vector unsigned int*)(bgra_addr + x + 16)) = spu_or(spu_or(vec_alpha, B_5), spu_or(spu_slqwbyte( R_5, 2),spu_slqwbyte(G_5, 1)));
*((vector unsigned int*)(bgra_addr + x + 20)) = spu_or(spu_or(vec_alpha, B_6), spu_or(spu_slqwbyte( R_6, 2),spu_slqwbyte(G_6, 1)));
*((vector unsigned int*)(bgra_addr + x + 24)) = spu_or(spu_or(vec_alpha, B_7), spu_or(spu_slqwbyte( R_7, 2),spu_slqwbyte(G_7, 1)));
*((vector unsigned int*)(bgra_addr + x + 28)) = spu_or(spu_or(vec_alpha, B_8), spu_or(spu_slqwbyte( R_8, 2),spu_slqwbyte(G_8, 1)));
*((vector unsigned int*)(bgra_addr + x + width)) = spu_or(spu_or(vec_alpha, B_9), spu_or(spu_slqwbyte( R_9, 2),spu_slqwbyte(G_9, 1)));
*((vector unsigned int*)(bgra_addr + x + width + 4)) = spu_or(spu_or(vec_alpha, B_10), spu_or(spu_slqwbyte(R_10, 2),spu_slqwbyte(G_10, 1)));
*((vector unsigned int*)(bgra_addr + x + width + 8)) = spu_or(spu_or(vec_alpha, B_11), spu_or(spu_slqwbyte(R_11, 2),spu_slqwbyte(G_11, 1)));
*((vector unsigned int*)(bgra_addr + x + width + 12)) = spu_or(spu_or(vec_alpha, B_12), spu_or(spu_slqwbyte(R_12, 2),spu_slqwbyte(G_12, 1)));
*((vector unsigned int*)(bgra_addr + x + width + 16)) = spu_or(spu_or(vec_alpha, B_13), spu_or(spu_slqwbyte(R_13, 2),spu_slqwbyte(G_13, 1)));
*((vector unsigned int*)(bgra_addr + x + width + 20)) = spu_or(spu_or(vec_alpha, B_14), spu_or(spu_slqwbyte(R_14, 2),spu_slqwbyte(G_14, 1)));
*((vector unsigned int*)(bgra_addr + x + width + 24)) = spu_or(spu_or(vec_alpha, B_15), spu_or(spu_slqwbyte(R_15, 2),spu_slqwbyte(G_15, 1)));
*((vector unsigned int*)(bgra_addr + x + width + 28)) = spu_or(spu_or(vec_alpha, B_16), spu_or(spu_slqwbyte(R_16, 2),spu_slqwbyte(G_16, 1)));
}
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment