dnl
AC_ARG_ENABLE(maemo,
[ --enable-maemo Internet tablets based on Maemo SDK (default disabled)])
-if test "${enable_maemo}" != "no"
+if test "${enable_maemo}" = "yes"
then
PKG_CHECK_MODULES(HILDON, [hildon-1 hildon-fm-2], [
VLC_ADD_CFLAGS([maemo],[${HILDON_CFLAGS}])
VLC_ADD_LIBS([maemo],[${HILDON_LIBS}])
VLC_ADD_PLUGIN([maemo])
+ VLC_ADD_PLUGIN([swscale_maemo])
AC_DEFINE([BUILD_MAEMO], 1, [Define if you're using Maemo interfaces])
ALIASES="${ALIASES} mvlc"
], [
SOURCES_postproc = postproc.c
SOURCES_swscale = swscale.c ../codec/avcodec/chroma.c
SOURCES_imgresample = imgresample.c ../codec/avcodec/chroma.c
+SOURCES_swscale_maemo = swscale_maemo.c libswscale_nokia770/arm_jit_swscale.c libswscale_nokia770/arm_colorconv.S libswscale_nokia770/arm_jit_swscale.h libswscale_nokia770/arm_colorconv.h
SOURCES_scene = scene.c
SOURCES_yuvp = yuvp.c
noinst_HEADERS = filter_common.h filter_picture.h
--- /dev/null
+/*
+ * ARM assembly optimized color format conversion functions
+ * (YV12 -> YUY2, YV12 -> some custom YUV420 format used by
+ * Epson graphics chip in Nokia N800)
+ *
+ * Copyright (C) 2007 Siarhei Siamashka <ssvb@users.sourceforge.net>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public License
+ * version 2.1 as published by the Free Software Foundation.
+ *
+ * This library is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
+ * 02110-1301 USA
+ */
+ .text
+
+/*******************************************************/
+
+ .align
+ .global yv12_to_yuy2_line_arm
+ .func yv12_to_yuy2_line_arm
+yv12_to_yuy2_line_arm:
+
+#define DST r0
+#define SRC_Y r1
+#define SRC_U r2
+#define SRC_V r3
+#define WIDTH ip
+
+ ldr ip, [sp], #0
+ stmfd sp!, {r4-r8, r10, lr}
+
+#define TMP1 r8
+#define TMP2 r10
+#define TMP3 lr
+
+ bic WIDTH, #1
+
+ subs WIDTH, #8
+ blt 2f
+1:
+ ldrb r4, [SRC_Y], #1
+ ldrb TMP1, [SRC_U], #1
+ ldrb TMP2, [SRC_Y], #1
+ ldrb TMP3, [SRC_V], #1
+ add r4, r4, TMP1, lsl #8
+ add r4, r4, TMP2, lsl #16
+ add r4, r4, TMP3, lsl #24
+
+ ldrb r5, [SRC_Y], #1
+ ldrb TMP1, [SRC_U], #1
+ ldrb TMP2, [SRC_Y], #1
+ ldrb TMP3, [SRC_V], #1
+ add r5, r5, TMP1, lsl #8
+ add r5, r5, TMP2, lsl #16
+ add r5, r5, TMP3, lsl #24
+
+ ldrb r6, [SRC_Y], #1
+ ldrb TMP1, [SRC_U], #1
+ ldrb TMP2, [SRC_Y], #1
+ ldrb TMP3, [SRC_V], #1
+ add r6, r6, TMP1, lsl #8
+ add r6, r6, TMP2, lsl #16
+ add r6, r6, TMP3, lsl #24
+
+ ldrb r7, [SRC_Y], #1
+ ldrb TMP1, [SRC_U], #1
+ ldrb TMP2, [SRC_Y], #1
+ ldrb TMP3, [SRC_V], #1
+ add r7, r7, TMP1, lsl #8
+ add r7, r7, TMP2, lsl #16
+ add r7, r7, TMP3, lsl #24
+
+ stmia DST!, {r4-r7}
+ subs WIDTH, WIDTH, #8
+ bge 1b
+2:
+ adds WIDTH, WIDTH, #8
+ ble 4f
+3:
+ ldrb r4, [SRC_Y], #1
+ ldrb TMP1, [SRC_U], #1
+ ldrb TMP2, [SRC_Y], #1
+ ldrb TMP3, [SRC_V], #1
+ add r4, r4, TMP1, lsl #8
+ add r4, r4, TMP2, lsl #16
+ add r4, r4, TMP3, lsl #24
+ str r4, [DST], #4
+ subs WIDTH, WIDTH, #2
+ bgt 3b
+4:
+ ldmfd sp!, {r4-r8, r10, pc}
+
+#undef DST
+#undef SRC_Y
+#undef SRC_U
+#undef SRC_V
+#undef WIDTH
+#undef TMP1
+#undef TMP2
+#undef TMP3
+
+ .endfunc
+
+/*******************************************************/
+
+#define DST r0
+#define SRC_Y r1
+#define SRC_U r2
+#define WIDTH r3
+#define TMP1 r10
+#define TMP2 r11
+#define TMP3 lr
+
+.macro YUV420_function_template function_name, USE_PLD, USE_ARMV6
+
+ .align
+ .global \function_name
+ .func \function_name
+\function_name:
+
+/* Read information about 4 pixels, convert them to YUV420 and store into 6 bytes using 16-bit writes */
+.macro CONVERT_4_PIXELS_MACROBLOCK
+ ldrb r4, [SRC_Y], #1
+ ldrb TMP1, [SRC_U], #1
+ ldrb r5, [SRC_U], #1
+ ldrb TMP2, [SRC_Y], #1
+ ldrb r6, [SRC_Y, #1]
+ ldrb TMP3, [SRC_Y], #2
+ add r4, r4, TMP1, lsl #8
+ add r5, r5, TMP2, lsl #8
+ add r6, r6, TMP3, lsl #8
+ strh r4, [DST], #2
+ strh r5, [DST], #2
+ strh r6, [DST], #2
+.endm
+
+.if \USE_ARMV6
+
+.macro CONVERT_8_PIXELS_MACROBLOCK_1 DST_REG1, DST_REG2, FLAG1, FLAG2, PLD_FLAG
+.if \FLAG1 == 0
+ ldrb \DST_REG1, [SRC_U], #1
+ ldrh TMP1, [SRC_Y], #2
+ ldrb TMP2, [SRC_U], #1
+.endif
+.if \FLAG2 == 1
+ ldrh \DST_REG2, [SRC_Y], #2
+.endif
+.if \PLD_FLAG == 1
+ pld [SRC_Y, #48]
+.endif
+ add \DST_REG1, \DST_REG1, TMP1, lsl #8
+ add \DST_REG1, \DST_REG1, TMP2, lsl #24
+.if \FLAG2 == 1
+ ldrb TMP1, [SRC_U], #1
+ ldrb TMP2, [SRC_Y], #1
+.endif
+ rev16 \DST_REG1, \DST_REG1
+.endm
+
+.macro CONVERT_8_PIXELS_MACROBLOCK_2 DST_REG1, DST_REG2, FLAG1, FLAG2, DUMMY1
+.if \FLAG1 == 0
+ ldrh \DST_REG1, [SRC_Y], #2
+ ldrb TMP1, [SRC_U], #1
+ ldrb TMP2, [SRC_Y], #1
+.endif
+.if \FLAG2 == 1
+ ldrb \DST_REG2, [SRC_Y], #1
+.endif
+ add \DST_REG1, \DST_REG1, TMP1, lsl #16
+ add \DST_REG1, \DST_REG1, TMP2, lsl #24
+.if \FLAG2 == 1
+ ldrb TMP1, [SRC_U], #1
+ ldrh TMP2, [SRC_Y], #2
+.endif
+ rev16 \DST_REG1, \DST_REG1
+.endm
+
+.macro CONVERT_8_PIXELS_MACROBLOCK_3 DST_REG1, DST_REG2, FLAG1, FLAG2, DUMMY1
+.if \FLAG1 == 0
+ ldrb \DST_REG1, [SRC_Y], #1
+ ldrb TMP1, [SRC_U], #1
+ ldrh TMP2, [SRC_Y], #2
+.endif
+.if \FLAG2 == 1
+ ldrb \DST_REG2, [SRC_U], #1
+.endif
+ add \DST_REG1, \DST_REG1, TMP1, lsl #8
+ add \DST_REG1, \DST_REG1, TMP2, lsl #16
+.if \FLAG2 == 1
+ ldrh TMP1, [SRC_Y], #2
+ ldrb TMP2, [SRC_U], #1
+.endif
+ rev16 \DST_REG1, \DST_REG1
+.endm
+
+.else
+
+/* Prepare the first 32-bit output value for 8 pixels macroblock */
+.macro CONVERT_8_PIXELS_MACROBLOCK_1 DST_REG, DUMMY1, DUMMY2, DUMMY3, PLD_FLAG
+ ldrb \DST_REG, [SRC_Y], #1
+ ldrb TMP1, [SRC_U], #1
+ ldrb TMP2, [SRC_U], #1
+ ldrb TMP3, [SRC_Y], #1
+.if \USE_PLD && (\PLD_FLAG == 1)
+ pld [SRC_Y, #48]
+.endif
+ add \DST_REG, \DST_REG, TMP1, lsl #8
+ add \DST_REG, \DST_REG, TMP2, lsl #16
+ add \DST_REG, \DST_REG, TMP3, lsl #24
+.endm
+
+/* Prepare the second 32-bit output value for 8 pixels macroblock */
+.macro CONVERT_8_PIXELS_MACROBLOCK_2 DST_REG, DUMMY1, DUMMY2, DUMMY3, DUMMY4
+ ldrb \DST_REG, [SRC_Y, #1]
+ ldrb TMP1, [SRC_Y], #2
+ ldrb TMP2, [SRC_Y], #1
+ ldrb TMP3, [SRC_U], #1
+ add \DST_REG, \DST_REG, TMP1, lsl #8
+ add \DST_REG, \DST_REG, TMP2, lsl #16
+ add \DST_REG, \DST_REG, TMP3, lsl #24
+.endm
+
+/* Prepare the third 32-bit output value for 8 pixels macroblock */
+.macro CONVERT_8_PIXELS_MACROBLOCK_3 DST_REG, DUMMY1, DUMMY2, DUMMY3, DUMMY4
+ ldrb \DST_REG, [SRC_U], #1
+ ldrb TMP1, [SRC_Y], #1
+ ldrb TMP2, [SRC_Y, #1]
+ ldrb TMP3, [SRC_Y], #2
+ add \DST_REG, \DST_REG, TMP1, lsl #8
+ add \DST_REG, \DST_REG, TMP2, lsl #16
+ add \DST_REG, \DST_REG, TMP3, lsl #24
+.endm
+
+.endif
+
+.if \USE_PLD
+ pld [SRC_Y]
+.endif
+ stmfd sp!, {r4-r8, r10-r11, lr}
+
+ /* Destination buffer should be at least 16-bit aligned, image width should be multiple of 4 */
+ bic DST, #1
+ bic WIDTH, #3
+
+ /* Ensure 32-bit alignment of the destination buffer */
+ tst DST, #2
+ beq 1f
+ subs WIDTH, #4
+ blt 6f
+ CONVERT_4_PIXELS_MACROBLOCK
+1:
+ subs WIDTH, #32
+ blt 3f
+2: /* Convert 32 pixels per loop iteration */
+ CONVERT_8_PIXELS_MACROBLOCK_1 r4, r6, 0, 1, 1 /* Also do cache preload for SRC_Y */
+ CONVERT_8_PIXELS_MACROBLOCK_2 r6, r7, 1, 1, 0
+ CONVERT_8_PIXELS_MACROBLOCK_3 r7, r8, 1, 1, 0
+ CONVERT_8_PIXELS_MACROBLOCK_1 r8, r5, 1, 1, 0
+ stmia DST!, {r4, r6, r7, r8}
+
+ subs WIDTH, #32
+
+ CONVERT_8_PIXELS_MACROBLOCK_2 r5, r6, 1, 1, 0
+ CONVERT_8_PIXELS_MACROBLOCK_3 r6, r7, 1, 1, 0
+ CONVERT_8_PIXELS_MACROBLOCK_1 r7, r8, 1, 1, 0
+ CONVERT_8_PIXELS_MACROBLOCK_2 r8, r4, 1, 1, 0
+ stmia DST!, {r5, r6, r7, r8}
+.if \USE_PLD
+ /* Do cache preload for SRC_U */
+ pld [SRC_U, #48]
+.endif
+ CONVERT_8_PIXELS_MACROBLOCK_3 r4, r6, 1, 1, 0
+ CONVERT_8_PIXELS_MACROBLOCK_1 r6, r7, 1, 1, 0
+ CONVERT_8_PIXELS_MACROBLOCK_2 r7, r8, 1, 1, 0
+ CONVERT_8_PIXELS_MACROBLOCK_3 r8, r4, 1, 0, 0
+ stmia DST!, {r4, r6, r7, r8}
+
+ bge 2b
+3:
+ adds WIDTH, WIDTH, #32
+ ble 6f
+
+ subs WIDTH, WIDTH, #8
+ blt 5f
+4: /* Convert remaining pixels processing them 8 per iteration */
+ CONVERT_8_PIXELS_MACROBLOCK_1 r4, r5, 0, 1, 0
+ CONVERT_8_PIXELS_MACROBLOCK_2 r5, r6, 1, 1, 0
+ CONVERT_8_PIXELS_MACROBLOCK_3 r6, r7, 1, 0, 0
+ stmia DST!, {r4-r6}
+ subs WIDTH, WIDTH, #8
+ bge 4b
+5: /* Convert the last 4 pixels if needed */
+ adds WIDTH, WIDTH, #8
+ ble 6f
+ CONVERT_4_PIXELS_MACROBLOCK
+ subs WIDTH, #4
+ bgt 4b
+6: /* Restore all registers and return */
+ ldmfd sp!, {r4-r8, r10-r11, pc}
+
+.purgem CONVERT_4_PIXELS_MACROBLOCK
+.purgem CONVERT_8_PIXELS_MACROBLOCK_1
+.purgem CONVERT_8_PIXELS_MACROBLOCK_2
+.purgem CONVERT_8_PIXELS_MACROBLOCK_3
+
+#undef DST
+#undef SRC_Y
+#undef SRC_U
+#undef WIDTH
+#undef TMP1
+#undef TMP2
+#undef TMP3
+
+ .endfunc
+
+.endm
+
+YUV420_function_template yv12_to_yuv420_line_arm, 0, 0
+YUV420_function_template yv12_to_yuv420_line_armv5, 1, 0
+YUV420_function_template yv12_to_yuv420_line_armv6, 1, 1
--- /dev/null
+/*
+ * ARM assembly optimized color format conversion functions
+ * (YV12 -> YUY2, YV12 -> some custom YUV420 format used by
+ * Epson graphics chip in Nokia N800)
+ *
+ * Copyright (C) 2007 Siarhei Siamashka <ssvb@users.sourceforge.net>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public License
+ * version 2.1 as published by the Free Software Foundation.
+ *
+ * This library is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
+ * 02110-1301 USA
+ */
+
+#ifndef __ARM_COLORCONV_H__
+#define __ARM_COLORCONV_H__
+
+#include <stdint.h>
+
+/**
+ * Convert a line of pixels from YV12 to YUY2 color format
+ * @param dst - destination buffer for YUY2 pixel data, it should be 32-bit aligned
+ * @param src_y - pointer to Y plane
+ * @param src_u - pointer to U plane
+ * @param src_v - pointer to V plane
+ * @param w - number of pixels to convert (should be multiple of 2)
+ */
+void yv12_to_yuy2_line_arm(uint32_t *dst, const uint16_t *src_y, const uint8_t *src_u, const uint8_t *src_v, int w);
+
+/**
+ * Convert a line of pixels from YV12 to YUV420 color format
+ * @param dst - destination buffer for YUV420 pixel data, it should be at least 16-bit aligned
+ * @param src_y - pointer to Y plane
+ * @param src_c - pointer to chroma plane (U for even lines, V for odd lines)
+ * @param w - number of pixels to convert (should be multiple of 4)
+ */
+void yv12_to_yuv420_line_arm(uint16_t *dst, const uint8_t *src_y, const uint8_t *src_c, int w);
+
+/**
+ * Convert a line of pixels from YV12 to YUV420 color format
+ * @param dst - destination buffer for YUV420 pixel data, it should be at least 16-bit aligned
+ * @param src_y - pointer to Y plane
+ * @param src_c - pointer to chroma plane (U for even lines, V for odd lines)
+ * @param w - number of pixels to convert (should be multiple of 4)
+ */
+void yv12_to_yuv420_line_armv5(uint16_t *dst, const uint8_t *src_y, const uint8_t *src_c, int w);
+
+/**
+ * Convert a line of pixels from YV12 to YUV420 color format
+ * @param dst - destination buffer for YUV420 pixel data, it should be at least 16-bit aligned
+ * @param src_y - pointer to Y plane, it should be 16-bit aligned
+ * @param src_c - pointer to chroma plane (U for even lines, V for odd lines)
+ * @param w - number of pixels to convert (should be multiple of 4)
+ */
+void yv12_to_yuv420_line_armv6(uint16_t *dst, const uint16_t *src_y, const uint8_t *src_c, int w);
+
+#endif
--- /dev/null
+/*
+ * Fast JIT powered scaler for ARM
+ *
+ * Copyright (C) 2007 Siarhei Siamashka <ssvb@users.sourceforge.net>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public License
+ * version 2.1 as published by the Free Software Foundation.
+ *
+ * This library is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
+ * 02110-1301 USA
+ */
+
+#include <stdio.h>
+#include <math.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <sys/mman.h>
+#include <string.h>
+
+#include "arm_jit_swscale.h"
+#include "arm_colorconv.h"
+
+/* Size of cpu instructions cache, we should never exceed it in generated code */
+#define INSTRUCTIONS_CACHE_SIZE 32768
+
+/* Supported output formats */
+#define FMT_OMAPFB_YUV422 1
+#define FMT_OMAPFB_YUV420 2
+
+extern void __clear_cache (char *beg, char *end);
+
+/*
+ * API is similar to API from ffmpeg libswscale
+ */
+typedef struct SwsContextArmJit {
+ int fmt;
+ int source_w;
+ int source_h;
+ int target_w;
+ int target_h;
+ uint32_t *codebuffer;
+ int *linebuffer;
+ int armv6_is_supported;
+} SwsContextArmJit;
+
+
+//#define JIT_DEBUG
+
+#define INTERPOLATE_COPY_FIRST 0
+#define INTERPOLATE_AVERAGE_1_3 1
+#define INTERPOLATE_AVERAGE_2_2 2
+#define INTERPOLATE_AVERAGE_3_1 3
+
+/**
+ * Get two nearest pixels from the source image
+ *
+ * @todo get rid of the floating point math
+ */
+static inline int get_pix(int quality, int orig_w, int dest_w, int x, int *p1, int *p2)
+{
+ double offs = ((double)x + 0.5) / (double)dest_w * (double)orig_w;
+ double dist;
+ int pix1 = floor(offs - 0.5);
+ int pix2 = ceil(offs - 0.5);
+ // Special boundary cases
+ if (pix1 < 0) {
+ *p1 = *p2 = 0;
+ return INTERPOLATE_COPY_FIRST;
+ }
+ if (pix2 >= orig_w) {
+ *p1 = *p2 = orig_w - 1;
+ return INTERPOLATE_COPY_FIRST;
+ }
+ dist = offs - ((double)pix1 + 0.5);
+#if 0
+ if (quality >= 3) {
+ if (dist > 0.125 && dist < 0.375) {
+ *p1 = pix1;
+ *p2 = pix2;
+ return INTERPOLATE_AVERAGE_3_1;
+ }
+ if (dist > 0.625 && dist < 0.875) {
+ *p1 = pix1;
+ *p2 = pix2;
+ return INTERPOLATE_AVERAGE_1_3;
+ }
+ }
+#endif
+ if (quality >= 2) {
+ if (dist > 0.25 && dist < 0.75) {
+ *p1 = pix1;
+ *p2 = pix2;
+ return INTERPOLATE_AVERAGE_2_2;
+ }
+ }
+
+ if (dist < 0.5) {
+ *p1 = *p2 = pix1;
+ return INTERPOLATE_COPY_FIRST;
+ } else {
+ *p1 = *p2 = pix2;
+ return INTERPOLATE_COPY_FIRST;
+ }
+}
+
+static uint32_t *generate_arm_cmd_ldrb_r_r_offs(uint32_t *cmdbuffer, int dstreg, int basereg, int offset)
+{
+#ifdef JIT_DEBUG
+ printf("ldrb r%d, [r%d, #%d]\n", dstreg, basereg, offset);
+#endif
+ *cmdbuffer++ = 0xE5D00000 | (basereg << 16) | (dstreg << 12) | (offset);
+ return cmdbuffer;
+}
+
+static uint32_t *generate_arm_cmd_add_r_r_r_lsl(uint32_t *cmdbuffer, int dstreg, int r1, int r2, int r2_shift)
+{
+#ifdef JIT_DEBUG
+ printf("add r%d, r%d, r%d, lsl #%d\n", dstreg, r1, r2, r2_shift);
+#endif
+ *cmdbuffer++ = 0xE0800000 | (r1 << 16) | (dstreg << 12) | (r2_shift << 7) | (r2);
+ return cmdbuffer;
+}
+
+static uint32_t *generate_arm_cmd_mov_r_r_lsr(uint32_t *cmdbuffer, int dstreg, int r, int shift)
+{
+#ifdef JIT_DEBUG
+ printf("mov r%d, r%d, lsr #%d\n", dstreg, r, shift);
+#endif
+ *cmdbuffer++ = 0xE1A00020 | (dstreg << 12) | (shift << 7) | (r);
+ return cmdbuffer;
+}
+
+/**
+ * Generation of 32-bit output scaled data
+ * @param quality - scaling quality level
+ * @param buf1reg - register that holds a pointer to the buffer with data for the first output byte
+ * @param buf2reg - register that holds a pointer to the buffer with data for the second output byte
+ * @param buf3reg - register that holds a pointer to the buffer with data for the third output byte
+ * @param buf4reg - register that holds a pointer to the buffer with data for the fourth output byte
+ */
+static uint32_t *generate_32bit_scaled_data_write(
+ uint32_t *p,
+ int quality, int orig_w, int dest_w,
+ int buf1reg, int size1, int offs1,
+ int buf2reg, int size2, int offs2,
+ int buf3reg, int size3, int offs3,
+ int buf4reg, int size4, int offs4)
+{
+ int p1, p2;
+ int type_y1, type_y2, type_u, type_v;
+ // First stage: perform data loading
+ type_y1 = get_pix(quality, orig_w / size1, dest_w / size1, offs1 / size1, &p1, &p2);
+ if (type_y1 == INTERPOLATE_COPY_FIRST) {
+ // Special case, no interpolation is needed, so load this data
+ // directly into destination register
+ p = generate_arm_cmd_ldrb_r_r_offs(p, 4, buf1reg, p1);
+ } else {
+ p = generate_arm_cmd_ldrb_r_r_offs(p, 5, buf1reg, p1);
+ p = generate_arm_cmd_ldrb_r_r_offs(p, 6, buf1reg, p2);
+ }
+ // u
+ type_u = get_pix(quality, orig_w / size2, dest_w / size2, offs2 / size2, &p1, &p2);
+ p = generate_arm_cmd_ldrb_r_r_offs(p, 7, buf2reg, p1);
+ if (type_u != INTERPOLATE_COPY_FIRST) p = generate_arm_cmd_ldrb_r_r_offs(p, 8, buf2reg, p2);
+ // y2
+ type_y2 = get_pix(quality, orig_w / size3, dest_w / size3, offs3 / size3, &p1, &p2);
+ p = generate_arm_cmd_ldrb_r_r_offs(p, 9, buf3reg, p1);
+ if (type_y2 != INTERPOLATE_COPY_FIRST) p = generate_arm_cmd_ldrb_r_r_offs(p, 10, buf3reg, p2);
+ // v
+ type_v = get_pix(quality, orig_w / size4, dest_w / size4, offs4 / size4, &p1, &p2);
+ p = generate_arm_cmd_ldrb_r_r_offs(p, 11, buf4reg, p1);
+ if (type_v != INTERPOLATE_COPY_FIRST) p = generate_arm_cmd_ldrb_r_r_offs(p, 12, buf4reg, p2);
+ // Second stage: perform data shuffling
+ if (type_y1 == INTERPOLATE_AVERAGE_2_2) {
+ p = generate_arm_cmd_add_r_r_r_lsl(p, 14, 5, 6, 0);
+ p = generate_arm_cmd_mov_r_r_lsr(p, 4, 14, 1);
+ }
+ if (type_u == INTERPOLATE_COPY_FIRST) {
+ p = generate_arm_cmd_add_r_r_r_lsl(p, 4, 4, 7, 8);
+ } else if (type_u == INTERPOLATE_AVERAGE_2_2) {
+ p = generate_arm_cmd_add_r_r_r_lsl(p, 14, 7, 8, 0);
+ p = generate_arm_cmd_mov_r_r_lsr(p, 14, 14, 1);
+ p = generate_arm_cmd_add_r_r_r_lsl(p, 4, 4, 14, 8);
+ }
+ if (type_y2 == INTERPOLATE_COPY_FIRST) {
+ p = generate_arm_cmd_add_r_r_r_lsl(p, 4, 4, 9, 16);
+ } else if (type_y2 == INTERPOLATE_AVERAGE_2_2) {
+ p = generate_arm_cmd_add_r_r_r_lsl(p, 14, 9, 10, 0);
+ p = generate_arm_cmd_mov_r_r_lsr(p, 14, 14, 1);
+ p = generate_arm_cmd_add_r_r_r_lsl(p, 4, 4, 14, 16);
+ }
+ if (type_v == INTERPOLATE_COPY_FIRST) {
+ p = generate_arm_cmd_add_r_r_r_lsl(p, 4, 4, 11, 24);
+ } else if (type_v == INTERPOLATE_AVERAGE_2_2) {
+ p = generate_arm_cmd_add_r_r_r_lsl(p, 14, 11, 12, 0);
+ p = generate_arm_cmd_mov_r_r_lsr(p, 14, 14, 1);
+ p = generate_arm_cmd_add_r_r_r_lsl(p, 4, 4, 14, 24);
+ }
+ // Third stage: store data and advance output buffer pointer
+ *p++ = 0xE4834004; // str r4, [r3], #4
+ return p;
+}
+
+/**
+ * Scaler code should assume:
+ * r0 - y plane
+ * r1 - u plane
+ * r2 - v plane
+ * r3 - destination buffer
+ * r4 - result for storage into output buffer
+ * r5, r6 - source data for y1 calculation
+ * r7, r8 - source data for u calculation
+ * r9, r10 - source data for y2 calculation
+ * r11, r12 - source data for v calculation
+ * r14 (lr) - accumulator
+ *
+ * @param cmdbuffer - bugger for dynamically generated code
+ * @return - number of instructions generated
+ */
+static int generate_yuv420p_to_yuyv422_line_scaler(uint32_t *cmdbuffer, int maxcmdcount, int orig_w, int dest_w, int quality)
+{
+ int i, p1, p2, cmdcount;
+ int type_y1, type_y2, type_u, type_v;
+
+ uint32_t *p = cmdbuffer;
+
+ *p++ = 0xE92D4FF0; // stmfd sp!, {r4-r11, lr} @ save all registers
+
+ // Process a pair of destination pixels per loop iteration (it should result in 32-bit value write)
+ for (i = 0; i < dest_w; i += 2) {
+ p = generate_32bit_scaled_data_write(
+ p, quality, orig_w, dest_w,
+ 0, 1, i + 0,
+ 1, 2, i,
+ 0, 1, i + 1,
+ 2, 2, i);
+ }
+ *p++ = 0xE8BD8FF0; // ldmfd sp!, {r4-r11, pc} @ restore all registers and return
+ cmdcount = p - cmdbuffer;
+
+#ifdef JIT_DEBUG
+ printf("@ number of instructions = %d\n", cmdcount);
+ FILE *f = fopen("cmdbuf.bin", "w+");
+ fwrite(cmdbuffer, 1, INSTRUCTIONS_CACHE_SIZE, f);
+ fclose(f);
+#endif
+ return cmdcount;
+}
+
+static int generate_yuv420p_to_yuv420_line_scaler(uint32_t *cmdbuffer, int maxcmdcount, int orig_w, int dest_w, int quality)
+{
+ int i = 0, p1, p2, cmdcount;
+ int type_y1, type_y2, type_u, type_v;
+
+ uint32_t *p = cmdbuffer;
+
+ #define SRC_Y 0
+ #define SRC_U 1
+
+ *p++ = 0xE92D4FF0; // stmfd sp!, {r4-r11, lr} @ save all registers
+
+ while (i + 8 <= dest_w) {
+ p = generate_32bit_scaled_data_write(
+ p, quality, orig_w, dest_w,
+ SRC_Y, 1, i + 0 * 1,
+ SRC_U, 2, i + 0 * 2,
+ SRC_U, 2, i + 1 * 2,
+ SRC_Y, 1, i + 1 * 1);
+ p = generate_32bit_scaled_data_write(
+ p, quality, orig_w, dest_w,
+ SRC_Y, 1, i + 3 * 1,
+ SRC_Y, 1, i + 2 * 1,
+ SRC_Y, 1, i + 4 * 1,
+ SRC_U, 2, i + 2 * 2);
+ p = generate_32bit_scaled_data_write(
+ p, quality, orig_w, dest_w,
+ SRC_U, 2, i + 3 * 2,
+ SRC_Y, 1, i + 5 * 1,
+ SRC_Y, 1, i + 7 * 1,
+ SRC_Y, 1, i + 6 * 1);
+ i += 8;
+ }
+ *p++ = 0xE8BD8FF0; // ldmfd sp!, {r4-r11, pc} @ restore all registers and return
+ cmdcount = p - cmdbuffer;
+
+#ifdef JIT_DEBUG
+ printf("@ number of instructions = %d\n", cmdcount);
+ FILE *f = fopen("cmdbuf.bin", "w+");
+ fwrite(cmdbuffer, 1, INSTRUCTIONS_CACHE_SIZE, f);
+ fclose(f);
+#endif
+ return cmdcount;
+}
+
+
+/******************************************************************************/
+
+static struct SwsContextArmJit *sws_arm_jit_create_scaler_internal(int source_w, int source_h, int target_w, int target_h, int quality, int fmt)
+{
+ int i, p1, p2;
+ uint32_t *p = mmap(0, INSTRUCTIONS_CACHE_SIZE, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ if (fmt == FMT_OMAPFB_YUV422) {
+ generate_yuv420p_to_yuyv422_line_scaler(p, INSTRUCTIONS_CACHE_SIZE / 4, source_w, target_w, quality);
+ } else if (fmt == FMT_OMAPFB_YUV420) {
+ generate_yuv420p_to_yuv420_line_scaler(p, INSTRUCTIONS_CACHE_SIZE / 4, source_w, target_w, quality);
+ } else {
+ return NULL;
+ }
+
+ int *linebuffer = (int *)malloc(target_h * sizeof(int));
+ for (i = 0; i < target_h; i ++) {
+ get_pix(1, source_h, target_h, i, &p1, &p2);
+ linebuffer[i] = p1;
+ }
+
+ __clear_cache((char *)p, (char *)p + INSTRUCTIONS_CACHE_SIZE);
+
+ SwsContextArmJit *context = (SwsContextArmJit *)malloc(sizeof(SwsContextArmJit));
+ memset(context, 0, sizeof(SwsContextArmJit));
+ context->source_w = source_w;
+ context->source_h = source_h;
+ context->target_w = target_w;
+ context->target_h = target_h;
+ context->codebuffer = p;
+ context->linebuffer = linebuffer;
+ context->fmt = fmt;
+ context->armv6_is_supported = 0;
+ return context;
+}
+
+struct SwsContextArmJit *sws_arm_jit_create_omapfb_yuv422_scaler(int source_w, int source_h, int target_w, int target_h, int quality)
+{
+ return sws_arm_jit_create_scaler_internal(source_w, source_h, target_w, target_h, quality, FMT_OMAPFB_YUV422);
+}
+
+struct SwsContextArmJit *sws_arm_jit_create_omapfb_yuv420_scaler(int source_w, int source_h, int target_w, int target_h, int quality)
+{
+ return sws_arm_jit_create_scaler_internal(source_w, source_h, target_w, target_h, quality, FMT_OMAPFB_YUV420);
+}
+
+struct SwsContextArmJit *sws_arm_jit_create_omapfb_yuv420_scaler_armv6(int source_w, int source_h, int target_w, int target_h, int quality)
+{
+ struct SwsContextArmJit *s = sws_arm_jit_create_scaler_internal(source_w, source_h, target_w, target_h, quality, FMT_OMAPFB_YUV420);
+ if (s) s->armv6_is_supported = 1;
+ return s;
+}
+
+void sws_arm_jit_free(SwsContextArmJit *context)
+{
+ if (!context) return;
+ munmap(context->codebuffer, INSTRUCTIONS_CACHE_SIZE);
+ free(context->linebuffer);
+ free(context);
+}
+
+static int sws_arm_jit_vscaleonly_internal(SwsContextArmJit *context, uint8_t* src[], int srcStride[], uint8_t* dst[], int dstStride[])
+{
+ int i, j;
+
+ if (context->fmt == FMT_OMAPFB_YUV420) {
+ void (*yv12_to_yuv420_line)(uint16_t *dst, const uint16_t *src_y, const uint8_t *src_c, int w) =
+ yv12_to_yuv420_line_arm;
+ if (context->armv6_is_supported) yv12_to_yuv420_line = yv12_to_yuv420_line_armv6;
+
+ for (i = 0; i < context->target_h; i++) {
+ j = context->linebuffer[i];
+ if (i & 1) {
+ yv12_to_yuv420_line((uint16_t *)(dst[0] + i * dstStride[0]),
+ src[0] + j * srcStride[0], src[2] + (j / 2) * srcStride[2], context->target_w);
+ } else {
+ yv12_to_yuv420_line((uint16_t *)(dst[0] + i * dstStride[0]),
+ src[0] + j * srcStride[0], src[1] + (j / 2) * srcStride[1], context->target_w);
+ }
+ }
+ return 1;
+ } else if (context->fmt == FMT_OMAPFB_YUV422) {
+ void (*yv12_to_yuy2_line)(uint16_t *dst, const uint16_t *src_y, const uint8_t *src_u, const uint8_t *src_v, int w) =
+ yv12_to_yuy2_line_arm;
+ for (i = 0; i < context->target_h; i++) {
+ j = context->linebuffer[i];
+ yv12_to_yuy2_line(
+ dst[0] + i * dstStride[0],
+ src[0] + j * srcStride[0],
+ src[1] + (j / 2) * srcStride[1],
+ src[2] + (j / 2) * srcStride[2],
+ context->target_w);
+ }
+ return 1;
+ }
+ return 0;
+}
+
+static int sws_arm_jit_scale_internal(SwsContextArmJit *context, uint8_t* src[], int srcStride[], uint8_t* dst[], int dstStride[])
+{
+ int i, j;
+ void (*scale_line)(uint8_t *y, uint8_t *u, uint8_t *v, uint8_t *out) =
+ (void (*)(uint8_t *, uint8_t *, uint8_t *, uint8_t *))context->codebuffer;
+
+ if (context->source_w == context->target_w)
+ return sws_arm_jit_vscaleonly_internal(context, src, srcStride, dst, dstStride);
+
+ if (context->fmt == FMT_OMAPFB_YUV422) {
+ for (i = 0; i < context->target_h; i++) {
+ j = context->linebuffer[i];
+ scale_line(
+ src[0] + j * srcStride[0],
+ src[1] + (j / 2) * srcStride[1],
+ src[2] + (j / 2) * srcStride[2],
+ dst[0] + i * dstStride[0]);
+ }
+ return 1;
+ } else if (context->fmt == FMT_OMAPFB_YUV420) {
+ for (i = 0; i < context->target_h; i++) {
+ j = context->linebuffer[i];
+ scale_line(
+ src[0] + j * srcStride[0],
+ (i & 1) ? (src[2] + (j / 2) * srcStride[2]) : (src[1] + (j / 2) * srcStride[1]),
+ 0,
+ dst[0] + i * dstStride[0]);
+ }
+ return 1;
+ }
+ return 0;
+}
+
+int sws_arm_jit_scale(SwsContextArmJit *context, uint8_t* src[], int srcStride[], int y, int h, uint8_t* dst[], int dstStride[])
+{
+ if (y != 0 || h != context->source_h) return 0; // Slices are not supported yet
+ return sws_arm_jit_scale_internal(context, src, srcStride, dst, dstStride);
+}
--- /dev/null
+/*
+ * Fast JIT powered scaler for ARM
+ *
+ * Copyright (C) 2007 Siarhei Siamashka <ssvb@users.sourceforge.net>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public License
+ * version 2.1 as published by the Free Software Foundation.
+ *
+ * This library is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
+ * 02110-1301 USA
+ */
+
+#ifndef ARM_JIT_SWSCALE_H
+#define ARM_JIT_SWSCALE_H
+
+#include <stdint.h>
+
+struct SwsContextArmJit;
+
+struct SwsContextArmJit *sws_arm_jit_create_omapfb_yuv422_scaler(int source_w, int source_h, int target_w, int target_h, int quality);
+struct SwsContextArmJit *sws_arm_jit_create_omapfb_yuv420_scaler(int source_w, int source_h, int target_w, int target_h, int quality);
+struct SwsContextArmJit *sws_arm_jit_create_omapfb_yuv420_scaler_armv6(int source_w, int source_h, int target_w, int target_h, int quality);
+
+int sws_arm_jit_scale(struct SwsContextArmJit *context, uint8_t* src[], int srcStride[], int y, int h, uint8_t* dst[], int dstStride[]);
+
+void sws_arm_jit_free(struct SwsContextArmJit *context);
+
+#endif
--- /dev/null
+/*****************************************************************************
+ * swscale_maemo.c: scaling and chroma conversion using libswscale_nokia770
+ *****************************************************************************
+ * Copyright (C) 1999-2008 the VideoLAN team
+ * $Id$
+ *
+ * Authors: Antoine Lejeune <phytos@videolan.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
+ *****************************************************************************/
+
+/*****************************************************************************
+ * Preamble
+ *****************************************************************************/
+#ifdef HAVE_CONFIG_H
+# include "config.h"
+#endif
+
+#include <vlc_common.h>
+#include <vlc_plugin.h>
+#include <vlc_vout.h>
+#include <vlc_filter.h>
+
+#include "libswscale_nokia770/arm_jit_swscale.h"
+#include "libswscale_nokia770/arm_colorconv.h"
+
+/****************************************************************************
+ * Local prototypes
+ ****************************************************************************/
+static int OpenScaler( vlc_object_t * );
+static void CloseScaler( vlc_object_t * );
+
+static picture_t *Filter( filter_t *, picture_t * );
+static int Init( filter_t * );
+
+/*****************************************************************************
+ * Module descriptor
+ *****************************************************************************/
+vlc_module_begin();
+ set_description( N_("Video scaling filter") );
+ set_capability( "video filter2", 1000 );
+ set_category( CAT_VIDEO );
+ set_subcategory( SUBCAT_VIDEO_VFILTER );
+ set_callbacks( OpenScaler, CloseScaler );
+vlc_module_end();
+
+/*****************************************************************************
+ * filter_sys_t : filter descriptor
+ *****************************************************************************/
+struct filter_sys_t
+{
+ struct SwsContextArmJit *ctx;
+
+ es_format_t fmt_in;
+ es_format_t fmt_out;
+};
+
+/*****************************************************************************
+ * OpenScaler: probe the filter and return score
+ *****************************************************************************/
+static int OpenScaler( vlc_object_t *p_this )
+{
+ filter_t *p_filter = (filter_t*)p_this;
+ filter_sys_t *p_sys;
+
+ /* Allocate the memory needed to store the decoder's structure */
+ if( ( p_filter->p_sys = p_sys =
+ (filter_sys_t *)malloc(sizeof(filter_sys_t)) ) == NULL )
+ {
+ return VLC_ENOMEM;
+ }
+
+ /* Misc init */
+ p_sys->ctx = NULL;
+ p_filter->pf_video_filter = Filter;
+ es_format_Init( &p_sys->fmt_in, 0, 0 );
+ es_format_Init( &p_sys->fmt_out, 0, 0 );
+
+ if( Init( p_filter ) )
+ {
+ free( p_sys );
+ return VLC_EGENERIC;
+ }
+
+ msg_Dbg( p_filter, "%ix%i chroma: %4.4s -> %ix%i chroma: %4.4s",
+ p_filter->fmt_in.video.i_width, p_filter->fmt_in.video.i_height,
+ (char *)&p_filter->fmt_in.video.i_chroma,
+ p_filter->fmt_out.video.i_width, p_filter->fmt_out.video.i_height,
+ (char *)&p_filter->fmt_out.video.i_chroma );
+
+ return VLC_SUCCESS;
+}
+
+/*****************************************************************************
+ * CloseFilter: clean up the filter
+ *****************************************************************************/
+static void CloseScaler( vlc_object_t *p_this )
+{
+ filter_t *p_filter = (filter_t*)p_this;
+ filter_sys_t *p_sys = p_filter->p_sys;
+
+ if( p_sys->ctx )
+ sws_arm_jit_free( p_sys->ctx );
+ free( p_sys );
+}
+
+/*****************************************************************************
+ * Helpers
+ *****************************************************************************/
+
+static bool IsFmtSimilar( const video_format_t *p_fmt1, const video_format_t *p_fmt2 )
+{
+ return p_fmt1->i_chroma == p_fmt2->i_chroma &&
+ p_fmt1->i_width == p_fmt2->i_width &&
+ p_fmt1->i_height == p_fmt2->i_height;
+}
+
+static int Init( filter_t *p_filter )
+{
+ filter_sys_t *p_sys = p_filter->p_sys;
+
+ if( IsFmtSimilar( &p_filter->fmt_in.video, &p_sys->fmt_in ) &&
+ IsFmtSimilar( &p_filter->fmt_out.video, &p_sys->fmt_out ) &&
+ p_sys->ctx )
+ {
+ return VLC_SUCCESS;
+ }
+
+ if( ( p_filter->fmt_in.video.i_chroma != VLC_FOURCC('I','4','2','0') &&
+ p_filter->fmt_in.video.i_chroma != VLC_FOURCC('I','Y','U','V') &&
+ p_filter->fmt_in.video.i_chroma != VLC_FOURCC('Y','V','1','2') ) ||
+ p_filter->fmt_out.video.i_chroma != VLC_FOURCC('Y','4','2','0') )
+ {
+ msg_Err( p_filter, "format not supported" );
+ return VLC_EGENERIC;
+ }
+
+ if( p_sys->ctx )
+ sws_arm_jit_free( p_sys->ctx );
+
+ p_sys->ctx =
+ sws_arm_jit_create_omapfb_yuv420_scaler_armv6(
+ p_filter->fmt_in.video.i_width,
+ p_filter->fmt_in.video.i_height,
+ p_filter->fmt_out.video.i_width,
+ p_filter->fmt_out.video.i_height, 2 );
+
+ if( !p_sys->ctx )
+ {
+ msg_Err( p_filter, "could not init SwScaler" );
+ return VLC_EGENERIC;
+ }
+
+ p_sys->fmt_in = p_filter->fmt_in;
+ p_sys->fmt_out = p_filter->fmt_out;
+
+ return VLC_SUCCESS;
+}
+
+/****************************************************************************
+ * Filter: the whole thing
+ ****************************************************************************
+ * This function is called just after the thread is launched.
+ ****************************************************************************/
+static picture_t *Filter( filter_t *p_filter, picture_t *p_pic )
+{
+ filter_sys_t *p_sys = p_filter->p_sys;
+ uint8_t *src[3]; int src_stride[3];
+ uint8_t *dst[3]; int dst_stride[3];
+ picture_t *p_pic_dst;
+ int i_plane;
+ int i_nb_planes = p_pic->i_planes;
+
+ /* Check if format properties changed */
+ if( Init( p_filter ) != VLC_SUCCESS )
+ return NULL;
+
+ /* Request output picture */
+ p_pic_dst = p_filter->pf_vout_buffer_new( p_filter );
+ if( !p_pic_dst )
+ {
+ msg_Warn( p_filter, "can't get output picture" );
+ return NULL;
+ }
+
+ for( i_plane = 0; i_plane < __MIN(3, p_pic->i_planes); i_plane++ )
+ {
+ src[i_plane] = p_pic->p[i_plane].p_pixels;
+ src_stride[i_plane] = p_pic->p[i_plane].i_pitch;
+ }
+ for( i_plane = 0; i_plane < __MIN(3, i_nb_planes); i_plane++ )
+ {
+ dst[i_plane] = p_pic_dst->p[i_plane].p_pixels;
+ dst_stride[i_plane] = p_pic_dst->p[i_plane].i_pitch;
+ }
+
+ sws_arm_jit_scale( p_sys->ctx, src, src_stride, 0,
+ p_filter->fmt_in.video.i_height, dst, dst_stride);
+
+ picture_CopyProperties( p_pic_dst, p_pic );
+ picture_Release( p_pic );
+
+ return p_pic_dst;
+}