diff --git a/src/radeon_reg.h b/src/radeon_reg.h
index c4177cc..cb65b72 100644
--- a/src/radeon_reg.h
+++ b/src/radeon_reg.h
@@ -4392,6 +4392,7 @@
 #define R300_TX_INVALTAGS				0x4100
 #define R300_TX_FILTER0_0				0x4400
 #define R300_TX_FILTER0_1				0x4404
+#define R300_TX_FILTER0_2				0x4408
 #       define R300_TX_CLAMP_S(x)                       ((x) << 0)
 #       define R300_TX_CLAMP_T(x)                       ((x) << 3)
 #       define R300_TX_CLAMP_R(x)                       ((x) << 6)
@@ -4410,8 +4411,10 @@
 #       define R300_TX_ID_SHIFT                         28
 #define R300_TX_FILTER1_0				0x4440
 #define R300_TX_FILTER1_1				0x4444
+#define R300_TX_FILTER1_2				0x4448
 #define R300_TX_FORMAT0_0				0x4480
 #define R300_TX_FORMAT0_1				0x4484
+#define R300_TX_FORMAT0_2				0x4488
 #       define R300_TXWIDTH_SHIFT                       0
 #       define R300_TXHEIGHT_SHIFT                      11
 #       define R300_NUM_LEVELS_SHIFT                    26
@@ -4420,6 +4423,7 @@
 #       define R300_TXPITCH_EN                          (1 << 31)
 #define R300_TX_FORMAT1_0				0x44c0
 #define R300_TX_FORMAT1_1				0x44c4
+#define R300_TX_FORMAT1_2				0x44c8
 #	define R300_TX_FORMAT_X8		    0x0
 #	define R300_TX_FORMAT_X16		    0x1
 #	define R300_TX_FORMAT_Y4X4		    0x2
@@ -4492,13 +4496,23 @@
 #       define R300_TX_FORMAT_YUV_TO_RGB_NO_CLAMP      (2 << 22)
 #       define R300_TX_FORMAT_SWAP_YUV                 (1 << 24)
 
+#       define R300_TX_FORMAT_CACHE_WHOLE              (0 << 27)
+#       define R300_TX_FORMAT_CACHE_HALF_REGION_0      (2 << 27)
+#       define R300_TX_FORMAT_CACHE_HALF_REGION_1      (3 << 27)
+#       define R300_TX_FORMAT_CACHE_FOURTH_REGION_0    (4 << 27)
+#       define R300_TX_FORMAT_CACHE_FOURTH_REGION_1    (5 << 27)
+#       define R300_TX_FORMAT_CACHE_FOURTH_REGION_2    (6 << 27)
+#       define R300_TX_FORMAT_CACHE_FOURTH_REGION_3    (7 << 27)
+
 #define R300_TX_FORMAT2_0				0x4500
 #define R300_TX_FORMAT2_1				0x4504
+#define R300_TX_FORMAT2_2				0x4508
 #       define R500_TXWIDTH_11                          (1 << 15)
 #       define R500_TXHEIGHT_11                         (1 << 16)
 
 #define R300_TX_OFFSET_0				0x4540
 #define R300_TX_OFFSET_1				0x4544
+#define R300_TX_OFFSET_2				0x4548
 #       define R300_ENDIAN_SWAP_16_BIT                  (1 << 0)
 #       define R300_ENDIAN_SWAP_32_BIT                  (2 << 0)
 #       define R300_ENDIAN_SWAP_HALF_DWORD              (3 << 0)
@@ -4509,6 +4523,7 @@
 #define R300_TX_ENABLE				        0x4104
 #       define R300_TEX_0_ENABLE                        (1 << 0)
 #       define R300_TEX_1_ENABLE                        (1 << 1)
+#       define R300_TEX_2_ENABLE                        (1 << 2)
 
 #define R300_US_W_FMT				        0x46b4
 #define R300_US_OUT_FMT_1				0x46a8
diff --git a/src/radeon_textured_video.c b/src/radeon_textured_video.c
index 08a3829..91762e5 100644
--- a/src/radeon_textured_video.c
+++ b/src/radeon_textured_video.c
@@ -165,8 +165,9 @@ RADEONPutImageTextured(ScrnInfoPtr pScrn,
     RADEONInfoPtr info = RADEONPTR(pScrn);
     RADEONPortPrivPtr pPriv = (RADEONPortPrivPtr)data;
     INT32 x1, x2, y1, y2;
-    int srcPitch, srcPitch2, dstPitch;
+    int srcPitch, srcPitch2, dstPitch, dstPitch2 = 0;
     int s2offset, s3offset, tmp;
+    int d2line, d3line;
     int top, left, npixels, nlines, size;
     BoxRec dstBox;
     int dst_width = width, dst_height = height;
@@ -199,22 +200,31 @@ RADEONPutImageTextured(ScrnInfoPtr pScrn,
     switch(id) {
     case FOURCC_YV12:
     case FOURCC_I420:
-	dstPitch = ((dst_width << 1) + 15) & ~15;
 	srcPitch = (width + 3) & ~3;
 	srcPitch2 = ((width >> 1) + 3) & ~3;
-	size = dstPitch * dst_height;
+        if (pPriv->planar_hw) {
+	    dstPitch = (dst_width + 15) & ~15;
+	    dstPitch = (dstPitch + 63) & ~63;
+	    dstPitch2 = ((dst_width >> 1) + 15) & ~15;
+	    dstPitch2 = (dstPitch2 + 63) & ~63;
+	    size = dstPitch * dst_height + 2 * dstPitch2 * ((dst_height + 1) >> 1);
+	} else {
+	    dstPitch = ((dst_width << 1) + 15) & ~15;
+	    dstPitch = (dstPitch + 63) & ~63;
+	    size = dstPitch * dst_height;
+	}
 	break;
     case FOURCC_UYVY:
     case FOURCC_YUY2:
     default:
 	dstPitch = ((dst_width << 1) + 15) & ~15;
+	dstPitch = (dstPitch + 63) & ~63;
 	srcPitch = (width << 1);
 	srcPitch2 = 0;
 	size = dstPitch * dst_height;
 	break;
     }
-
-   dstPitch = (dstPitch + 63) & ~63;
+    
 
     if (pPriv->video_memory != NULL && size != pPriv->size) {
 	radeon_legacy_free_memory(pScrn, pPriv->video_memory);
@@ -282,10 +292,16 @@ RADEONPutImageTextured(ScrnInfoPtr pScrn,
     pPriv->src_offset = pPriv->video_offset + info->fbLocation + pScrn->fbOffset;
     pPriv->src_addr = (uint8_t *)(info->FB + pPriv->video_offset + (top * dstPitch));
     pPriv->src_pitch = dstPitch;
+    pPriv->planeu_offset = dstPitch * dst_height;
+    pPriv->planev_offset = pPriv->planeu_offset + dstPitch2 * ((dst_height + 1) >> 1);
     pPriv->size = size;
     pPriv->pDraw = pDraw;
+    
 
 #if 0
+    ErrorF("planeu_offset: 0x%x\n", pPriv->planeu_offset);
+    ErrorF("planev_offset: 0x%x\n", pPriv->planev_offset);
+    ErrorF("dstPitch2: 0x%x\n", dstPitch2);
     ErrorF("src_offset: 0x%x\n", pPriv->src_offset);
     ErrorF("src_addr: 0x%x\n", pPriv->src_addr);
     ErrorF("src_pitch: 0x%x\n", pPriv->src_pitch);
@@ -294,22 +310,46 @@ RADEONPutImageTextured(ScrnInfoPtr pScrn,
     switch(id) {
     case FOURCC_YV12:
     case FOURCC_I420:
-	top &= ~1;
-	nlines = ((((y2 + 0xffff) >> 16) + 1) & ~1) - top;
-	s2offset = srcPitch * height;
-	s3offset = (srcPitch2 * (height >> 1)) + s2offset;
-	pPriv->src_addr += left << 1;
-	tmp = ((top >> 1) * srcPitch2) + (left >> 1);
-	s2offset += tmp;
-	s3offset += tmp;
-	if (id == FOURCC_I420) {
-	    tmp = s2offset;
-	    s2offset = s3offset;
-	    s3offset = tmp;
+        if (pPriv->planar_hw) {
+	    top &= ~1;
+	    s2offset = srcPitch * ((height + 1) & ~1);
+	    s3offset = s2offset + srcPitch2 * ((height + 1) >> 1);
+	    s2offset += (top >> 1) * srcPitch2 + (left >> 1);
+	    s3offset += (top >> 1) * srcPitch2 + (left >> 1);
+	    d2line = pPriv->planeu_offset;
+	    d3line = pPriv->planev_offset;
+	    d2line += (top >> 1) * dstPitch2 - (top * dstPitch);
+	    d3line += (top >> 1) * dstPitch2 - (top * dstPitch);
+	    nlines = ((y2 + 0xffff) >> 16) - top;
+	    if(id == FOURCC_YV12) {
+		tmp = s2offset;
+		s2offset = s3offset;
+		s3offset = tmp;
+	    }
+	    RADEONCopyData(pScrn, buf + (top * srcPitch) + left, pPriv->src_addr + left,
+		srcPitch, dstPitch, nlines, npixels, 1);
+	    RADEONCopyData(pScrn, buf + s2offset,  pPriv->src_addr + d2line + (left >> 1),
+		srcPitch2, dstPitch2, (nlines + 1) >> 1, npixels >> 1, 1);
+	    RADEONCopyData(pScrn, buf + s3offset, pPriv->src_addr + d3line + (left >> 1),
+		srcPitch2, dstPitch2, (nlines + 1) >> 1, npixels >> 1, 1);
+	} else {
+	    top &= ~1;
+	    nlines = ((((y2 + 0xffff) >> 16) + 1) & ~1) - top;
+	    s2offset = srcPitch * height;
+	    s3offset = (srcPitch2 * (height >> 1)) + s2offset;
+	    pPriv->src_addr += left << 1;
+	    tmp = ((top >> 1) * srcPitch2) + (left >> 1);
+	    s2offset += tmp;
+	    s3offset += tmp;
+	    if (id == FOURCC_I420) {
+		tmp = s2offset;
+		s2offset = s3offset;
+		s3offset = tmp;
+	    }
+	    RADEONCopyMungedData(pScrn, buf + (top * srcPitch) + left,
+				 buf + s2offset, buf + s3offset, pPriv->src_addr,
+				 srcPitch, srcPitch2, dstPitch, nlines, npixels);
 	}
-	RADEONCopyMungedData(pScrn, buf + (top * srcPitch) + left,
-			     buf + s2offset, buf + s3offset, pPriv->src_addr,
-			     srcPitch, srcPitch2, dstPitch, nlines, npixels);
 	break;
     case FOURCC_UYVY:
     case FOURCC_YUY2:
@@ -386,17 +426,19 @@ static XF86AttributeRec Attributes[NUM_ATTRIBUTES+1] =
     {0, 0, 0, NULL}
 };
 
-#define NUM_ATTRIBUTES_R300 2
+#define NUM_ATTRIBUTES_R300 3
 
 static XF86AttributeRec Attributes_r300[NUM_ATTRIBUTES_R300+1] =
 {
     {XvSettable | XvGettable, 0, 2, "XV_BICUBIC"},
     {XvSettable | XvGettable, 0, 1, "XV_VSYNC"},
+    {XvSettable | XvGettable, 0, 1, "XV_HWPLANAR"},
     {0, 0, 0, NULL}
 };
 
 static Atom xvBicubic;
 static Atom xvVSync;
+static Atom xvHWPlanar;
 
 #define NUM_IMAGES 4
 
@@ -423,6 +465,8 @@ RADEONGetTexPortAttribute(ScrnInfoPtr  pScrn,
 	*value = pPriv->bicubic_state;
     else if (attribute == xvVSync)
 	*value = pPriv->vsync;
+    else if (attribute == xvHWPlanar)
+	*value = pPriv->planar_hw;
     else
 	return BadMatch;
 
@@ -444,6 +488,8 @@ RADEONSetTexPortAttribute(ScrnInfoPtr  pScrn,
 	pPriv->bicubic_state = ClipValue (value, 0, 2);
     else if (attribute == xvVSync)
 	pPriv->vsync = ClipValue (value, 0, 1);
+    else if (attribute == xvHWPlanar)
+	pPriv->planar_hw = ClipValue (value, 0, 1);
     else
 	return BadMatch;
 
@@ -467,6 +513,7 @@ RADEONSetupImageTexturedVideo(ScreenPtr pScreen)
 
     xvBicubic         = MAKE_ATOM("XV_BICUBIC");
     xvVSync           = MAKE_ATOM("XV_VSYNC");
+    xvHWPlanar        = MAKE_ATOM("XV_HWPLANAR");
 
     adapt->type = XvWindowMask | XvInputMask | XvImageMask;
     adapt->flags = 0;
@@ -514,6 +561,7 @@ RADEONSetupImageTexturedVideo(ScreenPtr pScreen)
 	pPriv->doubleBuffer = 0;
 	pPriv->bicubic_state = BICUBIC_AUTO;
 	pPriv->vsync = TRUE;
+	pPriv->planar_hw = IS_R300_3D ? TRUE : FALSE;
 
 	/* gotta uninit this someplace, XXX: shouldn't be necessary for textured */
 	REGION_NULL(pScreen, &pPriv->clip);
diff --git a/src/radeon_textured_videofuncs.c b/src/radeon_textured_videofuncs.c
index c6ed472..607a0eb 100644
--- a/src/radeon_textured_videofuncs.c
+++ b/src/radeon_textured_videofuncs.c
@@ -97,6 +97,7 @@ FUNC_NAME(RADEONDisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv
     uint32_t dst_offset, dst_pitch, dst_format;
     uint32_t txenable, colorpitch;
     uint32_t blendcntl;
+    Bool isplanar = FALSE;
     int dstxoff, dstyoff, pixel_shift, vtx_count;
     BoxPtr pBox = REGION_RECTS(&pPriv->clip);
     int nBox = REGION_NUM_RECTS(&pPriv->clip);
@@ -181,16 +182,29 @@ FUNC_NAME(RADEONDisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv
 	if (RADEONTilingEnabled(pScrn, pPixmap))
 	    colorpitch |= R300_COLORTILE;
 
-	if (pPriv->id == FOURCC_UYVY)
-	    txformat1 = R300_TX_FORMAT_YVYU422;
-	else
-	    txformat1 = R300_TX_FORMAT_VYUY422;
+	if (pPriv->planar_hw && (pPriv->id == FOURCC_I420 || pPriv->id == FOURCC_YV12)) {
+	    isplanar = TRUE;
+	}
+
+	if (isplanar) {
+	    txformat1 = R300_TX_FORMAT_X8 | R300_TX_FORMAT_CACHE_HALF_REGION_0;
+	    txpitch = pPriv->src_pitch;
+	} else {
+	    if (pPriv->id == FOURCC_UYVY)
+		txformat1 = R300_TX_FORMAT_YVYU422;
+	    else
+		txformat1 = R300_TX_FORMAT_VYUY422;
 
-	txformat1 |= R300_TX_FORMAT_YUV_TO_RGB_CLAMP;
+	    txformat1 |= R300_TX_FORMAT_YUV_TO_RGB_CLAMP;
+
+	    /* pitch is in pixels */
+	    txpitch = pPriv->src_pitch / 2;
+	}
+	txpitch -= 1;
 
 	txformat0 = ((((pPriv->w - 1) & 0x7ff) << R300_TXWIDTH_SHIFT) |
-		     (((pPriv->h - 1) & 0x7ff) << R300_TXHEIGHT_SHIFT) |
-		     R300_TXPITCH_EN);
+		    (((pPriv->h - 1) & 0x7ff) << R300_TXHEIGHT_SHIFT) |
+		    R300_TXPITCH_EN);
 
 	info->accel_state->texW[0] = pPriv->w;
 	info->accel_state->texH[0] = pPriv->h;
@@ -201,9 +215,6 @@ FUNC_NAME(RADEONDisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv
 		    R300_TX_MIN_FILTER_LINEAR |
 		    (0 << R300_TX_ID_SHIFT));
 
-	/* pitch is in pixels */
-	txpitch = pPriv->src_pitch / 2;
-	txpitch -= 1;
 
 	if (IS_R500_3D && ((pPriv->w - 1) & 0x800))
 	    txpitch |= R500_TXWIDTH_11;
@@ -224,6 +235,34 @@ FUNC_NAME(RADEONDisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv
 
 	txenable = R300_TEX_0_ENABLE;
 
+	if (isplanar) {
+	    txformat0 = ((((((pPriv->w + 1 ) >> 1) - 1) & 0x7ff) << R300_TXWIDTH_SHIFT) |
+			(((((pPriv->h + 1 ) >> 1 ) - 1) & 0x7ff) << R300_TXHEIGHT_SHIFT) |
+			R300_TXPITCH_EN);
+	    txpitch = ((pPriv->src_pitch >> 1) + 63) & ~63;
+	    txpitch -= 1;
+	    txfilter = (R300_TX_CLAMP_S(R300_TX_CLAMP_CLAMP_LAST) |
+		        R300_TX_CLAMP_T(R300_TX_CLAMP_CLAMP_LAST) |
+			R300_TX_MIN_FILTER_LINEAR |
+			R300_TX_MAG_FILTER_LINEAR);
+
+		BEGIN_ACCEL(12);
+		OUT_ACCEL_REG(R300_TX_FILTER0_1, txfilter | (1 << R300_TX_ID_SHIFT));
+		OUT_ACCEL_REG(R300_TX_FILTER1_1, 0);
+		OUT_ACCEL_REG(R300_TX_FORMAT0_1, txformat0);
+		OUT_ACCEL_REG(R300_TX_FORMAT1_1, R300_TX_FORMAT_X8 | R300_TX_FORMAT_CACHE_FOURTH_REGION_2);
+		OUT_ACCEL_REG(R300_TX_FORMAT2_1, txpitch);
+		OUT_ACCEL_REG(R300_TX_OFFSET_1, txoffset + pPriv->planeu_offset);
+		OUT_ACCEL_REG(R300_TX_FILTER0_2, txfilter | (2 << R300_TX_ID_SHIFT));
+		OUT_ACCEL_REG(R300_TX_FILTER1_2, 0);
+		OUT_ACCEL_REG(R300_TX_FORMAT0_2, txformat0);
+		OUT_ACCEL_REG(R300_TX_FORMAT1_2, R300_TX_FORMAT_X8 | R300_TX_FORMAT_CACHE_FOURTH_REGION_3);
+		OUT_ACCEL_REG(R300_TX_FORMAT2_2, txpitch);
+		OUT_ACCEL_REG(R300_TX_OFFSET_2, txoffset + pPriv->planev_offset);
+		FINISH_ACCEL();
+		txenable |= R300_TEX_1_ENABLE | R300_TEX_2_ENABLE;
+	}
+
 	if (pPriv->bicubic_enabled) {
 		/* Size is 128x1 */
 		txformat0 = ((0x7f << R300_TXWIDTH_SHIFT) |
@@ -691,6 +730,192 @@ FUNC_NAME(RADEONDisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv
 		OUT_ACCEL_REG(R300_US_ALU_CONST_A(1), 0);
 
 		FINISH_ACCEL();
+	    } else if (isplanar) {
+		BEGIN_ACCEL(45);
+		/* 2 components: same 2 for tex0/1/2 */
+		OUT_ACCEL_REG(R300_RS_COUNT,
+			  ((2 << R300_RS_COUNT_IT_COUNT_SHIFT) |
+			   R300_RS_COUNT_HIRES_EN));
+		/* R300_INST_COUNT_RS - highest RS instruction used */
+		OUT_ACCEL_REG(R300_RS_INST_COUNT, R300_INST_COUNT_RS(0) | R300_TX_OFFSET_RS(6));
+
+		OUT_ACCEL_REG(R300_US_PIXSIZE, 2); /* highest temp used */
+
+		/* Indirection levels */
+		OUT_ACCEL_REG(R300_US_CONFIG, ((0 << R300_NLEVEL_SHIFT) |
+							R300_FIRST_TEX));
+
+		OUT_ACCEL_REG(R300_US_CODE_OFFSET, (R300_ALU_CODE_OFFSET(0) |
+						   R300_ALU_CODE_SIZE(5) |
+						   R300_TEX_CODE_OFFSET(0) |
+						   R300_TEX_CODE_SIZE(3)));
+
+		OUT_ACCEL_REG(R300_US_CODE_ADDR_3, (R300_ALU_START(0) |
+						   R300_ALU_SIZE(4) |
+						   R300_TEX_START(0) |
+						   R300_TEX_SIZE(2) |
+						   R300_RGBA_OUT));
+
+		/* tex inst */
+		OUT_ACCEL_REG(R300_US_TEX_INST_0, (R300_TEX_SRC_ADDR(0) |
+						  R300_TEX_DST_ADDR(0) |
+						  R300_TEX_ID(0) |
+						  R300_TEX_INST(R300_TEX_INST_LD)));
+		OUT_ACCEL_REG(R300_US_TEX_INST_1, (R300_TEX_SRC_ADDR(0) |
+						  R300_TEX_DST_ADDR(1) |
+						  R300_TEX_ID(1) |
+						  R300_TEX_INST(R300_TEX_INST_LD)));
+		OUT_ACCEL_REG(R300_US_TEX_INST_2, (R300_TEX_SRC_ADDR(0) |
+						  R300_TEX_DST_ADDR(2) |
+						  R300_TEX_ID(2) |
+						  R300_TEX_INST(R300_TEX_INST_LD)));
+
+		/* ALU inst */
+		/* u normalized goes in r0.b */
+		OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(0), (R300_ALU_RGB_ADDR0(1) |
+						   R300_ALU_RGB_ADDR1(0) |
+						   R300_ALU_RGB_ADDR2(0) |
+						   R300_ALU_RGB_ADDRD(0) |
+						   R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_B)));
+		OUT_ACCEL_REG(R300_US_ALU_RGB_INST(0), (R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) |
+						   R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) |
+						   R300_ALU_RGB_SEL_B(R300_ALU_RGB_1_0) |
+						   R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) |
+						   R300_ALU_RGB_SEL_C(R300_ALU_RGB_0_5) |
+						   R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NEG) |
+						   R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) |
+						   R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE)));
+		/* keep normalized y in r0.a */
+		OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(0), (R300_ALU_ALPHA_ADDR0(0) |
+						   R300_ALU_ALPHA_ADDR1(R300_ALU_ALPHA_CONST(0)) |
+						   R300_ALU_ALPHA_ADDR2(0) |
+						   R300_ALU_ALPHA_ADDRD(0) |
+						   R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_A)));
+		OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(0), (R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_SRC0_A) |
+						   R300_ALU_ALPHA_MOD_A(R300_ALU_ALPHA_MOD_NOP) |
+						   R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_1_0) |
+						   R300_ALU_ALPHA_MOD_B(R300_ALU_ALPHA_MOD_NOP) |
+						   R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_SRC1_A) |
+						   R300_ALU_ALPHA_MOD_C(R300_ALU_ALPHA_MOD_NOP) |
+						   R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) |
+						   R300_ALU_ALPHA_OMOD(R300_ALU_ALPHA_OMOD_NONE)));
+
+		/* v normalized in r0.g */
+		OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(1), (R300_ALU_RGB_ADDR0(2) |
+						   R300_ALU_RGB_ADDR1(0) |
+						   R300_ALU_RGB_ADDR2(0) |
+						   R300_ALU_RGB_ADDRD(0) |
+						   R300_ALU_RGB_WMASK(R300_ALU_RGB_MASK_G)));
+		OUT_ACCEL_REG(R300_US_ALU_RGB_INST(1), (R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_RGB) |
+						   R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) |
+						   R300_ALU_RGB_SEL_B(R300_ALU_RGB_1_0) |
+						   R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) |
+						   R300_ALU_RGB_SEL_C(R300_ALU_RGB_0_5) |
+						   R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NEG) |
+						   R300_ALU_RGB_OP(R300_ALU_RGB_OP_MAD) |
+						   R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE)));
+		/* alpha nop */
+		OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(1), (R300_ALU_ALPHA_ADDRD(0) |
+						   R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE)));
+		OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(1), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) |
+						   R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) |
+						   R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
+						   R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0)));
+
+		/* dp3 r */
+		OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(2), (R300_ALU_RGB_ADDR0(0) |
+						   R300_ALU_RGB_ADDR1(R300_ALU_RGB_CONST(1)) |
+						   R300_ALU_RGB_ADDR2(0) |
+						   R300_ALU_RGB_ADDRD(0) |
+						   R300_ALU_RGB_OMASK(R300_ALU_RGB_MASK_R)));
+		OUT_ACCEL_REG(R300_US_ALU_RGB_INST(2), (R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_ABG) |
+						   R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) |
+						   R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RGB) |
+						   R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) |
+						   R300_ALU_RGB_SEL_C(R300_ALU_RGB_1_0) |
+						   R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) |
+						   R300_ALU_RGB_OP(R300_ALU_RGB_OP_DP3) |
+						   R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE) |
+						   R300_ALU_RGB_CLAMP));
+		/* alpha nop */
+		OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(2), (R300_ALU_ALPHA_ADDRD(0) |
+						   R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE)));
+		OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(2), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) |
+						   R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) |
+						   R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
+						   R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0)));
+
+		/* dp3 g */
+		OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(3), (R300_ALU_RGB_ADDR0(0) |
+						   R300_ALU_RGB_ADDR1(R300_ALU_RGB_CONST(2)) |
+						   R300_ALU_RGB_ADDR2(0) |
+						   R300_ALU_RGB_ADDRD(0) |
+						   R300_ALU_RGB_OMASK(R300_ALU_RGB_MASK_G)));
+		OUT_ACCEL_REG(R300_US_ALU_RGB_INST(3), (R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_ABG) |
+						   R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) |
+						   R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RGB) |
+						   R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) |
+						   R300_ALU_RGB_SEL_C(R300_ALU_RGB_1_0) |
+						   R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) |
+						   R300_ALU_RGB_OP(R300_ALU_RGB_OP_DP3) |
+						   R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE) |
+						   R300_ALU_RGB_CLAMP));
+		/* alpha nop */
+		OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(3), (R300_ALU_ALPHA_ADDRD(0) |
+						   R300_ALU_ALPHA_WMASK(R300_ALU_ALPHA_MASK_NONE)));
+		OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(3), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) |
+						   R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) |
+						   R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
+						   R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_0_0)));
+
+		/* dp3 b */
+		OUT_ACCEL_REG(R300_US_ALU_RGB_ADDR(4), (R300_ALU_RGB_ADDR0(0) |
+						   R300_ALU_RGB_ADDR1(R300_ALU_RGB_CONST(3)) |
+						   R300_ALU_RGB_ADDR2(0) |
+						   R300_ALU_RGB_ADDRD(0) |
+						   R300_ALU_RGB_OMASK(R300_ALU_RGB_MASK_B)));
+		OUT_ACCEL_REG(R300_US_ALU_RGB_INST(4), (R300_ALU_RGB_SEL_A(R300_ALU_RGB_SRC0_ABG) |
+						   R300_ALU_RGB_MOD_A(R300_ALU_RGB_MOD_NOP) |
+						   R300_ALU_RGB_SEL_B(R300_ALU_RGB_SRC1_RGB) |
+						   R300_ALU_RGB_MOD_B(R300_ALU_RGB_MOD_NOP) |
+						   R300_ALU_RGB_SEL_C(R300_ALU_RGB_1_0) |
+						   R300_ALU_RGB_MOD_C(R300_ALU_RGB_MOD_NOP) |
+						   R300_ALU_RGB_OP(R300_ALU_RGB_OP_DP3) |
+						   R300_ALU_RGB_OMOD(R300_ALU_RGB_OMOD_NONE) |
+						   R300_ALU_RGB_CLAMP));
+		/* write alpha 1 */
+		OUT_ACCEL_REG(R300_US_ALU_ALPHA_ADDR(4), (R300_ALU_ALPHA_ADDRD(0) |
+						   R300_ALU_ALPHA_OMASK(R300_ALU_ALPHA_MASK_A) |
+						   R300_ALU_ALPHA_TARGET_A));
+		OUT_ACCEL_REG(R300_US_ALU_ALPHA_INST(4), (R300_ALU_ALPHA_OP(R300_ALU_ALPHA_OP_MAD) |
+						   R300_ALU_ALPHA_SEL_A(R300_ALU_ALPHA_0_0) |
+						   R300_ALU_ALPHA_SEL_B(R300_ALU_ALPHA_0_0) |
+						   R300_ALU_ALPHA_SEL_C(R300_ALU_ALPHA_1_0)));
+
+		/* Shader constants. */
+      /* constant 0: normalization offset for y, brightness/contrast */
+		OUT_ACCEL_REG(R300_US_ALU_CONST_R(0), F_TO_24((float)pPriv->brightness / 1000.0));
+		OUT_ACCEL_REG(R300_US_ALU_CONST_G(0), F_TO_24((float)pPriv->contrast / 255.0));
+		OUT_ACCEL_REG(R300_US_ALU_CONST_B(0), F_TO_24(0.0));
+		OUT_ACCEL_REG(R300_US_ALU_CONST_A(0), F_TO_24(-0.0625));
+      /* constant 1: r coefficients*/
+		OUT_ACCEL_REG(R300_US_ALU_CONST_R(1), F_TO_24(1.1643));
+		OUT_ACCEL_REG(R300_US_ALU_CONST_G(1), F_TO_24(0.0));
+		OUT_ACCEL_REG(R300_US_ALU_CONST_B(1), F_TO_24(1.5958));
+		OUT_ACCEL_REG(R300_US_ALU_CONST_A(1), F_TO_24(0.0));
+      /* constant 2: g coefficients */
+		OUT_ACCEL_REG(R300_US_ALU_CONST_R(2), F_TO_24(1.1643));
+		OUT_ACCEL_REG(R300_US_ALU_CONST_G(2), F_TO_24(-0.39173));
+		OUT_ACCEL_REG(R300_US_ALU_CONST_B(2), F_TO_24(-0.81290));
+		OUT_ACCEL_REG(R300_US_ALU_CONST_A(2), F_TO_24(0.0));
+      /* constant 3: b coefficients */
+		OUT_ACCEL_REG(R300_US_ALU_CONST_R(3), F_TO_24(1.1643));
+		OUT_ACCEL_REG(R300_US_ALU_CONST_G(3), F_TO_24(2.017));
+		OUT_ACCEL_REG(R300_US_ALU_CONST_B(3), F_TO_24(0.0));
+		OUT_ACCEL_REG(R300_US_ALU_CONST_A(3), F_TO_24(0.0));
+
+		FINISH_ACCEL();
+    
 	    } else {
 		BEGIN_ACCEL(11);
 		/* 2 components: 2 for tex0 */
@@ -760,7 +985,7 @@ FUNC_NAME(RADEONDisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv
 						   R300_ALU_ALPHA_OMOD(R300_ALU_ALPHA_OMOD_NONE) |
 						   R300_ALU_ALPHA_CLAMP));
 		FINISH_ACCEL();
-		}
+	    }
 	} else {
 	    if (pPriv->bicubic_enabled) {
 		BEGIN_ACCEL(7);
diff --git a/src/radeon_video.h b/src/radeon_video.h
index 7f1891e..b59720f 100644
--- a/src/radeon_video.h
+++ b/src/radeon_video.h
@@ -89,6 +89,10 @@ typedef struct {
 
    void         *video_memory;
    int           video_offset;
+   
+   Bool          planar_hw;
+   int           planeu_offset;
+   int           planev_offset;
 
    /* bicubic filtering */
    void         *bicubic_memory;
