|
E:优化内部循环的判断函数PixelsIsInPic,将判断展开到内部循环之外,跳过不需要处理的目标像素;
TRotaryClipData类用于寻找旋转需要处理的边界范围;算法思路是首先寻找原图片中心点对应的;
那条扫描线,然后依次向上和向下寻找边界; 如果想要更快速的边界寻找算法,可以考虑利用像素的直线
绘制原理来自动生成边界(有机会的时候再来实现它);

边界寻找算法图示
struct TRotaryClipData{ public: long src_width; long src_height; long dst_width; long dst_height; long Ax_16; long Ay_16; long Bx_16; long By_16; long Cx_16; long Cy_16; public: long out_dst_up_y; long out_dst_up_x0; long out_dst_up_x1; long out_dst_down_y; long out_dst_down_x0; long out_dst_down_x1;
long out_src_x0_16; long out_src_y0_16; public: inline long get_up_x0(){ if (out_dst_up_x0<0) return 0; else return out_dst_up_x0; } inline long get_up_x1(){ if (out_dst_up_x1>=dst_width) return dst_width; else return out_dst_up_x1; } inline long get_down_x0(){ if (out_dst_down_x0<0) return 0; else return out_dst_down_x0; } inline long get_down_x1(){ if (out_dst_down_x1>=dst_width) return dst_width; else return out_dst_down_x1; }
inline bool is_in_src(long src_x_16,long src_y_16) { return ( ( (src_x_16>=0)&&((src_x_16>>16) && ( (src_y_16>=0)&&((src_y_16>>16) } void find_begin_in(long dst_y,long& out_dst_x,long& src_x_16,long& src_y_16) { src_x_16-=Ax_16; src_y_16-=Ay_16; while (is_in_src(src_x_16,src_y_16)) { --out_dst_x; src_x_16-=Ax_16; src_y_16-=Ay_16; } } bool find_begin(long dst_y,long& out_dst_x0,long dst_x1) { long test_dst_x0=out_dst_x0-1; long src_x_16=Ax_16*test_dst_x0 + Bx_16*dst_y + Cx_16; long src_y_16=Ay_16*test_dst_x0 + By_16*dst_y + Cy_16; for (long i=test_dst_x0;i<=dst_x1;++i) { if (is_in_src(src_x_16,src_y_16)) { out_dst_x0=i; if (i==test_dst_x0) { find_begin_in(dst_y,out_dst_x0,src_x_16,src_y_16); out_src_x0_16=src_x_16+Ax_16; out_src_y0_16=src_y_16+Ay_16; } else { out_src_x0_16=src_x_16; out_src_y0_16=src_y_16; } return true; } else { src_x_16+=Ax_16; src_y_16+=Ay_16; } } return false; } void find_end(long dst_y,long dst_x0,long& out_dst_x1) { long test_dst_x1=out_dst_x1; if (test_dst_x1 long src_x_16=Ax_16*test_dst_x1 + Bx_16*dst_y + Cx_16; long src_y_16=Ay_16*test_dst_x1 + By_16*dst_y + Cy_16; if (is_in_src(src_x_16,src_y_16)) { ++test_dst_x1; src_x_16+=Ax_16; src_y_16+=Ay_16; while (is_in_src(src_x_16,src_y_16)) { ++test_dst_x1; src_x_16+=Ax_16; src_y_16+=Ay_16; } out_dst_x1=test_dst_x1; } else { src_x_16-=Ax_16; src_y_16-=Ay_16; while (!is_in_src(src_x_16,src_y_16)) { --test_dst_x1; src_x_16-=Ax_16; src_y_16-=Ay_16; } out_dst_x1=test_dst_x1; } }
bool inti_clip(double move_x,double move_y) { //计算src中心点映射到dst后的坐标 out_dst_down_y=(long)(src_height*0.5+move_y); out_dst_down_x0=(long)(src_width*0.5+move_x); out_dst_down_x1=out_dst_down_x0; //得到初始out_? if (find_begin(out_dst_down_y,out_dst_down_x0,out_dst_down_x1)) find_end(out_dst_down_y,out_dst_down_x0,out_dst_down_x1); out_dst_up_y=out_dst_down_y; out_dst_up_x0=out_dst_down_x0; out_dst_up_x1=out_dst_down_x1; return (out_dst_down_x0 } bool next_clip_line_down() { ++out_dst_down_y; if (!find_begin(out_dst_down_y,out_dst_down_x0,out_dst_down_x1)) return false; find_end(out_dst_down_y,out_dst_down_x0,out_dst_down_x1); return (out_dst_down_x0 } bool next_clip_line_up() { --out_dst_up_y; if (!find_begin(out_dst_up_y,out_dst_up_x0,out_dst_up_x1)) return false; find_end(out_dst_up_y,out_dst_up_x0,out_dst_up_x1); return (out_dst_up_x0 } };
void PicRotary3(const TPicRegion& Dst,const TPicRegion& Src,double RotaryAngle,double ZoomX,double ZoomY,double move_x,double move_y) { if ( (fabs(ZoomX*Src.width)<1.0e-4) || (fabs(ZoomY*Src.height)<1.0e-4) ) return; //太小的缩放比例认为已经不可见 double tmprZoomXY=1.0/(ZoomX*ZoomY); double rZoomX=tmprZoomXY*ZoomY; double rZoomY=tmprZoomXY*ZoomX; double sinA,cosA; SinCos(RotaryAngle,sinA,cosA); long Ax_16=(long)(rZoomX*cosA*(1<<16)); long Ay_16=(long)(rZoomX*sinA*(1<<16)); long Bx_16=(long)(-rZoomY*sinA*(1<<16)); long By_16=(long)(rZoomY*cosA*(1<<16)); double rx0=Src.width*0.5; //(rx0,ry0)为旋转中心 double ry0=Src.height*0.5; long Cx_16=(long)((-(rx0+move_x)*rZoomX*cosA+(ry0+move_y)*rZoomY*sinA+rx0)*(1<<16)); long Cy_16=(long)((-(rx0+move_x)*rZoomX*sinA-(ry0+move_y)*rZoomY*cosA+ry0)*(1<<16));
TRotaryClipData rcData; rcData.Ax_16=Ax_16; rcData.Bx_16=Bx_16; rcData.Cx_16=Cx_16; rcData.Ay_16=Ay_16; rcData.By_16=By_16; rcData.Cy_16=Cy_16; rcData.dst_width=Dst.width; rcData.dst_height=Dst.height; rcData.src_width=Src.width; rcData.src_height=Src.height; if (!rcData.inti_clip(move_x,move_y)) return;
TARGB32* pDstLine=Dst.pdata; ((TUInt8*&)pDstLine)+=(Dst.byte_width*rcData.out_dst_down_y); while (true) //to down { long y=rcData.out_dst_down_y; if (y>=Dst.height) break; if (y>=0) { long srcx_16=rcData.out_src_x0_16; long srcy_16=rcData.out_src_y0_16; long x1=rcData.get_down_x1(); for (long x=rcData.get_down_x0();x { pDstLine[x]=Pixels(Src,(srcx_16>>16),(srcy_16>>16)); srcx_16+=Ax_16; srcy_16+=Ay_16; } } if (!rcData.next_clip_line_down()) break; ((TUInt8*&)pDstLine)+=Dst.byte_width; }
pDstLine=Dst.pdata; ((TUInt8*&)pDstLine)+=(Dst.byte_width*rcData.out_dst_up_y); while (rcData.next_clip_line_up()) //to up { long y=rcData.out_dst_up_y; if (y<0) break; if (y { long srcx_16=rcData.out_src_x0_16; long srcy_16=rcData.out_src_y0_16; long x1=rcData.get_up_x1(); for (long x=rcData.get_up_x0();x { pDstLine[x]=Pixels(Src,(srcx_16>>16),(srcy_16>>16)); srcx_16+=Ax_16; srcy_16+=Ay_16; } ((TUInt8*&)pDstLine)-=Dst.byte_width; } } }
|
////////////////////////////////////////////////////////////////////////////////
//速度测试:
//==============================================================================
// PicRotary3 141.1 fps
////////////////////////////////////////////////////////////////////////////////
F:使用SSE的MOVNTQ指令优化CPU写缓冲 (现在的代码还有一点bug没有修复,使用的时候请留意!)
struct TRotaryCopyLineData { TARGB32* psrc; long src_byte_width; long Ax_16; long Ay_16; TARGB32* pDstLine; long xCount; long srcx0_16; long srcy0_16; };
void PicRotarySSE_copyLine(TRotaryCopyLineData* rclData) { /* //汇编实现的功能等价的对应高级语言代码示例 for (long x=0;xxCount;++x) { rclData->pDstLine[x]=((TARGB32*)((TUInt8*)rclData->psrc +(rclData->srcy0_16>>16)*rclData->src_byte_width))[rclData->srcx0_16>>16]; rclData->srcx0_16+=rclData->Ax_16; rclData->srcy0_16+=rclData->Ay_16; } //*/ asm { mov esi,rclData mov edi,[esi]TRotaryCopyLineData.pDstLine mov ecx,[esi]TRotaryCopyLineData.xCount mov edx,[esi]TRotaryCopyLineData.srcx0_16 mov ebx,[esi]TRotaryCopyLineData.srcy0_16 push ebp
push ecx and ecx, (not 1) //循环2次展开
test ecx,ecx //nop jle EndWriteLoop2
lea edi,[edi+ecx*4] neg ecx
//todo: 尝试显式预读在旋转中的效果
loop2_start: mov eax,ebx mov ebp,edx shr eax,16 imul eax,[esi]TRotaryCopyLineData.src_byte_width add ebx,[esi]TRotaryCopyLineData.Ay_16 add edx,[esi]TRotaryCopyLineData.Ax_16 shr ebp,16 lea eax,[eax+ebp*4] mov ebp,[esi]TRotaryCopyLineData.psrc MOVD mm0,[eax+ebp]
mov eax,ebx mov ebp,edx shr eax,16 imul eax,[esi]TRotaryCopyLineData.src_byte_width add ebx,[esi]TRotaryCopyLineData.Ay_16 add edx,[esi]TRotaryCopyLineData.Ax_16 shr ebp,16 lea eax,[eax+ebp*4] mov ebp,[esi]TRotaryCopyLineData.psrc PUNPCKlDQ mm0,[eax+ebp]
add ecx,2 // MOVNTQ qword ptr [edi+ecx*4], mm0 //不使用缓存的写入指令 asm _emit 0x0F asm _emit 0xE7 asm _emit 0x04 asm _emit 0x8F
jnz loop2_start
emms
EndWriteLoop2:
pop ecx and ecx,1 test ecx,ecx jle EndWriteLoop
lea edi,[edi+ecx*4] neg ecx
loop_start: mov eax,ebx mov ebp,edx shr eax,16 imul eax,[esi]TRotaryCopyLineData.src_byte_width add ebx,[esi]TRotaryCopyLineData.Ay_16 add edx,[esi]TRotaryCopyLineData.Ax_16 shr ebp,16 lea eax,[eax+ebp*4] mov ebp,[esi]TRotaryCopyLineData.psrc mov eax,[eax+ebp] mov [edi+ecx*4],eax
inc ecx jnz loop_start
EndWriteLoop:
pop ebp
} }
void PicRotarySSE(const TPicRegion& Dst,const TPicRegion& Src,double RotaryAngle,double ZoomX,double ZoomY,double move_x,double move_y) { if ( (fabs(ZoomX*Src.width)<1.0e-4) || (fabs(ZoomY*Src.height)<1.0e-4) ) return; //太小的缩放比例认为已经不可见 double tmprZoomXY=1.0/(ZoomX*ZoomY); double rZoomX=tmprZoomXY*ZoomY; double rZoomY=tmprZoomXY*ZoomX; double sinA,cosA; SinCos(RotaryAngle,sinA,cosA); long Ax_16=(long)(rZoomX*cosA*(1<<16)); long Ay_16=(long)(rZoomX*sinA*(1<<16)); long Bx_16=(long)(-rZoomY*sinA*(1<<16)); long By_16=(long)(rZoomY*cosA*(1<<16)); double rx0=Src.width*0.5; //(rx0,ry0)为旋转中心 double ry0=Src.height*0.5; long Cx_16=(long)((-(rx0+move_x)*rZoomX*cosA+(ry0+move_y)*rZoomY*sinA+rx0)*(1<<16)); long Cy_16=(long)((-(rx0+move_x)*rZoomX*sinA-(ry0+move_y)*rZoomY*cosA+ry0)*(1<<16));
TRotaryClipData rcData; rcData.Ax_16=Ax_16; rcData.Bx_16=Bx_16; rcData.Cx_16=Cx_16; rcData.Ay_16=Ay_16; rcData.By_16=By_16; rcData.Cy_16=Cy_16; rcData.dst_width=Dst.width; rcData.dst_height=Dst.height; rcData.src_width=Src.width; rcData.src_height=Src.height; if (!rcData.inti_clip(move_x,move_y)) return;
TRotaryCopyLineData rclData; rclData.Ax_16=rcData.Ax_16; rclData.Ay_16=rcData.Ay_16; rclData.psrc=Src.pdata; rclData.src_byte_width=Src.byte_width;
TARGB32* pDstLine=Dst.pdata; ((TUInt8*&)pDstLine)+=(Dst.byte_width*rcData.out_dst_down_y); while (true) //to down { long y=rcData.out_dst_down_y; if (y>=Dst.height) break; if (y>=0) { rclData.srcx0_16=rcData.out_src_x0_16; rclData.srcy0_16=rcData.out_src_y0_16; long x0=rcData.get_down_x0(); rclData.pDstLine=&pDstLine[x0]; rclData.xCount=rcData.get_down_x1()-x0; PicRotarySSE_copyLine(&rclData); } if (!rcData.next_clip_line_down()) break; ((TUInt8*&)pDstLine)+=Dst.byte_width; }
pDstLine=Dst.pdata; ((TUInt8*&)pDstLine)+=(Dst.byte_width*rcData.out_dst_up_y); while (rcData.next_clip_line_up()) //to up { long y=rcData.out_dst_up_y; if (y<0) break; if (y { rclData.srcx0_16=rcData.out_src_x0_16; rclData.srcy0_16=rcData.out_src_y0_16; long x0=rcData.get_up_x0(); rclData.pDstLine=&pDstLine[x0]; rclData.xCount=rcData.get_up_x1()-x0; PicRotarySSE_copyLine(&rclData); ((TUInt8*&)pDstLine)-=Dst.byte_width; } } }
|
////////////////////////////////////////////////////////////////////////////////
//速度测试:
//==============================================================================
// PicRotarySEE 154.4 fps
////////////////////////////////////////////////////////////////////////////////
上一页 [1] [2] [3] [4] [5] [6] [7] 下一页
 【责编:Kittoy】
|