LINUX.ORG.RU

История изменений

Исправление Novell-ch, (текущая версия) :

помогло

int64_t c = ((int64_t) convert->cmatrix[0][3] << 32)
			+ ((int64_t) convert->cmatrix[1][3] << 16)
			+ ((int64_t) convert->cmatrix[2][3] << 0);

Performance counter stats for process id '17523':

      18862.102342 task-clock                #    0.272 CPUs utilized          
            30,875 context-switches          #    0.002 M/sec                  
             4,253 cpu-migrations            #    0.225 K/sec                  
             1,858 page-faults               #    0.099 K/sec                  
    56,563,008,115 cycles                    #    2.999 GHz                    
    13,637,059,458 stalled-cycles-frontend   #   24.11% frontend cycles idle   
   <not supported> stalled-cycles-backend  
   149,019,018,585 instructions              #    2.63  insns per cycle        
                                             #    0.09  stalled cycles per insn
     8,822,857,677 branches                  #  467.756 M/sec                  
        32,225,169 branch-misses             #    0.37% of all branches        
   <not supported> L1-dcache-loads:HG      
       860,977,224 L1-dcache-load-misses:HG  #    0.00% of all L1-dcache hits  
       368,779,426 LLC-loads:HG              #   19.551 M/sec                  
   <not supported> LLC-load-misses:HG      

      69.386963059 seconds time elapsed

финальный результат такой, может примут в апстим, завтра потестю на 32 битах.

static int op=0;
static int64_t t_r3[256], t_g3[256], t_b3[256];

static void
videoconvert_convert_matrix8 (VideoConvert * convert, gpointer p)
{
 while (op<2) {   
int i, j;
	int k_r[] = { convert->cmatrix[0][0], convert->cmatrix[1][0], convert->cmatrix[2][0] };
	int k_g[] = { convert->cmatrix[0][1], convert->cmatrix[1][1], convert->cmatrix[2][1] };
	int k_b[] = { convert->cmatrix[0][2], convert->cmatrix[1][2], convert->cmatrix[2][2] };


	for (i = 0; i <= 255; i++)
	{
		int64_t r = 0, g = 0, b = 0;
		for (j = 0; j <= 2; j++)
		{
			r = (r << 16) + k_r[j] * i;
			g = (g << 16) + k_g[j] * i;
			b = (b << 16) + k_b[j] * i;
		}
		t_r3[i] = r;
		t_g3[i] = g;
		t_b3[i] = b;
		op=2;
	}
 }
  int t;
	uint8_t r, g, b;
	guint8 *pixels = p;
	int64_t c = ((int64_t) convert->cmatrix[0][3] << 32)
			+ ((int64_t) convert->cmatrix[1][3] << 16)
			+ ((int64_t) convert->cmatrix[2][3] << 0);

	for (t = 0; t < convert->width * 4; t += 4)
	{
		r = pixels[t + 1];
		g = pixels[t + 2];
		b = pixels[t + 3];

		int64_t x3 = t_r3[r] + t_g3[g] + t_b3[b] + c;

		pixels[t + 1] = x3 >> 40;
		pixels[t + 2] = x3 >> 24;
		pixels[t + 3] = x3 >> 8;
	}
}

Исходная версия Novell-ch, :

помогло

int64_t c = ((int64_t) convert->cmatrix[0][3] << 32)
			+ ((int64_t) convert->cmatrix[1][3] << 16)
			+ ((int64_t) convert->cmatrix[2][3] << 0);

Performance counter stats for process id '17523':

      18862.102342 task-clock                #    0.272 CPUs utilized          
            30,875 context-switches          #    0.002 M/sec                  
             4,253 cpu-migrations            #    0.225 K/sec                  
             1,858 page-faults               #    0.099 K/sec                  
    56,563,008,115 cycles                    #    2.999 GHz                    
    13,637,059,458 stalled-cycles-frontend   #   24.11% frontend cycles idle   
   <not supported> stalled-cycles-backend  
   149,019,018,585 instructions              #    2.63  insns per cycle        
                                             #    0.09  stalled cycles per insn
     8,822,857,677 branches                  #  467.756 M/sec                  
        32,225,169 branch-misses             #    0.37% of all branches        
   <not supported> L1-dcache-loads:HG      
       860,977,224 L1-dcache-load-misses:HG  #    0.00% of all L1-dcache hits  
       368,779,426 LLC-loads:HG              #   19.551 M/sec                  
   <not supported> LLC-load-misses:HG      

      69.386963059 seconds time elapsed
static int op=0;
static int64_t t_r3[256], t_g3[256], t_b3[256];

фитальный результат такой, может примут в апстим, завтра потестю на 32 битах.
static void
videoconvert_convert_matrix8 (VideoConvert * convert, gpointer p)
{
 while (op<2) {   
int i, j;
	int k_r[] = { convert->cmatrix[0][0], convert->cmatrix[1][0], convert->cmatrix[2][0] };
	int k_g[] = { convert->cmatrix[0][1], convert->cmatrix[1][1], convert->cmatrix[2][1] };
	int k_b[] = { convert->cmatrix[0][2], convert->cmatrix[1][2], convert->cmatrix[2][2] };


	for (i = 0; i <= 255; i++)
	{
		int64_t r = 0, g = 0, b = 0;
		for (j = 0; j <= 2; j++)
		{
			r = (r << 16) + k_r[j] * i;
			g = (g << 16) + k_g[j] * i;
			b = (b << 16) + k_b[j] * i;
		}
		t_r3[i] = r;
		t_g3[i] = g;
		t_b3[i] = b;
		op=2;
	}
 }
  int t;
	uint8_t r, g, b;
	guint8 *pixels = p;
	int64_t c = ((int64_t) convert->cmatrix[0][3] << 32)
			+ ((int64_t) convert->cmatrix[1][3] << 16)
			+ ((int64_t) convert->cmatrix[2][3] << 0);

	for (t = 0; t < convert->width * 4; t += 4)
	{
		r = pixels[t + 1];
		g = pixels[t + 2];
		b = pixels[t + 3];

		int64_t x3 = t_r3[r] + t_g3[g] + t_b3[b] + c;

		pixels[t + 1] = x3 >> 40;
		pixels[t + 2] = x3 >> 24;
		pixels[t + 3] = x3 >> 8;
	}
}