История изменений
Исправление Novell-ch, (текущая версия) :
они такие
#define RGB2Y(R, G, B) (( ( 47 * (R) + 157 * (G) + 16 * (B) + 4096) >> SCALE)
#define RGB2U(R, G, B) ((( ( -26 * (R) + -87 * (G) + 112 * (B) + 32768) >> SCALE)
#define RGB2V(R, G, B) ((( ( 112 * (R) + -102 * (G) + -10 * (B) + 32768) >> SCALE)
static int64_t t_r3[256], t_g3[256], t_b3[256];
static int op=0;
static void
videoconvert_convert_matrix8 (VideoConvert * convert, gpointer p)
{
while (op<2) {
int i, j;
int k_r[] = { 66, -38, 112 };
int k_g[] = { 129, -74, -94 };
int k_b[] = { 25, 112, -18 };
for (i = 0; i <= 255; i++)
{
int64_t r = 0, g = 0, b = 0;
for (j = 0; j <= 2; j++)
{
r = (r << 16) + k_r[j] * i;
g = (g << 16) + k_g[j] * i;
b = (b << 16) + k_b[j] * i;
}
t_r3[i] = r;
t_g3[i] = g;
t_b3[i] = b;
op=2;
}
}
int t;
uint8_t r, g, b;
guint8 *pixels = p;
int64_t c1 = (128LL << 32) + (128 << 16) + 128 +
(16LL << 40) + (128LL << 24) + (128 << 8);
for (t = 0; t < convert->width * 4; t += 4)
{
r = pixels[t + 1];
g = pixels[t + 2];
b = pixels[t + 3];
int64_t x3 = t_r3[r] + t_g3[g] + t_b3[b] + c1;
pixels[t + 1] = x3 >> 40;
pixels[t + 2] = x3 >> 24;
pixels[t + 3] = x3 >> 8;
}
}
и при этом
Performance counter stats for process id '25765':
16838.452312 task-clock # 0.260 CPUs utilized
25,274 context-switches # 0.002 M/sec
2,386 cpu-migrations # 0.142 K/sec
1,774 page-faults # 0.105 K/sec
51,113,181,950 cycles # 3.036 GHz
11,444,873,761 stalled-cycles-frontend # 22.39% frontend cycles idle
<not supported> stalled-cycles-backend
137,791,947,677 instructions # 2.70 insns per cycle
# 0.08 stalled cycles per insn
7,999,685,222 branches # 475.084 M/sec
29,238,725 branch-misses # 0.37% of all branches
<not supported> L1-dcache-loads:HG
820,831,013 L1-dcache-load-misses:HG # 0.00% of all L1-dcache hits
343,732,325 LLC-loads:HG # 20.414 M/sec
<not supported> LLC-load-misses:HG
64.883041228 seconds time elapsed
Исходная версия Novell-ch, :
они такие
#define RGB2Y(R, G, B) (( ( 47 * (R) + 157 * (G) + 16 * (B) + 4096) >> SCALE)
#define RGB2U(R, G, B) ((( ( -26 * (R) + -87 * (G) + 112 * (B) + 32768) >> SCALE)
#define RGB2V(R, G, B) ((( ( 112 * (R) + -102 * (G) + -10 * (B) + 32768) >> SCALE)
static int64_t t_r3[256], t_g3[256], t_b3[256];
static int op=0;
static void
videoconvert_convert_matrix8 (VideoConvert * convert, gpointer p)
{
while (op<2) {
int i, j;
int k_r[] = { 66, -38, 112 };
int k_g[] = { 129, -74, -94 };
int k_b[] = { 25, 112, -18 };
for (i = 0; i <= 255; i++)
{
int64_t r = 0, g = 0, b = 0;
for (j = 0; j <= 2; j++)
{
r = (r << 16) + k_r[j] * i;
g = (g << 16) + k_g[j] * i;
b = (b << 16) + k_b[j] * i;
}
t_r3[i] = r;
t_g3[i] = g;
t_b3[i] = b;
op=2;
}
}
int t;
uint8_t r, g, b;
guint8 *pixels = p;
int64_t c1 = (128LL << 32) + (128 << 16) + 128 +
(16LL << 40) + (128LL << 24) + (128 << 8);
for (t = 0; t < convert->width * 4; t += 4)
{
r = pixels[t + 1];
g = pixels[t + 2];
b = pixels[t + 3];
int64_t x3 = t_r3[r] + t_g3[g] + t_b3[b] + c1;
pixels[t + 1] = x3 >> 40;
pixels[t + 2] = x3 >> 24;
pixels[t + 3] = x3 >> 8;
}
}
и при этом
Performance counter stats for process id '25765':
16838.452312 task-clock # 0.260 CPUs utilized
25,274 context-switches # 0.002 M/sec
2,386 cpu-migrations # 0.142 K/sec
1,774 page-faults # 0.105 K/sec
51,113,181,950 cycles # 3.036 GHz
11,444,873,761 stalled-cycles-frontend # 22.39% frontend cycles idle
<not supported> stalled-cycles-backend
137,791,947,677 instructions # 2.70 insns per cycle
# 0.08 stalled cycles per insn
7,999,685,222 branches # 475.084 M/sec
29,238,725 branch-misses # 0.37% of all branches
<not supported> L1-dcache-loads:HG
820,831,013 L1-dcache-load-misses:HG # 0.00% of all L1-dcache hits
343,732,325 LLC-loads:HG # 20.414 M/sec
<not supported> LLC-load-misses:HG
64.883041228 seconds time elapsed