sharpening.c


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480

/**
* @file			sharp.c
* @brief		sharpening algorithm
* @author		Patrick Roth - roth@stettbacher.ch
* @copyright	Stettbacher Signal Processing AG
* 
* @remarks
*
* <PRE>
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
* </PRE>
*
*/

#include <stdio.h>
#include <string.h>

#if (WITH_SIMD == 1)
#include <immintrin.h>		// see /usr/lib64/gcc/x86_64-suse-linux/4.7/include/immintrin.h
#endif // WITH_SIMD

#include "color_pipe_private.h"
#include "color.h"
#include "filter.h"


/**
 * Transform monochrome to YUV image.
 * 
 * @param img_yuv On return: image in YUV color space
 * @param img_mono input monochrome image
 * @param height image height in number of pixels
 * @param width image width in number of pixels
 * @param bit_channel monochrome bit resolution
 */
static void mono_to_yuv(int16_t *img_yuv, const void *img_mono, const int height, const int width, const int bit_channel) {
	
	int y, x, index_mono, index_yuv;
	const uint8_t *in8 = img_mono;
	const uint16_t *in16 = img_mono;
	
	
	index_mono = 0;
	index_yuv = 0;
	for(y = 0; y < height; y++) {
		for(x = 0; x < width; x++) {
			if(bit_channel <= 8) {
				img_yuv[index_yuv] = in8[index_mono];
				img_yuv[index_yuv+1] = 0;
				img_yuv[index_yuv+2] = 0;
			}
			else if(bit_channel <= 16) {
				img_yuv[index_yuv] = in16[index_mono];
				img_yuv[index_yuv+1] = 0;
				img_yuv[index_yuv+2] = 0;
			}
			index_mono++;
			index_yuv += 3;
		}
	}
}


/**
 * Transform YUV to monochrome image.
 * 
 * @param img_mono On return: monochrome image
 * @param img_yuv input monochrome image
 * @param height image height in number of pixels
 * @param width image width in number of pixels
 * @param bit_channel monochrome bit resolution
 */
static void yuv_to_mono(void *img_mono, const int16_t *img_yuv, const int height, const int width, const int bit_channel) {
	int y, x, index_mono, index_yuv;
	int8_t *out8 = img_mono;
	int16_t *out16 = img_mono;
	const int pix_max = (1<<bit_channel)-1;
	int value;
	
	index_mono = 0;
	index_yuv = 0;
	
	for(y = 0; y < height; y++) {
		for(x = 0; x < width; x++) {
			value = img_yuv[index_yuv];
			
			if(value < 0) {
				value = 0;
			}
			else if(value > pix_max) {
				value = pix_max;
			}
			
			if(bit_channel <= 8) {
				out8[index_mono] = value;
			}
			else if(bit_channel <= 16) {
				out16[index_mono] = value;
			}
			
			index_mono++;
			index_yuv += 3;
		}
	}
}


/**
 * Sharp pixel by applying 3x3 filter kernel. Fixed-point is used.
 * The kernel weights around the center are equal.
 * 
 * @param a_other kernel weight around center
 * @param a_center kernel weight at center position
 * @param p11 pixel value at position 1/1
 * @param p12 pixel value at position 1/2
 * @param p13 pixel value at position 1/3
 * @param p21 pixel value at position 2/1
 * @param p22 pixel value at position 2/2
 * @param p23 pixel value at position 2/3
 * @param p31 pixel value at position 3/1
 * @param p32 pixel value at position 3/2
 * @param p33 pixel value at position 3/3 
 * @param shift_fact The shifting factor defines how many number of bits the kernel and pixel were shifted to left.
 * @return filtered pixel value
 */
static inline int16_t do_sharp(const int16_t a_other, const int16_t a_center,
							   const int16_t p11, const int16_t p12, const int16_t p13,
							   const int16_t p21, const int16_t p22, const int16_t p23,
							   const int16_t p31, const int16_t p32, const int16_t p33,
							   const int shift_fact) {
	
	int16_t out;
	
	
	out = (	a_other*p11 + a_other*p12 + a_other*p13 +
			a_other*p21 + a_center*p22 + a_other*p23 +
			a_other*p31 + a_other*p32 + a_other*p33) >> shift_fact;
	return out;
}

#if 0
/*
 * Sharpening algorithm by using SSE instructions.
 * It's slower than the scalar algorithm above!!
 */
static int16_t do_sharp_sse(__m128i coeff_line0, __m128i coeff_line1, __m128i coeff_line2,
							__m128i px_line0, __m128i px_line1, __m128i px_line2, __m128i mask,
							int shift_fact) {
	
	__m128i y_line0, y_line1, y_line2, madd_line0, madd_line1, madd_line2;
	int32_t res[2];
	
	
	y_line0 = _mm_shuffle_epi8(px_line0, mask);
	y_line1 = _mm_shuffle_epi8(px_line1, mask);
	y_line2 = _mm_shuffle_epi8(px_line2, mask);
	
	madd_line0 = _mm_madd_epi16(y_line0, coeff_line0);
	madd_line1 = _mm_madd_epi16(y_line1, coeff_line1);
	madd_line2 = _mm_madd_epi16(y_line2, coeff_line2);
	
	madd_line0 = _mm_hadd_epi32(madd_line0, madd_line1);
	madd_line0 = _mm_hadd_epi32(madd_line0, madd_line2);
	
	madd_line0 = _mm_hadd_epi32(madd_line0, madd_line0);
	madd_line0 = _mm_hadd_epi32(madd_line0, madd_line0);
	
	_mm_storel_epi64((__m128i*)&res, madd_line0);
	return (res[0] >> shift_fact);
}
#endif

/**
 * Make given YUV image sharper. Use the given the filter strength to tune the sharpening strength.
 * If the local sharpening is set only those pixels are sharpened defined at the sharpening mask.
 * 
 * This sharpening algorithm high-pass filters the input image and adds it to itself. Therfore all edges become sharper.
 * The sharpening is done on the Y-channel only. The brightness is of interest. The U and V channel won't be touched to avoid color
 * shiftings.
 * 
 * @param img_out On return: high-pass filtered YUV image
 * @param img_in YUV image to filter with given kernel
 * @param height image height in number of pixels
 * @param width image width in number of pixels
 * @param sharp_strength sharpening strength factor
 * @param max_y maximum Y-channel value (depends on bit per pixel)
 * @param local_flag not 0 if local sharpening must be done based on sharpening mask
 * @param sharp_mask sharpening mask (binary image)
 */
static void make_sharper(int16_t *img_out, const int16_t * img_in, const int height, const int width, const float sharp_strength, const int max_y,
						 const int local_flag, const int8_t *sharp_mask) {
	
	int y, x, index_upper, index_center, index_lower;
	int16_t filter_output;
// 	__m128i coeff_line0, coeff_line1, coeff_line2, mask;
	
	
	/*
	 * don't touch it or check high-pass filter coefficient a_center for overflow!!
	 */
	const int shift_fact = 10;
	
	/*
	 * High-pass filter coefficients
	 * 
	 * e. g. shift_fact = 10 and sharp_strength = 4
	 * 		--> a_other = -4/8.0 * 2^10 = 2^9 	--> no overflow
	 * 		--> a_center = 4*2^10 = 2^12		--> no overflow
	 * 
	 * e. g. shift_fact = 10 and sharp_strength = 32
	 * 		--> a_other = -32/8.0 * 2^10 = 2^12	--> no overflow
	 * 		--> a_center = 32*2^10 = 2^15		--> overflow because this value cis not possible with int16_t datatype
	 */
	const int16_t a_other = -1.0*sharp_strength/8.0*(1<<shift_fact);
	const int16_t a_center = sharp_strength*(1<<shift_fact);
	
	
	/*
	coeff_line0 = _mm_set_epi16(0, 0, 0, 0, 0, a_other, a_other, a_other);
	coeff_line1 = _mm_set_epi16(0, 0, 0, 0, 0, a_other, a_center, a_other);
	coeff_line2 = _mm_set_epi16(0, 0, 0, 0, 0, a_other, a_other, a_other);
	mask = _mm_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 13, 12, 7, 6, 1, 0);
	*/
	
	for(y = 1; y < (height-1); y++) {
		
		index_upper = (y-1)*width*3;
		index_center = y*width*3;
		index_lower = (y+1)*width*3;
		
		
		for(x = 1; x < (width-1); x++) {
			
			if(local_flag != 0 && sharp_mask[index_center+3] == 0) {
				// don't sharp this pixel
				filter_output = 0;
			}
			else {
				filter_output = do_sharp(a_other, a_center,
										 img_in[index_upper], img_in[index_upper+3], img_in[index_upper+6],
										 img_in[index_center], img_in[index_center+3], img_in[index_center+6],
										 img_in[index_lower], img_in[index_lower+3], img_in[index_lower+6],
										 shift_fact);
				/*
				filter_output = do_sharp_sse(	coeff_line0, coeff_line1, coeff_line2,
												_mm_lddqu_si128((__m128i*)(&img_in[index_upper])),
												_mm_lddqu_si128((__m128i*)(&img_in[index_center])),
												_mm_lddqu_si128((__m128i*)(&img_in[index_lower])),
												mask,
												shift_fact);
				*/

			}
			
			filter_output += img_in[index_center+3];
			
			img_out[index_center+3] = filter_output;
			img_out[index_center+4] = img_in[index_center+4];
			img_out[index_center+5] = img_in[index_center+5];
			index_upper += 3;
			index_center += 3;
			index_lower += 3;
		}
	}
	
	// handle horizontal upper border line (without corners)
	index_center = 0;
	index_upper = width*3;
	index_lower = index_upper;
	for(x = 1; x < (width-1); x++) {
		if(local_flag != 0 && sharp_mask[index_center+3] == 0) {
			// don't sharp this pixel
			filter_output = 0;
		}
		else {
			filter_output = do_sharp(a_other, a_center,
									 img_in[index_upper], img_in[index_upper+3], img_in[index_upper+6],
									 img_in[index_center], img_in[index_center+3], img_in[index_center+6],
									 img_in[index_lower], img_in[index_lower+3], img_in[index_lower+6],
									 shift_fact);
		}
		
		filter_output += img_in[index_center+3];
		
		img_out[index_center+3] = filter_output;
		img_out[index_center+4] = img_in[index_center+4];
		img_out[index_center+5] = img_in[index_center+5];
		index_upper += 3;
		index_center += 3;
		index_lower += 3;
	}
	
	// handle horizontal lower border line (without corners)
	index_center = (height-1)*width*3;
	index_upper = (height-2)*width*3;
	index_lower = index_upper;
	for(x = 1; x < (width-1); x++) {
		if(local_flag != 0 && sharp_mask[index_center+3] == 0) {
			// don't sharp this pixel
			filter_output = 0;
		}
		else {
			filter_output = do_sharp(a_other, a_center,
									 img_in[index_upper], img_in[index_upper+3], img_in[index_upper+6],
									 img_in[index_center], img_in[index_center+3], img_in[index_center+6],
									 img_in[index_lower], img_in[index_lower+3], img_in[index_lower+6],
									 shift_fact);
		}
		
		filter_output += img_in[index_center+3];
		
		img_out[index_center+3] = filter_output;
		img_out[index_center+4] = img_in[index_center+4];
		img_out[index_center+5] = img_in[index_center+5];
		index_upper += 3;
		index_center += 3;
		index_lower += 3;
	}
	
	// handle vertical left border line (without corners)
	for(y = 1; y < (height-1); y++) {
		index_upper = (y-1)*width*3;
		index_center = y*width*3;
		index_lower = (y+1)*width*3;
		
		if(local_flag != 0 && sharp_mask[index_center+3] == 0) {
			// don't sharp this pixel
			filter_output = 0;
		}
		else {
			filter_output = do_sharp(a_other, a_center,
									 img_in[index_upper+3], img_in[index_upper], img_in[index_upper+3],
									 img_in[index_center+3], img_in[index_center], img_in[index_center+3],
									 img_in[index_lower+3], img_in[index_lower], img_in[index_lower+3],
									 shift_fact);
		}
		
		filter_output += img_in[index_center];
		img_out[index_center] = filter_output;
		img_out[index_center+1] = img_in[index_center+1];
		img_out[index_center+2] = img_in[index_center+2];
	}
	
	// handle vertical right border line (without corners)
	for(y = 1; y < (height-1); y++) {
		index_upper = y*width*3-3;
		index_center = (y+1)*width*3-3;
		index_lower = (y+2)*width*3-3;
		
		if(local_flag != 0 && sharp_mask[index_center+3] == 0) {
			// don't sharp this pixel
			filter_output = 0;
		}
		else {
			filter_output = do_sharp(a_other, a_center,
									 img_in[index_upper-3], img_in[index_upper], img_in[index_upper-3],
									 img_in[index_center-3], img_in[index_center], img_in[index_center-3],
									 img_in[index_lower-3], img_in[index_lower], img_in[index_lower-3],
									 shift_fact);
		}
			
		filter_output += img_in[index_center];
		img_out[index_center] = filter_output;
		img_out[index_center+1] = img_in[index_center+1];
		img_out[index_center+2] = img_in[index_center+2];
	}
	
	
	/*
	 * Image corners are not sharpened!!
	 */
	
	// handle upper left corner
	img_out[0] = img_in[0];
	img_out[1] = img_in[1];
	img_out[2] = img_in[2];
	
	// handle upper right corner
	index_center = width*3-3;
	img_out[index_center] = img_in[index_center];
	img_out[index_center+1] = img_in[index_center+1];
	img_out[index_center+2] = img_in[index_center+2];
	
	// handle lower left corner
	index_center = (height-1)*width*3;
	img_out[index_center] = img_in[index_center];
	img_out[index_center+1] = img_in[index_center+1];
	img_out[index_center+2] = img_in[index_center+2];
	
	// handle lower right corner
	index_center = height*width*3-3;
	img_out[index_center] = img_in[index_center];
	img_out[index_center+1] = img_in[index_center+1];
	img_out[index_center+2] = img_in[index_center+2];
}


/**
 * Sharpening algorithm.
 * 
 * @param sharp_data required sharpening data
 * @return 0 on success otherwise -1
 */
int sharpening(struct sharp_data_t *sharp_data) {
	
	void *img_sharp, *img_unsharp;
	int is_color, bit_channel, width, height;
	int16_t *img_yuv, *img_yuv_sharp, *img_sobel, *img_gauss;
	float sharp_factor;
	enum sharp_alg_t sharp_alg;
	int8_t *sharp_mask;
	int local_sens;
	
	
	// put variables on stack
	is_color = sharp_data->is_color;
	img_sharp = sharp_data->img_sharp;
	img_unsharp = sharp_data->img_in;
	bit_channel = sharp_data->bit_channel;
	width = sharp_data->width;
	height = sharp_data->height;
	img_yuv = sharp_data->img_yuv;
	sharp_factor = sharp_data->sharp_factor;
	sharp_alg = sharp_data->sharp_alg;
	local_sens = sharp_data->local_sens;
	img_yuv_sharp = sharp_data->img_yuv_sharp;
	img_sobel = sharp_data->img_sobel;
	img_gauss = sharp_data->img_gauss;
	sharp_mask = sharp_data->sharp_mask;
	
	
	/*
	 * Sharpening is done on Y-channel.
	 * In case of color image, the RGB is transformed to YUV. In case of monochrom image,
	 * the Y-channel is used only.
	 */
	if(is_color) {
		// RGB to YUV transformation
		color_rgb_to_yuv(img_yuv, img_unsharp, height, width, bit_channel);
	}
	else {
		mono_to_yuv(img_yuv, img_unsharp, height, width, bit_channel);
	}
	
	/*
	 * In case of local sharpening, do calculate sharpening mask.
	 */
	if(sharp_alg == SHARP_ALG_LOCAL) {
		filter_sobel_3s16(img_sobel, img_yuv, height, width, 0, 1, 1);
		filter_gauss_3s16(img_gauss, img_sobel, height, width, 3, 1.0, 0, 1, 1);		// incresing the kernel size need more computing performance
		local_sens = (int)((1.0-local_sens/100.0)*(1<<bit_channel));
		filter_binary_3s16(sharp_mask, img_gauss, height, width, local_sens, (1<<bit_channel)-1, 0, 1, 1);
	}
	
	/*
	 * Y-channel is sharpened only to avoid color shifting
	 */
	make_sharper(img_yuv_sharp, img_yuv, height, width, sharp_factor, (1<<bit_channel)-1, sharp_alg == SHARP_ALG_LOCAL ? 1:0, sharp_mask);
	
	// YUV to RGB transformation
	if(is_color) {
		color_yuv_to_rgb(img_sharp, img_yuv_sharp, height, width, bit_channel);
	}
	else {
		yuv_to_mono(img_sharp, img_yuv_sharp, height, width, bit_channel);
	}
	return 0;
}