You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

405 lines
14KB

  1. /*
  2. * Copyright (c) 2012-2014 Clément Bœsch <u pkh me>
  3. *
  4. * This file is part of FFmpeg.
  5. *
  6. * FFmpeg is free software; you can redistribute it and/or
  7. * modify it under the terms of the GNU Lesser General Public
  8. * License as published by the Free Software Foundation; either
  9. * version 2.1 of the License, or (at your option) any later version.
  10. *
  11. * FFmpeg is distributed in the hope that it will be useful,
  12. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  14. * Lesser General Public License for more details.
  15. *
  16. * You should have received a copy of the GNU Lesser General Public
  17. * License along with FFmpeg; if not, write to the Free Software
  18. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  19. */
  20. /**
  21. * @file
  22. * Edge detection filter
  23. *
  24. * @see https://en.wikipedia.org/wiki/Canny_edge_detector
  25. */
  26. #include "libavutil/avassert.h"
  27. #include "libavutil/opt.h"
  28. #include "avfilter.h"
  29. #include "formats.h"
  30. #include "internal.h"
  31. #include "video.h"
  32. enum FilterMode {
  33. MODE_WIRES,
  34. MODE_COLORMIX,
  35. MODE_CANNY,
  36. NB_MODE
  37. };
  38. struct plane_info {
  39. uint8_t *tmpbuf;
  40. uint16_t *gradients;
  41. char *directions;
  42. };
  43. typedef struct EdgeDetectContext {
  44. const AVClass *class;
  45. struct plane_info planes[3];
  46. int nb_planes;
  47. double low, high;
  48. uint8_t low_u8, high_u8;
  49. int mode;
  50. } EdgeDetectContext;
  51. #define OFFSET(x) offsetof(EdgeDetectContext, x)
  52. #define FLAGS AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM
  53. static const AVOption edgedetect_options[] = {
  54. { "high", "set high threshold", OFFSET(high), AV_OPT_TYPE_DOUBLE, {.dbl=50/255.}, 0, 1, FLAGS },
  55. { "low", "set low threshold", OFFSET(low), AV_OPT_TYPE_DOUBLE, {.dbl=20/255.}, 0, 1, FLAGS },
  56. { "mode", "set mode", OFFSET(mode), AV_OPT_TYPE_INT, {.i64=MODE_WIRES}, 0, NB_MODE-1, FLAGS, "mode" },
  57. { "wires", "white/gray wires on black", 0, AV_OPT_TYPE_CONST, {.i64=MODE_WIRES}, INT_MIN, INT_MAX, FLAGS, "mode" },
  58. { "colormix", "mix colors", 0, AV_OPT_TYPE_CONST, {.i64=MODE_COLORMIX}, INT_MIN, INT_MAX, FLAGS, "mode" },
  59. { "canny", "detect edges on planes", 0, AV_OPT_TYPE_CONST, {.i64=MODE_CANNY}, INT_MIN, INT_MAX, FLAGS, "mode" },
  60. { NULL }
  61. };
  62. AVFILTER_DEFINE_CLASS(edgedetect);
  63. static av_cold int init(AVFilterContext *ctx)
  64. {
  65. EdgeDetectContext *edgedetect = ctx->priv;
  66. edgedetect->low_u8 = edgedetect->low * 255. + .5;
  67. edgedetect->high_u8 = edgedetect->high * 255. + .5;
  68. return 0;
  69. }
  70. static int query_formats(AVFilterContext *ctx)
  71. {
  72. const EdgeDetectContext *edgedetect = ctx->priv;
  73. static const enum AVPixelFormat wires_pix_fmts[] = {AV_PIX_FMT_GRAY8, AV_PIX_FMT_NONE};
  74. static const enum AVPixelFormat canny_pix_fmts[] = {AV_PIX_FMT_YUV444P, AV_PIX_FMT_GBRP, AV_PIX_FMT_GRAY8, AV_PIX_FMT_NONE};
  75. static const enum AVPixelFormat colormix_pix_fmts[] = {AV_PIX_FMT_GBRP, AV_PIX_FMT_GRAY8, AV_PIX_FMT_NONE};
  76. AVFilterFormats *fmts_list;
  77. const enum AVPixelFormat *pix_fmts = NULL;
  78. if (edgedetect->mode == MODE_WIRES) {
  79. pix_fmts = wires_pix_fmts;
  80. } else if (edgedetect->mode == MODE_COLORMIX) {
  81. pix_fmts = colormix_pix_fmts;
  82. } else if (edgedetect->mode == MODE_CANNY) {
  83. pix_fmts = canny_pix_fmts;
  84. } else {
  85. av_assert0(0);
  86. }
  87. fmts_list = ff_make_format_list(pix_fmts);
  88. if (!fmts_list)
  89. return AVERROR(ENOMEM);
  90. return ff_set_common_formats(ctx, fmts_list);
  91. }
  92. static int config_props(AVFilterLink *inlink)
  93. {
  94. int p;
  95. AVFilterContext *ctx = inlink->dst;
  96. EdgeDetectContext *edgedetect = ctx->priv;
  97. edgedetect->nb_planes = inlink->format == AV_PIX_FMT_GRAY8 ? 1 : 3;
  98. for (p = 0; p < edgedetect->nb_planes; p++) {
  99. struct plane_info *plane = &edgedetect->planes[p];
  100. plane->tmpbuf = av_malloc(inlink->w * inlink->h);
  101. plane->gradients = av_calloc(inlink->w * inlink->h, sizeof(*plane->gradients));
  102. plane->directions = av_malloc(inlink->w * inlink->h);
  103. if (!plane->tmpbuf || !plane->gradients || !plane->directions)
  104. return AVERROR(ENOMEM);
  105. }
  106. return 0;
  107. }
  108. static void gaussian_blur(AVFilterContext *ctx, int w, int h,
  109. uint8_t *dst, int dst_linesize,
  110. const uint8_t *src, int src_linesize)
  111. {
  112. int i, j;
  113. memcpy(dst, src, w); dst += dst_linesize; src += src_linesize;
  114. memcpy(dst, src, w); dst += dst_linesize; src += src_linesize;
  115. for (j = 2; j < h - 2; j++) {
  116. dst[0] = src[0];
  117. dst[1] = src[1];
  118. for (i = 2; i < w - 2; i++) {
  119. /* Gaussian mask of size 5x5 with sigma = 1.4 */
  120. dst[i] = ((src[-2*src_linesize + i-2] + src[2*src_linesize + i-2]) * 2
  121. + (src[-2*src_linesize + i-1] + src[2*src_linesize + i-1]) * 4
  122. + (src[-2*src_linesize + i ] + src[2*src_linesize + i ]) * 5
  123. + (src[-2*src_linesize + i+1] + src[2*src_linesize + i+1]) * 4
  124. + (src[-2*src_linesize + i+2] + src[2*src_linesize + i+2]) * 2
  125. + (src[ -src_linesize + i-2] + src[ src_linesize + i-2]) * 4
  126. + (src[ -src_linesize + i-1] + src[ src_linesize + i-1]) * 9
  127. + (src[ -src_linesize + i ] + src[ src_linesize + i ]) * 12
  128. + (src[ -src_linesize + i+1] + src[ src_linesize + i+1]) * 9
  129. + (src[ -src_linesize + i+2] + src[ src_linesize + i+2]) * 4
  130. + src[i-2] * 5
  131. + src[i-1] * 12
  132. + src[i ] * 15
  133. + src[i+1] * 12
  134. + src[i+2] * 5) / 159;
  135. }
  136. dst[i ] = src[i ];
  137. dst[i + 1] = src[i + 1];
  138. dst += dst_linesize;
  139. src += src_linesize;
  140. }
  141. memcpy(dst, src, w); dst += dst_linesize; src += src_linesize;
  142. memcpy(dst, src, w);
  143. }
  144. enum {
  145. DIRECTION_45UP,
  146. DIRECTION_45DOWN,
  147. DIRECTION_HORIZONTAL,
  148. DIRECTION_VERTICAL,
  149. };
  150. static int get_rounded_direction(int gx, int gy)
  151. {
  152. /* reference angles:
  153. * tan( pi/8) = sqrt(2)-1
  154. * tan(3pi/8) = sqrt(2)+1
  155. * Gy/Gx is the tangent of the angle (theta), so Gy/Gx is compared against
  156. * <ref-angle>, or more simply Gy against <ref-angle>*Gx
  157. *
  158. * Gx and Gy bounds = [-1020;1020], using 16-bit arithmetic:
  159. * round((sqrt(2)-1) * (1<<16)) = 27146
  160. * round((sqrt(2)+1) * (1<<16)) = 158218
  161. */
  162. if (gx) {
  163. int tanpi8gx, tan3pi8gx;
  164. if (gx < 0)
  165. gx = -gx, gy = -gy;
  166. gy <<= 16;
  167. tanpi8gx = 27146 * gx;
  168. tan3pi8gx = 158218 * gx;
  169. if (gy > -tan3pi8gx && gy < -tanpi8gx) return DIRECTION_45UP;
  170. if (gy > -tanpi8gx && gy < tanpi8gx) return DIRECTION_HORIZONTAL;
  171. if (gy > tanpi8gx && gy < tan3pi8gx) return DIRECTION_45DOWN;
  172. }
  173. return DIRECTION_VERTICAL;
  174. }
  175. static void sobel(int w, int h,
  176. uint16_t *dst, int dst_linesize,
  177. int8_t *dir, int dir_linesize,
  178. const uint8_t *src, int src_linesize)
  179. {
  180. int i, j;
  181. for (j = 1; j < h - 1; j++) {
  182. dst += dst_linesize;
  183. dir += dir_linesize;
  184. src += src_linesize;
  185. for (i = 1; i < w - 1; i++) {
  186. const int gx =
  187. -1*src[-src_linesize + i-1] + 1*src[-src_linesize + i+1]
  188. -2*src[ i-1] + 2*src[ i+1]
  189. -1*src[ src_linesize + i-1] + 1*src[ src_linesize + i+1];
  190. const int gy =
  191. -1*src[-src_linesize + i-1] + 1*src[ src_linesize + i-1]
  192. -2*src[-src_linesize + i ] + 2*src[ src_linesize + i ]
  193. -1*src[-src_linesize + i+1] + 1*src[ src_linesize + i+1];
  194. dst[i] = FFABS(gx) + FFABS(gy);
  195. dir[i] = get_rounded_direction(gx, gy);
  196. }
  197. }
  198. }
  199. static void non_maximum_suppression(int w, int h,
  200. uint8_t *dst, int dst_linesize,
  201. const int8_t *dir, int dir_linesize,
  202. const uint16_t *src, int src_linesize)
  203. {
  204. int i, j;
  205. #define COPY_MAXIMA(ay, ax, by, bx) do { \
  206. if (src[i] > src[(ay)*src_linesize + i+(ax)] && \
  207. src[i] > src[(by)*src_linesize + i+(bx)]) \
  208. dst[i] = av_clip_uint8(src[i]); \
  209. } while (0)
  210. for (j = 1; j < h - 1; j++) {
  211. dst += dst_linesize;
  212. dir += dir_linesize;
  213. src += src_linesize;
  214. for (i = 1; i < w - 1; i++) {
  215. switch (dir[i]) {
  216. case DIRECTION_45UP: COPY_MAXIMA( 1, -1, -1, 1); break;
  217. case DIRECTION_45DOWN: COPY_MAXIMA(-1, -1, 1, 1); break;
  218. case DIRECTION_HORIZONTAL: COPY_MAXIMA( 0, -1, 0, 1); break;
  219. case DIRECTION_VERTICAL: COPY_MAXIMA(-1, 0, 1, 0); break;
  220. }
  221. }
  222. }
  223. }
  224. static void double_threshold(int low, int high, int w, int h,
  225. uint8_t *dst, int dst_linesize,
  226. const uint8_t *src, int src_linesize)
  227. {
  228. int i, j;
  229. for (j = 0; j < h; j++) {
  230. for (i = 0; i < w; i++) {
  231. if (src[i] > high) {
  232. dst[i] = src[i];
  233. continue;
  234. }
  235. if ((!i || i == w - 1 || !j || j == h - 1) &&
  236. src[i] > low &&
  237. (src[-src_linesize + i-1] > high ||
  238. src[-src_linesize + i ] > high ||
  239. src[-src_linesize + i+1] > high ||
  240. src[ i-1] > high ||
  241. src[ i+1] > high ||
  242. src[ src_linesize + i-1] > high ||
  243. src[ src_linesize + i ] > high ||
  244. src[ src_linesize + i+1] > high))
  245. dst[i] = src[i];
  246. else
  247. dst[i] = 0;
  248. }
  249. dst += dst_linesize;
  250. src += src_linesize;
  251. }
  252. }
  253. static void color_mix(int w, int h,
  254. uint8_t *dst, int dst_linesize,
  255. const uint8_t *src, int src_linesize)
  256. {
  257. int i, j;
  258. for (j = 0; j < h; j++) {
  259. for (i = 0; i < w; i++)
  260. dst[i] = (dst[i] + src[i]) >> 1;
  261. dst += dst_linesize;
  262. src += src_linesize;
  263. }
  264. }
  265. static int filter_frame(AVFilterLink *inlink, AVFrame *in)
  266. {
  267. AVFilterContext *ctx = inlink->dst;
  268. EdgeDetectContext *edgedetect = ctx->priv;
  269. AVFilterLink *outlink = ctx->outputs[0];
  270. int p, direct = 0;
  271. AVFrame *out;
  272. if (edgedetect->mode != MODE_COLORMIX && av_frame_is_writable(in)) {
  273. direct = 1;
  274. out = in;
  275. } else {
  276. out = ff_get_video_buffer(outlink, outlink->w, outlink->h);
  277. if (!out) {
  278. av_frame_free(&in);
  279. return AVERROR(ENOMEM);
  280. }
  281. av_frame_copy_props(out, in);
  282. }
  283. for (p = 0; p < edgedetect->nb_planes; p++) {
  284. struct plane_info *plane = &edgedetect->planes[p];
  285. uint8_t *tmpbuf = plane->tmpbuf;
  286. uint16_t *gradients = plane->gradients;
  287. int8_t *directions = plane->directions;
  288. /* gaussian filter to reduce noise */
  289. gaussian_blur(ctx, inlink->w, inlink->h,
  290. tmpbuf, inlink->w,
  291. in->data[p], in->linesize[p]);
  292. /* compute the 16-bits gradients and directions for the next step */
  293. sobel(inlink->w, inlink->h,
  294. gradients, inlink->w,
  295. directions,inlink->w,
  296. tmpbuf, inlink->w);
  297. /* non_maximum_suppression() will actually keep & clip what's necessary and
  298. * ignore the rest, so we need a clean output buffer */
  299. memset(tmpbuf, 0, inlink->w * inlink->h);
  300. non_maximum_suppression(inlink->w, inlink->h,
  301. tmpbuf, inlink->w,
  302. directions,inlink->w,
  303. gradients, inlink->w);
  304. /* keep high values, or low values surrounded by high values */
  305. double_threshold(edgedetect->low_u8, edgedetect->high_u8,
  306. inlink->w, inlink->h,
  307. out->data[p], out->linesize[p],
  308. tmpbuf, inlink->w);
  309. if (edgedetect->mode == MODE_COLORMIX) {
  310. color_mix(inlink->w, inlink->h,
  311. out->data[p], out->linesize[p],
  312. in->data[p], in->linesize[p]);
  313. }
  314. }
  315. if (!direct)
  316. av_frame_free(&in);
  317. return ff_filter_frame(outlink, out);
  318. }
  319. static av_cold void uninit(AVFilterContext *ctx)
  320. {
  321. int p;
  322. EdgeDetectContext *edgedetect = ctx->priv;
  323. for (p = 0; p < edgedetect->nb_planes; p++) {
  324. struct plane_info *plane = &edgedetect->planes[p];
  325. av_freep(&plane->tmpbuf);
  326. av_freep(&plane->gradients);
  327. av_freep(&plane->directions);
  328. }
  329. }
  330. static const AVFilterPad edgedetect_inputs[] = {
  331. {
  332. .name = "default",
  333. .type = AVMEDIA_TYPE_VIDEO,
  334. .config_props = config_props,
  335. .filter_frame = filter_frame,
  336. },
  337. { NULL }
  338. };
  339. static const AVFilterPad edgedetect_outputs[] = {
  340. {
  341. .name = "default",
  342. .type = AVMEDIA_TYPE_VIDEO,
  343. },
  344. { NULL }
  345. };
  346. AVFilter ff_vf_edgedetect = {
  347. .name = "edgedetect",
  348. .description = NULL_IF_CONFIG_SMALL("Detect and draw edge."),
  349. .priv_size = sizeof(EdgeDetectContext),
  350. .init = init,
  351. .uninit = uninit,
  352. .query_formats = query_formats,
  353. .inputs = edgedetect_inputs,
  354. .outputs = edgedetect_outputs,
  355. .priv_class = &edgedetect_class,
  356. .flags = AVFILTER_FLAG_SUPPORT_TIMELINE_GENERIC,
  357. };