You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

773 lines
28KB

  1. /*
  2. * Copyright (c) 2017 Gerion Entrup
  3. *
  4. * This file is part of FFmpeg.
  5. *
  6. * FFmpeg is free software; you can redistribute it and/or modify
  7. * it under the terms of the GNU General Public License as published by
  8. * the Free Software Foundation; either version 2 of the License, or
  9. * (at your option) any later version.
  10. *
  11. * FFmpeg is distributed in the hope that it will be useful,
  12. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  14. * GNU General Public License for more details.
  15. *
  16. * You should have received a copy of the GNU General Public License along
  17. * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
  18. * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
  19. */
  20. /**
  21. * @file
  22. * MPEG-7 video signature calculation and lookup filter
  23. * @see http://epubs.surrey.ac.uk/531590/1/MPEG-7%20Video%20Signature%20Author%27s%20Copy.pdf
  24. */
  25. #include <float.h>
  26. #include "libavcodec/put_bits.h"
  27. #include "libavformat/avformat.h"
  28. #include "libavutil/opt.h"
  29. #include "libavutil/avstring.h"
  30. #include "libavutil/intreadwrite.h"
  31. #include "libavutil/timestamp.h"
  32. #include "avfilter.h"
  33. #include "internal.h"
  34. #include "signature.h"
  35. #include "signature_lookup.c"
  36. #define OFFSET(x) offsetof(SignatureContext, x)
  37. #define FLAGS AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_VIDEO_PARAM
  38. #define BLOCK_LCM (int64_t) 476985600
  39. static const AVOption signature_options[] = {
  40. { "detectmode", "set the detectmode",
  41. OFFSET(mode), AV_OPT_TYPE_INT, {.i64 = MODE_OFF}, 0, NB_LOOKUP_MODE-1, FLAGS, "mode" },
  42. { "off", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = MODE_OFF}, 0, 0, .flags = FLAGS, "mode" },
  43. { "full", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = MODE_FULL}, 0, 0, .flags = FLAGS, "mode" },
  44. { "fast", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = MODE_FAST}, 0, 0, .flags = FLAGS, "mode" },
  45. { "nb_inputs", "number of inputs",
  46. OFFSET(nb_inputs), AV_OPT_TYPE_INT, {.i64 = 1}, 1, INT_MAX, FLAGS },
  47. { "filename", "filename for output files",
  48. OFFSET(filename), AV_OPT_TYPE_STRING, {.str = ""}, 0, NB_FORMATS-1, FLAGS },
  49. { "format", "set output format",
  50. OFFSET(format), AV_OPT_TYPE_INT, {.i64 = FORMAT_BINARY}, 0, 1, FLAGS , "format" },
  51. { "binary", 0, 0, AV_OPT_TYPE_CONST, {.i64=FORMAT_BINARY}, 0, 0, FLAGS, "format" },
  52. { "xml", 0, 0, AV_OPT_TYPE_CONST, {.i64=FORMAT_XML}, 0, 0, FLAGS, "format" },
  53. { "th_d", "threshold to detect one word as similar",
  54. OFFSET(thworddist), AV_OPT_TYPE_INT, {.i64 = 9000}, 1, INT_MAX, FLAGS },
  55. { "th_dc", "threshold to detect all words as similar",
  56. OFFSET(thcomposdist), AV_OPT_TYPE_INT, {.i64 = 60000}, 1, INT_MAX, FLAGS },
  57. { "th_xh", "threshold to detect frames as similar",
  58. OFFSET(thl1), AV_OPT_TYPE_INT, {.i64 = 116}, 1, INT_MAX, FLAGS },
  59. { "th_di", "minimum length of matching sequence in frames",
  60. OFFSET(thdi), AV_OPT_TYPE_INT, {.i64 = 0}, 0, INT_MAX, FLAGS },
  61. { "th_it", "threshold for relation of good to all frames",
  62. OFFSET(thit), AV_OPT_TYPE_DOUBLE, {.dbl = 0.5}, 0.0, 1.0, FLAGS },
  63. { NULL }
  64. };
  65. AVFILTER_DEFINE_CLASS(signature);
  66. static int query_formats(AVFilterContext *ctx)
  67. {
  68. /* all formats with a separate gray value */
  69. static const enum AVPixelFormat pix_fmts[] = {
  70. AV_PIX_FMT_GRAY8,
  71. AV_PIX_FMT_YUV410P, AV_PIX_FMT_YUV411P,
  72. AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUV422P,
  73. AV_PIX_FMT_YUV440P, AV_PIX_FMT_YUV444P,
  74. AV_PIX_FMT_YUVJ411P, AV_PIX_FMT_YUVJ420P,
  75. AV_PIX_FMT_YUVJ422P, AV_PIX_FMT_YUVJ444P,
  76. AV_PIX_FMT_YUVJ440P,
  77. AV_PIX_FMT_NV12, AV_PIX_FMT_NV21,
  78. AV_PIX_FMT_NONE
  79. };
  80. return ff_set_common_formats(ctx, ff_make_format_list(pix_fmts));
  81. }
  82. static int config_input(AVFilterLink *inlink)
  83. {
  84. AVFilterContext *ctx = inlink->dst;
  85. SignatureContext *sic = ctx->priv;
  86. StreamContext *sc = &(sic->streamcontexts[FF_INLINK_IDX(inlink)]);
  87. sc->time_base = inlink->time_base;
  88. /* test for overflow */
  89. sc->divide = (((uint64_t) inlink->w/32) * (inlink->w/32 + 1) * (inlink->h/32 * inlink->h/32 + 1) > INT64_MAX / (BLOCK_LCM * 255));
  90. if (sc->divide) {
  91. av_log(ctx, AV_LOG_WARNING, "Input dimension too high for precise calculation, numbers will be rounded.\n");
  92. }
  93. sc->w = inlink->w;
  94. sc->h = inlink->h;
  95. return 0;
  96. }
  97. static int get_block_size(const Block *b)
  98. {
  99. return (b->to.y - b->up.y + 1) * (b->to.x - b->up.x + 1);
  100. }
  101. static uint64_t get_block_sum(StreamContext *sc, uint64_t intpic[32][32], const Block *b)
  102. {
  103. uint64_t sum = 0;
  104. int x0, y0, x1, y1;
  105. x0 = b->up.x;
  106. y0 = b->up.y;
  107. x1 = b->to.x;
  108. y1 = b->to.y;
  109. if (x0-1 >= 0 && y0-1 >= 0) {
  110. sum = intpic[y1][x1] + intpic[y0-1][x0-1] - intpic[y1][x0-1] - intpic[y0-1][x1];
  111. } else if (x0-1 >= 0) {
  112. sum = intpic[y1][x1] - intpic[y1][x0-1];
  113. } else if (y0-1 >= 0) {
  114. sum = intpic[y1][x1] - intpic[y0-1][x1];
  115. } else {
  116. sum = intpic[y1][x1];
  117. }
  118. return sum;
  119. }
  120. static int cmp(const void *x, const void *y)
  121. {
  122. const uint64_t *a = x, *b = y;
  123. return *a < *b ? -1 : ( *a > *b ? 1 : 0 );
  124. }
  125. /**
  126. * sets the bit at position pos to 1 in data
  127. */
  128. static void set_bit(uint8_t* data, size_t pos)
  129. {
  130. uint8_t mask = 1 << 7-(pos%8);
  131. data[pos/8] |= mask;
  132. }
  133. static int filter_frame(AVFilterLink *inlink, AVFrame *picref)
  134. {
  135. AVFilterContext *ctx = inlink->dst;
  136. SignatureContext *sic = ctx->priv;
  137. StreamContext *sc = &(sic->streamcontexts[FF_INLINK_IDX(inlink)]);
  138. FineSignature* fs;
  139. static const uint8_t pot3[5] = { 3*3*3*3, 3*3*3, 3*3, 3, 1 };
  140. /* indexes of words : 210,217,219,274,334 44,175,233,270,273 57,70,103,237,269 100,285,295,337,354 101,102,111,275,296
  141. s2usw = sorted to unsorted wordvec: 44 is at index 5, 57 at index 10...
  142. */
  143. static const unsigned int wordvec[25] = {44,57,70,100,101,102,103,111,175,210,217,219,233,237,269,270,273,274,275,285,295,296,334,337,354};
  144. static const uint8_t s2usw[25] = { 5,10,11, 15, 20, 21, 12, 22, 6, 0, 1, 2, 7, 13, 14, 8, 9, 3, 23, 16, 17, 24, 4, 18, 19};
  145. uint8_t wordt2b[5] = { 0, 0, 0, 0, 0 }; /* word ternary to binary */
  146. uint64_t intpic[32][32];
  147. uint64_t rowcount;
  148. uint8_t *p = picref->data[0];
  149. int inti, intj;
  150. int *intjlut;
  151. uint64_t conflist[DIFFELEM_SIZE];
  152. int f = 0, g = 0, w = 0;
  153. int32_t dh1 = 1, dh2 = 1, dw1 = 1, dw2 = 1, a, b;
  154. int64_t denom;
  155. int i, j, k, ternary;
  156. uint64_t blocksum;
  157. int blocksize;
  158. int64_t th; /* threshold */
  159. int64_t sum;
  160. int64_t precfactor = (sc->divide) ? 65536 : BLOCK_LCM;
  161. /* initialize fs */
  162. if (sc->curfinesig) {
  163. fs = av_mallocz(sizeof(FineSignature));
  164. if (!fs)
  165. return AVERROR(ENOMEM);
  166. sc->curfinesig->next = fs;
  167. fs->prev = sc->curfinesig;
  168. sc->curfinesig = fs;
  169. } else {
  170. fs = sc->curfinesig = sc->finesiglist;
  171. sc->curcoarsesig1->first = fs;
  172. }
  173. fs->pts = picref->pts;
  174. fs->index = sc->lastindex++;
  175. memset(intpic, 0, sizeof(uint64_t)*32*32);
  176. intjlut = av_malloc_array(inlink->w, sizeof(int));
  177. if (!intjlut)
  178. return AVERROR(ENOMEM);
  179. for (i = 0; i < inlink->w; i++) {
  180. intjlut[i] = (i*32)/inlink->w;
  181. }
  182. for (i = 0; i < inlink->h; i++) {
  183. inti = (i*32)/inlink->h;
  184. for (j = 0; j < inlink->w; j++) {
  185. intj = intjlut[j];
  186. intpic[inti][intj] += p[j];
  187. }
  188. p += picref->linesize[0];
  189. }
  190. av_freep(&intjlut);
  191. /* The following calculates a summed area table (intpic) and brings the numbers
  192. * in intpic to the same denominator.
  193. * So you only have to handle the numinator in the following sections.
  194. */
  195. dh1 = inlink->h / 32;
  196. if (inlink->h % 32)
  197. dh2 = dh1 + 1;
  198. dw1 = inlink->w / 32;
  199. if (inlink->w % 32)
  200. dw2 = dw1 + 1;
  201. denom = (sc->divide) ? dh1 * dh2 * dw1 * dw2 : 1;
  202. for (i = 0; i < 32; i++) {
  203. rowcount = 0;
  204. a = 1;
  205. if (dh2 > 1) {
  206. a = ((inlink->h*(i+1))%32 == 0) ? (inlink->h*(i+1))/32 - 1 : (inlink->h*(i+1))/32;
  207. a -= ((inlink->h*i)%32 == 0) ? (inlink->h*i)/32 - 1 : (inlink->h*i)/32;
  208. a = (a == dh1)? dh2 : dh1;
  209. }
  210. for (j = 0; j < 32; j++) {
  211. b = 1;
  212. if (dw2 > 1) {
  213. b = ((inlink->w*(j+1))%32 == 0) ? (inlink->w*(j+1))/32 - 1 : (inlink->w*(j+1))/32;
  214. b -= ((inlink->w*j)%32 == 0) ? (inlink->w*j)/32 - 1 : (inlink->w*j)/32;
  215. b = (b == dw1)? dw2 : dw1;
  216. }
  217. rowcount += intpic[i][j] * a * b * precfactor / denom;
  218. if (i > 0) {
  219. intpic[i][j] = intpic[i-1][j] + rowcount;
  220. } else {
  221. intpic[i][j] = rowcount;
  222. }
  223. }
  224. }
  225. denom = (sc->divide) ? 1 : dh1 * dh2 * dw1 * dw2;
  226. for (i = 0; i < ELEMENT_COUNT; i++) {
  227. const ElemCat* elemcat = elements[i];
  228. int64_t* elemsignature;
  229. uint64_t* sortsignature;
  230. elemsignature = av_malloc_array(elemcat->elem_count, sizeof(int64_t));
  231. if (!elemsignature)
  232. return AVERROR(ENOMEM);
  233. sortsignature = av_malloc_array(elemcat->elem_count, sizeof(int64_t));
  234. if (!sortsignature) {
  235. av_freep(&elemsignature);
  236. return AVERROR(ENOMEM);
  237. }
  238. for (j = 0; j < elemcat->elem_count; j++) {
  239. blocksum = 0;
  240. blocksize = 0;
  241. for (k = 0; k < elemcat->left_count; k++) {
  242. blocksum += get_block_sum(sc, intpic, &elemcat->blocks[j*elemcat->block_count+k]);
  243. blocksize += get_block_size(&elemcat->blocks[j*elemcat->block_count+k]);
  244. }
  245. sum = blocksum / blocksize;
  246. if (elemcat->av_elem) {
  247. sum -= 128 * precfactor * denom;
  248. } else {
  249. blocksum = 0;
  250. blocksize = 0;
  251. for (; k < elemcat->block_count; k++) {
  252. blocksum += get_block_sum(sc, intpic, &elemcat->blocks[j*elemcat->block_count+k]);
  253. blocksize += get_block_size(&elemcat->blocks[j*elemcat->block_count+k]);
  254. }
  255. sum -= blocksum / blocksize;
  256. conflist[g++] = FFABS(sum * 8 / (precfactor * denom));
  257. }
  258. elemsignature[j] = sum;
  259. sortsignature[j] = FFABS(sum);
  260. }
  261. /* get threshold */
  262. qsort(sortsignature, elemcat->elem_count, sizeof(uint64_t), cmp);
  263. th = sortsignature[(int) (elemcat->elem_count*0.333)];
  264. /* ternarize */
  265. for (j = 0; j < elemcat->elem_count; j++) {
  266. if (elemsignature[j] < -th) {
  267. ternary = 0;
  268. } else if (elemsignature[j] <= th) {
  269. ternary = 1;
  270. } else {
  271. ternary = 2;
  272. }
  273. fs->framesig[f/5] += ternary * pot3[f%5];
  274. if (f == wordvec[w]) {
  275. fs->words[s2usw[w]/5] += ternary * pot3[wordt2b[s2usw[w]/5]++];
  276. if (w < 24)
  277. w++;
  278. }
  279. f++;
  280. }
  281. av_freep(&elemsignature);
  282. av_freep(&sortsignature);
  283. }
  284. /* confidence */
  285. qsort(conflist, DIFFELEM_SIZE, sizeof(uint64_t), cmp);
  286. fs->confidence = FFMIN(conflist[DIFFELEM_SIZE/2], 255);
  287. /* coarsesignature */
  288. if (sc->coarsecount == 0) {
  289. if (sc->curcoarsesig2) {
  290. sc->curcoarsesig1 = av_mallocz(sizeof(CoarseSignature));
  291. if (!sc->curcoarsesig1)
  292. return AVERROR(ENOMEM);
  293. sc->curcoarsesig1->first = fs;
  294. sc->curcoarsesig2->next = sc->curcoarsesig1;
  295. sc->coarseend = sc->curcoarsesig1;
  296. }
  297. }
  298. if (sc->coarsecount == 45) {
  299. sc->midcoarse = 1;
  300. sc->curcoarsesig2 = av_mallocz(sizeof(CoarseSignature));
  301. if (!sc->curcoarsesig2)
  302. return AVERROR(ENOMEM);
  303. sc->curcoarsesig2->first = fs;
  304. sc->curcoarsesig1->next = sc->curcoarsesig2;
  305. sc->coarseend = sc->curcoarsesig2;
  306. }
  307. for (i = 0; i < 5; i++) {
  308. set_bit(sc->curcoarsesig1->data[i], fs->words[i]);
  309. }
  310. /* assuming the actual frame is the last */
  311. sc->curcoarsesig1->last = fs;
  312. if (sc->midcoarse) {
  313. for (i = 0; i < 5; i++) {
  314. set_bit(sc->curcoarsesig2->data[i], fs->words[i]);
  315. }
  316. sc->curcoarsesig2->last = fs;
  317. }
  318. sc->coarsecount = (sc->coarsecount+1)%90;
  319. /* debug printing finesignature */
  320. if (av_log_get_level() == AV_LOG_DEBUG) {
  321. av_log(ctx, AV_LOG_DEBUG, "input %d, confidence: %d\n", FF_INLINK_IDX(inlink), fs->confidence);
  322. av_log(ctx, AV_LOG_DEBUG, "words:");
  323. for (i = 0; i < 5; i++) {
  324. av_log(ctx, AV_LOG_DEBUG, " %d:", fs->words[i] );
  325. av_log(ctx, AV_LOG_DEBUG, " %d", fs->words[i] / pot3[0] );
  326. for (j = 1; j < 5; j++)
  327. av_log(ctx, AV_LOG_DEBUG, ",%d", fs->words[i] % pot3[j-1] / pot3[j] );
  328. av_log(ctx, AV_LOG_DEBUG, ";");
  329. }
  330. av_log(ctx, AV_LOG_DEBUG, "\n");
  331. av_log(ctx, AV_LOG_DEBUG, "framesignature:");
  332. for (i = 0; i < SIGELEM_SIZE/5; i++) {
  333. av_log(ctx, AV_LOG_DEBUG, " %d", fs->framesig[i] / pot3[0] );
  334. for (j = 1; j < 5; j++)
  335. av_log(ctx, AV_LOG_DEBUG, ",%d", fs->framesig[i] % pot3[j-1] / pot3[j] );
  336. }
  337. av_log(ctx, AV_LOG_DEBUG, "\n");
  338. }
  339. if (FF_INLINK_IDX(inlink) == 0)
  340. return ff_filter_frame(inlink->dst->outputs[0], picref);
  341. return 1;
  342. }
  343. static int xml_export(AVFilterContext *ctx, StreamContext *sc, const char* filename)
  344. {
  345. FineSignature* fs;
  346. CoarseSignature* cs;
  347. int i, j;
  348. FILE* f;
  349. unsigned int pot3[5] = { 3*3*3*3, 3*3*3, 3*3, 3, 1 };
  350. f = fopen(filename, "w");
  351. if (!f) {
  352. int err = AVERROR(EINVAL);
  353. char buf[128];
  354. av_strerror(err, buf, sizeof(buf));
  355. av_log(ctx, AV_LOG_ERROR, "cannot open xml file %s: %s\n", filename, buf);
  356. return err;
  357. }
  358. /* header */
  359. fprintf(f, "<?xml version='1.0' encoding='ASCII' ?>\n");
  360. fprintf(f, "<Mpeg7 xmlns=\"urn:mpeg:mpeg7:schema:2001\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" xsi:schemaLocation=\"urn:mpeg:mpeg7:schema:2001 schema/Mpeg7-2001.xsd\">\n");
  361. fprintf(f, " <DescriptionUnit xsi:type=\"DescriptorCollectionType\">\n");
  362. fprintf(f, " <Descriptor xsi:type=\"VideoSignatureType\">\n");
  363. fprintf(f, " <VideoSignatureRegion>\n");
  364. fprintf(f, " <VideoSignatureSpatialRegion>\n");
  365. fprintf(f, " <Pixel>0 0 </Pixel>\n");
  366. fprintf(f, " <Pixel>%d %d </Pixel>\n", sc->w - 1, sc->h - 1);
  367. fprintf(f, " </VideoSignatureSpatialRegion>\n");
  368. fprintf(f, " <StartFrameOfSpatialRegion>0</StartFrameOfSpatialRegion>\n");
  369. /* hoping num is 1, other values are vague */
  370. fprintf(f, " <MediaTimeUnit>%d</MediaTimeUnit>\n", sc->time_base.den / sc->time_base.num);
  371. fprintf(f, " <MediaTimeOfSpatialRegion>\n");
  372. fprintf(f, " <StartMediaTimeOfSpatialRegion>0</StartMediaTimeOfSpatialRegion>\n");
  373. fprintf(f, " <EndMediaTimeOfSpatialRegion>%" PRIu64 "</EndMediaTimeOfSpatialRegion>\n", sc->coarseend->last->pts);
  374. fprintf(f, " </MediaTimeOfSpatialRegion>\n");
  375. /* coarsesignatures */
  376. for (cs = sc->coarsesiglist; cs; cs = cs->next) {
  377. fprintf(f, " <VSVideoSegment>\n");
  378. fprintf(f, " <StartFrameOfSegment>%" PRIu32 "</StartFrameOfSegment>\n", cs->first->index);
  379. fprintf(f, " <EndFrameOfSegment>%" PRIu32 "</EndFrameOfSegment>\n", cs->last->index);
  380. fprintf(f, " <MediaTimeOfSegment>\n");
  381. fprintf(f, " <StartMediaTimeOfSegment>%" PRIu64 "</StartMediaTimeOfSegment>\n", cs->first->pts);
  382. fprintf(f, " <EndMediaTimeOfSegment>%" PRIu64 "</EndMediaTimeOfSegment>\n", cs->last->pts);
  383. fprintf(f, " </MediaTimeOfSegment>\n");
  384. for (i = 0; i < 5; i++) {
  385. fprintf(f, " <BagOfWords>");
  386. for (j = 0; j < 31; j++) {
  387. uint8_t n = cs->data[i][j];
  388. if (j < 30) {
  389. fprintf(f, "%d %d %d %d %d %d %d %d ", (n & 0x80) >> 7,
  390. (n & 0x40) >> 6,
  391. (n & 0x20) >> 5,
  392. (n & 0x10) >> 4,
  393. (n & 0x08) >> 3,
  394. (n & 0x04) >> 2,
  395. (n & 0x02) >> 1,
  396. (n & 0x01));
  397. } else {
  398. /* print only 3 bit in last byte */
  399. fprintf(f, "%d %d %d ", (n & 0x80) >> 7,
  400. (n & 0x40) >> 6,
  401. (n & 0x20) >> 5);
  402. }
  403. }
  404. fprintf(f, "</BagOfWords>\n");
  405. }
  406. fprintf(f, " </VSVideoSegment>\n");
  407. }
  408. /* finesignatures */
  409. for (fs = sc->finesiglist; fs; fs = fs->next) {
  410. fprintf(f, " <VideoFrame>\n");
  411. fprintf(f, " <MediaTimeOfFrame>%" PRIu64 "</MediaTimeOfFrame>\n", fs->pts);
  412. /* confidence */
  413. fprintf(f, " <FrameConfidence>%d</FrameConfidence>\n", fs->confidence);
  414. /* words */
  415. fprintf(f, " <Word>");
  416. for (i = 0; i < 5; i++) {
  417. fprintf(f, "%d ", fs->words[i]);
  418. if (i < 4) {
  419. fprintf(f, " ");
  420. }
  421. }
  422. fprintf(f, "</Word>\n");
  423. /* framesignature */
  424. fprintf(f, " <FrameSignature>");
  425. for (i = 0; i< SIGELEM_SIZE/5; i++) {
  426. if (i > 0) {
  427. fprintf(f, " ");
  428. }
  429. fprintf(f, "%d ", fs->framesig[i] / pot3[0]);
  430. for (j = 1; j < 5; j++)
  431. fprintf(f, " %d ", fs->framesig[i] % pot3[j-1] / pot3[j] );
  432. }
  433. fprintf(f, "</FrameSignature>\n");
  434. fprintf(f, " </VideoFrame>\n");
  435. }
  436. fprintf(f, " </VideoSignatureRegion>\n");
  437. fprintf(f, " </Descriptor>\n");
  438. fprintf(f, " </DescriptionUnit>\n");
  439. fprintf(f, "</Mpeg7>\n");
  440. fclose(f);
  441. return 0;
  442. }
  443. static int binary_export(AVFilterContext *ctx, StreamContext *sc, const char* filename)
  444. {
  445. FILE* f;
  446. FineSignature* fs;
  447. CoarseSignature* cs;
  448. uint32_t numofsegments = (sc->lastindex + 44)/45;
  449. int i, j;
  450. PutBitContext buf;
  451. /* buffer + header + coarsesignatures + finesignature */
  452. int len = (512 + 6 * 32 + 3*16 + 2 +
  453. numofsegments * (4*32 + 1 + 5*243) +
  454. sc->lastindex * (2 + 32 + 6*8 + 608)) / 8;
  455. uint8_t* buffer = av_malloc_array(len, sizeof(uint8_t));
  456. if (!buffer)
  457. return AVERROR(ENOMEM);
  458. f = fopen(filename, "wb");
  459. if (!f) {
  460. int err = AVERROR(EINVAL);
  461. char buf[128];
  462. av_strerror(err, buf, sizeof(buf));
  463. av_log(ctx, AV_LOG_ERROR, "cannot open file %s: %s\n", filename, buf);
  464. av_freep(&buffer);
  465. return err;
  466. }
  467. init_put_bits(&buf, buffer, len);
  468. put_bits32(&buf, 1); /* NumOfSpatial Regions, only 1 supported */
  469. put_bits(&buf, 1, 1); /* SpatialLocationFlag, always the whole image */
  470. put_bits32(&buf, 0); /* PixelX,1 PixelY,1, 0,0 */
  471. put_bits(&buf, 16, sc->w-1 & 0xFFFF); /* PixelX,2 */
  472. put_bits(&buf, 16, sc->h-1 & 0xFFFF); /* PixelY,2 */
  473. put_bits32(&buf, 0); /* StartFrameOfSpatialRegion */
  474. put_bits32(&buf, sc->lastindex); /* NumOfFrames */
  475. /* hoping num is 1, other values are vague */
  476. /* den/num might be greater than 16 bit, so cutting it */
  477. put_bits(&buf, 16, 0xFFFF & (sc->time_base.den / sc->time_base.num)); /* MediaTimeUnit */
  478. put_bits(&buf, 1, 1); /* MediaTimeFlagOfSpatialRegion */
  479. put_bits32(&buf, 0); /* StartMediaTimeOfSpatialRegion */
  480. put_bits32(&buf, 0xFFFFFFFF & sc->coarseend->last->pts); /* EndMediaTimeOfSpatialRegion */
  481. put_bits32(&buf, numofsegments); /* NumOfSegments */
  482. /* coarsesignatures */
  483. for (cs = sc->coarsesiglist; cs; cs = cs->next) {
  484. put_bits32(&buf, cs->first->index); /* StartFrameOfSegment */
  485. put_bits32(&buf, cs->last->index); /* EndFrameOfSegment */
  486. put_bits(&buf, 1, 1); /* MediaTimeFlagOfSegment */
  487. put_bits32(&buf, 0xFFFFFFFF & cs->first->pts); /* StartMediaTimeOfSegment */
  488. put_bits32(&buf, 0xFFFFFFFF & cs->last->pts); /* EndMediaTimeOfSegment */
  489. for (i = 0; i < 5; i++) {
  490. /* put 243 bits ( = 7 * 32 + 19 = 8 * 28 + 19) into buffer */
  491. for (j = 0; j < 30; j++) {
  492. put_bits(&buf, 8, cs->data[i][j]);
  493. }
  494. put_bits(&buf, 3, cs->data[i][30] >> 5);
  495. }
  496. }
  497. /* finesignatures */
  498. put_bits(&buf, 1, 0); /* CompressionFlag, only 0 supported */
  499. for (fs = sc->finesiglist; fs; fs = fs->next) {
  500. put_bits(&buf, 1, 1); /* MediaTimeFlagOfFrame */
  501. put_bits32(&buf, 0xFFFFFFFF & fs->pts); /* MediaTimeOfFrame */
  502. put_bits(&buf, 8, fs->confidence); /* FrameConfidence */
  503. for (i = 0; i < 5; i++) {
  504. put_bits(&buf, 8, fs->words[i]); /* Words */
  505. }
  506. /* framesignature */
  507. for (i = 0; i < SIGELEM_SIZE/5; i++) {
  508. put_bits(&buf, 8, fs->framesig[i]);
  509. }
  510. }
  511. flush_put_bits(&buf);
  512. fwrite(buffer, 1, put_bits_count(&buf)/8, f);
  513. fclose(f);
  514. av_freep(&buffer);
  515. return 0;
  516. }
  517. static int export(AVFilterContext *ctx, StreamContext *sc, int input)
  518. {
  519. SignatureContext* sic = ctx->priv;
  520. char filename[1024];
  521. if (sic->nb_inputs > 1) {
  522. /* error already handled */
  523. av_assert0(av_get_frame_filename(filename, sizeof(filename), sic->filename, input) == 0);
  524. } else {
  525. if (av_strlcpy(filename, sic->filename, sizeof(filename)) >= sizeof(filename))
  526. return AVERROR(EINVAL);
  527. }
  528. if (sic->format == FORMAT_XML) {
  529. return xml_export(ctx, sc, filename);
  530. } else {
  531. return binary_export(ctx, sc, filename);
  532. }
  533. }
  534. static int request_frame(AVFilterLink *outlink)
  535. {
  536. AVFilterContext *ctx = outlink->src;
  537. SignatureContext *sic = ctx->priv;
  538. StreamContext *sc, *sc2;
  539. MatchingInfo match;
  540. int i, j, ret;
  541. int lookup = 1; /* indicates wheather EOF of all files is reached */
  542. /* process all inputs */
  543. for (i = 0; i < sic->nb_inputs; i++){
  544. sc = &(sic->streamcontexts[i]);
  545. ret = ff_request_frame(ctx->inputs[i]);
  546. /* return if unexpected error occurs in input stream */
  547. if (ret < 0 && ret != AVERROR_EOF)
  548. return ret;
  549. /* export signature at EOF */
  550. if (ret == AVERROR_EOF && !sc->exported) {
  551. /* export if wanted */
  552. if (strlen(sic->filename) > 0) {
  553. if (export(ctx, sc, i) < 0)
  554. return ret;
  555. }
  556. sc->exported = 1;
  557. }
  558. lookup &= sc->exported;
  559. }
  560. /* signature lookup */
  561. if (lookup && sic->mode != MODE_OFF) {
  562. /* iterate over every pair */
  563. for (i = 0; i < sic->nb_inputs; i++) {
  564. sc = &(sic->streamcontexts[i]);
  565. for (j = i+1; j < sic->nb_inputs; j++) {
  566. sc2 = &(sic->streamcontexts[j]);
  567. match = lookup_signatures(ctx, sic, sc, sc2, sic->mode);
  568. if (match.score != 0) {
  569. av_log(ctx, AV_LOG_INFO, "matching of video %d at %f and %d at %f, %d frames matching\n",
  570. i, ((double) match.first->pts * sc->time_base.num) / sc->time_base.den,
  571. j, ((double) match.second->pts * sc2->time_base.num) / sc2->time_base.den,
  572. match.matchframes);
  573. if (match.whole)
  574. av_log(ctx, AV_LOG_INFO, "whole video matching\n");
  575. } else {
  576. av_log(ctx, AV_LOG_INFO, "no matching of video %d and %d\n", i, j);
  577. }
  578. }
  579. }
  580. }
  581. return ret;
  582. }
  583. static av_cold int init(AVFilterContext *ctx)
  584. {
  585. SignatureContext *sic = ctx->priv;
  586. StreamContext *sc;
  587. int i, ret;
  588. char tmp[1024];
  589. sic->streamcontexts = av_mallocz(sic->nb_inputs * sizeof(StreamContext));
  590. if (!sic->streamcontexts)
  591. return AVERROR(ENOMEM);
  592. for (i = 0; i < sic->nb_inputs; i++) {
  593. AVFilterPad pad = {
  594. .type = AVMEDIA_TYPE_VIDEO,
  595. .name = av_asprintf("in%d", i),
  596. .config_props = config_input,
  597. .filter_frame = filter_frame,
  598. };
  599. if (!pad.name)
  600. return AVERROR(ENOMEM);
  601. if ((ret = ff_insert_inpad(ctx, i, &pad)) < 0) {
  602. av_freep(&pad.name);
  603. return ret;
  604. }
  605. sc = &(sic->streamcontexts[i]);
  606. sc->lastindex = 0;
  607. sc->finesiglist = av_mallocz(sizeof(FineSignature));
  608. if (!sc->finesiglist)
  609. return AVERROR(ENOMEM);
  610. sc->curfinesig = NULL;
  611. sc->coarsesiglist = av_mallocz(sizeof(CoarseSignature));
  612. if (!sc->coarsesiglist)
  613. return AVERROR(ENOMEM);
  614. sc->curcoarsesig1 = sc->coarsesiglist;
  615. sc->coarseend = sc->coarsesiglist;
  616. sc->coarsecount = 0;
  617. sc->midcoarse = 0;
  618. }
  619. /* check filename */
  620. if (sic->nb_inputs > 1 && strlen(sic->filename) > 0 && av_get_frame_filename(tmp, sizeof(tmp), sic->filename, 0) == -1) {
  621. av_log(ctx, AV_LOG_ERROR, "The filename must contain %%d or %%0nd, if you have more than one input.\n");
  622. return AVERROR(EINVAL);
  623. }
  624. return 0;
  625. }
  626. static av_cold void uninit(AVFilterContext *ctx)
  627. {
  628. SignatureContext *sic = ctx->priv;
  629. StreamContext *sc;
  630. void* tmp;
  631. FineSignature* finsig;
  632. CoarseSignature* cousig;
  633. int i;
  634. /* free the lists */
  635. if (sic->streamcontexts != NULL) {
  636. for (i = 0; i < sic->nb_inputs; i++) {
  637. sc = &(sic->streamcontexts[i]);
  638. finsig = sc->finesiglist;
  639. cousig = sc->coarsesiglist;
  640. while (finsig) {
  641. tmp = finsig;
  642. finsig = finsig->next;
  643. av_freep(&tmp);
  644. }
  645. sc->finesiglist = NULL;
  646. while (cousig) {
  647. tmp = cousig;
  648. cousig = cousig->next;
  649. av_freep(&tmp);
  650. }
  651. sc->coarsesiglist = NULL;
  652. }
  653. av_freep(&sic->streamcontexts);
  654. }
  655. for (unsigned i = 0; i < ctx->nb_inputs; i++)
  656. av_freep(&ctx->input_pads[i].name);
  657. }
  658. static int config_output(AVFilterLink *outlink)
  659. {
  660. AVFilterContext *ctx = outlink->src;
  661. AVFilterLink *inlink = ctx->inputs[0];
  662. outlink->time_base = inlink->time_base;
  663. outlink->frame_rate = inlink->frame_rate;
  664. outlink->sample_aspect_ratio = inlink->sample_aspect_ratio;
  665. outlink->w = inlink->w;
  666. outlink->h = inlink->h;
  667. return 0;
  668. }
  669. static const AVFilterPad signature_outputs[] = {
  670. {
  671. .name = "default",
  672. .type = AVMEDIA_TYPE_VIDEO,
  673. .request_frame = request_frame,
  674. .config_props = config_output,
  675. },
  676. { NULL }
  677. };
  678. AVFilter ff_vf_signature = {
  679. .name = "signature",
  680. .description = NULL_IF_CONFIG_SMALL("Calculate the MPEG-7 video signature"),
  681. .priv_size = sizeof(SignatureContext),
  682. .priv_class = &signature_class,
  683. .init = init,
  684. .uninit = uninit,
  685. .query_formats = query_formats,
  686. .outputs = signature_outputs,
  687. .inputs = NULL,
  688. .flags = AVFILTER_FLAG_DYNAMIC_INPUTS,
  689. };