patch by Anton Khirnov, wyskas gmail com Originally committed as revision 20006 to svn://svn.ffmpeg.org/ffmpeg/trunktags/v0.6
| @@ -81,6 +81,7 @@ static void read_ttag(AVFormatContext *s, int taglen, const char *key) | |||||
| char *q, dst[512]; | char *q, dst[512]; | ||||
| int len, dstlen = sizeof(dst) - 1; | int len, dstlen = sizeof(dst) - 1; | ||||
| unsigned genre; | unsigned genre; | ||||
| unsigned int (*get)(ByteIOContext*) = get_be16; | |||||
| dst[0] = 0; | dst[0] = 0; | ||||
| if (taglen < 1) | if (taglen < 1) | ||||
| @@ -99,11 +100,38 @@ static void read_ttag(AVFormatContext *s, int taglen, const char *key) | |||||
| *q = 0; | *q = 0; | ||||
| break; | break; | ||||
| case 1: /* UTF-16 with BOM */ | |||||
| taglen -= 2; | |||||
| switch (get_be16(s->pb)) { | |||||
| case 0xfffe: | |||||
| get = get_le16; | |||||
| case 0xfeff: | |||||
| break; | |||||
| default: | |||||
| av_log(s, AV_LOG_ERROR, "Incorrect BOM value in tag %s.\n", key); | |||||
| return; | |||||
| } | |||||
| // fall-through | |||||
| case 2: /* UTF-16BE without BOM */ | |||||
| q = dst; | |||||
| while (taglen > 1 && q - dst < dstlen - 7) { | |||||
| uint32_t ch; | |||||
| uint8_t tmp; | |||||
| GET_UTF16(ch, ((taglen -= 2) >= 0 ? get(s->pb) : 0), break;) | |||||
| PUT_UTF8(ch, tmp, *q++ = tmp;) | |||||
| } | |||||
| *q = 0; | |||||
| break; | |||||
| case 3: /* UTF-8 */ | case 3: /* UTF-8 */ | ||||
| len = FFMIN(taglen, dstlen - 1); | len = FFMIN(taglen, dstlen - 1); | ||||
| get_buffer(s->pb, dst, len); | get_buffer(s->pb, dst, len); | ||||
| dst[len] = 0; | dst[len] = 0; | ||||
| break; | break; | ||||
| default: | |||||
| av_log(s, AV_LOG_WARNING, "Unknown encoding in tag %s\n.", key); | |||||
| } | } | ||||
| if (!strcmp(key, "genre") | if (!strcmp(key, "genre") | ||||
| @@ -265,6 +265,30 @@ static inline av_const int av_ceil_log2(int x) | |||||
| }\ | }\ | ||||
| } | } | ||||
| /*! | |||||
| * \def GET_UTF16(val, GET_16BIT, ERROR) | |||||
| * Converts a UTF-16 character (2 or 4 bytes) to its 32-bit UCS-4 encoded form | |||||
| * \param val is the output and should be of type uint32_t. It holds the converted | |||||
| * UCS-4 character and should be a left value. | |||||
| * \param GET_16BIT gets two bytes of UTF-16 encoded data converted to native endianness. | |||||
| * It can be a function or a statement whose return value or evaluated value is of type | |||||
| * uint16_t. It will be executed up to 2 times. | |||||
| * \param ERROR action that should be taken when an invalid UTF-16 surrogate is | |||||
| * returned from GET_BYTE. It should be a statement that jumps out of the macro, | |||||
| * like exit(), goto, return, break, or continue. | |||||
| */ | |||||
| #define GET_UTF16(val, GET_16BIT, ERROR)\ | |||||
| val = GET_16BIT;\ | |||||
| {\ | |||||
| unsigned int hi = val - 0xD800;\ | |||||
| if (hi < 0x800) {\ | |||||
| val = GET_16BIT - 0xDC00;\ | |||||
| if (val > 0x3FFU || hi > 0x3FFU)\ | |||||
| ERROR\ | |||||
| val += (hi<<10) + 0x10000;\ | |||||
| }\ | |||||
| }\ | |||||
| /*! | /*! | ||||
| * \def PUT_UTF8(val, tmp, PUT_BYTE) | * \def PUT_UTF8(val, tmp, PUT_BYTE) | ||||
| * Converts a 32-bit Unicode character to its UTF-8 encoded form (up to 4 bytes long). | * Converts a 32-bit Unicode character to its UTF-8 encoded form (up to 4 bytes long). | ||||