00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022 #define DEBUG_EXTRACT_ID3v24 0
00023
00024 #include "platform.h"
00025 #include "extractor.h"
00026 #include <string.h>
00027 #include <stdio.h>
00028 #include <sys/types.h>
00029 #include <sys/stat.h>
00030 #include <unistd.h>
00031 #include <stdlib.h>
00032 #include <fcntl.h>
00033 #ifndef MINGW
00034 #include <sys/mman.h>
00035 #endif
00036 #include "convert.h"
00037
00038
00039 static struct EXTRACTOR_Keywords *
00040 addKeyword (EXTRACTOR_KeywordList * oldhead,
00041 char *phrase, EXTRACTOR_KeywordType type)
00042 {
00043 EXTRACTOR_KeywordList *keyword;
00044
00045 keyword = (EXTRACTOR_KeywordList *) malloc (sizeof (EXTRACTOR_KeywordList));
00046 keyword->next = oldhead;
00047 keyword->keyword = phrase;
00048 keyword->keywordType = type;
00049 return keyword;
00050 }
00051
00052 typedef struct
00053 {
00054 char *text;
00055 EXTRACTOR_KeywordType type;
00056 } Matches;
00057
00058 static Matches tmap[] = {
00059 {"COMM", EXTRACTOR_COMMENT},
00060 {"IPLS", EXTRACTOR_CONTRIBUTOR},
00061 {"TIPL", EXTRACTOR_CONTRIBUTOR},
00062 {"TMOO", EXTRACTOR_MOOD},
00063 {"TMCL", EXTRACTOR_MUSICIAN_CREDITS_LIST},
00064 {"LINK", EXTRACTOR_LINK},
00065 {"MCDI", EXTRACTOR_MUSIC_CD_IDENTIFIER},
00066 {"PCNT", EXTRACTOR_PLAY_COUNTER},
00067 {"POPM", EXTRACTOR_POPULARITY_METER},
00068 {"TCOP", EXTRACTOR_COPYRIGHT},
00069 {"TDRC", EXTRACTOR_DATE},
00070 {"TCON", EXTRACTOR_GENRE},
00071 {"TIT1", EXTRACTOR_GENRE},
00072 {"TENC", EXTRACTOR_ENCODED_BY},
00073 {"TEXT", EXTRACTOR_LYRICS},
00074 {"TOLY", EXTRACTOR_CONTRIBUTOR},
00075 {"TOPE", EXTRACTOR_CONTRIBUTOR},
00076 {"TOWN", EXTRACTOR_OWNER},
00077 {"TPE1", EXTRACTOR_ARTIST},
00078 {"TPE2", EXTRACTOR_ARTIST},
00079 {"TPE3", EXTRACTOR_CONDUCTOR},
00080 {"TPE4", EXTRACTOR_INTERPRET},
00081 {"TIME", EXTRACTOR_TIME},
00082 {"TMED", EXTRACTOR_MEDIA_TYPE},
00083 {"TCOM", EXTRACTOR_CREATOR},
00084 {"TOFN", EXTRACTOR_FILENAME},
00085 {"TOPE", EXTRACTOR_ARTIST},
00086 {"TPUB", EXTRACTOR_PUBLISHER},
00087 {"TRCK", EXTRACTOR_TRACK_NUMBER},
00088 {"TRSC", EXTRACTOR_ISRC},
00089 {"TRSN", EXTRACTOR_SOURCE},
00090 {"TRSO", EXTRACTOR_CREATED_FOR},
00091 {"TSRC", EXTRACTOR_RESOURCE_IDENTIFIER},
00092 {"TYER", EXTRACTOR_YEAR},
00093 {"TOAL", EXTRACTOR_ALBUM},
00094 {"TALB", EXTRACTOR_ALBUM},
00095 {"TLAN", EXTRACTOR_LANGUAGE},
00096 {"TIT2", EXTRACTOR_TITLE},
00097 {"TIT3", EXTRACTOR_DESCRIPTION},
00098 {"WCOM", EXTRACTOR_RELEASE},
00099 {"WCOP", EXTRACTOR_DISCLAIMER},
00100 {"", EXTRACTOR_KEYWORDS},
00101 {NULL, 0},
00102 };
00103
00104
00105
00106 struct EXTRACTOR_Keywords *
00107 libextractor_id3v24_extract (const char *filename,
00108 const unsigned char *data,
00109 const size_t size,
00110 struct EXTRACTOR_Keywords *prev)
00111 {
00112 int unsync;
00113 int extendedHdr;
00114 int experimental;
00115 int footer;
00116 unsigned int tsize;
00117 unsigned int pos;
00118 unsigned int ehdrSize;
00119 unsigned int padding;
00120
00121 if ((size < 16) ||
00122 (data[0] != 0x49) ||
00123 (data[1] != 0x44) ||
00124 (data[2] != 0x33) || (data[3] != 0x04) || (data[4] != 0x00))
00125 return prev;
00126 unsync = (data[5] & 0x80) > 0;
00127 extendedHdr = (data[5] & 0x40) > 0;
00128 experimental = (data[5] & 0x20) > 0;
00129 footer = (data[5] & 0x10) > 0;
00130 tsize = (((data[6] & 0x7F) << 21) |
00131 ((data[7] & 0x7F) << 14) |
00132 ((data[8] & 0x7F) << 7) | ((data[9] & 0x7F) << 0));
00133 if ((tsize + 10 > size) || (experimental))
00134 return prev;
00135 pos = 10;
00136 padding = 0;
00137 if (extendedHdr)
00138 {
00139 ehdrSize = (((data[10] & 0x7F) << 21) |
00140 ((data[11] & 0x7F) << 14) |
00141 ((data[12] & 0x7F) << 7) | ((data[13] & 0x7F) << 0));
00142 pos += ehdrSize;
00143 }
00144
00145
00146 while (pos < tsize)
00147 {
00148 size_t csize;
00149 int i;
00150 unsigned short flags;
00151
00152 if (pos + 10 > tsize)
00153 return prev;
00154
00155 csize = (((data[pos + 4] & 0x7F) << 21) |
00156 ((data[pos + 5] & 0x7F) << 14) |
00157 ((data[pos + 6] & 0x7F) << 7) | ((data[pos + 7] & 0x7F) << 0));
00158
00159 if ((pos + 10 + csize > tsize) || (csize > tsize) || (csize == 0))
00160 break;
00161 flags = (data[pos + 8] << 8) + data[pos + 9];
00162 if (((flags & 0x80) > 0) ||
00163 ((flags & 0x40) > 0) )
00164 {
00165 pos += 10 + csize;
00166 continue;
00167 }
00168 i = 0;
00169 while (tmap[i].text != NULL)
00170 {
00171 if (0 == strncmp (tmap[i].text, (const char *) &data[pos], 4))
00172 {
00173 char *word;
00174 if ((flags & 0x20) > 0)
00175 {
00176
00177 pos++;
00178 csize--;
00179 }
00180
00181
00182
00183
00184 switch (data[pos + 10])
00185 {
00186 case 0x00:
00187 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 11],
00188 csize, "ISO-8859-1");
00189 break;
00190 case 0x01:
00191 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 11],
00192 csize, "UTF-16");
00193 break;
00194 case 0x02:
00195 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 11],
00196 csize, "UTF-16BE");
00197 break;
00198 case 0x03:
00199 word = malloc (csize + 1);
00200 memcpy (word, &data[pos + 11], csize);
00201 word[csize] = '\0';
00202 break;
00203 default:
00204
00205
00206 word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 11],
00207 csize, "ISO-8859-1");
00208 break;
00209 }
00210 pos++;
00211 csize--;
00212 if ((word != NULL) && (strlen (word) > 0))
00213 {
00214 prev = addKeyword (prev, word, tmap[i].type);
00215 }
00216 else
00217 {
00218 free (word);
00219 }
00220 break;
00221 }
00222 i++;
00223 }
00224 pos += 10 + csize;
00225 }
00226 return prev;
00227 }
00228
00229