id3v24extractor.c

Go to the documentation of this file.
00001 /*
00002      This file is part of libextractor.
00003      (C) 2002, 2003, 2004, 2006 Vidyut Samanta and Christian Grothoff
00004 
00005      libextractor is free software; you can redistribute it and/or modify
00006      it under the terms of the GNU General Public License as published
00007      by the Free Software Foundation; either version 2, or (at your
00008      option) any later version.
00009 
00010      libextractor is distributed in the hope that it will be useful, but
00011      WITHOUT ANY WARRANTY; without even the implied warranty of
00012      MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00013      General Public License for more details.
00014 
00015      You should have received a copy of the GNU General Public License
00016      along with libextractor; see the file COPYING.  If not, write to the
00017      Free Software Foundation, Inc., 59 Temple Place - Suite 330,
00018      Boston, MA 02111-1307, USA.
00019 
00020  */
00021 
00022 #define DEBUG_EXTRACT_ID3v24 0
00023 
00024 #include "platform.h"
00025 #include "extractor.h"
00026 #include <string.h>
00027 #include <stdio.h>
00028 #include <sys/types.h>
00029 #include <sys/stat.h>
00030 #include <unistd.h>
00031 #include <stdlib.h>
00032 #include <fcntl.h>
00033 #ifndef MINGW
00034 #include <sys/mman.h>
00035 #endif
00036 #include "convert.h"
00037 
00038 
00039 static struct EXTRACTOR_Keywords *
00040 addKeyword (EXTRACTOR_KeywordList * oldhead,
00041             char *phrase, EXTRACTOR_KeywordType type)
00042 {
00043   EXTRACTOR_KeywordList *keyword;
00044 
00045   keyword = (EXTRACTOR_KeywordList *) malloc (sizeof (EXTRACTOR_KeywordList));
00046   keyword->next = oldhead;
00047   keyword->keyword = phrase;
00048   keyword->keywordType = type;
00049   return keyword;
00050 }
00051 
00052 typedef struct
00053 {
00054   char *text;
00055   EXTRACTOR_KeywordType type;
00056 } Matches;
00057 
00058 static Matches tmap[] = {
00059   {"COMM", EXTRACTOR_COMMENT},
00060   {"IPLS", EXTRACTOR_CONTRIBUTOR},
00061   {"TIPL", EXTRACTOR_CONTRIBUTOR},
00062   {"TMOO", EXTRACTOR_MOOD},
00063   {"TMCL", EXTRACTOR_MUSICIAN_CREDITS_LIST},
00064   {"LINK", EXTRACTOR_LINK},
00065   {"MCDI", EXTRACTOR_MUSIC_CD_IDENTIFIER},
00066   {"PCNT", EXTRACTOR_PLAY_COUNTER},
00067   {"POPM", EXTRACTOR_POPULARITY_METER},
00068   {"TCOP", EXTRACTOR_COPYRIGHT},
00069   {"TDRC", EXTRACTOR_DATE},
00070   {"TCON", EXTRACTOR_GENRE},
00071   {"TIT1", EXTRACTOR_GENRE},
00072   {"TENC", EXTRACTOR_ENCODED_BY},
00073   {"TEXT", EXTRACTOR_LYRICS},
00074   {"TOLY", EXTRACTOR_CONTRIBUTOR},
00075   {"TOPE", EXTRACTOR_CONTRIBUTOR},
00076   {"TOWN", EXTRACTOR_OWNER},
00077   {"TPE1", EXTRACTOR_ARTIST},
00078   {"TPE2", EXTRACTOR_ARTIST},
00079   {"TPE3", EXTRACTOR_CONDUCTOR},
00080   {"TPE4", EXTRACTOR_INTERPRET},
00081   {"TIME", EXTRACTOR_TIME},
00082   {"TMED", EXTRACTOR_MEDIA_TYPE},
00083   {"TCOM", EXTRACTOR_CREATOR},
00084   {"TOFN", EXTRACTOR_FILENAME},
00085   {"TOPE", EXTRACTOR_ARTIST},
00086   {"TPUB", EXTRACTOR_PUBLISHER},
00087   {"TRCK", EXTRACTOR_TRACK_NUMBER},
00088   {"TRSC", EXTRACTOR_ISRC},
00089   {"TRSN", EXTRACTOR_SOURCE},
00090   {"TRSO", EXTRACTOR_CREATED_FOR},
00091   {"TSRC", EXTRACTOR_RESOURCE_IDENTIFIER},
00092   {"TYER", EXTRACTOR_YEAR},
00093   {"TOAL", EXTRACTOR_ALBUM},
00094   {"TALB", EXTRACTOR_ALBUM},
00095   {"TLAN", EXTRACTOR_LANGUAGE},
00096   {"TIT2", EXTRACTOR_TITLE},
00097   {"TIT3", EXTRACTOR_DESCRIPTION},
00098   {"WCOM", EXTRACTOR_RELEASE},
00099   {"WCOP", EXTRACTOR_DISCLAIMER},
00100   {"", EXTRACTOR_KEYWORDS},
00101   {NULL, 0},
00102 };
00103 
00104 
00105 /* mimetype = audio/mpeg */
00106 struct EXTRACTOR_Keywords *
00107 libextractor_id3v24_extract (const char *filename,
00108                              const unsigned char *data,
00109                              const size_t size,
00110                              struct EXTRACTOR_Keywords *prev)
00111 {
00112   int unsync;
00113   int extendedHdr;
00114   int experimental;
00115   int footer;
00116   unsigned int tsize;
00117   unsigned int pos;
00118   unsigned int ehdrSize;
00119   unsigned int padding;
00120 
00121   if ((size < 16) ||
00122       (data[0] != 0x49) ||
00123       (data[1] != 0x44) ||
00124       (data[2] != 0x33) || (data[3] != 0x04) || (data[4] != 0x00))
00125     return prev;
00126   unsync = (data[5] & 0x80) > 0;
00127   extendedHdr = (data[5] & 0x40) > 0;
00128   experimental = (data[5] & 0x20) > 0;
00129   footer = (data[5] & 0x10) > 0;
00130   tsize = (((data[6] & 0x7F) << 21) |
00131            ((data[7] & 0x7F) << 14) |
00132            ((data[8] & 0x7F) << 7) | ((data[9] & 0x7F) << 0));
00133   if ((tsize + 10 > size) || (experimental))
00134     return prev;
00135   pos = 10;
00136   padding = 0;
00137   if (extendedHdr)
00138     {
00139       ehdrSize = (((data[10] & 0x7F) << 21) |
00140                   ((data[11] & 0x7F) << 14) |
00141                   ((data[12] & 0x7F) << 7) | ((data[13] & 0x7F) << 0));
00142       pos += ehdrSize;
00143     }
00144 
00145 
00146   while (pos < tsize)
00147     {
00148       size_t csize;
00149       int i;
00150       unsigned short flags;
00151 
00152       if (pos + 10 > tsize)
00153         return prev;
00154 
00155       csize = (((data[pos + 4] & 0x7F) << 21) |
00156                ((data[pos + 5] & 0x7F) << 14) |
00157                ((data[pos + 6] & 0x7F) << 7) | ((data[pos + 7] & 0x7F) << 0));
00158 
00159       if ((pos + 10 + csize > tsize) || (csize > tsize) || (csize == 0))
00160         break;
00161       flags = (data[pos + 8] << 8) + data[pos + 9];
00162       if (((flags & 0x80) > 0) /* compressed, not yet supported */  ||
00163           ((flags & 0x40) > 0) /* encrypted, not supported */ )
00164         {
00165           pos += 10 + csize;
00166           continue;
00167         }
00168       i = 0;
00169       while (tmap[i].text != NULL)
00170         {
00171           if (0 == strncmp (tmap[i].text, (const char *) &data[pos], 4))
00172             {
00173               char *word;
00174               if ((flags & 0x20) > 0)
00175                 {
00176                   /* "group" identifier, skip a byte */
00177                   pos++;
00178                   csize--;
00179                 }
00180 
00181               /* this byte describes the encoding
00182                  try to convert strings to UTF-8
00183                  if it fails, then forget it */
00184               switch (data[pos + 10])
00185                 {
00186                 case 0x00:
00187                   word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 11],
00188                                         csize, "ISO-8859-1");
00189                   break;
00190                 case 0x01:
00191                   word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 11],
00192                                         csize, "UTF-16");
00193                   break;
00194                 case 0x02:
00195                   word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 11],
00196                                         csize, "UTF-16BE");
00197                   break;
00198                 case 0x03:
00199                   word = malloc (csize + 1);
00200                   memcpy (word, &data[pos + 11], csize);
00201                   word[csize] = '\0';
00202                   break;
00203                 default:
00204                   /* bad encoding byte,
00205                      try to convert from iso-8859-1 */
00206                   word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 11],
00207                                         csize, "ISO-8859-1");
00208                   break;
00209                 }
00210               pos++;
00211               csize--;
00212               if ((word != NULL) && (strlen (word) > 0))
00213                 {
00214                   prev = addKeyword (prev, word, tmap[i].type);
00215                 }
00216               else
00217                 {
00218                   free (word);
00219                 }
00220               break;
00221             }
00222           i++;
00223         }
00224       pos += 10 + csize;
00225     }
00226   return prev;
00227 }
00228 
00229 /* end of id3v24extractor.c */

Generated on Thu Nov 20 10:44:42 2008 for libextractor by  doxygen 1.5.1