id3v2extractor.c

Go to the documentation of this file.
00001 /*
00002      This file is part of libextractor.
00003      (C) 2002, 2003, 2004, 2006 Vidyut Samanta and Christian Grothoff
00004 
00005      libextractor is free software; you can redistribute it and/or modify
00006      it under the terms of the GNU General Public License as published
00007      by the Free Software Foundation; either version 2, or (at your
00008      option) any later version.
00009 
00010      libextractor is distributed in the hope that it will be useful, but
00011      WITHOUT ANY WARRANTY; without even the implied warranty of
00012      MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00013      General Public License for more details.
00014 
00015      You should have received a copy of the GNU General Public License
00016      along with libextractor; see the file COPYING.  If not, write to the
00017      Free Software Foundation, Inc., 59 Temple Place - Suite 330,
00018      Boston, MA 02111-1307, USA.
00019 
00020  */
00021 
00022 #include "platform.h"
00023 #include "extractor.h"
00024 #ifndef MINGW
00025 #include <sys/mman.h>
00026 #endif
00027 #include "convert.h"
00028 
00029 #define DEBUG_EXTRACT_ID3v2 0
00030 
00031 
00032 static struct EXTRACTOR_Keywords *
00033 addKeyword (EXTRACTOR_KeywordList * oldhead,
00034             char *phrase, EXTRACTOR_KeywordType type)
00035 {
00036   EXTRACTOR_KeywordList *keyword;
00037 
00038   keyword = (EXTRACTOR_KeywordList *) malloc (sizeof (EXTRACTOR_KeywordList));
00039   keyword->next = oldhead;
00040   keyword->keyword = phrase;
00041   keyword->keywordType = type;
00042   return keyword;
00043 }
00044 
00045 typedef struct
00046 {
00047   char *text;
00048   EXTRACTOR_KeywordType type;
00049 } Matches;
00050 
00051 static Matches tmap[] = {
00052   {"TAL", EXTRACTOR_TITLE},
00053   {"TT1", EXTRACTOR_GROUP},
00054   {"TT2", EXTRACTOR_TITLE},
00055   {"TT3", EXTRACTOR_TITLE},
00056   {"TXT", EXTRACTOR_DESCRIPTION},
00057   {"TPB", EXTRACTOR_PUBLISHER},
00058   {"WAF", EXTRACTOR_LOCATION},
00059   {"WAR", EXTRACTOR_LOCATION},
00060   {"WAS", EXTRACTOR_LOCATION},
00061   {"WCP", EXTRACTOR_COPYRIGHT},
00062   {"WAF", EXTRACTOR_LOCATION},
00063   {"WCM", EXTRACTOR_DISCLAIMER},
00064   {"TSS", EXTRACTOR_FORMAT},
00065   {"TYE", EXTRACTOR_DATE},
00066   {"TLA", EXTRACTOR_LANGUAGE},
00067   {"TP1", EXTRACTOR_ARTIST},
00068   {"TP2", EXTRACTOR_ARTIST},
00069   {"TP3", EXTRACTOR_CONDUCTOR},
00070   {"TP4", EXTRACTOR_INTERPRET},
00071   {"IPL", EXTRACTOR_CONTRIBUTOR},
00072   {"TOF", EXTRACTOR_FILENAME},
00073   {"TEN", EXTRACTOR_PRODUCER},
00074   {"TCO", EXTRACTOR_SUBJECT},
00075   {"TCR", EXTRACTOR_COPYRIGHT},
00076   {"SLT", EXTRACTOR_LYRICS},
00077   {"TOA", EXTRACTOR_ARTIST},
00078   {"TRC", EXTRACTOR_ISRC},
00079   {"TRK", EXTRACTOR_TRACK_NUMBER},
00080   {"TCM", EXTRACTOR_CREATOR},
00081   {"TOT", EXTRACTOR_ALBUM},
00082   {"TOL", EXTRACTOR_AUTHOR},
00083   {"COM", EXTRACTOR_COMMENT},
00084   {"", EXTRACTOR_KEYWORDS},
00085   {NULL, 0},
00086 };
00087 
00088 
00089 /* mimetype = audio/mpeg */
00090 struct EXTRACTOR_Keywords *
00091 libextractor_id3v2_extract (const char *filename,
00092                             const unsigned char *data,
00093                             size_t size, struct EXTRACTOR_Keywords *prev)
00094 {
00095   int unsync;
00096   unsigned int tsize;
00097   unsigned int pos;
00098 
00099   if ((size < 16) ||
00100       (data[0] != 0x49) ||
00101       (data[1] != 0x44) ||
00102       (data[2] != 0x33) || (data[3] != 0x02) || (data[4] != 0x00))
00103     return prev;
00104   unsync = (data[5] & 0x80) > 0;
00105   tsize = (((data[6] & 0x7F) << 21) |
00106            ((data[7] & 0x7F) << 14) |
00107            ((data[8] & 0x7F) << 07) | ((data[9] & 0x7F) << 00));
00108 
00109   if (tsize + 10 > size)
00110     return prev;
00111   pos = 10;
00112   while (pos < tsize)
00113     {
00114       size_t csize;
00115       int i;
00116 
00117       if (pos + 6 > tsize)
00118         return prev;
00119       csize = (data[pos + 3] << 16) + (data[pos + 4] << 8) + data[pos + 5];
00120       if ((pos + 6 + csize > tsize) || (csize > tsize) || (csize == 0))
00121         break;
00122       i = 0;
00123       while (tmap[i].text != NULL)
00124         {
00125           if (0 == strncmp (tmap[i].text, (const char *) &data[pos], 3))
00126             {
00127               char *word;
00128               /* this byte describes the encoding
00129                  try to convert strings to UTF-8
00130                  if it fails, then forget it */
00131               switch (data[pos + 6])
00132                 {
00133                 case 0x00:
00134                   word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 7],
00135                                         csize, "ISO-8859-1");
00136                   break;
00137                 case 0x01:
00138                   word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 7],
00139                                         csize, "UCS-2");
00140                   break;
00141                 default:
00142                   /* bad encoding byte,
00143                      try to convert from iso-8859-1 */
00144                   word = EXTRACTOR_common_convert_to_utf8 ((const char *) &data[pos + 7],
00145                                         csize, "ISO-8859-1");
00146                   break;
00147                 }
00148               pos++;
00149               csize--;
00150               if ((word != NULL) && (strlen (word) > 0))
00151                 {
00152                   prev = addKeyword (prev, word, tmap[i].type);
00153                 }
00154               else
00155                 {
00156                   free (word);
00157                 }
00158               break;
00159             }
00160           i++;
00161         }
00162       pos += 6 + csize;
00163     }
00164   return prev;
00165 }
00166 
00167 /* end of id3v2extractor.c */

Generated on Thu Nov 20 09:44:42 2008 for libextractor by  doxygen 1.5.1