debextractor.c

Go to the documentation of this file.
00001 /*
00002      This file is part of libextractor.
00003      (C) 2002, 2003, 2004 Vidyut Samanta and Christian Grothoff
00004 
00005      libextractor is free software; you can redistribute it and/or modify
00006      it under the terms of the GNU General Public License as published
00007      by the Free Software Foundation; either version 2, or (at your
00008      option) any later version.
00009 
00010      libextractor is distributed in the hope that it will be useful, but
00011      WITHOUT ANY WARRANTY; without even the implied warranty of
00012      MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00013      General Public License for more details.
00014 
00015      You should have received a copy of the GNU General Public License
00016      along with libextractor; see the file COPYING.  If not, write to the
00017      Free Software Foundation, Inc., 59 Temple Place - Suite 330,
00018      Boston, MA 02111-1307, USA.
00019  */
00020 
00021 #include "platform.h"
00022 #include "extractor.h"
00023 #include <zlib.h>
00024 
00025 /*
00026  * The .deb is an ar-chive file.  It contains a tar.gz file
00027  * named "control.tar.gz" which then contains a file 'control'
00028  * that has the meta-data.  And which variant of the various
00029  * ar file formats is used is also not quite certain. Yuck.
00030  *
00031  * References:
00032  * http://www.mkssoftware.com/docs/man4/tar.4.asp
00033  * http://lists.debian.org/debian-policy/2003/12/msg00000.html
00034  * http://www.opengroup.org/onlinepubs/009695399/utilities/ar.html
00035  */
00036 
00037 static EXTRACTOR_KeywordList *
00038 addKeyword (EXTRACTOR_KeywordType type,
00039             char *keyword, EXTRACTOR_KeywordList * next)
00040 {
00041   EXTRACTOR_KeywordList *result;
00042 
00043   if (keyword == NULL)
00044     return next;
00045   result = malloc (sizeof (EXTRACTOR_KeywordList));
00046   result->next = next;
00047   result->keyword = keyword;
00048   result->keywordType = type;
00049   return result;
00050 }
00051 
00052 static char *
00053 stndup (const char *str, size_t n)
00054 {
00055   char *tmp;
00056   tmp = malloc (n + 1);
00057   tmp[n] = '\0';
00058   memcpy (tmp, str, n);
00059   return tmp;
00060 }
00061 
00062 
00063 
00064 typedef struct
00065 {
00066   char *text;
00067   EXTRACTOR_KeywordType type;
00068 } Matches;
00069 
00070 /* see also: "man 5 deb-control" */
00071 static Matches tmap[] = {
00072   {"Package: ", EXTRACTOR_SOFTWARE},
00073   {"Version: ", EXTRACTOR_VERSIONNUMBER},
00074   {"Section: ", EXTRACTOR_GENRE},
00075   {"Priority: ", EXTRACTOR_PRIORITY},
00076   {"Architecture: ", EXTRACTOR_CREATED_FOR},
00077   {"Depends: ", EXTRACTOR_DEPENDENCY},
00078   {"Recommends: ", EXTRACTOR_RELATION},
00079   {"Suggests: ", EXTRACTOR_RELATION},
00080   {"Installed-Size: ", EXTRACTOR_SIZE},
00081   {"Maintainer: ", EXTRACTOR_PACKAGER},
00082   {"Description: ", EXTRACTOR_DESCRIPTION},
00083   {"Source: ", EXTRACTOR_SOURCE},
00084   {"Pre-Depends: ", EXTRACTOR_DEPENDENCY},
00085   {"Conflicts: ", EXTRACTOR_CONFLICTS},
00086   {"Replaces: ", EXTRACTOR_REPLACES},
00087   {"Provides: ", EXTRACTOR_PROVIDES},
00088   {NULL, 0},
00089   {"Essential: ", EXTRACTOR_UNKNOWN}
00090 };
00091 
00092 
00093 /**
00094  * Process the control file.
00095  */
00096 static struct EXTRACTOR_Keywords *
00097 processControl (const char *data,
00098                 const size_t size, struct EXTRACTOR_Keywords *prev)
00099 {
00100   size_t pos;
00101   char *key;
00102 
00103   pos = 0;
00104   while (pos < size)
00105     {
00106       size_t colon;
00107       size_t eol;
00108       int i;
00109 
00110       colon = pos;
00111       while (data[colon] != ':')
00112         {
00113           if ((colon > size) || (data[colon] == '\n'))
00114             return prev;
00115           colon++;
00116         }
00117       colon++;
00118       while ((colon < size) && (isspace (data[colon])))
00119         colon++;
00120       eol = colon;
00121       while ((eol < size) &&
00122              ((data[eol] != '\n') ||
00123               ((eol + 1 < size) && (data[eol + 1] == ' '))))
00124         eol++;
00125       if ((eol == colon) || (eol > size))
00126         return prev;
00127       key = stndup (&data[pos], colon - pos);
00128       i = 0;
00129       while (tmap[i].text != NULL)
00130         {
00131           if (0 == strcmp (key, tmap[i].text))
00132             {
00133               char *val;
00134 
00135               val = stndup (&data[colon], eol - colon);
00136               prev = addKeyword (tmap[i].type, val, prev);
00137               break;
00138             }
00139           i++;
00140         }
00141       free (key);
00142       pos = eol + 1;
00143     }
00144   return prev;
00145 }
00146 
00147 
00148 typedef struct
00149 {
00150   char name[100];
00151   char mode[8];
00152   char userId[8];
00153   char groupId[8];
00154   char filesize[12];
00155   char lastModTime[12];
00156   char chksum[8];
00157   char link;
00158   char linkName[100];
00159 } TarHeader;
00160 
00161 typedef struct
00162 {
00163   TarHeader tar;
00164   char magic[6];
00165   char version[2];
00166   char uname[32];
00167   char gname[32];
00168   char devmajor[8];
00169   char devminor[8];
00170   char prefix[155];
00171 } USTarHeader;
00172 
00173 /**
00174  * Process the control.tar file.
00175  */
00176 static struct EXTRACTOR_Keywords *
00177 processControlTar (const char *data,
00178                    const size_t size, struct EXTRACTOR_Keywords *prev)
00179 {
00180   TarHeader *tar;
00181   USTarHeader *ustar;
00182   size_t pos;
00183 
00184   pos = 0;
00185   while (pos + sizeof (TarHeader) < size)
00186     {
00187       unsigned long long fsize;
00188       char buf[13];
00189 
00190       tar = (TarHeader *) & data[pos];
00191       if (pos + sizeof (USTarHeader) < size)
00192         {
00193           ustar = (USTarHeader *) & data[pos];
00194           if (0 == strncmp ("ustar", &ustar->magic[0], strlen ("ustar")))
00195             pos += 512;         /* sizeof(USTarHeader); */
00196           else
00197             pos += 257;         /* sizeof(TarHeader); minus gcc alignment... */
00198         }
00199       else
00200         {
00201           pos += 257;           /* sizeof(TarHeader); minus gcc alignment... */
00202         }
00203 
00204       memcpy (buf, &tar->filesize[0], 12);
00205       buf[12] = '\0';
00206       if (1 != sscanf (buf, "%12llo", &fsize))  /* octal! Yuck yuck! */
00207         return prev;
00208       if ((pos + fsize > size) || (fsize > size) || (pos + fsize < pos))
00209         return prev;
00210 
00211       if (0 == strncmp (&tar->name[0], "./control", strlen ("./control")))
00212         {
00213           return processControl (&data[pos], fsize, prev);
00214         }
00215       if ((fsize & 511) != 0)
00216         fsize = (fsize | 511) + 1;      /* round up! */
00217       if (pos + fsize < pos)
00218         return prev;
00219       pos += fsize;
00220     }
00221   return prev;
00222 }
00223 
00224 #define MAX_CONTROL_SIZE (1024 * 1024)
00225 
00226 static voidpf
00227 Emalloc (voidpf opaque, uInt items, uInt size)
00228 {
00229   return malloc (size * items);
00230 }
00231 
00232 static void
00233 Efree (voidpf opaque, voidpf ptr)
00234 {
00235   free (ptr);
00236 }
00237 
00238 /**
00239  * Process the control.tar.gz file.
00240  */
00241 static struct EXTRACTOR_Keywords *
00242 processControlTGZ (const unsigned char *data,
00243                    size_t size, struct EXTRACTOR_Keywords *prev)
00244 {
00245   size_t bufSize;
00246   char *buf;
00247   z_stream strm;
00248 
00249   bufSize =
00250     data[size - 4] + 256 * data[size - 3] + 65536 * data[size - 2] +
00251     256 * 65536 * data[size - 1];
00252   if (bufSize > MAX_CONTROL_SIZE)
00253     return prev;
00254 
00255   memset (&strm, 0, sizeof (z_stream));
00256 
00257   strm.next_in = (Bytef *) data;
00258   strm.avail_in = size;
00259   strm.total_in = 0;
00260   strm.zalloc = &Emalloc;
00261   strm.zfree = &Efree;
00262   strm.opaque = NULL;
00263 
00264   if (Z_OK == inflateInit2 (&strm, 15 + 32))
00265     {
00266       buf = malloc (bufSize);
00267       if (buf == NULL)
00268         {
00269           inflateEnd (&strm);
00270           return prev;
00271         }
00272       strm.next_out = (Bytef *) buf;
00273       strm.avail_out = bufSize;
00274       inflate (&strm, Z_FINISH);
00275       if (strm.total_out > 0)
00276         {
00277           prev = processControlTar (buf, strm.total_out, prev);
00278           inflateEnd (&strm);
00279           free (buf);
00280           return prev;
00281         }
00282       free (buf);
00283       inflateEnd (&strm);
00284     }
00285   return prev;
00286 }
00287 
00288 typedef struct
00289 {
00290   char name[16];
00291   char lastModTime[12];
00292   char userId[6];
00293   char groupId[6];
00294   char modeInOctal[8];
00295   char filesize[10];
00296   char trailer[2];
00297 } ObjectHeader;
00298 
00299 struct EXTRACTOR_Keywords *
00300 libextractor_deb_extract (const char *filename,
00301                           const char *data,
00302                           const size_t size, struct EXTRACTOR_Keywords *prev)
00303 {
00304   size_t pos;
00305   int done = 0;
00306 
00307   if (size < 128)
00308     return prev;
00309   if (0 != strncmp ("!<arch>\n", data, strlen ("!<arch>\n")))
00310     return prev;
00311   pos = strlen ("!<arch>\n");
00312   while (pos + sizeof (ObjectHeader) < size)
00313     {
00314       ObjectHeader *hdr;
00315       unsigned long long fsize;
00316       char buf[11];
00317 
00318       hdr = (ObjectHeader *) & data[pos];
00319       if (0 != strncmp (&hdr->trailer[0], "`\n", 2))
00320         return prev;
00321 
00322       memcpy (buf, &hdr->filesize[0], 10);
00323       buf[10] = '\0';
00324       if (1 != sscanf (buf, "%10llu", &fsize))
00325         return prev;
00326       pos += sizeof (ObjectHeader);
00327       if ((pos + fsize > size) || (fsize > size) || (pos + fsize < pos))
00328         return prev;
00329       if (0 == strncmp (&hdr->name[0],
00330                         "control.tar.gz", strlen ("control.tar.gz")))
00331         {
00332           prev = processControlTGZ ((const unsigned char *) &data[pos],
00333                                     fsize, prev);
00334           done++;
00335         }
00336       if (0 == strncmp (&hdr->name[0],
00337                         "debian-binary", strlen ("debian-binary")))
00338         {
00339           prev = addKeyword (EXTRACTOR_MIMETYPE,
00340                              strdup ("application/x-debian-package"), prev);
00341           done++;
00342         }
00343       pos += fsize;
00344       if (done == 2)
00345         break;                  /* no need to process the rest of the archive */
00346     }
00347   return prev;
00348 }

Generated on Fri Jan 9 15:44:26 2009 for libextractor by  doxygen 1.5.1