extractor.c File Reference

#include "platform.h"
#include "extractor.h"
#include <pthread.h>
#include <../../libltdl/ltdl.h>
#include "iconv.c"

Go to the source code of this file.

Defines

#define DEBUG   0
#define HIGHEST_TYPE_NUMBER   137
#define EXSO   ""
#define OGGSO   ""
#define FLACSO   ""
#define QTSO   ""
#define OLESO   ""
#define MPEGSO   ""
#define DEFSO   "libextractor_html:\libextractor_man:\libextractor_ps:\libextractor_pdf:\libextractor_mp3:\libextractor_id3v2:\libextractor_id3v23:\libextractor_id3v24:\libextractor_mime:\libextractor_tar:\libextractor_dvi:\libextractor_deb:\libextractor_png:\libextractor_gif:\libextractor_wav:\libextractor_flv:\libextractor_real:\libextractor_jpeg:\libextractor_tiff:\libextractor_zip:\libextractor_rpm:\libextractor_riff:\libextractor_applefile:\libextractor_elf:\libextractor_oo:\libextractor_asf:\libextractor_sid:\libextractor_nsfe:\libextractor_nsf:\libextractor_it:\libextractor_xm:\libextractor_s3m"
#define DEFAULT_LIBRARIES   MPEGSO EXSO OLESO OGGSO FLACSO QTSO DEFSO
#define LTDL_MUTEX_LOCK
#define LTDL_MUTEX_UNLOCK
#define MAX_READ   1024 * 1024 * 1024
#define MAX_DECOMPRESS   16 * 1024 * 1024

Functions

const char * EXTRACTOR_getDefaultLibraries ()
static char * cut_bin (char *in)
static char * cut_lib (char *in)
static char * get_path_from_PATH ()
static char * get_path_from_ENV_PREFIX ()
static char * os_get_installation_path ()
void __attribute__ ((constructor))
void __attribute__ ((destructor))
static int fileopen (const char *filename, int oflag,...)
EXTRACTOR_ExtractorListEXTRACTOR_loadDefaultLibraries ()
const char * EXTRACTOR_getKeywordTypeAsString (const EXTRACTOR_KeywordType type)
static void * getSymbolWithPrefix (void *lib_handle, const char *lib_name, const char *sym_name)
static int loadLibrary (const char *name, void **libHandle, ExtractMethod *method)
static EXTRACTOR_ExtractorListEXTRACTOR_addLibrary2 (EXTRACTOR_ExtractorList *prev, const char *library, const char *options)
EXTRACTOR_ExtractorListEXTRACTOR_addLibrary (EXTRACTOR_ExtractorList *prev, const char *library)
static EXTRACTOR_ExtractorListEXTRACTOR_addLibraryLast2 (EXTRACTOR_ExtractorList *prev, const char *library, const char *options)
EXTRACTOR_ExtractorListEXTRACTOR_addLibraryLast (EXTRACTOR_ExtractorList *prev, const char *library)
EXTRACTOR_ExtractorListEXTRACTOR_loadConfigLibraries (EXTRACTOR_ExtractorList *prev, const char *config)
EXTRACTOR_ExtractorListEXTRACTOR_removeLibrary (EXTRACTOR_ExtractorList *prev, const char *library)
void EXTRACTOR_removeAll (EXTRACTOR_ExtractorList *libraries)
static EXTRACTOR_KeywordListgetKeywords (EXTRACTOR_ExtractorList *extractor, const char *filename, const unsigned char *data, size_t size)
EXTRACTOR_KeywordListEXTRACTOR_getKeywords (EXTRACTOR_ExtractorList *extractor, const char *filename)
EXTRACTOR_KeywordListEXTRACTOR_getKeywords2 (EXTRACTOR_ExtractorList *extractor, const void *data, size_t size)
static void removeKeyword (const char *keyword, const EXTRACTOR_KeywordType type, const unsigned int options, EXTRACTOR_KeywordList **list, EXTRACTOR_KeywordList *current)
EXTRACTOR_KeywordListEXTRACTOR_removeDuplicateKeywords (EXTRACTOR_KeywordList *list, const unsigned int options)
EXTRACTOR_KeywordListEXTRACTOR_removeEmptyKeywords (EXTRACTOR_KeywordList *list)
EXTRACTOR_KeywordListEXTRACTOR_removeKeywordsOfType (EXTRACTOR_KeywordList *list, EXTRACTOR_KeywordType type)
void EXTRACTOR_printKeywords (FILE *handle, EXTRACTOR_KeywordList *keywords)
void EXTRACTOR_freeKeywords (EXTRACTOR_KeywordList *keywords)
EXTRACTOR_KeywordType EXTRACTOR_getHighestKeywordTypeNumber ()
const char * EXTRACTOR_extractLast (const EXTRACTOR_KeywordType type, EXTRACTOR_KeywordList *keywords)
const char * EXTRACTOR_extractLastByString (const char *type, EXTRACTOR_KeywordList *keywords)
unsigned int EXTRACTOR_countKeywords (EXTRACTOR_KeywordList *keywords)
char * EXTRACTOR_binaryEncode (const unsigned char *data, size_t size)
int EXTRACTOR_binaryDecode (const char *in, unsigned char **out, size_t *outSize)

Variables

static const char * keywordTypes []
static char * old_dlsearchpath = NULL
static pthread_mutex_t ltdl_lock = PTHREAD_MUTEX_INITIALIZER


Define Documentation

#define DEBUG   0

Definition at line 39 of file extractor.c.

Referenced by dvbsub_parse(), gif_read_extension(), libextractor_thumbnailffmpeg_extract(), and mxf_read_local_tags().

#define DEFAULT_LIBRARIES   MPEGSO EXSO OLESO OGGSO FLACSO QTSO DEFSO

Definition at line 271 of file extractor.c.

Referenced by EXTRACTOR_getDefaultLibraries(), and EXTRACTOR_loadDefaultLibraries().

#define DEFSO   "libextractor_html:\libextractor_man:\libextractor_ps:\libextractor_pdf:\libextractor_mp3:\libextractor_id3v2:\libextractor_id3v23:\libextractor_id3v24:\libextractor_mime:\libextractor_tar:\libextractor_dvi:\libextractor_deb:\libextractor_png:\libextractor_gif:\libextractor_wav:\libextractor_flv:\libextractor_real:\libextractor_jpeg:\libextractor_tiff:\libextractor_zip:\libextractor_rpm:\libextractor_riff:\libextractor_applefile:\libextractor_elf:\libextractor_oo:\libextractor_asf:\libextractor_sid:\libextractor_nsfe:\libextractor_nsf:\libextractor_it:\libextractor_xm:\libextractor_s3m"

Definition at line 237 of file extractor.c.

#define EXSO   ""

Definition at line 201 of file extractor.c.

#define FLACSO   ""

Definition at line 213 of file extractor.c.

#define HIGHEST_TYPE_NUMBER   137

Definition at line 186 of file extractor.c.

Referenced by EXTRACTOR_getHighestKeywordTypeNumber(), EXTRACTOR_getKeywordTypeAsString(), and EXTRACTOR_printKeywords().

#define LTDL_MUTEX_LOCK

Value:

if (pthread_mutex_lock (&ltdl_lock) != 0) \
    abort();

Definition at line 736 of file extractor.c.

Referenced by EXTRACTOR_removeLibrary(), getSymbolWithPrefix(), and loadLibrary().

#define LTDL_MUTEX_UNLOCK

Value:

if (pthread_mutex_unlock (&ltdl_lock) != 0) \
    abort();

Definition at line 739 of file extractor.c.

Referenced by EXTRACTOR_removeLibrary(), getSymbolWithPrefix(), and loadLibrary().

#define MAX_DECOMPRESS   16 * 1024 * 1024

How many bytes do we actually try to decompress? (from the beginning of the file). Limit to 16 MB.

Definition at line 1052 of file extractor.c.

Referenced by getKeywords().

#define MAX_READ   1024 * 1024 * 1024

How many bytes do we actually try to scan? (from the beginning of the file). Limit to 1 GB.

Definition at line 1046 of file extractor.c.

Referenced by EXTRACTOR_getKeywords().

#define MPEGSO   ""

Definition at line 231 of file extractor.c.

#define OGGSO   ""

Definition at line 207 of file extractor.c.

#define OLESO   ""

Definition at line 225 of file extractor.c.

#define QTSO   ""

Definition at line 219 of file extractor.c.


Function Documentation

void __attribute__ ( (destructor)   ) 

Definition at line 632 of file extractor.c.

References free, NULL, and ShutdownWinEnv().

00632                                                  {
00633   lt_dlsetsearchpath(old_dlsearchpath);
00634   if (old_dlsearchpath != NULL) {
00635     free(old_dlsearchpath);
00636     old_dlsearchpath = NULL;
00637   }
00638 #ifdef MINGW
00639   ShutdownWinEnv();
00640 #endif
00641   lt_dlexit ();
00642 }

void __attribute__ ( (constructor)   ) 

Definition at line 590 of file extractor.c.

References _, fprintf, free, InitWinEnv(), ISOLOCALEDIR, LOCALEDIR, malloc, NULL, os_get_installation_path(), PACKAGE, and strcat.

00590                                                   {
00591   int err;
00592   const char * opath;
00593   char * path;
00594   char * cpath;
00595 
00596 #if ENABLE_NLS
00597   BINDTEXTDOMAIN(PACKAGE, LOCALEDIR);
00598   BINDTEXTDOMAIN("iso-639", ISOLOCALEDIR); /* used by wordextractor */
00599 #endif
00600   err = lt_dlinit ();
00601   if (err > 0) {
00602 #if DEBUG
00603     fprintf(stderr,
00604             _("Initialization of plugin mechanism failed: %s!\n"),
00605             lt_dlerror());
00606 #endif
00607     return;
00608   }
00609   opath = lt_dlgetsearchpath();
00610   if (opath != NULL)
00611     old_dlsearchpath = strdup(opath);
00612   path = os_get_installation_path();
00613   if (path != NULL) {
00614     if (opath != NULL) {
00615       cpath = malloc(strlen(path) + strlen(opath) + 4);
00616       strcpy(cpath, opath);
00617       strcat(cpath, ":");
00618       strcat(cpath, path);
00619       lt_dlsetsearchpath(cpath);
00620       free(path);
00621       free(cpath);
00622     } else {
00623       lt_dlsetsearchpath(path);
00624       free(path);
00625     }
00626   }
00627 #ifdef MINGW
00628   InitWinEnv();
00629 #endif
00630 }

static char* cut_bin ( char *  in  )  [static]

Definition at line 279 of file extractor.c.

References NULL.

Referenced by get_path_from_ENV_PREFIX(), and get_path_from_PATH().

00279                                  {
00280   size_t p;
00281 
00282   if (in == NULL)
00283     return NULL;
00284   p = strlen(in);
00285   if (p > 4) {
00286     if ( (in[p-1] == '/') ||
00287          (in[p-1] == '\\') )
00288       in[--p] = '\0';
00289     if (0 == strcmp(&in[p-3],
00290                     "bin")) {
00291       in[p-3] = '\0';
00292       p -= 3;
00293     }
00294   }
00295   return in;
00296 }

static char* cut_lib ( char *  in  )  [static]

Definition at line 298 of file extractor.c.

References NULL.

Referenced by get_path_from_ENV_PREFIX().

00298                                  {
00299   size_t p;
00300 
00301   if (in == NULL)
00302     return NULL;
00303   p = strlen(in);
00304   if (p > 4) {
00305     if ( (in[p-1] == '/') ||
00306          (in[p-1] == '\\') )
00307       in[--p] = '\0';
00308     if (0 == strcmp(&in[p-3],
00309                     "lib")) {
00310       in[p-3] = '\0';
00311       p -= 3;
00312     }
00313   }
00314   return in;
00315 }

EXTRACTOR_ExtractorList* EXTRACTOR_addLibrary ( EXTRACTOR_ExtractorList prev,
const char *  library 
)

Add a library for keyword extraction.

Parameters:
prev the previous list of libraries, may be NULL
library the name of the library
Returns:
the new list of libraries, equal to prev iff an error occured

Definition at line 860 of file extractor.c.

References EXTRACTOR_addLibrary2(), and NULL.

Referenced by get_keywords_for_file(), main(), and test_plugins().

00862 {
00863   return EXTRACTOR_addLibrary2(prev, library, NULL);
00864 }

static EXTRACTOR_ExtractorList* EXTRACTOR_addLibrary2 ( EXTRACTOR_ExtractorList prev,
const char *  library,
const char *  options 
) [static]

Definition at line 832 of file extractor.c.

References EXTRACTOR_Extractor::extractMethod, EXTRACTOR_Extractor::libname, EXTRACTOR_Extractor::libraryHandle, loadLibrary(), malloc, EXTRACTOR_Extractor::next, NULL, and EXTRACTOR_Extractor::options.

Referenced by EXTRACTOR_addLibrary(), and EXTRACTOR_loadConfigLibraries().

00834 {
00835   EXTRACTOR_ExtractorList *result;
00836   void *handle;
00837   ExtractMethod method;
00838 
00839   if (-1 == loadLibrary (library, &handle, &method))
00840     return prev;
00841   result = malloc (sizeof (EXTRACTOR_ExtractorList));
00842   result->next = prev;
00843   result->libraryHandle = handle;
00844   result->extractMethod = method;
00845   result->libname = strdup (library);
00846   if( options )
00847     result->options = strdup (options);
00848   else
00849     result->options = NULL;
00850   return result;
00851 }

EXTRACTOR_ExtractorList* EXTRACTOR_addLibraryLast ( EXTRACTOR_ExtractorList prev,
const char *  library 
)

Add a library for keyword extraction at the END of the list.

Parameters:
prev the previous list of libraries, may be NULL
library the name of the library
Returns:
the new list of libraries, always equal to prev except if prev was NULL and no error occurs

Definition at line 904 of file extractor.c.

References EXTRACTOR_addLibraryLast2(), and NULL.

Referenced by main().

00906 {
00907   return EXTRACTOR_addLibraryLast2(prev, library, NULL);
00908 }

static EXTRACTOR_ExtractorList* EXTRACTOR_addLibraryLast2 ( EXTRACTOR_ExtractorList prev,
const char *  library,
const char *  options 
) [static]

Definition at line 868 of file extractor.c.

References EXTRACTOR_Extractor::extractMethod, EXTRACTOR_Extractor::libname, EXTRACTOR_Extractor::libraryHandle, loadLibrary(), malloc, EXTRACTOR_Extractor::next, NULL, and EXTRACTOR_Extractor::options.

Referenced by EXTRACTOR_addLibraryLast(), and EXTRACTOR_loadConfigLibraries().

00870 {
00871   EXTRACTOR_ExtractorList *result;
00872   EXTRACTOR_ExtractorList *pos;
00873   void *handle;
00874   ExtractMethod method;
00875 
00876   if (-1 == loadLibrary (library, &handle, &method))
00877     return prev;
00878   result = malloc (sizeof (EXTRACTOR_ExtractorList));
00879   result->next = NULL;
00880   result->libraryHandle = handle;
00881   result->extractMethod = method;
00882   result->libname = strdup (library);
00883   if( options )
00884     result->options = strdup (options);
00885   else
00886     result->options = NULL;
00887   if (prev == NULL)
00888     return result;
00889   pos = prev;
00890   while (pos->next != NULL)
00891     pos = pos->next;
00892   pos->next = result;
00893   return prev;
00894 }

int EXTRACTOR_binaryDecode ( const char *  in,
unsigned char **  out,
size_t *  outSize 
)

This function can be used to decode the binary data encoded in the libextractor metadata (i.e. for the thumbnails).

Parameters:
in 0-terminated string from the meta-data
Returns:
1 on error, 0 on success

Definition at line 1724 of file extractor.c.

References malloc, and NULL.

Referenced by test().

01726                                              {
01727   unsigned char * buf;
01728   size_t pos;
01729   size_t wpos;
01730   unsigned char marker;
01731   size_t i;
01732   size_t end;
01733   size_t inSize;
01734 
01735   inSize = strlen(in);
01736   if (inSize == 0) {
01737     *out = NULL;
01738     *outSize = 0;
01739     return 0;
01740   }
01741 
01742   buf = malloc(inSize); /* slightly more than needed ;-) */
01743   if (buf == NULL)
01744     return 1; /* error */
01745   *out = buf;
01746 
01747   pos = 0;
01748   wpos = 0;
01749   while (pos < inSize) {
01750     end = pos + 255; /* 255 here: count the marker! */
01751     if (end > inSize)
01752       end = inSize;
01753     marker = in[pos++];
01754     for (i=pos;i<end;i++)
01755       buf[wpos++] = (in[i] == (char) marker) ? 0 : in[i];
01756     pos = end;
01757   }
01758   *outSize = wpos;
01759   return 0;
01760 }

char* EXTRACTOR_binaryEncode ( const unsigned char *  data,
size_t  size 
)

Encode the given binary data object as a 0-terminated C-string according to the LE binary data encoding standard.

Returns:
NULL on error, the 0-terminated encoding otherwise

Definition at line 1665 of file extractor.c.

References free, malloc, and NULL.

Referenced by flac_metadata(), libextractor_thumbnailgtk_extract(), and test().

01666                                            {
01667 
01668   char * binary;
01669   size_t pos;
01670   size_t end;
01671   size_t wpos;
01672   size_t i;
01673   unsigned int markers[8]; /* 256 bits */
01674   unsigned char marker;
01675 
01676  /* encode! */
01677   binary = malloc(2 + size + (size+256) / 254);
01678   if (binary == NULL)
01679     return NULL;
01680 
01681   pos = 0;
01682   wpos = 0;
01683   while (pos < size) {
01684     /* find unused value between 1 and 255 in
01685        the next 254 bytes */
01686     end = pos + 254;
01687     if (end < pos)
01688       break; /* integer overflow! */
01689     if (end > size)
01690       end = size;
01691     memset(markers,
01692            0,
01693            sizeof(markers));
01694     for (i=pos;i<end;i++)
01695       markers[data[i]&7] |= 1 << (data[i] >> 3);
01696     marker = 1;
01697     while (markers[marker&7] & (1 << (marker >> 3))) {
01698       marker++;
01699       if (marker == 0) {
01700         /* assertion failed... */
01701         free(binary);
01702         return NULL;
01703       }
01704     }
01705     /* recode */
01706     binary[wpos++] = marker;
01707     for (i=pos;i<end;i++)
01708       binary[wpos++] = data[i] == 0 ? marker : data[i];
01709     pos = end;
01710   }
01711   binary[wpos++] = 0; /* 0-termination! */
01712   return binary;
01713 }

unsigned int EXTRACTOR_countKeywords ( EXTRACTOR_KeywordList keywords  ) 

Count the number of keywords in the keyword list.

Parameters:
keywords the keyword list
Returns:
the number of keywords in the list

Definition at line 1646 of file extractor.c.

References EXTRACTOR_Keywords::next, and NULL.

Referenced by compare_keywords_to_ref(), and main().

01647 {
01648   int count = 0;
01649   while (keywords != NULL)
01650     {
01651       count++;
01652       keywords = keywords->next;
01653     }
01654   return count;
01655 }

const char* EXTRACTOR_extractLast ( const EXTRACTOR_KeywordType  type,
EXTRACTOR_KeywordList keywords 
)

Extract the last keyword that of the given type from the keyword list.

Parameters:
type the type of the keyword
keywords the keyword list
Returns:
the last matching keyword, or NULL if none matches

Definition at line 1604 of file extractor.c.

References EXTRACTOR_Keywords::keyword, EXTRACTOR_Keywords::keywordType, EXTRACTOR_Keywords::next, and NULL.

Referenced by EXTRACT_NAME(), libextractor_html_extract(), libextractor_mime_extract(), libextractor_ole2_extract(), libextractor_ps_extract(), libextractor_thumbnailffmpeg_extract(), libextractor_thumbnailgtk_extract(), libextractor_zip_extract(), and main().

01606 {
01607   char *result = NULL;
01608   while (keywords != NULL)
01609     {
01610       if (keywords->keywordType == type)
01611         result = keywords->keyword;
01612       keywords = keywords->next;
01613     }
01614   return result;
01615 }

const char* EXTRACTOR_extractLastByString ( const char *  type,
EXTRACTOR_KeywordList keywords 
)

Extract the last keyword of the given string from the keyword list.

Parameters:
type the string describing the type of the keyword
keywords the keyword list
Returns:
the last matching keyword, or NULL if none matches

Definition at line 1624 of file extractor.c.

References _, EXTRACTOR_Keywords::keyword, EXTRACTOR_Keywords::keywordType, keywordTypes, EXTRACTOR_Keywords::next, and NULL.

Referenced by main(), and printSelectedKeywordsBibtex().

01626 {
01627   char * result = NULL;
01628 
01629   if (type == NULL)
01630     return NULL;
01631   while (keywords != NULL) {
01632     if ( (0 == strcmp(_(keywordTypes[keywords->keywordType]), type)) ||
01633          (0 == strcmp(keywordTypes[keywords->keywordType], type) ) )
01634       result = keywords->keyword;
01635     keywords = keywords->next;
01636   }
01637   return result;
01638 }

void EXTRACTOR_freeKeywords ( EXTRACTOR_KeywordList keywords  ) 

Free the memory occupied by the keyword list (and the keyword strings in it!)

Parameters:
keywords the list to free

Definition at line 1576 of file extractor.c.

References free, EXTRACTOR_Keywords::keyword, EXTRACTOR_Keywords::next, and NULL.

Referenced by main(), processSentences(), and test_plugins().

01577 {
01578   EXTRACTOR_KeywordList *prev;
01579   while (keywords != NULL)
01580     {
01581       prev = keywords;
01582       keywords = keywords->next;
01583       free (prev->keyword);
01584       free (prev);
01585     }
01586 }

const char* EXTRACTOR_getDefaultLibraries ( void   ) 

Definition at line 273 of file extractor.c.

References DEFAULT_LIBRARIES.

00273                                              {
00274   return DEFAULT_LIBRARIES;
00275 }

EXTRACTOR_KeywordType EXTRACTOR_getHighestKeywordTypeNumber ( void   ) 

Return the highest type number, exclusive as in [0,highest).

Definition at line 1592 of file extractor.c.

References HIGHEST_TYPE_NUMBER.

Referenced by main().

01593 {
01594   return HIGHEST_TYPE_NUMBER;
01595 }

EXTRACTOR_KeywordList* EXTRACTOR_getKeywords ( EXTRACTOR_ExtractorList extractor,
const char *  filename 
)

Extract keywords from a file using the available extractors.

Parameters:
extractor the list of extractor libraries
filename the name of the file
Returns:
the list of keywords found in the file, NULL if none were found (or other errors)

Definition at line 1297 of file extractor.c.

References fileopen(), free, getKeywords(), malloc, MAX_READ, MMAP, MUNMAP, NULL, and STAT.

Referenced by get_keywords_for_file(), and main().

01298                                               {
01299   EXTRACTOR_KeywordList *result;
01300   int file;
01301   void * buffer;
01302   struct stat fstatbuf;
01303   size_t size;
01304   int eno, dir;
01305 
01306   if (-1 == STAT(filename, &fstatbuf))
01307     return NULL;
01308 
01309   if (!S_ISDIR(fstatbuf.st_mode)) {
01310     dir = 0;
01311       
01312 #ifdef O_LARGEFILE
01313     file = fileopen(filename, O_RDONLY | O_LARGEFILE);
01314 #else
01315     file = fileopen(filename, O_RDONLY);
01316 #endif
01317     if (-1 == file)
01318       return NULL;
01319   
01320     size = fstatbuf.st_size;
01321     if (size == 0) {
01322       close(file);
01323       return NULL;
01324     }
01325   
01326     if (size > MAX_READ)
01327       size = MAX_READ; /* do not mmap/read more than 1 GB! */
01328     buffer = MMAP(NULL, size, PROT_READ, MAP_PRIVATE, file, 0);
01329     if ( (buffer == NULL) || (buffer == (void *) -1) ) {
01330       eno = errno;
01331       close(file);
01332       errno = eno;
01333       return NULL;
01334     }
01335   }
01336   else {
01337     dir = 1;
01338     
01339     size = 0;
01340     buffer = malloc(1);
01341   }
01342   
01343   result = getKeywords(extractor,
01344                        filename,
01345                        buffer,
01346                        size);
01347   
01348   if (dir)
01349     free(buffer);
01350   else {
01351     MUNMAP (buffer, size);
01352     close(file);
01353   }
01354   return result;
01355 }

EXTRACTOR_KeywordList* EXTRACTOR_getKeywords2 ( EXTRACTOR_ExtractorList extractor,
const void *  data,
size_t  size 
)

Extract keywords from a buffer in memory using the available extractors.

Parameters:
extractor the list of extractor libraries
data the data of the file
size the number of bytes in data
Returns:
the list of keywords found in the file, NULL if none were found (or other errors)

Definition at line 1370 of file extractor.c.

References getKeywords(), and NULL.

01372                                     {
01373   if (data == NULL)
01374     return NULL;
01375   return getKeywords(extractor,
01376                      NULL,
01377                      data,
01378                      size);
01379 }

const char* EXTRACTOR_getKeywordTypeAsString ( const EXTRACTOR_KeywordType  type  ) 

Get the textual name of the keyword.

Returns:
NULL if the type is not known

Definition at line 726 of file extractor.c.

References HIGHEST_TYPE_NUMBER, keywordTypes, and NULL.

Referenced by main(), printSelectedKeywords(), and printSelectedKeywordsGrepFriendly().

00727 {
00728   if ((type >= 0) && (type < HIGHEST_TYPE_NUMBER))
00729     return keywordTypes[type];
00730   else
00731     return NULL;
00732 }

EXTRACTOR_ExtractorList* EXTRACTOR_loadConfigLibraries ( EXTRACTOR_ExtractorList prev,
const char *  config 
)

Load multiple libraries as specified by the user.

Parameters:
config a string given by the user that defines which libraries should be loaded. Has the format "[[-]LIBRARYNAME[:[-]LIBRARYNAME]*]". For example, libextractor_mp3.so:libextractor_ogg.so loads the mp3 and the ogg library. The '-' before the LIBRARYNAME indicates that the library should be added to the end of the library list (addLibraryLast).
prev the previous list of libraries, may be NULL
Returns:
the new list of libraries, equal to prev iff an error occured or if config was empty (or NULL).

Definition at line 924 of file extractor.c.

References EXTRACTOR_addLibrary2(), EXTRACTOR_addLibraryLast2(), free, len, and NULL.

Referenced by EXTRACTOR_loadDefaultLibraries(), get_keywords_for_file(), main(), and test_plugins().

00926 {
00927   char *cpy;
00928   int pos;
00929   int last;
00930   int lastconf;
00931   int len;
00932 
00933   if (config == NULL)
00934     return prev;
00935   len = strlen(config);
00936   cpy = strdup(config);
00937   pos = 0;
00938   last = 0;
00939   lastconf = 0;
00940   while (pos < len)
00941     {
00942       while ((cpy[pos] != ':') && (cpy[pos] != '\0') &&
00943              (cpy[pos] != '('))
00944         pos++;
00945       if( cpy[pos] == '(' ) {
00946         cpy[pos++] = '\0';      /* replace '(' by termination */
00947         lastconf = pos;         /* start config from here, after (. */
00948         while ((cpy[pos] != '\0') && (cpy[pos] != ')'))
00949           pos++; /* config until ) or EOS. */
00950         if( cpy[pos] == ')' ) {
00951           cpy[pos++] = '\0'; /* write end of config here. */
00952           while ((cpy[pos] != ':') && (cpy[pos] != '\0'))
00953             pos++; /* forward until real end of string found. */
00954           cpy[pos++] = '\0';
00955         } else {
00956           cpy[pos++] = '\0'; /* end of string. */
00957         }
00958       } else {
00959         lastconf = -1;         /* NULL config when no (). */
00960         cpy[pos++] = '\0';      /* replace ':' by termination */
00961       }
00962       if (cpy[last] == '-')
00963         {
00964           last++;
00965           if( lastconf != -1 )
00966             prev = EXTRACTOR_addLibraryLast2 (prev, &cpy[last],
00967                                               &cpy[lastconf]);
00968           else
00969             prev = EXTRACTOR_addLibraryLast2 (prev, &cpy[last], NULL);
00970         }
00971       else
00972         if( lastconf != -1 )
00973           prev = EXTRACTOR_addLibrary2 (prev, &cpy[last], &cpy[lastconf]);
00974         else
00975           prev = EXTRACTOR_addLibrary2 (prev, &cpy[last], NULL);
00976 
00977       last = pos;
00978     }
00979   free (cpy);
00980   return prev;
00981 }

EXTRACTOR_ExtractorList* EXTRACTOR_loadDefaultLibraries ( void   ) 

Load the default set of libraries. The default set of libraries consists of the libraries that are part of the libextractor distribution (except split and filename extractor) plus the extractors that are specified in the environment variable "LIBEXTRACTOR_LIBRARIES".

Returns:
the default set of libraries.

Definition at line 700 of file extractor.c.

References DEFAULT_LIBRARIES, EXTRACTOR_loadConfigLibraries(), free, getenv(), malloc, NULL, and strcat.

Referenced by get_keywords_for_file(), main(), test_plugins(), and testLoadPlugins().

00701 {
00702   char *env;
00703   char *tmp;
00704   EXTRACTOR_ExtractorList *res;
00705 
00706 
00707   env = getenv ("LIBEXTRACTOR_LIBRARIES");
00708   if (env == NULL)
00709     {
00710       return EXTRACTOR_loadConfigLibraries (NULL, DEFAULT_LIBRARIES);
00711     }
00712   tmp = malloc (strlen (env) + strlen (DEFAULT_LIBRARIES) + 2);
00713   strcpy (tmp, env);
00714   strcat (tmp, ":");
00715   strcat (tmp, DEFAULT_LIBRARIES);
00716   res = EXTRACTOR_loadConfigLibraries (NULL, tmp);
00717   free (tmp);
00718   return res;
00719 }

void EXTRACTOR_printKeywords ( FILE *  handle,
EXTRACTOR_KeywordList keywords 
)

Print a keyword list to a file. For debugging.

Parameters:
handle the file to write to (stdout, stderr), may NOT be NULL
keywords the list of keywords to print, may be NULL

Definition at line 1532 of file extractor.c.

References _, EXTRACTOR_THUMBNAIL_DATA, fprintf, free, HIGHEST_TYPE_NUMBER, iconvHelper(), EXTRACTOR_Keywords::keyword, EXTRACTOR_Keywords::keywordType, keywordTypes, EXTRACTOR_Keywords::next, and NULL.

Referenced by main().

01534 {
01535   iconv_t cd;
01536   char * buf;
01537 
01538   cd = iconv_open(
01539     nl_langinfo(CODESET)
01540     , "UTF-8");
01541   while (keywords != NULL)
01542     {
01543       if (cd == (iconv_t) -1)
01544         buf = strdup(keywords->keyword);
01545       else
01546         buf = iconvHelper(cd,
01547                           keywords->keyword);
01548       if (keywords->keywordType == EXTRACTOR_THUMBNAIL_DATA) {
01549         fprintf(handle,
01550                 _("%s - (binary)\n"),
01551                 _(keywordTypes[keywords->keywordType]));
01552       } else {
01553         if (keywords->keywordType >= HIGHEST_TYPE_NUMBER)
01554           fprintf(handle,
01555                   _("INVALID TYPE - %s\n"),
01556                   buf);
01557         else
01558           fprintf(handle,
01559                   "%s - %s\n",
01560                   _(keywordTypes[keywords->keywordType]),
01561                   buf);
01562       }
01563       free(buf);
01564       keywords = keywords->next;
01565     }
01566   if (cd != (iconv_t) -1)
01567     iconv_close(cd);
01568 }

void EXTRACTOR_removeAll ( EXTRACTOR_ExtractorList libraries  ) 

Remove all extractors.

Parameters:
libraries the list of extractors

Definition at line 1034 of file extractor.c.

References EXTRACTOR_removeLibrary(), EXTRACTOR_Extractor::libname, and NULL.

Referenced by get_keywords_for_file(), main(), test_plugins(), and testLoadPlugins().

01035 {
01036   while (libraries != NULL)
01037     libraries = EXTRACTOR_removeLibrary (libraries, libraries->libname);
01038 }

EXTRACTOR_KeywordList* EXTRACTOR_removeDuplicateKeywords ( EXTRACTOR_KeywordList list,
const unsigned int  options 
)

Remove duplicate keywords from the list.

Parameters:
list the original keyword list (destroyed in the process!)
options a set of options (DUPLICATES_XXXX)
Returns:
a list of keywords without duplicates

Definition at line 1433 of file extractor.c.

References EXTRACTOR_Keywords::keyword, EXTRACTOR_Keywords::keywordType, EXTRACTOR_Keywords::next, NULL, and removeKeyword().

Referenced by main().

01434                                                                {
01435   EXTRACTOR_KeywordList *pos;
01436 
01437   pos = list;
01438   while (pos != NULL) {
01439     removeKeyword(pos->keyword,
01440                   pos->keywordType,
01441                   options,
01442                   &list,
01443                   pos);
01444     pos = pos->next;
01445   }
01446   return list;
01447 }

EXTRACTOR_KeywordList* EXTRACTOR_removeEmptyKeywords ( EXTRACTOR_KeywordList list  ) 

Remove empty (all-whitespace) keywords from the list.

Parameters:
list the original keyword list (destroyed in the process!)
Returns:
a list of keywords without duplicates

Definition at line 1455 of file extractor.c.

References free, EXTRACTOR_Keywords::keyword, EXTRACTOR_Keywords::next, and NULL.

Referenced by main().

01455                                                              {
01456   EXTRACTOR_KeywordList * pos;
01457   EXTRACTOR_KeywordList * last;
01458 
01459   last = NULL;
01460   pos = list;
01461   while (pos != NULL)
01462     {
01463       int allWhite;
01464       int i;
01465       allWhite = 1;
01466       for (i=strlen(pos->keyword)-1;i>=0;i--)
01467         if (! isspace(pos->keyword[i]))
01468           allWhite = 0;
01469       if (allWhite)
01470         {
01471           EXTRACTOR_KeywordList * next;
01472           next = pos->next;
01473           if (last == NULL)
01474             list = next;
01475           else
01476             last->next = next;
01477           free(pos->keyword);
01478           free(pos);
01479           pos = next;
01480         }
01481       else
01482         {
01483           last = pos;
01484           pos = pos->next;
01485         }
01486     }
01487   return list;
01488 }

EXTRACTOR_KeywordList* EXTRACTOR_removeKeywordsOfType ( EXTRACTOR_KeywordList list,
EXTRACTOR_KeywordType  type 
)

Remove keywords of a particular type from the