extractor.h File Reference

#include <stdio.h>

Go to the source code of this file.

Data Structures

struct  EXTRACTOR_Keywords
struct  EXTRACTOR_Extractor

Defines

#define EXTRACTOR_VERSION   0x00052003
#define EXTRACTOR_DUPLICATES_TYPELESS   1
#define EXTRACTOR_DUPLICATES_REMOVE_UNKNOWN   2
#define EXTRACTOR_DEFAULT_LIBRARIES   EXTRACTOR_getDefaultLibraries()
#define EXTRACTOR_isBinaryType(type)   (type == EXTRACTOR_THUMBNAIL_DATA)

Typedefs

typedef EXTRACTOR_Keywords EXTRACTOR_KeywordList
typedef EXTRACTOR_KeywordList *(*) ExtractMethod (const char *filename, char *data, size_t filesize, EXTRACTOR_KeywordList *next, const char *options)
typedef EXTRACTOR_Extractor EXTRACTOR_ExtractorList

Enumerations

enum  EXTRACTOR_KeywordType {
  EXTRACTOR_UNKNOWN = 0, EXTRACTOR_FILENAME = 1, EXTRACTOR_MIMETYPE = 2, EXTRACTOR_TITLE = 3,
  EXTRACTOR_AUTHOR = 4, EXTRACTOR_ARTIST = 5, EXTRACTOR_DESCRIPTION = 6, EXTRACTOR_COMMENT = 7,
  EXTRACTOR_DATE = 8, EXTRACTOR_PUBLISHER = 9, EXTRACTOR_LANGUAGE = 10, EXTRACTOR_ALBUM = 11,
  EXTRACTOR_GENRE = 12, EXTRACTOR_LOCATION = 13, EXTRACTOR_VERSIONNUMBER = 14, EXTRACTOR_ORGANIZATION = 15,
  EXTRACTOR_COPYRIGHT = 16, EXTRACTOR_SUBJECT = 17, EXTRACTOR_KEYWORDS = 18, EXTRACTOR_CONTRIBUTOR = 19,
  EXTRACTOR_RESOURCE_TYPE = 20, EXTRACTOR_FORMAT = 21, EXTRACTOR_RESOURCE_IDENTIFIER = 22, EXTRACTOR_SOURCE = 23,
  EXTRACTOR_RELATION = 24, EXTRACTOR_COVERAGE = 25, EXTRACTOR_SOFTWARE = 26, EXTRACTOR_DISCLAIMER = 27,
  EXTRACTOR_WARNING = 28, EXTRACTOR_TRANSLATED = 29, EXTRACTOR_CREATION_DATE = 30, EXTRACTOR_MODIFICATION_DATE = 31,
  EXTRACTOR_CREATOR = 32, EXTRACTOR_PRODUCER = 33, EXTRACTOR_PAGE_COUNT = 34, EXTRACTOR_PAGE_ORIENTATION = 35,
  EXTRACTOR_PAPER_SIZE = 36, EXTRACTOR_USED_FONTS = 37, EXTRACTOR_PAGE_ORDER = 38, EXTRACTOR_CREATED_FOR = 39,
  EXTRACTOR_MAGNIFICATION = 40, EXTRACTOR_RELEASE = 41, EXTRACTOR_GROUP = 42, EXTRACTOR_SIZE = 43,
  EXTRACTOR_SUMMARY = 44, EXTRACTOR_PACKAGER = 45, EXTRACTOR_VENDOR = 46, EXTRACTOR_LICENSE = 47,
  EXTRACTOR_DISTRIBUTION = 48, EXTRACTOR_BUILDHOST = 49, EXTRACTOR_OS = 50, EXTRACTOR_DEPENDENCY = 51,
  EXTRACTOR_HASH_MD4 = 52, EXTRACTOR_HASH_MD5 = 53, EXTRACTOR_HASH_SHA0 = 54, EXTRACTOR_HASH_SHA1 = 55,
  EXTRACTOR_HASH_RMD160 = 56, EXTRACTOR_RESOLUTION = 57, EXTRACTOR_CATEGORY = 58, EXTRACTOR_BOOKTITLE = 59,
  EXTRACTOR_PRIORITY = 60, EXTRACTOR_CONFLICTS = 61, EXTRACTOR_REPLACES = 62, EXTRACTOR_PROVIDES = 63,
  EXTRACTOR_CONDUCTOR = 64, EXTRACTOR_INTERPRET = 65, EXTRACTOR_OWNER = 66, EXTRACTOR_LYRICS = 67,
  EXTRACTOR_MEDIA_TYPE = 68, EXTRACTOR_CONTACT = 69, EXTRACTOR_THUMBNAIL_DATA = 70, EXTRACTOR_PUBLICATION_DATE = 71,
  EXTRACTOR_CAMERA_MAKE = 72, EXTRACTOR_CAMERA_MODEL = 73, EXTRACTOR_EXPOSURE = 74, EXTRACTOR_APERTURE = 75,
  EXTRACTOR_EXPOSURE_BIAS = 76, EXTRACTOR_FLASH = 77, EXTRACTOR_FLASH_BIAS = 78, EXTRACTOR_FOCAL_LENGTH = 79,
  EXTRACTOR_FOCAL_LENGTH_35MM = 80, EXTRACTOR_ISO_SPEED = 81, EXTRACTOR_EXPOSURE_MODE = 82, EXTRACTOR_METERING_MODE = 83,
  EXTRACTOR_MACRO_MODE = 84, EXTRACTOR_IMAGE_QUALITY = 85, EXTRACTOR_WHITE_BALANCE = 86, EXTRACTOR_ORIENTATION = 87,
  EXTRACTOR_TEMPLATE = 88, EXTRACTOR_SPLIT = 89, EXTRACTOR_PRODUCTVERSION = 90, EXTRACTOR_LAST_SAVED_BY = 91,
  EXTRACTOR_LAST_PRINTED = 92, EXTRACTOR_WORD_COUNT = 93, EXTRACTOR_CHARACTER_COUNT = 94, EXTRACTOR_TOTAL_EDITING_TIME = 95,
  EXTRACTOR_THUMBNAILS = 96, EXTRACTOR_SECURITY = 97, EXTRACTOR_CREATED_BY_SOFTWARE = 98, EXTRACTOR_MODIFIED_BY_SOFTWARE = 99,
  EXTRACTOR_REVISION_HISTORY = 100, EXTRACTOR_LOWERCASE = 101, EXTRACTOR_COMPANY = 102, EXTRACTOR_GENERATOR = 103,
  EXTRACTOR_CHARACTER_SET = 104, EXTRACTOR_LINE_COUNT = 105, EXTRACTOR_PARAGRAPH_COUNT = 106, EXTRACTOR_EDITING_CYCLES = 107,
  EXTRACTOR_SCALE = 108, EXTRACTOR_MANAGER = 109, EXTRACTOR_MOVIE_DIRECTOR = 110, EXTRACTOR_DURATION = 111,
  EXTRACTOR_INFORMATION = 112, EXTRACTOR_FULL_NAME = 113, EXTRACTOR_CHAPTER = 114, EXTRACTOR_YEAR = 115,
  EXTRACTOR_LINK = 116, EXTRACTOR_MUSIC_CD_IDENTIFIER = 117, EXTRACTOR_PLAY_COUNTER = 118, EXTRACTOR_POPULARITY_METER = 119,
  EXTRACTOR_CONTENT_TYPE = 120, EXTRACTOR_ENCODED_BY = 121, EXTRACTOR_TIME = 122, EXTRACTOR_MUSICIAN_CREDITS_LIST = 123,
  EXTRACTOR_MOOD = 124, EXTRACTOR_FORMAT_VERSION = 125, EXTRACTOR_TELEVISION_SYSTEM = 126, EXTRACTOR_SONG_COUNT = 127,
  EXTRACTOR_STARTING_SONG = 128, EXTRACTOR_HARDWARE_DEPENDENCY = 129, EXTRACTOR_RIPPER = 130, EXTRACTOR_FILE_SIZE = 131,
  EXTRACTOR_TRACK_NUMBER = 132, EXTRACTOR_ISRC = 133, EXTRACTOR_DISC_NUMBER = 134, EXTRACTOR_GNUNET_DISPLAY_TYPE = 135,
  EXTRACTOR_GNUNET_ECBC_URI = 136
}

Functions

const char * EXTRACTOR_getDefaultLibraries (void)
EXTRACTOR_ExtractorListEXTRACTOR_loadDefaultLibraries (void)
const char * EXTRACTOR_getKeywordTypeAsString (EXTRACTOR_KeywordType type)
EXTRACTOR_KeywordType EXTRACTOR_getHighestKeywordTypeNumber (void)
EXTRACTOR_ExtractorListEXTRACTOR_loadConfigLibraries (EXTRACTOR_ExtractorList *prev, const char *config)
EXTRACTOR_ExtractorListEXTRACTOR_addLibrary (EXTRACTOR_ExtractorList *prev, const char *library)
EXTRACTOR_ExtractorListEXTRACTOR_addLibraryLast (EXTRACTOR_ExtractorList *prev, const char *library)
EXTRACTOR_ExtractorListEXTRACTOR_removeLibrary (EXTRACTOR_ExtractorList *prev, const char *library)
void EXTRACTOR_removeAll (EXTRACTOR_ExtractorList *libraries)
EXTRACTOR_KeywordListEXTRACTOR_getKeywords (EXTRACTOR_ExtractorList *extractor, const char *filename)
EXTRACTOR_KeywordListEXTRACTOR_getKeywords2 (EXTRACTOR_ExtractorList *extractor, const void *data, size_t size)
EXTRACTOR_KeywordListEXTRACTOR_removeDuplicateKeywords (EXTRACTOR_KeywordList *list, unsigned int options)
EXTRACTOR_KeywordListEXTRACTOR_removeEmptyKeywords (EXTRACTOR_KeywordList *list)
EXTRACTOR_KeywordListEXTRACTOR_removeKeywordsOfType (EXTRACTOR_KeywordList *list, EXTRACTOR_KeywordType type)
void EXTRACTOR_printKeywords (FILE *handle, EXTRACTOR_KeywordList *keywords)
void EXTRACTOR_freeKeywords (EXTRACTOR_KeywordList *keywords)
const char * EXTRACTOR_extractLast (EXTRACTOR_KeywordType type, EXTRACTOR_KeywordList *keywords)
const char * EXTRACTOR_extractLastByString (const char *type, EXTRACTOR_KeywordList *keywords)
unsigned int EXTRACTOR_countKeywords (EXTRACTOR_KeywordList *keywords)
int EXTRACTOR_binaryDecode (const char *in, unsigned char **out, size_t *outSize)
char * EXTRACTOR_binaryEncode (const unsigned char *data, size_t size)


Define Documentation

#define EXTRACTOR_DEFAULT_LIBRARIES   EXTRACTOR_getDefaultLibraries()

Definition at line 46 of file extractor.h.

#define EXTRACTOR_DUPLICATES_REMOVE_UNKNOWN   2

Definition at line 44 of file extractor.h.

Referenced by main(), and removeKeyword().

#define EXTRACTOR_DUPLICATES_TYPELESS   1

Definition at line 40 of file extractor.h.

Referenced by main(), and removeKeyword().

#define EXTRACTOR_isBinaryType ( type   )     (type == EXTRACTOR_THUMBNAIL_DATA)

Test if a given LE type contains binary data.

Definition at line 198 of file extractor.h.

Referenced by printSelectedKeywords(), and printSelectedKeywordsGrepFriendly().

#define EXTRACTOR_VERSION   0x00052003

0.2.6-1 => 0x00020601 4.5.2-0 => 0x04050200

Definition at line 35 of file extractor.h.


Typedef Documentation

typedef EXTRACTOR_KeywordList*(*) ExtractMethod(const char *filename, char *data, size_t filesize, EXTRACTOR_KeywordList *next, const char *options)

Signature of the extract method that each plugin must provide.

Parameters:
filename MAYBE NULL (!)
data must not be modified (!)

Definition at line 226 of file extractor.h.

typedef struct EXTRACTOR_Extractor EXTRACTOR_ExtractorList

Linked list of extractor helper-libraries. An application builds this list by telling libextractor to load various keyword-extraction libraries. Libraries can also be unloaded (removed from this list, see removeLibrary).

Client code should never be concerned with the internals of this struct.

typedef struct EXTRACTOR_Keywords EXTRACTOR_KeywordList

A linked list of keywords. This structure is passed around in libExtractor and is typically the result of any keyword extraction operation.

Each entry in the keyword list consists of a string (the keyword) and the keyword type (of type KeywordType) describing how/from where the keyword was obtained.


Enumeration Type Documentation

enum EXTRACTOR_KeywordType

Enumeration defining various sources of keywords. See also http://dublincore.org/documents/1998/09/dces/

Enumerator:
EXTRACTOR_UNKNOWN 
EXTRACTOR_FILENAME 
EXTRACTOR_MIMETYPE 
EXTRACTOR_TITLE 
EXTRACTOR_AUTHOR 
EXTRACTOR_ARTIST 
EXTRACTOR_DESCRIPTION 
EXTRACTOR_COMMENT 
EXTRACTOR_DATE 
EXTRACTOR_PUBLISHER 
EXTRACTOR_LANGUAGE 
EXTRACTOR_ALBUM 
EXTRACTOR_GENRE 
EXTRACTOR_LOCATION 
EXTRACTOR_VERSIONNUMBER 
EXTRACTOR_ORGANIZATION 
EXTRACTOR_COPYRIGHT 
EXTRACTOR_SUBJECT 
EXTRACTOR_KEYWORDS 
EXTRACTOR_CONTRIBUTOR 
EXTRACTOR_RESOURCE_TYPE 
EXTRACTOR_FORMAT 
EXTRACTOR_RESOURCE_IDENTIFIER 
EXTRACTOR_SOURCE 
EXTRACTOR_RELATION 
EXTRACTOR_COVERAGE 
EXTRACTOR_SOFTWARE 
EXTRACTOR_DISCLAIMER 
EXTRACTOR_WARNING 
EXTRACTOR_TRANSLATED 
EXTRACTOR_CREATION_DATE 
EXTRACTOR_MODIFICATION_DATE 
EXTRACTOR_CREATOR 
EXTRACTOR_PRODUCER 
EXTRACTOR_PAGE_COUNT 
EXTRACTOR_PAGE_ORIENTATION 
EXTRACTOR_PAPER_SIZE 
EXTRACTOR_USED_FONTS 
EXTRACTOR_PAGE_ORDER 
EXTRACTOR_CREATED_FOR 
EXTRACTOR_MAGNIFICATION 
EXTRACTOR_RELEASE 
EXTRACTOR_GROUP 
EXTRACTOR_SIZE 
EXTRACTOR_SUMMARY 
EXTRACTOR_PACKAGER 
EXTRACTOR_VENDOR 
EXTRACTOR_LICENSE 
EXTRACTOR_DISTRIBUTION 
EXTRACTOR_BUILDHOST 
EXTRACTOR_OS 
EXTRACTOR_DEPENDENCY 
EXTRACTOR_HASH_MD4 
EXTRACTOR_HASH_MD5 
EXTRACTOR_HASH_SHA0 
EXTRACTOR_HASH_SHA1 
EXTRACTOR_HASH_RMD160 
EXTRACTOR_RESOLUTION 
EXTRACTOR_CATEGORY 
EXTRACTOR_BOOKTITLE 
EXTRACTOR_PRIORITY 
EXTRACTOR_CONFLICTS 
EXTRACTOR_REPLACES 
EXTRACTOR_PROVIDES 
EXTRACTOR_CONDUCTOR 
EXTRACTOR_INTERPRET 
EXTRACTOR_OWNER 
EXTRACTOR_LYRICS 
EXTRACTOR_MEDIA_TYPE 
EXTRACTOR_CONTACT 
EXTRACTOR_THUMBNAIL_DATA 
EXTRACTOR_PUBLICATION_DATE 
EXTRACTOR_CAMERA_MAKE 
EXTRACTOR_CAMERA_MODEL 
EXTRACTOR_EXPOSURE 
EXTRACTOR_APERTURE 
EXTRACTOR_EXPOSURE_BIAS 
EXTRACTOR_FLASH 
EXTRACTOR_FLASH_BIAS 
EXTRACTOR_FOCAL_LENGTH 
EXTRACTOR_FOCAL_LENGTH_35MM 
EXTRACTOR_ISO_SPEED 
EXTRACTOR_EXPOSURE_MODE 
EXTRACTOR_METERING_MODE 
EXTRACTOR_MACRO_MODE 
EXTRACTOR_IMAGE_QUALITY 
EXTRACTOR_WHITE_BALANCE 
EXTRACTOR_ORIENTATION 
EXTRACTOR_TEMPLATE 
EXTRACTOR_SPLIT 
EXTRACTOR_PRODUCTVERSION 
EXTRACTOR_LAST_SAVED_BY 
EXTRACTOR_LAST_PRINTED 
EXTRACTOR_WORD_COUNT 
EXTRACTOR_CHARACTER_COUNT 
EXTRACTOR_TOTAL_EDITING_TIME 
EXTRACTOR_THUMBNAILS 
EXTRACTOR_SECURITY 
EXTRACTOR_CREATED_BY_SOFTWARE 
EXTRACTOR_MODIFIED_BY_SOFTWARE 
EXTRACTOR_REVISION_HISTORY 
EXTRACTOR_LOWERCASE 
EXTRACTOR_COMPANY 
EXTRACTOR_GENERATOR 
EXTRACTOR_CHARACTER_SET 
EXTRACTOR_LINE_COUNT 
EXTRACTOR_PARAGRAPH_COUNT 
EXTRACTOR_EDITING_CYCLES 
EXTRACTOR_SCALE 
EXTRACTOR_MANAGER 
EXTRACTOR_MOVIE_DIRECTOR 
EXTRACTOR_DURATION 
EXTRACTOR_INFORMATION 
EXTRACTOR_FULL_NAME 
EXTRACTOR_CHAPTER 
EXTRACTOR_YEAR 
EXTRACTOR_LINK 
EXTRACTOR_MUSIC_CD_IDENTIFIER 
EXTRACTOR_PLAY_COUNTER 
EXTRACTOR_POPULARITY_METER 
EXTRACTOR_CONTENT_TYPE 
EXTRACTOR_ENCODED_BY 
EXTRACTOR_TIME 
EXTRACTOR_MUSICIAN_CREDITS_LIST 
EXTRACTOR_MOOD 
EXTRACTOR_FORMAT_VERSION 
EXTRACTOR_TELEVISION_SYSTEM 
EXTRACTOR_SONG_COUNT 
EXTRACTOR_STARTING_SONG 
EXTRACTOR_HARDWARE_DEPENDENCY 
EXTRACTOR_RIPPER 
EXTRACTOR_FILE_SIZE 
EXTRACTOR_TRACK_NUMBER 
EXTRACTOR_ISRC 
EXTRACTOR_DISC_NUMBER 
EXTRACTOR_GNUNET_DISPLAY_TYPE 
EXTRACTOR_GNUNET_ECBC_URI 

Definition at line 55 of file extractor.h.

00055              {
00056   EXTRACTOR_UNKNOWN = 0,
00057   EXTRACTOR_FILENAME = 1,
00058   EXTRACTOR_MIMETYPE = 2,
00059   EXTRACTOR_TITLE = 3,
00060   EXTRACTOR_AUTHOR = 4,
00061   EXTRACTOR_ARTIST = 5,
00062   EXTRACTOR_DESCRIPTION = 6,
00063   EXTRACTOR_COMMENT = 7,
00064   EXTRACTOR_DATE = 8,
00065   EXTRACTOR_PUBLISHER = 9,
00066   EXTRACTOR_LANGUAGE = 10,
00067   EXTRACTOR_ALBUM = 11,
00068   EXTRACTOR_GENRE = 12,
00069   EXTRACTOR_LOCATION = 13,
00070   EXTRACTOR_VERSIONNUMBER = 14,
00071   EXTRACTOR_ORGANIZATION = 15,
00072   EXTRACTOR_COPYRIGHT = 16,
00073   EXTRACTOR_SUBJECT = 17,
00074   EXTRACTOR_KEYWORDS = 18,
00075   EXTRACTOR_CONTRIBUTOR = 19,
00076   EXTRACTOR_RESOURCE_TYPE = 20,
00077   EXTRACTOR_FORMAT = 21,
00078   EXTRACTOR_RESOURCE_IDENTIFIER = 22,
00079   EXTRACTOR_SOURCE = 23,
00080   EXTRACTOR_RELATION = 24,
00081   EXTRACTOR_COVERAGE = 25,
00082   EXTRACTOR_SOFTWARE = 26,
00083   EXTRACTOR_DISCLAIMER = 27,
00084   EXTRACTOR_WARNING = 28,
00085   EXTRACTOR_TRANSLATED = 29,
00086   EXTRACTOR_CREATION_DATE = 30,
00087   EXTRACTOR_MODIFICATION_DATE = 31,
00088   EXTRACTOR_CREATOR = 32,
00089   EXTRACTOR_PRODUCER = 33,
00090   EXTRACTOR_PAGE_COUNT = 34,
00091   EXTRACTOR_PAGE_ORIENTATION = 35,
00092   EXTRACTOR_PAPER_SIZE = 36,
00093   EXTRACTOR_USED_FONTS = 37,
00094   EXTRACTOR_PAGE_ORDER = 38,
00095   EXTRACTOR_CREATED_FOR = 39,
00096   EXTRACTOR_MAGNIFICATION = 40,
00097   EXTRACTOR_RELEASE = 41,
00098   EXTRACTOR_GROUP = 42,
00099   EXTRACTOR_SIZE = 43,
00100   EXTRACTOR_SUMMARY = 44,
00101   EXTRACTOR_PACKAGER = 45,
00102   EXTRACTOR_VENDOR = 46,
00103   EXTRACTOR_LICENSE = 47,
00104   EXTRACTOR_DISTRIBUTION = 48,
00105   EXTRACTOR_BUILDHOST = 49,
00106   EXTRACTOR_OS = 50,
00107   EXTRACTOR_DEPENDENCY = 51,
00108   EXTRACTOR_HASH_MD4 = 52,
00109   EXTRACTOR_HASH_MD5 = 53,
00110   EXTRACTOR_HASH_SHA0 = 54,
00111   EXTRACTOR_HASH_SHA1 = 55,
00112   EXTRACTOR_HASH_RMD160 = 56,
00113   EXTRACTOR_RESOLUTION = 57,
00114   EXTRACTOR_CATEGORY = 58,
00115   EXTRACTOR_BOOKTITLE = 59,
00116   EXTRACTOR_PRIORITY = 60,
00117   EXTRACTOR_CONFLICTS = 61,
00118   EXTRACTOR_REPLACES = 62,
00119   EXTRACTOR_PROVIDES = 63,
00120   EXTRACTOR_CONDUCTOR = 64,
00121   EXTRACTOR_INTERPRET = 65,
00122   EXTRACTOR_OWNER = 66,
00123   EXTRACTOR_LYRICS = 67,
00124   EXTRACTOR_MEDIA_TYPE = 68,
00125   EXTRACTOR_CONTACT = 69,
00126   EXTRACTOR_THUMBNAIL_DATA = 70,
00127   EXTRACTOR_PUBLICATION_DATE = 71,
00128   EXTRACTOR_CAMERA_MAKE = 72,
00129   EXTRACTOR_CAMERA_MODEL = 73,
00130   EXTRACTOR_EXPOSURE = 74,
00131   EXTRACTOR_APERTURE = 75,
00132   EXTRACTOR_EXPOSURE_BIAS = 76,
00133   EXTRACTOR_FLASH = 77,
00134   EXTRACTOR_FLASH_BIAS = 78,
00135   EXTRACTOR_FOCAL_LENGTH = 79,
00136   EXTRACTOR_FOCAL_LENGTH_35MM = 80,
00137   EXTRACTOR_ISO_SPEED = 81,
00138   EXTRACTOR_EXPOSURE_MODE = 82,
00139   EXTRACTOR_METERING_MODE = 83,
00140   EXTRACTOR_MACRO_MODE = 84,
00141   EXTRACTOR_IMAGE_QUALITY = 85,
00142   EXTRACTOR_WHITE_BALANCE = 86,
00143   EXTRACTOR_ORIENTATION = 87,
00144   EXTRACTOR_TEMPLATE = 88,
00145   EXTRACTOR_SPLIT = 89,
00146   EXTRACTOR_PRODUCTVERSION = 90,
00147   EXTRACTOR_LAST_SAVED_BY = 91,
00148   EXTRACTOR_LAST_PRINTED = 92,
00149   EXTRACTOR_WORD_COUNT = 93,
00150   EXTRACTOR_CHARACTER_COUNT = 94,
00151   EXTRACTOR_TOTAL_EDITING_TIME = 95,
00152   EXTRACTOR_THUMBNAILS = 96,
00153   EXTRACTOR_SECURITY = 97,
00154   EXTRACTOR_CREATED_BY_SOFTWARE = 98,
00155   EXTRACTOR_MODIFIED_BY_SOFTWARE = 99,
00156   EXTRACTOR_REVISION_HISTORY = 100,
00157   EXTRACTOR_LOWERCASE = 101,
00158   EXTRACTOR_COMPANY = 102,
00159   EXTRACTOR_GENERATOR = 103,
00160   EXTRACTOR_CHARACTER_SET = 104,
00161   EXTRACTOR_LINE_COUNT = 105,
00162   EXTRACTOR_PARAGRAPH_COUNT = 106,
00163   EXTRACTOR_EDITING_CYCLES = 107,
00164   EXTRACTOR_SCALE = 108,
00165   EXTRACTOR_MANAGER = 109,
00166   EXTRACTOR_MOVIE_DIRECTOR = 110,
00167   EXTRACTOR_DURATION = 111,
00168   EXTRACTOR_INFORMATION = 112,
00169   EXTRACTOR_FULL_NAME = 113,
00170   EXTRACTOR_CHAPTER = 114,
00171   EXTRACTOR_YEAR = 115,
00172   EXTRACTOR_LINK = 116,
00173   EXTRACTOR_MUSIC_CD_IDENTIFIER = 117,
00174   EXTRACTOR_PLAY_COUNTER = 118,
00175   EXTRACTOR_POPULARITY_METER = 119,
00176   EXTRACTOR_CONTENT_TYPE = 120,
00177   EXTRACTOR_ENCODED_BY = 121,
00178   EXTRACTOR_TIME = 122,
00179   EXTRACTOR_MUSICIAN_CREDITS_LIST = 123,
00180   EXTRACTOR_MOOD = 124, 
00181   EXTRACTOR_FORMAT_VERSION = 125,
00182   EXTRACTOR_TELEVISION_SYSTEM = 126,
00183   EXTRACTOR_SONG_COUNT = 127,
00184   EXTRACTOR_STARTING_SONG = 128,
00185   EXTRACTOR_HARDWARE_DEPENDENCY = 129,
00186   EXTRACTOR_RIPPER = 130,
00187   EXTRACTOR_FILE_SIZE = 131,
00188   EXTRACTOR_TRACK_NUMBER = 132,
00189   EXTRACTOR_ISRC = 133,
00190   EXTRACTOR_DISC_NUMBER = 134,
00191   EXTRACTOR_GNUNET_DISPLAY_TYPE = 135,
00192   EXTRACTOR_GNUNET_ECBC_URI = 136,
00193 } EXTRACTOR_KeywordType;


Function Documentation

EXTRACTOR_ExtractorList* EXTRACTOR_addLibrary ( EXTRACTOR_ExtractorList prev,
const char *  library 
)

Add a library for keyword extraction.

Parameters:
prev the previous list of libraries, may be NULL
library the name of the library
Returns:
the new list of libraries, equal to prev iff an error occured

Definition at line 860 of file extractor.c.

References EXTRACTOR_addLibrary2(), and NULL.

Referenced by get_keywords_for_file(), main(), and test_plugins().

00862 {
00863   return EXTRACTOR_addLibrary2(prev, library, NULL);
00864 }

EXTRACTOR_ExtractorList* EXTRACTOR_addLibraryLast ( EXTRACTOR_ExtractorList prev,
const char *  library 
)

Add a library for keyword extraction at the END of the list.

Parameters:
prev the previous list of libraries, may be NULL
library the name of the library
Returns:
the new list of libraries, always equal to prev except if prev was NULL and no error occurs

Definition at line 904 of file extractor.c.

References EXTRACTOR_addLibraryLast2(), and NULL.

Referenced by main().

00906 {
00907   return EXTRACTOR_addLibraryLast2(prev, library, NULL);
00908 }

int EXTRACTOR_binaryDecode ( const char *  in,
unsigned char **  out,
size_t *  outSize 
)

This function can be used to decode the binary data encoded in the libextractor metadata (i.e. for the thumbnails).

Parameters:
in 0-terminated string from the meta-data
Returns:
1 on error, 0 on success

Definition at line 1724 of file extractor.c.

References malloc, and NULL.

Referenced by test().

01726                                              {
01727   unsigned char * buf;
01728   size_t pos;
01729   size_t wpos;
01730   unsigned char marker;
01731   size_t i;
01732   size_t end;
01733   size_t inSize;
01734 
01735   inSize = strlen(in);
01736   if (inSize == 0) {
01737     *out = NULL;
01738     *outSize = 0;
01739     return 0;
01740   }
01741 
01742   buf = malloc(inSize); /* slightly more than needed ;-) */
01743   if (buf == NULL)
01744     return 1; /* error */
01745   *out = buf;
01746 
01747   pos = 0;
01748   wpos = 0;
01749   while (pos < inSize) {
01750     end = pos + 255; /* 255 here: count the marker! */
01751     if (end > inSize)
01752       end = inSize;
01753     marker = in[pos++];
01754     for (i=pos;i<end;i++)
01755       buf[wpos++] = (in[i] == (char) marker) ? 0 : in[i];
01756     pos = end;
01757   }
01758   *outSize = wpos;
01759   return 0;
01760 }

char* EXTRACTOR_binaryEncode ( const unsigned char *  data,
size_t  size 
)

Encode the given binary data object as a 0-terminated C-string according to the LE binary data encoding standard.

Returns:
NULL on error, the 0-terminated encoding otherwise

Definition at line 1665 of file extractor.c.

References free, malloc, and NULL.

Referenced by flac_metadata(), libextractor_thumbnailgtk_extract(), and test().

01666                                            {
01667 
01668   char * binary;
01669   size_t pos;
01670   size_t end;
01671   size_t wpos;
01672   size_t i;
01673   unsigned int markers[8]; /* 256 bits */
01674   unsigned char marker;
01675 
01676  /* encode! */
01677   binary = malloc(2 + size + (size+256) / 254);
01678   if (binary == NULL)
01679     return NULL;
01680 
01681   pos = 0;
01682   wpos = 0;
01683   while (pos < size) {
01684     /* find unused value between 1 and 255 in
01685        the next 254 bytes */
01686     end = pos + 254;
01687     if (end < pos)
01688       break; /* integer overflow! */
01689     if (end > size)
01690       end = size;
01691     memset(markers,
01692            0,
01693            sizeof(markers));
01694     for (i=pos;i<end;i++)
01695       markers[data[i]&7] |= 1 << (data[i] >> 3);
01696     marker = 1;
01697     while (markers[marker&7] & (1 << (marker >> 3))) {
01698       marker++;
01699       if (marker == 0) {
01700         /* assertion failed... */
01701         free(binary);
01702         return NULL;
01703       }
01704     }
01705     /* recode */
01706     binary[wpos++] = marker;
01707     for (i=pos;i<end;i++)
01708       binary[wpos++] = data[i] == 0 ? marker : data[i];
01709     pos = end;
01710   }
01711   binary[wpos++] = 0; /* 0-termination! */
01712   return binary;
01713 }

unsigned int EXTRACTOR_countKeywords ( EXTRACTOR_KeywordList keywords  ) 

Count the number of keywords in the keyword list.

Parameters:
keywords the keyword list
Returns:
the number of keywords in the list

Definition at line 1646 of file extractor.c.

References EXTRACTOR_Keywords::next, and NULL.

Referenced by compare_keywords_to_ref(), and main().

01647 {
01648   int count = 0;
01649   while (keywords != NULL)
01650     {
01651       count++;
01652       keywords = keywords->next;
01653     }
01654   return count;
01655 }

const char* EXTRACTOR_extractLast ( const EXTRACTOR_KeywordType  type,
EXTRACTOR_KeywordList keywords 
)

Extract the last keyword that of the given type from the keyword list.

Parameters:
type the type of the keyword
keywords the keyword list
Returns:
the last matching keyword, or NULL if none matches

Definition at line 1604 of file extractor.c.

References EXTRACTOR_Keywords::keyword, EXTRACTOR_Keywords::keywordType, EXTRACTOR_Keywords::next, and NULL.

Referenced by EXTRACT_NAME(), libextractor_html_extract(), libextractor_mime_extract(), libextractor_ole2_extract(), libextractor_ps_extract(), libextractor_thumbnailffmpeg_extract(), libextractor_thumbnailgtk_extract(), libextractor_zip_extract(), and main().

01606 {
01607   char *result = NULL;
01608   while (keywords != NULL)
01609     {
01610       if (keywords->keywordType == type)
01611         result = keywords->keyword;
01612       keywords = keywords->next;
01613     }
01614   return result;
01615 }

const char* EXTRACTOR_extractLastByString ( const char *  type,
EXTRACTOR_KeywordList keywords 
)

Extract the last keyword of the given string from the keyword list.

Parameters:
type the string describing the type of the keyword
keywords the keyword list
Returns:
the last matching keyword, or NULL if none matches

Definition at line 1624 of file extractor.c.

References _, EXTRACTOR_Keywords::keyword, EXTRACTOR_Keywords::keywordType, keywordTypes, EXTRACTOR_Keywords::next, and NULL.

Referenced by main(), and printSelectedKeywordsBibtex().

01626 {
01627   char * result = NULL;
01628 
01629   if (type == NULL)
01630     return NULL;
01631   while (keywords != NULL) {
01632     if ( (0 == strcmp(_(keywordTypes[keywords->keywordType]), type)) ||
01633          (0 == strcmp(keywordTypes[keywords->keywordType], type) ) )
01634       result = keywords->keyword;
01635     keywords = keywords->next;
01636   }
01637   return result;
01638 }