#include <stdio.h>Go to the source code of this file.
| #define EXTRACTOR_DEFAULT_LIBRARIES EXTRACTOR_getDefaultLibraries() |
Definition at line 46 of file extractor.h.
| #define EXTRACTOR_DUPLICATES_REMOVE_UNKNOWN 2 |
| #define EXTRACTOR_DUPLICATES_TYPELESS 1 |
Test if a given LE type contains binary data.
Definition at line 198 of file extractor.h.
Referenced by printSelectedKeywords(), and printSelectedKeywordsGrepFriendly().
| #define EXTRACTOR_VERSION 0x00052003 |
0.2.6-1 => 0x00020601 4.5.2-0 => 0x04050200
Definition at line 35 of file extractor.h.
| typedef EXTRACTOR_KeywordList*(*) ExtractMethod(const char *filename, char *data, size_t filesize, EXTRACTOR_KeywordList *next, const char *options) |
Signature of the extract method that each plugin must provide.
| filename | MAYBE NULL (!) | |
| data | must not be modified (!) |
Definition at line 226 of file extractor.h.
| typedef struct EXTRACTOR_Extractor EXTRACTOR_ExtractorList |
Linked list of extractor helper-libraries. An application builds this list by telling libextractor to load various keyword-extraction libraries. Libraries can also be unloaded (removed from this list, see removeLibrary).
Client code should never be concerned with the internals of this struct.
| typedef struct EXTRACTOR_Keywords EXTRACTOR_KeywordList |
A linked list of keywords. This structure is passed around in libExtractor and is typically the result of any keyword extraction operation.
Each entry in the keyword list consists of a string (the keyword) and the keyword type (of type KeywordType) describing how/from where the keyword was obtained.
Enumeration defining various sources of keywords. See also http://dublincore.org/documents/1998/09/dces/
Definition at line 55 of file extractor.h.
00055 { 00056 EXTRACTOR_UNKNOWN = 0, 00057 EXTRACTOR_FILENAME = 1, 00058 EXTRACTOR_MIMETYPE = 2, 00059 EXTRACTOR_TITLE = 3, 00060 EXTRACTOR_AUTHOR = 4, 00061 EXTRACTOR_ARTIST = 5, 00062 EXTRACTOR_DESCRIPTION = 6, 00063 EXTRACTOR_COMMENT = 7, 00064 EXTRACTOR_DATE = 8, 00065 EXTRACTOR_PUBLISHER = 9, 00066 EXTRACTOR_LANGUAGE = 10, 00067 EXTRACTOR_ALBUM = 11, 00068 EXTRACTOR_GENRE = 12, 00069 EXTRACTOR_LOCATION = 13, 00070 EXTRACTOR_VERSIONNUMBER = 14, 00071 EXTRACTOR_ORGANIZATION = 15, 00072 EXTRACTOR_COPYRIGHT = 16, 00073 EXTRACTOR_SUBJECT = 17, 00074 EXTRACTOR_KEYWORDS = 18, 00075 EXTRACTOR_CONTRIBUTOR = 19, 00076 EXTRACTOR_RESOURCE_TYPE = 20, 00077 EXTRACTOR_FORMAT = 21, 00078 EXTRACTOR_RESOURCE_IDENTIFIER = 22, 00079 EXTRACTOR_SOURCE = 23, 00080 EXTRACTOR_RELATION = 24, 00081 EXTRACTOR_COVERAGE = 25, 00082 EXTRACTOR_SOFTWARE = 26, 00083 EXTRACTOR_DISCLAIMER = 27, 00084 EXTRACTOR_WARNING = 28, 00085 EXTRACTOR_TRANSLATED = 29, 00086 EXTRACTOR_CREATION_DATE = 30, 00087 EXTRACTOR_MODIFICATION_DATE = 31, 00088 EXTRACTOR_CREATOR = 32, 00089 EXTRACTOR_PRODUCER = 33, 00090 EXTRACTOR_PAGE_COUNT = 34, 00091 EXTRACTOR_PAGE_ORIENTATION = 35, 00092 EXTRACTOR_PAPER_SIZE = 36, 00093 EXTRACTOR_USED_FONTS = 37, 00094 EXTRACTOR_PAGE_ORDER = 38, 00095 EXTRACTOR_CREATED_FOR = 39, 00096 EXTRACTOR_MAGNIFICATION = 40, 00097 EXTRACTOR_RELEASE = 41, 00098 EXTRACTOR_GROUP = 42, 00099 EXTRACTOR_SIZE = 43, 00100 EXTRACTOR_SUMMARY = 44, 00101 EXTRACTOR_PACKAGER = 45, 00102 EXTRACTOR_VENDOR = 46, 00103 EXTRACTOR_LICENSE = 47, 00104 EXTRACTOR_DISTRIBUTION = 48, 00105 EXTRACTOR_BUILDHOST = 49, 00106 EXTRACTOR_OS = 50, 00107 EXTRACTOR_DEPENDENCY = 51, 00108 EXTRACTOR_HASH_MD4 = 52, 00109 EXTRACTOR_HASH_MD5 = 53, 00110 EXTRACTOR_HASH_SHA0 = 54, 00111 EXTRACTOR_HASH_SHA1 = 55, 00112 EXTRACTOR_HASH_RMD160 = 56, 00113 EXTRACTOR_RESOLUTION = 57, 00114 EXTRACTOR_CATEGORY = 58, 00115 EXTRACTOR_BOOKTITLE = 59, 00116 EXTRACTOR_PRIORITY = 60, 00117 EXTRACTOR_CONFLICTS = 61, 00118 EXTRACTOR_REPLACES = 62, 00119 EXTRACTOR_PROVIDES = 63, 00120 EXTRACTOR_CONDUCTOR = 64, 00121 EXTRACTOR_INTERPRET = 65, 00122 EXTRACTOR_OWNER = 66, 00123 EXTRACTOR_LYRICS = 67, 00124 EXTRACTOR_MEDIA_TYPE = 68, 00125 EXTRACTOR_CONTACT = 69, 00126 EXTRACTOR_THUMBNAIL_DATA = 70, 00127 EXTRACTOR_PUBLICATION_DATE = 71, 00128 EXTRACTOR_CAMERA_MAKE = 72, 00129 EXTRACTOR_CAMERA_MODEL = 73, 00130 EXTRACTOR_EXPOSURE = 74, 00131 EXTRACTOR_APERTURE = 75, 00132 EXTRACTOR_EXPOSURE_BIAS = 76, 00133 EXTRACTOR_FLASH = 77, 00134 EXTRACTOR_FLASH_BIAS = 78, 00135 EXTRACTOR_FOCAL_LENGTH = 79, 00136 EXTRACTOR_FOCAL_LENGTH_35MM = 80, 00137 EXTRACTOR_ISO_SPEED = 81, 00138 EXTRACTOR_EXPOSURE_MODE = 82, 00139 EXTRACTOR_METERING_MODE = 83, 00140 EXTRACTOR_MACRO_MODE = 84, 00141 EXTRACTOR_IMAGE_QUALITY = 85, 00142 EXTRACTOR_WHITE_BALANCE = 86, 00143 EXTRACTOR_ORIENTATION = 87, 00144 EXTRACTOR_TEMPLATE = 88, 00145 EXTRACTOR_SPLIT = 89, 00146 EXTRACTOR_PRODUCTVERSION = 90, 00147 EXTRACTOR_LAST_SAVED_BY = 91, 00148 EXTRACTOR_LAST_PRINTED = 92, 00149 EXTRACTOR_WORD_COUNT = 93, 00150 EXTRACTOR_CHARACTER_COUNT = 94, 00151 EXTRACTOR_TOTAL_EDITING_TIME = 95, 00152 EXTRACTOR_THUMBNAILS = 96, 00153 EXTRACTOR_SECURITY = 97, 00154 EXTRACTOR_CREATED_BY_SOFTWARE = 98, 00155 EXTRACTOR_MODIFIED_BY_SOFTWARE = 99, 00156 EXTRACTOR_REVISION_HISTORY = 100, 00157 EXTRACTOR_LOWERCASE = 101, 00158 EXTRACTOR_COMPANY = 102, 00159 EXTRACTOR_GENERATOR = 103, 00160 EXTRACTOR_CHARACTER_SET = 104, 00161 EXTRACTOR_LINE_COUNT = 105, 00162 EXTRACTOR_PARAGRAPH_COUNT = 106, 00163 EXTRACTOR_EDITING_CYCLES = 107, 00164 EXTRACTOR_SCALE = 108, 00165 EXTRACTOR_MANAGER = 109, 00166 EXTRACTOR_MOVIE_DIRECTOR = 110, 00167 EXTRACTOR_DURATION = 111, 00168 EXTRACTOR_INFORMATION = 112, 00169 EXTRACTOR_FULL_NAME = 113, 00170 EXTRACTOR_CHAPTER = 114, 00171 EXTRACTOR_YEAR = 115, 00172 EXTRACTOR_LINK = 116, 00173 EXTRACTOR_MUSIC_CD_IDENTIFIER = 117, 00174 EXTRACTOR_PLAY_COUNTER = 118, 00175 EXTRACTOR_POPULARITY_METER = 119, 00176 EXTRACTOR_CONTENT_TYPE = 120, 00177 EXTRACTOR_ENCODED_BY = 121, 00178 EXTRACTOR_TIME = 122, 00179 EXTRACTOR_MUSICIAN_CREDITS_LIST = 123, 00180 EXTRACTOR_MOOD = 124, 00181 EXTRACTOR_FORMAT_VERSION = 125, 00182 EXTRACTOR_TELEVISION_SYSTEM = 126, 00183 EXTRACTOR_SONG_COUNT = 127, 00184 EXTRACTOR_STARTING_SONG = 128, 00185 EXTRACTOR_HARDWARE_DEPENDENCY = 129, 00186 EXTRACTOR_RIPPER = 130, 00187 EXTRACTOR_FILE_SIZE = 131, 00188 EXTRACTOR_TRACK_NUMBER = 132, 00189 EXTRACTOR_ISRC = 133, 00190 EXTRACTOR_DISC_NUMBER = 134, 00191 EXTRACTOR_GNUNET_DISPLAY_TYPE = 135, 00192 EXTRACTOR_GNUNET_ECBC_URI = 136, 00193 } EXTRACTOR_KeywordType;
| EXTRACTOR_ExtractorList* EXTRACTOR_addLibrary | ( | EXTRACTOR_ExtractorList * | prev, | |
| const char * | library | |||
| ) |
Add a library for keyword extraction.
| prev | the previous list of libraries, may be NULL | |
| library | the name of the library |
Definition at line 860 of file extractor.c.
References EXTRACTOR_addLibrary2(), and NULL.
Referenced by get_keywords_for_file(), main(), and test_plugins().
00862 { 00863 return EXTRACTOR_addLibrary2(prev, library, NULL); 00864 }
| EXTRACTOR_ExtractorList* EXTRACTOR_addLibraryLast | ( | EXTRACTOR_ExtractorList * | prev, | |
| const char * | library | |||
| ) |
Add a library for keyword extraction at the END of the list.
| prev | the previous list of libraries, may be NULL | |
| library | the name of the library |
Definition at line 904 of file extractor.c.
References EXTRACTOR_addLibraryLast2(), and NULL.
Referenced by main().
00906 { 00907 return EXTRACTOR_addLibraryLast2(prev, library, NULL); 00908 }
| int EXTRACTOR_binaryDecode | ( | const char * | in, | |
| unsigned char ** | out, | |||
| size_t * | outSize | |||
| ) |
This function can be used to decode the binary data encoded in the libextractor metadata (i.e. for the thumbnails).
| in | 0-terminated string from the meta-data |
Definition at line 1724 of file extractor.c.
Referenced by test().
01726 { 01727 unsigned char * buf; 01728 size_t pos; 01729 size_t wpos; 01730 unsigned char marker; 01731 size_t i; 01732 size_t end; 01733 size_t inSize; 01734 01735 inSize = strlen(in); 01736 if (inSize == 0) { 01737 *out = NULL; 01738 *outSize = 0; 01739 return 0; 01740 } 01741 01742 buf = malloc(inSize); /* slightly more than needed ;-) */ 01743 if (buf == NULL) 01744 return 1; /* error */ 01745 *out = buf; 01746 01747 pos = 0; 01748 wpos = 0; 01749 while (pos < inSize) { 01750 end = pos + 255; /* 255 here: count the marker! */ 01751 if (end > inSize) 01752 end = inSize; 01753 marker = in[pos++]; 01754 for (i=pos;i<end;i++) 01755 buf[wpos++] = (in[i] == (char) marker) ? 0 : in[i]; 01756 pos = end; 01757 } 01758 *outSize = wpos; 01759 return 0; 01760 }
| char* EXTRACTOR_binaryEncode | ( | const unsigned char * | data, | |
| size_t | size | |||
| ) |
Encode the given binary data object as a 0-terminated C-string according to the LE binary data encoding standard.
Definition at line 1665 of file extractor.c.
References free, malloc, and NULL.
Referenced by flac_metadata(), libextractor_thumbnailgtk_extract(), and test().
01666 { 01667 01668 char * binary; 01669 size_t pos; 01670 size_t end; 01671 size_t wpos; 01672 size_t i; 01673 unsigned int markers[8]; /* 256 bits */ 01674 unsigned char marker; 01675 01676 /* encode! */ 01677 binary = malloc(2 + size + (size+256) / 254); 01678 if (binary == NULL) 01679 return NULL; 01680 01681 pos = 0; 01682 wpos = 0; 01683 while (pos < size) { 01684 /* find unused value between 1 and 255 in 01685 the next 254 bytes */ 01686 end = pos + 254; 01687 if (end < pos) 01688 break; /* integer overflow! */ 01689 if (end > size) 01690 end = size; 01691 memset(markers, 01692 0, 01693 sizeof(markers)); 01694 for (i=pos;i<end;i++) 01695 markers[data[i]&7] |= 1 << (data[i] >> 3); 01696 marker = 1; 01697 while (markers[marker&7] & (1 << (marker >> 3))) { 01698 marker++; 01699 if (marker == 0) { 01700 /* assertion failed... */ 01701 free(binary); 01702 return NULL; 01703 } 01704 } 01705 /* recode */ 01706 binary[wpos++] = marker; 01707 for (i=pos;i<end;i++) 01708 binary[wpos++] = data[i] == 0 ? marker : data[i]; 01709 pos = end; 01710 } 01711 binary[wpos++] = 0; /* 0-termination! */ 01712 return binary; 01713 }
| unsigned int EXTRACTOR_countKeywords | ( | EXTRACTOR_KeywordList * | keywords | ) |
Count the number of keywords in the keyword list.
| keywords | the keyword list |
Definition at line 1646 of file extractor.c.
References EXTRACTOR_Keywords::next, and NULL.
Referenced by compare_keywords_to_ref(), and main().
01647 { 01648 int count = 0; 01649 while (keywords != NULL) 01650 { 01651 count++; 01652 keywords = keywords->next; 01653 } 01654 return count; 01655 }
| const char* EXTRACTOR_extractLast | ( | const EXTRACTOR_KeywordType | type, | |
| EXTRACTOR_KeywordList * | keywords | |||
| ) |
Extract the last keyword that of the given type from the keyword list.
| type | the type of the keyword | |
| keywords | the keyword list |
Definition at line 1604 of file extractor.c.
References EXTRACTOR_Keywords::keyword, EXTRACTOR_Keywords::keywordType, EXTRACTOR_Keywords::next, and NULL.
Referenced by EXTRACT_NAME(), libextractor_html_extract(), libextractor_mime_extract(), libextractor_ole2_extract(), libextractor_ps_extract(), libextractor_thumbnailffmpeg_extract(), libextractor_thumbnailgtk_extract(), libextractor_zip_extract(), and main().
01606 { 01607 char *result = NULL; 01608 while (keywords != NULL) 01609 { 01610 if (keywords->keywordType == type) 01611 result = keywords->keyword; 01612 keywords = keywords->next; 01613 } 01614 return result; 01615 }
| const char* EXTRACTOR_extractLastByString | ( | const char * | type, | |
| EXTRACTOR_KeywordList * | keywords | |||
| ) |
Extract the last keyword of the given string from the keyword list.
| type | the string describing the type of the keyword | |
| keywords | the keyword list |
Definition at line 1624 of file extractor.c.
References _, EXTRACTOR_Keywords::keyword, EXTRACTOR_Keywords::keywordType, keywordTypes, EXTRACTOR_Keywords::next, and NULL.
Referenced by main(), and printSelectedKeywordsBibtex().
01626 { 01627 char * result = NULL; 01628 01629 if (type == NULL) 01630 return NULL; 01631 while (keywords != NULL) { 01632 if ( (0 == strcmp(_(keywordTypes[keywords->keywordType]), type)) || 01633 (0 == strcmp(keywordTypes[keywords->keywordType], type) ) ) 01634 result = keywords->keyword; 01635 keywords = keywords->next; 01636 } 01637 return result; 01638 }