#include "platform.h"#include "extractor.h"#include <pthread.h>#include <../../libltdl/ltdl.h>#include "iconv.c"Go to the source code of this file.
Defines | |
| #define | DEBUG 0 |
| #define | HIGHEST_TYPE_NUMBER 137 |
| #define | EXSO "" |
| #define | OGGSO "" |
| #define | FLACSO "" |
| #define | QTSO "" |
| #define | OLESO "" |
| #define | MPEGSO "" |
| #define | DEFSO "libextractor_html:\libextractor_man:\libextractor_ps:\libextractor_pdf:\libextractor_mp3:\libextractor_id3v2:\libextractor_id3v23:\libextractor_id3v24:\libextractor_mime:\libextractor_tar:\libextractor_dvi:\libextractor_deb:\libextractor_png:\libextractor_gif:\libextractor_wav:\libextractor_flv:\libextractor_real:\libextractor_jpeg:\libextractor_tiff:\libextractor_zip:\libextractor_rpm:\libextractor_riff:\libextractor_applefile:\libextractor_elf:\libextractor_oo:\libextractor_asf:\libextractor_sid:\libextractor_nsfe:\libextractor_nsf:\libextractor_it:\libextractor_xm:\libextractor_s3m" |
| #define | DEFAULT_LIBRARIES MPEGSO EXSO OLESO OGGSO FLACSO QTSO DEFSO |
| #define | LTDL_MUTEX_LOCK |
| #define | LTDL_MUTEX_UNLOCK |
| #define | MAX_READ 1024 * 1024 * 1024 |
| #define | MAX_DECOMPRESS 16 * 1024 * 1024 |
Functions | |
| const char * | EXTRACTOR_getDefaultLibraries () |
| static char * | cut_bin (char *in) |
| static char * | cut_lib (char *in) |
| static char * | get_path_from_PATH () |
| static char * | get_path_from_ENV_PREFIX () |
| static char * | os_get_installation_path () |
| void | __attribute__ ((constructor)) |
| void | __attribute__ ((destructor)) |
| static int | fileopen (const char *filename, int oflag,...) |
| EXTRACTOR_ExtractorList * | EXTRACTOR_loadDefaultLibraries () |
| const char * | EXTRACTOR_getKeywordTypeAsString (const EXTRACTOR_KeywordType type) |
| static void * | getSymbolWithPrefix (void *lib_handle, const char *lib_name, const char *sym_name) |
| static int | loadLibrary (const char *name, void **libHandle, ExtractMethod *method) |
| static EXTRACTOR_ExtractorList * | EXTRACTOR_addLibrary2 (EXTRACTOR_ExtractorList *prev, const char *library, const char *options) |
| EXTRACTOR_ExtractorList * | EXTRACTOR_addLibrary (EXTRACTOR_ExtractorList *prev, const char *library) |
| static EXTRACTOR_ExtractorList * | EXTRACTOR_addLibraryLast2 (EXTRACTOR_ExtractorList *prev, const char *library, const char *options) |
| EXTRACTOR_ExtractorList * | EXTRACTOR_addLibraryLast (EXTRACTOR_ExtractorList *prev, const char *library) |
| EXTRACTOR_ExtractorList * | EXTRACTOR_loadConfigLibraries (EXTRACTOR_ExtractorList *prev, const char *config) |
| EXTRACTOR_ExtractorList * | EXTRACTOR_removeLibrary (EXTRACTOR_ExtractorList *prev, const char *library) |
| void | EXTRACTOR_removeAll (EXTRACTOR_ExtractorList *libraries) |
| static EXTRACTOR_KeywordList * | getKeywords (EXTRACTOR_ExtractorList *extractor, const char *filename, const unsigned char *data, size_t size) |
| EXTRACTOR_KeywordList * | EXTRACTOR_getKeywords (EXTRACTOR_ExtractorList *extractor, const char *filename) |
| EXTRACTOR_KeywordList * | EXTRACTOR_getKeywords2 (EXTRACTOR_ExtractorList *extractor, const void *data, size_t size) |
| static void | removeKeyword (const char *keyword, const EXTRACTOR_KeywordType type, const unsigned int options, EXTRACTOR_KeywordList **list, EXTRACTOR_KeywordList *current) |
| EXTRACTOR_KeywordList * | EXTRACTOR_removeDuplicateKeywords (EXTRACTOR_KeywordList *list, const unsigned int options) |
| EXTRACTOR_KeywordList * | EXTRACTOR_removeEmptyKeywords (EXTRACTOR_KeywordList *list) |
| EXTRACTOR_KeywordList * | EXTRACTOR_removeKeywordsOfType (EXTRACTOR_KeywordList *list, EXTRACTOR_KeywordType type) |
| void | EXTRACTOR_printKeywords (FILE *handle, EXTRACTOR_KeywordList *keywords) |
| void | EXTRACTOR_freeKeywords (EXTRACTOR_KeywordList *keywords) |
| EXTRACTOR_KeywordType | EXTRACTOR_getHighestKeywordTypeNumber () |
| const char * | EXTRACTOR_extractLast (const EXTRACTOR_KeywordType type, EXTRACTOR_KeywordList *keywords) |
| const char * | EXTRACTOR_extractLastByString (const char *type, EXTRACTOR_KeywordList *keywords) |
| unsigned int | EXTRACTOR_countKeywords (EXTRACTOR_KeywordList *keywords) |
| char * | EXTRACTOR_binaryEncode (const unsigned char *data, size_t size) |
| int | EXTRACTOR_binaryDecode (const char *in, unsigned char **out, size_t *outSize) |
Variables | |
| static const char * | keywordTypes [] |
| static char * | old_dlsearchpath = NULL |
| static pthread_mutex_t | ltdl_lock = PTHREAD_MUTEX_INITIALIZER |
| #define DEBUG 0 |
Definition at line 39 of file extractor.c.
Referenced by dvbsub_parse(), gif_read_extension(), libextractor_thumbnailffmpeg_extract(), and mxf_read_local_tags().
| #define DEFAULT_LIBRARIES MPEGSO EXSO OLESO OGGSO FLACSO QTSO DEFSO |
Definition at line 271 of file extractor.c.
Referenced by EXTRACTOR_getDefaultLibraries(), and EXTRACTOR_loadDefaultLibraries().
| #define DEFSO "libextractor_html:\libextractor_man:\libextractor_ps:\libextractor_pdf:\libextractor_mp3:\libextractor_id3v2:\libextractor_id3v23:\libextractor_id3v24:\libextractor_mime:\libextractor_tar:\libextractor_dvi:\libextractor_deb:\libextractor_png:\libextractor_gif:\libextractor_wav:\libextractor_flv:\libextractor_real:\libextractor_jpeg:\libextractor_tiff:\libextractor_zip:\libextractor_rpm:\libextractor_riff:\libextractor_applefile:\libextractor_elf:\libextractor_oo:\libextractor_asf:\libextractor_sid:\libextractor_nsfe:\libextractor_nsf:\libextractor_it:\libextractor_xm:\libextractor_s3m" |
Definition at line 237 of file extractor.c.
| #define EXSO "" |
Definition at line 201 of file extractor.c.
| #define FLACSO "" |
Definition at line 213 of file extractor.c.
| #define HIGHEST_TYPE_NUMBER 137 |
Definition at line 186 of file extractor.c.
Referenced by EXTRACTOR_getHighestKeywordTypeNumber(), EXTRACTOR_getKeywordTypeAsString(), and EXTRACTOR_printKeywords().
| #define LTDL_MUTEX_LOCK |
Value:
if (pthread_mutex_lock (<dl_lock) != 0) \ abort();
Definition at line 736 of file extractor.c.
Referenced by EXTRACTOR_removeLibrary(), getSymbolWithPrefix(), and loadLibrary().
| #define LTDL_MUTEX_UNLOCK |
Value:
if (pthread_mutex_unlock (<dl_lock) != 0) \ abort();
Definition at line 739 of file extractor.c.
Referenced by EXTRACTOR_removeLibrary(), getSymbolWithPrefix(), and loadLibrary().
| #define MAX_DECOMPRESS 16 * 1024 * 1024 |
How many bytes do we actually try to decompress? (from the beginning of the file). Limit to 16 MB.
Definition at line 1052 of file extractor.c.
Referenced by getKeywords().
| #define MAX_READ 1024 * 1024 * 1024 |
How many bytes do we actually try to scan? (from the beginning of the file). Limit to 1 GB.
Definition at line 1046 of file extractor.c.
Referenced by EXTRACTOR_getKeywords().
| #define MPEGSO "" |
Definition at line 231 of file extractor.c.
| #define OGGSO "" |
Definition at line 207 of file extractor.c.
| #define OLESO "" |
Definition at line 225 of file extractor.c.
| #define QTSO "" |
Definition at line 219 of file extractor.c.
| void __attribute__ | ( | (destructor) | ) |
Definition at line 632 of file extractor.c.
References free, NULL, and ShutdownWinEnv().
00632 { 00633 lt_dlsetsearchpath(old_dlsearchpath); 00634 if (old_dlsearchpath != NULL) { 00635 free(old_dlsearchpath); 00636 old_dlsearchpath = NULL; 00637 } 00638 #ifdef MINGW 00639 ShutdownWinEnv(); 00640 #endif 00641 lt_dlexit (); 00642 }
| void __attribute__ | ( | (constructor) | ) |
Definition at line 590 of file extractor.c.
References _, fprintf, free, InitWinEnv(), ISOLOCALEDIR, LOCALEDIR, malloc, NULL, os_get_installation_path(), PACKAGE, and strcat.
00590 { 00591 int err; 00592 const char * opath; 00593 char * path; 00594 char * cpath; 00595 00596 #if ENABLE_NLS 00597 BINDTEXTDOMAIN(PACKAGE, LOCALEDIR); 00598 BINDTEXTDOMAIN("iso-639", ISOLOCALEDIR); /* used by wordextractor */ 00599 #endif 00600 err = lt_dlinit (); 00601 if (err > 0) { 00602 #if DEBUG 00603 fprintf(stderr, 00604 _("Initialization of plugin mechanism failed: %s!\n"), 00605 lt_dlerror()); 00606 #endif 00607 return; 00608 } 00609 opath = lt_dlgetsearchpath(); 00610 if (opath != NULL) 00611 old_dlsearchpath = strdup(opath); 00612 path = os_get_installation_path(); 00613 if (path != NULL) { 00614 if (opath != NULL) { 00615 cpath = malloc(strlen(path) + strlen(opath) + 4); 00616 strcpy(cpath, opath); 00617 strcat(cpath, ":"); 00618 strcat(cpath, path); 00619 lt_dlsetsearchpath(cpath); 00620 free(path); 00621 free(cpath); 00622 } else { 00623 lt_dlsetsearchpath(path); 00624 free(path); 00625 } 00626 } 00627 #ifdef MINGW 00628 InitWinEnv(); 00629 #endif 00630 }
| static char* cut_bin | ( | char * | in | ) | [static] |
Definition at line 279 of file extractor.c.
References NULL.
Referenced by get_path_from_ENV_PREFIX(), and get_path_from_PATH().
00279 { 00280 size_t p; 00281 00282 if (in == NULL) 00283 return NULL; 00284 p = strlen(in); 00285 if (p > 4) { 00286 if ( (in[p-1] == '/') || 00287 (in[p-1] == '\\') ) 00288 in[--p] = '\0'; 00289 if (0 == strcmp(&in[p-3], 00290 "bin")) { 00291 in[p-3] = '\0'; 00292 p -= 3; 00293 } 00294 } 00295 return in; 00296 }
| static char* cut_lib | ( | char * | in | ) | [static] |
Definition at line 298 of file extractor.c.
References NULL.
Referenced by get_path_from_ENV_PREFIX().
00298 { 00299 size_t p; 00300 00301 if (in == NULL) 00302 return NULL; 00303 p = strlen(in); 00304 if (p > 4) { 00305 if ( (in[p-1] == '/') || 00306 (in[p-1] == '\\') ) 00307 in[--p] = '\0'; 00308 if (0 == strcmp(&in[p-3], 00309 "lib")) { 00310 in[p-3] = '\0'; 00311 p -= 3; 00312 } 00313 } 00314 return in; 00315 }
| EXTRACTOR_ExtractorList* EXTRACTOR_addLibrary | ( | EXTRACTOR_ExtractorList * | prev, | |
| const char * | library | |||
| ) |
Add a library for keyword extraction.
| prev | the previous list of libraries, may be NULL | |
| library | the name of the library |
Definition at line 860 of file extractor.c.
References EXTRACTOR_addLibrary2(), and NULL.
Referenced by get_keywords_for_file(), main(), and test_plugins().
00862 { 00863 return EXTRACTOR_addLibrary2(prev, library, NULL); 00864 }
| static EXTRACTOR_ExtractorList* EXTRACTOR_addLibrary2 | ( | EXTRACTOR_ExtractorList * | prev, | |
| const char * | library, | |||
| const char * | options | |||
| ) | [static] |
Definition at line 832 of file extractor.c.
References EXTRACTOR_Extractor::extractMethod, EXTRACTOR_Extractor::libname, EXTRACTOR_Extractor::libraryHandle, loadLibrary(), malloc, EXTRACTOR_Extractor::next, NULL, and EXTRACTOR_Extractor::options.
Referenced by EXTRACTOR_addLibrary(), and EXTRACTOR_loadConfigLibraries().
00834 { 00835 EXTRACTOR_ExtractorList *result; 00836 void *handle; 00837 ExtractMethod method; 00838 00839 if (-1 == loadLibrary (library, &handle, &method)) 00840 return prev; 00841 result = malloc (sizeof (EXTRACTOR_ExtractorList)); 00842 result->next = prev; 00843 result->libraryHandle = handle; 00844 result->extractMethod = method; 00845 result->libname = strdup (library); 00846 if( options ) 00847 result->options = strdup (options); 00848 else 00849 result->options = NULL; 00850 return result; 00851 }
| EXTRACTOR_ExtractorList* EXTRACTOR_addLibraryLast | ( | EXTRACTOR_ExtractorList * | prev, | |
| const char * | library | |||
| ) |
Add a library for keyword extraction at the END of the list.
| prev | the previous list of libraries, may be NULL | |
| library | the name of the library |
Definition at line 904 of file extractor.c.
References EXTRACTOR_addLibraryLast2(), and NULL.
Referenced by main().
00906 { 00907 return EXTRACTOR_addLibraryLast2(prev, library, NULL); 00908 }
| static EXTRACTOR_ExtractorList* EXTRACTOR_addLibraryLast2 | ( | EXTRACTOR_ExtractorList * | prev, | |
| const char * | library, | |||
| const char * | options | |||
| ) | [static] |
Definition at line 868 of file extractor.c.
References EXTRACTOR_Extractor::extractMethod, EXTRACTOR_Extractor::libname, EXTRACTOR_Extractor::libraryHandle, loadLibrary(), malloc, EXTRACTOR_Extractor::next, NULL, and EXTRACTOR_Extractor::options.
Referenced by EXTRACTOR_addLibraryLast(), and EXTRACTOR_loadConfigLibraries().
00870 { 00871 EXTRACTOR_ExtractorList *result; 00872 EXTRACTOR_ExtractorList *pos; 00873 void *handle; 00874 ExtractMethod method; 00875 00876 if (-1 == loadLibrary (library, &handle, &method)) 00877 return prev; 00878 result = malloc (sizeof (EXTRACTOR_ExtractorList)); 00879 result->next = NULL; 00880 result->libraryHandle = handle; 00881 result->extractMethod = method; 00882 result->libname = strdup (library); 00883 if( options ) 00884 result->options = strdup (options); 00885 else 00886 result->options = NULL; 00887 if (prev == NULL) 00888 return result; 00889 pos = prev; 00890 while (pos->next != NULL) 00891 pos = pos->next; 00892 pos->next = result; 00893 return prev; 00894 }
| int EXTRACTOR_binaryDecode | ( | const char * | in, | |
| unsigned char ** | out, | |||
| size_t * | outSize | |||
| ) |
This function can be used to decode the binary data encoded in the libextractor metadata (i.e. for the thumbnails).
| in | 0-terminated string from the meta-data |
Definition at line 1724 of file extractor.c.
Referenced by test().
01726 { 01727 unsigned char * buf; 01728 size_t pos; 01729 size_t wpos; 01730 unsigned char marker; 01731 size_t i; 01732 size_t end; 01733 size_t inSize; 01734 01735 inSize = strlen(in); 01736 if (inSize == 0) { 01737 *out = NULL; 01738 *outSize = 0; 01739 return 0; 01740 } 01741 01742 buf = malloc(inSize); /* slightly more than needed ;-) */ 01743 if (buf == NULL) 01744 return 1; /* error */ 01745 *out = buf; 01746 01747 pos = 0; 01748 wpos = 0; 01749 while (pos < inSize) { 01750 end = pos + 255; /* 255 here: count the marker! */ 01751 if (end > inSize) 01752 end = inSize; 01753 marker = in[pos++]; 01754 for (i=pos;i<end;i++) 01755 buf[wpos++] = (in[i] == (char) marker) ? 0 : in[i]; 01756 pos = end; 01757 } 01758 *outSize = wpos; 01759 return 0; 01760 }
| char* EXTRACTOR_binaryEncode | ( | const unsigned char * | data, | |
| size_t | size | |||
| ) |
Encode the given binary data object as a 0-terminated C-string according to the LE binary data encoding standard.
Definition at line 1665 of file extractor.c.
References free, malloc, and NULL.
Referenced by flac_metadata(), libextractor_thumbnailgtk_extract(), and test().
01666 { 01667 01668 char * binary; 01669 size_t pos; 01670 size_t end; 01671 size_t wpos; 01672 size_t i; 01673 unsigned int markers[8]; /* 256 bits */ 01674 unsigned char marker; 01675 01676 /* encode! */ 01677 binary = malloc(2 + size + (size+256) / 254); 01678 if (binary == NULL) 01679 return NULL; 01680 01681 pos = 0; 01682 wpos = 0; 01683 while (pos < size) { 01684 /* find unused value between 1 and 255 in 01685 the next 254 bytes */ 01686 end = pos + 254; 01687 if (end < pos) 01688 break; /* integer overflow! */ 01689 if (end > size) 01690 end = size; 01691 memset(markers, 01692 0, 01693 sizeof(markers)); 01694 for (i=pos;i<end;i++) 01695 markers[data[i]&7] |= 1 << (data[i] >> 3); 01696 marker = 1; 01697 while (markers[marker&7] & (1 << (marker >> 3))) { 01698 marker++; 01699 if (marker == 0) { 01700 /* assertion failed... */ 01701 free(binary); 01702 return NULL; 01703 } 01704 } 01705 /* recode */ 01706 binary[wpos++] = marker; 01707 for (i=pos;i<end;i++) 01708 binary[wpos++] = data[i] == 0 ? marker : data[i]; 01709 pos = end; 01710 } 01711 binary[wpos++] = 0; /* 0-termination! */ 01712 return binary; 01713 }
| unsigned int EXTRACTOR_countKeywords | ( | EXTRACTOR_KeywordList * | keywords | ) |
Count the number of keywords in the keyword list.
| keywords | the keyword list |
Definition at line 1646 of file extractor.c.
References EXTRACTOR_Keywords::next, and NULL.
Referenced by compare_keywords_to_ref(), and main().
01647 { 01648 int count = 0; 01649 while (keywords != NULL) 01650 { 01651 count++; 01652 keywords = keywords->next; 01653 } 01654 return count; 01655 }
| const char* EXTRACTOR_extractLast | ( | const EXTRACTOR_KeywordType | type, | |
| EXTRACTOR_KeywordList * | keywords | |||
| ) |
Extract the last keyword that of the given type from the keyword list.
| type | the type of the keyword | |
| keywords | the keyword list |
Definition at line 1604 of file extractor.c.
References EXTRACTOR_Keywords::keyword, EXTRACTOR_Keywords::keywordType, EXTRACTOR_Keywords::next, and NULL.
Referenced by EXTRACT_NAME(), libextractor_html_extract(), libextractor_mime_extract(), libextractor_ole2_extract(), libextractor_ps_extract(), libextractor_thumbnailffmpeg_extract(), libextractor_thumbnailgtk_extract(), libextractor_zip_extract(), and main().
01606 { 01607 char *result = NULL; 01608 while (keywords != NULL) 01609 { 01610 if (keywords->keywordType == type) 01611 result = keywords->keyword; 01612 keywords = keywords->next; 01613 } 01614 return result; 01615 }
| const char* EXTRACTOR_extractLastByString | ( | const char * | type, | |
| EXTRACTOR_KeywordList * | keywords | |||
| ) |
Extract the last keyword of the given string from the keyword list.
| type | the string describing the type of the keyword | |
| keywords | the keyword list |
Definition at line 1624 of file extractor.c.
References _, EXTRACTOR_Keywords::keyword, EXTRACTOR_Keywords::keywordType, keywordTypes, EXTRACTOR_Keywords::next, and NULL.
Referenced by main(), and printSelectedKeywordsBibtex().
01626 { 01627 char * result = NULL; 01628 01629 if (type == NULL) 01630 return NULL; 01631 while (keywords != NULL) { 01632 if ( (0 == strcmp(_(keywordTypes[keywords->keywordType]), type)) || 01633 (0 == strcmp(keywordTypes[keywords->keywordType], type) ) ) 01634 result = keywords->keyword; 01635 keywords = keywords->next; 01636 } 01637 return result; 01638 }
| void EXTRACTOR_freeKeywords | ( | EXTRACTOR_KeywordList * | keywords | ) |
Free the memory occupied by the keyword list (and the keyword strings in it!)
| keywords | the list to free |
Definition at line 1576 of file extractor.c.
References free, EXTRACTOR_Keywords::keyword, EXTRACTOR_Keywords::next, and NULL.
Referenced by main(), processSentences(), and test_plugins().
01577 { 01578 EXTRACTOR_KeywordList *prev; 01579 while (keywords != NULL) 01580 { 01581 prev = keywords; 01582 keywords = keywords->next; 01583 free (prev->keyword); 01584 free (prev); 01585 } 01586 }
| const char* EXTRACTOR_getDefaultLibraries | ( | void | ) |
Definition at line 273 of file extractor.c.
References DEFAULT_LIBRARIES.
00273 { 00274 return DEFAULT_LIBRARIES; 00275 }
| EXTRACTOR_KeywordType EXTRACTOR_getHighestKeywordTypeNumber | ( | void | ) |
Return the highest type number, exclusive as in [0,highest).
Definition at line 1592 of file extractor.c.
References HIGHEST_TYPE_NUMBER.
Referenced by main().
01593 { 01594 return HIGHEST_TYPE_NUMBER; 01595 }
| EXTRACTOR_KeywordList* EXTRACTOR_getKeywords | ( | EXTRACTOR_ExtractorList * | extractor, | |
| const char * | filename | |||
| ) |
Extract keywords from a file using the available extractors.
| extractor | the list of extractor libraries | |
| filename | the name of the file |
Definition at line 1297 of file extractor.c.
References fileopen(), free, getKeywords(), malloc, MAX_READ, MMAP, MUNMAP, NULL, and STAT.
Referenced by get_keywords_for_file(), and main().
01298 { 01299 EXTRACTOR_KeywordList *result; 01300 int file; 01301 void * buffer; 01302 struct stat fstatbuf; 01303 size_t size; 01304 int eno, dir; 01305 01306 if (-1 == STAT(filename, &fstatbuf)) 01307 return NULL; 01308 01309 if (!S_ISDIR(fstatbuf.st_mode)) { 01310 dir = 0; 01311 01312 #ifdef O_LARGEFILE 01313 file = fileopen(filename, O_RDONLY | O_LARGEFILE); 01314 #else 01315 file = fileopen(filename, O_RDONLY); 01316 #endif 01317 if (-1 == file) 01318 return NULL; 01319 01320 size = fstatbuf.st_size; 01321 if (size == 0) { 01322 close(file); 01323 return NULL; 01324 } 01325 01326 if (size > MAX_READ) 01327 size = MAX_READ; /* do not mmap/read more than 1 GB! */ 01328 buffer = MMAP(NULL, size, PROT_READ, MAP_PRIVATE, file, 0); 01329 if ( (buffer == NULL) || (buffer == (void *) -1) ) { 01330 eno = errno; 01331 close(file); 01332 errno = eno; 01333 return NULL; 01334 } 01335 } 01336 else { 01337 dir = 1; 01338 01339 size = 0; 01340 buffer = malloc(1); 01341 } 01342 01343 result = getKeywords(extractor, 01344 filename, 01345 buffer, 01346 size); 01347 01348 if (dir) 01349 free(buffer); 01350 else { 01351 MUNMAP (buffer, size); 01352 close(file); 01353 } 01354 return result; 01355 }
| EXTRACTOR_KeywordList* EXTRACTOR_getKeywords2 | ( | EXTRACTOR_ExtractorList * | extractor, | |
| const void * | data, | |||
| size_t | size | |||
| ) |
Extract keywords from a buffer in memory using the available extractors.
| extractor | the list of extractor libraries | |
| data | the data of the file | |
| size | the number of bytes in data |
Definition at line 1370 of file extractor.c.
References getKeywords(), and NULL.
01372 { 01373 if (data == NULL) 01374 return NULL; 01375 return getKeywords(extractor, 01376 NULL, 01377 data, 01378 size); 01379 }
| const char* EXTRACTOR_getKeywordTypeAsString | ( | const EXTRACTOR_KeywordType | type | ) |
Get the textual name of the keyword.
Definition at line 726 of file extractor.c.
References HIGHEST_TYPE_NUMBER, keywordTypes, and NULL.
Referenced by main(), printSelectedKeywords(), and printSelectedKeywordsGrepFriendly().
00727 { 00728 if ((type >= 0) && (type < HIGHEST_TYPE_NUMBER)) 00729 return keywordTypes[type]; 00730 else 00731 return NULL; 00732 }
| EXTRACTOR_ExtractorList* EXTRACTOR_loadConfigLibraries | ( | EXTRACTOR_ExtractorList * | prev, | |
| const char * | config | |||
| ) |
Load multiple libraries as specified by the user.
| config | a string given by the user that defines which libraries should be loaded. Has the format "[[-]LIBRARYNAME[:[-]LIBRARYNAME]*]". For example, libextractor_mp3.so:libextractor_ogg.so loads the mp3 and the ogg library. The '-' before the LIBRARYNAME indicates that the library should be added to the end of the library list (addLibraryLast). | |
| prev | the previous list of libraries, may be NULL |
Definition at line 924 of file extractor.c.
References EXTRACTOR_addLibrary2(), EXTRACTOR_addLibraryLast2(), free, len, and NULL.
Referenced by EXTRACTOR_loadDefaultLibraries(), get_keywords_for_file(), main(), and test_plugins().
00926 { 00927 char *cpy; 00928 int pos; 00929 int last; 00930 int lastconf; 00931 int len; 00932 00933 if (config == NULL) 00934 return prev; 00935 len = strlen(config); 00936 cpy = strdup(config); 00937 pos = 0; 00938 last = 0; 00939 lastconf = 0; 00940 while (pos < len) 00941 { 00942 while ((cpy[pos] != ':') && (cpy[pos] != '\0') && 00943 (cpy[pos] != '(')) 00944 pos++; 00945 if( cpy[pos] == '(' ) { 00946 cpy[pos++] = '\0'; /* replace '(' by termination */ 00947 lastconf = pos; /* start config from here, after (. */ 00948 while ((cpy[pos] != '\0') && (cpy[pos] != ')')) 00949 pos++; /* config until ) or EOS. */ 00950 if( cpy[pos] == ')' ) { 00951 cpy[pos++] = '\0'; /* write end of config here. */ 00952 while ((cpy[pos] != ':') && (cpy[pos] != '\0')) 00953 pos++; /* forward until real end of string found. */ 00954 cpy[pos++] = '\0'; 00955 } else { 00956 cpy[pos++] = '\0'; /* end of string. */ 00957 } 00958 } else { 00959 lastconf = -1; /* NULL config when no (). */ 00960 cpy[pos++] = '\0'; /* replace ':' by termination */ 00961 } 00962 if (cpy[last] == '-') 00963 { 00964 last++; 00965 if( lastconf != -1 ) 00966 prev = EXTRACTOR_addLibraryLast2 (prev, &cpy[last], 00967 &cpy[lastconf]); 00968 else 00969 prev = EXTRACTOR_addLibraryLast2 (prev, &cpy[last], NULL); 00970 } 00971 else 00972 if( lastconf != -1 ) 00973 prev = EXTRACTOR_addLibrary2 (prev, &cpy[last], &cpy[lastconf]); 00974 else 00975 prev = EXTRACTOR_addLibrary2 (prev, &cpy[last], NULL); 00976 00977 last = pos; 00978 } 00979 free (cpy); 00980 return prev; 00981 }
| EXTRACTOR_ExtractorList* EXTRACTOR_loadDefaultLibraries | ( | void | ) |
Load the default set of libraries. The default set of libraries consists of the libraries that are part of the libextractor distribution (except split and filename extractor) plus the extractors that are specified in the environment variable "LIBEXTRACTOR_LIBRARIES".
Definition at line 700 of file extractor.c.
References DEFAULT_LIBRARIES, EXTRACTOR_loadConfigLibraries(), free, getenv(), malloc, NULL, and strcat.
Referenced by get_keywords_for_file(), main(), test_plugins(), and testLoadPlugins().
00701 { 00702 char *env; 00703 char *tmp; 00704 EXTRACTOR_ExtractorList *res; 00705 00706 00707 env = getenv ("LIBEXTRACTOR_LIBRARIES"); 00708 if (env == NULL) 00709 { 00710 return EXTRACTOR_loadConfigLibraries (NULL, DEFAULT_LIBRARIES); 00711 } 00712 tmp = malloc (strlen (env) + strlen (DEFAULT_LIBRARIES) + 2); 00713 strcpy (tmp, env); 00714 strcat (tmp, ":"); 00715 strcat (tmp, DEFAULT_LIBRARIES); 00716 res = EXTRACTOR_loadConfigLibraries (NULL, tmp); 00717 free (tmp); 00718 return res; 00719 }
| void EXTRACTOR_printKeywords | ( | FILE * | handle, | |
| EXTRACTOR_KeywordList * | keywords | |||
| ) |
Print a keyword list to a file. For debugging.
| handle | the file to write to (stdout, stderr), may NOT be NULL | |
| keywords | the list of keywords to print, may be NULL |
Definition at line 1532 of file extractor.c.
References _, EXTRACTOR_THUMBNAIL_DATA, fprintf, free, HIGHEST_TYPE_NUMBER, iconvHelper(), EXTRACTOR_Keywords::keyword, EXTRACTOR_Keywords::keywordType, keywordTypes, EXTRACTOR_Keywords::next, and NULL.
Referenced by main().
01534 { 01535 iconv_t cd; 01536 char * buf; 01537 01538 cd = iconv_open( 01539 nl_langinfo(CODESET) 01540 , "UTF-8"); 01541 while (keywords != NULL) 01542 { 01543 if (cd == (iconv_t) -1) 01544 buf = strdup(keywords->keyword); 01545 else 01546 buf = iconvHelper(cd, 01547 keywords->keyword); 01548 if (keywords->keywordType == EXTRACTOR_THUMBNAIL_DATA) { 01549 fprintf(handle, 01550 _("%s - (binary)\n"), 01551 _(keywordTypes[keywords->keywordType])); 01552 } else { 01553 if (keywords->keywordType >= HIGHEST_TYPE_NUMBER) 01554 fprintf(handle, 01555 _("INVALID TYPE - %s\n"), 01556 buf); 01557 else 01558 fprintf(handle, 01559 "%s - %s\n", 01560 _(keywordTypes[keywords->keywordType]), 01561 buf); 01562 } 01563 free(buf); 01564 keywords = keywords->next; 01565 } 01566 if (cd != (iconv_t) -1) 01567 iconv_close(cd); 01568 }
| void EXTRACTOR_removeAll | ( | EXTRACTOR_ExtractorList * | libraries | ) |
Remove all extractors.
| libraries | the list of extractors |
Definition at line 1034 of file extractor.c.
References EXTRACTOR_removeLibrary(), EXTRACTOR_Extractor::libname, and NULL.
Referenced by get_keywords_for_file(), main(), test_plugins(), and testLoadPlugins().
01035 { 01036 while (libraries != NULL) 01037 libraries = EXTRACTOR_removeLibrary (libraries, libraries->libname); 01038 }
| EXTRACTOR_KeywordList* EXTRACTOR_removeDuplicateKeywords | ( | EXTRACTOR_KeywordList * | list, | |
| const unsigned int | options | |||
| ) |
Remove duplicate keywords from the list.
| list | the original keyword list (destroyed in the process!) | |
| options | a set of options (DUPLICATES_XXXX) |
Definition at line 1433 of file extractor.c.
References EXTRACTOR_Keywords::keyword, EXTRACTOR_Keywords::keywordType, EXTRACTOR_Keywords::next, NULL, and removeKeyword().
Referenced by main().
01434 { 01435 EXTRACTOR_KeywordList *pos; 01436 01437 pos = list; 01438 while (pos != NULL) { 01439 removeKeyword(pos->keyword, 01440 pos->keywordType, 01441 options, 01442 &list, 01443 pos); 01444 pos = pos->next; 01445 } 01446 return list; 01447 }
| EXTRACTOR_KeywordList* EXTRACTOR_removeEmptyKeywords | ( | EXTRACTOR_KeywordList * | list | ) |
Remove empty (all-whitespace) keywords from the list.
| list | the original keyword list (destroyed in the process!) |
Definition at line 1455 of file extractor.c.
References free, EXTRACTOR_Keywords::keyword, EXTRACTOR_Keywords::next, and NULL.
Referenced by main().
01455 { 01456 EXTRACTOR_KeywordList * pos; 01457 EXTRACTOR_KeywordList * last; 01458 01459 last = NULL; 01460 pos = list; 01461 while (pos != NULL) 01462 { 01463 int allWhite; 01464 int i; 01465 allWhite = 1; 01466 for (i=strlen(pos->keyword)-1;i>=0;i--) 01467 if (! isspace(pos->keyword[i])) 01468 allWhite = 0; 01469 if (allWhite) 01470 { 01471 EXTRACTOR_KeywordList * next; 01472 next = pos->next; 01473 if (last == NULL) 01474 list = next; 01475 else 01476 last->next = next; 01477 free(pos->keyword); 01478 free(pos); 01479 pos = next; 01480 } 01481 else 01482 { 01483 last = pos; 01484 pos = pos->next; 01485 } 01486 } 01487 return list; 01488 }
| EXTRACTOR_KeywordList* EXTRACTOR_removeKeywordsOfType | ( | EXTRACTOR_KeywordList * | list, | |
| EXTRACTOR_KeywordType | type | |||
| ) |
Remove keywords of a particular type from the