00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021 #include "platform.h"
00022 #include "extractor.h"
00023
00024 static EXTRACTOR_KeywordList *
00025 addKeyword (EXTRACTOR_KeywordType type,
00026 char *keyword, EXTRACTOR_KeywordList * next)
00027 {
00028 EXTRACTOR_KeywordList *result;
00029
00030 if (keyword == NULL)
00031 return next;
00032 result = malloc (sizeof (EXTRACTOR_KeywordList));
00033 result->next = next;
00034 result->keyword = keyword;
00035 result->keywordType = type;
00036 return result;
00037 }
00038
00039 typedef struct
00040 {
00041 char *text;
00042 EXTRACTOR_KeywordType type;
00043 } Matches;
00044
00045 static Matches tmap[] = {
00046 {"/Title (", EXTRACTOR_TITLE},
00047 {"/Subject (", EXTRACTOR_SUBJECT},
00048 {"/Author (", EXTRACTOR_AUTHOR},
00049 {"/Keywords (", EXTRACTOR_KEYWORDS},
00050 {"/Creator (", EXTRACTOR_CREATOR},
00051 {"/Producer (", EXTRACTOR_PRODUCER},
00052 {NULL, 0},
00053 };
00054
00055 static struct EXTRACTOR_Keywords *
00056 parseZZZ (const char *data,
00057 size_t pos, size_t len, struct EXTRACTOR_Keywords *prev)
00058 {
00059 size_t slen;
00060 size_t end;
00061 int i;
00062 char *value;
00063
00064 end = pos + len;
00065 slen = strlen ("ps:SDict begin [");
00066 if (len <= slen)
00067 return prev;
00068 if (0 != strncmp ("ps:SDict begin [ ", &data[pos], slen))
00069 return prev;
00070 pos += slen;
00071 while (pos < end)
00072 {
00073 i = 0;
00074 while (tmap[i].text != NULL)
00075 {
00076 slen = strlen (tmap[i].text);
00077 if (pos + slen < end)
00078 {
00079 if (0 == strncmp (&data[pos], tmap[i].text, slen))
00080 {
00081 pos += slen;
00082 slen = pos;
00083 while ((slen < end) && (data[slen] != ')'))
00084 slen++;
00085 slen = slen - pos;
00086 value = malloc (slen + 1);
00087 value[slen] = '\0';
00088 memcpy (value, &data[pos], slen);
00089 prev = addKeyword (tmap[i].type, value, prev);
00090 pos += slen + 1;
00091 }
00092 }
00093 i++;
00094 }
00095 pos++;
00096 }
00097 return prev;
00098 }
00099
00100 static unsigned int
00101 getIntAt (const void *data)
00102 {
00103 char p[4];
00104
00105 memcpy (p, data, 4);
00106 return *(unsigned int *) &p[0];
00107 }
00108
00109 static unsigned int
00110 getShortAt (const void *data)
00111 {
00112 char p[2];
00113
00114 memcpy (p, data, 2);
00115 return *(unsigned short *) &p[0];
00116 }
00117
00118 struct EXTRACTOR_Keywords *
00119 libextractor_dvi_extract (const char *filename,
00120 const unsigned char *data,
00121 size_t size, struct EXTRACTOR_Keywords *prev)
00122 {
00123 unsigned int klen;
00124 char *comment;
00125 unsigned int pos;
00126 unsigned int opos;
00127 unsigned int len;
00128 unsigned int pageCount;
00129 char *pages;
00130
00131 if (size < 40)
00132 return prev;
00133 if ((data[0] != 247) || (data[1] != 2))
00134 return prev;
00135 klen = data[14];
00136
00137 pos = size - 1;
00138 while ((data[pos] == 223) && (pos > 0))
00139 pos--;
00140 if ((data[pos] != 2) || (pos < 40))
00141 return prev;
00142 pos--;
00143 pos -= 4;
00144
00145 if (data[pos] != 249)
00146 return prev;
00147 opos = pos;
00148 pos = ntohl (getIntAt (&data[opos + 1]));
00149 if (pos + 25 > size)
00150 return prev;
00151
00152 if (data[pos] != 248)
00153 return prev;
00154 pageCount = 0;
00155 opos = pos;
00156 pos = ntohl (getIntAt (&data[opos + 1]));
00157 while (1)
00158 {
00159 if (pos == (unsigned int) -1)
00160 break;
00161 if (pos + 45 > size)
00162 return prev;
00163 if (data[pos] != 139)
00164 return prev;
00165 pageCount++;
00166 opos = pos;
00167 pos = ntohl (getIntAt (&data[opos + 41]));
00168 if (pos == (unsigned int) -1)
00169 break;
00170 if (pos >= opos)
00171 return prev;
00172 }
00173
00174 pages = malloc (16);
00175 snprintf (pages, 16, "%u", pageCount);
00176 comment = malloc (klen + 1);
00177 comment[klen] = '\0';
00178 memcpy (comment, &data[15], klen);
00179 prev = addKeyword (EXTRACTOR_MIMETYPE, strdup ("application/x-dvi"), prev);
00180 prev = addKeyword (EXTRACTOR_COMMENT, comment, prev);
00181 prev = addKeyword (EXTRACTOR_PAGE_COUNT, pages, prev);
00182
00183 pos = opos;
00184 while (pos < size - 100)
00185 {
00186 switch (data[pos])
00187 {
00188 case 139:
00189
00190 pos += 45;
00191 break;
00192 case 239:
00193 len = data[pos + 1];
00194 if (pos + 2 + len < size)
00195 prev = parseZZZ ((const char *) data, pos + 2, len, prev);
00196 pos += len + 2;
00197 break;
00198 case 240:
00199 len = ntohs (getShortAt (&data[pos + 1]));
00200 if (pos + 3 + len < size)
00201 prev = parseZZZ ((const char *) data, pos + 3, len, prev);
00202 pos += len + 3;
00203 break;
00204 case 241:
00205 len = (ntohs (getShortAt (&data[pos + 1]))) + 65536 * data[pos + 3];
00206 if (pos + 4 + len < size)
00207 prev = parseZZZ ((const char *) data, pos + 4, len, prev);
00208 pos += len + 4;
00209 break;
00210 case 242:
00211 len = ntohl (getIntAt (&data[pos + 1]));
00212 if (pos + 1 + len < size)
00213 prev = parseZZZ ((const char *) data, pos + 5, len, prev);
00214 pos += len + 5;
00215 break;
00216 default:
00217 return prev;
00218 }
00219 }
00220 return prev;
00221 }