SILVERCODERS DocToText  4.0.1512
Converts DOC, XLS, XLSB, PPT, RTF, ODF (ODT, ODS, ODP), OOXML (DOCX, XLSX, PPTX), iWork (PAGES, NUMBERS, KEYNOTE), ODFXML (FODP, FODS, FODT), PDF, EML and HTML documents to plain text. Extracts metadata and annotations.
 All Classes Functions Enumerations Pages
doctotext_c_api.h
1 #include <stdio.h>
2 #include <time.h>
3 
4 #ifdef __cplusplus
5 extern "C"
6 {
7 #endif
8 
9 #ifndef DOCTOTEXT_CALL
10  #if defined(_WIN32) && !defined(_WIN64)
11  #define DOCTOTEXT_CALL __cdecl
12  #else
13  #define DOCTOTEXT_CALL
14  #endif
15 #endif
16 
17 typedef int doctotext_parser_type;
18 
19 #define DOCTOTEXT_PARSER_AUTO 0
20 #define DOCTOTEXT_PARSER_RTF 1
21 #define DOCTOTEXT_PARSER_ODF_OOXML 2
22 #define DOCTOTEXT_PARSER_XLS 3
23 #define DOCTOTEXT_PARSER_DOC 4
24 #define DOCTOTEXT_PARSER_PPT 5
25 #define DOCTOTEXT_PARSER_HTML 6
26 #define DOCTOTEXT_PARSER_IWORK 7
27 #define DOCTOTEXT_PARSER_XLSB 8
28 #define DOCTOTEXT_PARSER_PDF 9
29 #define DOCTOTEXT_PARSER_TXT 10
30 
31 typedef int doctotext_xml_parse_mode;
32 
33 #define DOCTOTEXT_PARSE_XML 0
34 #define DOCTOTEXT_FIX_XML 1
35 #define DOCTOTEXT_STRIP_XML 2
36 
37 typedef int doctotext_table_style;
38 
39 #define DOCTOTEXT_TABLE_STYLE_TABLE_LOOK 0
40 #define DOCTOTEXT_TABLE_STYLE_ONE_ROW 1
41 #define DOCTOTEXT_TABLE_STYLE_ONE_COL 2
42 
43 typedef int doctotext_url_style;
44 
45 #define DOCTOTEXT_URL_STYLE_TEXT_ONLY 0
46 #define DOCTOTEXT_URL_STYLE_EXTENDED 1
47 #define DOCTOTEXT_URL_STYLE_UNDERSCORED 2
48 
49 typedef int doctotext_metadata_type;
50 
51 #define DOCTOTEXT_METADATA_TYPE_NONE 0
52 #define DOCTOTEXT_METADATA_TYPE_EXTRACTED 1
53 #define DOCTOTEXT_METADATA_TYPE_ESTIMATED 2
54 
55 /*-------------------------------------------------------------------------------------------------------------------------*/
56 
57 typedef struct DocToTextFormattingStyle DocToTextFormattingStyle;
58 
65 DocToTextFormattingStyle* DOCTOTEXT_CALL doctotext_create_formatting_style();
66 
73 void DOCTOTEXT_CALL doctotext_free_formatting_style(DocToTextFormattingStyle* formatting_style);
74 
80 void DOCTOTEXT_CALL doctotext_formatting_style_set_table_style(DocToTextFormattingStyle* formatting_style, doctotext_table_style table_style);
81 
87 void DOCTOTEXT_CALL doctotext_formatting_style_set_url_style(DocToTextFormattingStyle* formatting_style, doctotext_url_style url_style);
88 
94 void DOCTOTEXT_CALL doctotext_formatting_style_set_list_prefix(DocToTextFormattingStyle* formatting_style, const char* prefix);
95 
96 /*-------------------------------------------------------------------------------------------------------------------------*/
97 
98 typedef struct DocToTextExtractorParams DocToTextExtractorParams;
99 
106 DocToTextExtractorParams* DOCTOTEXT_CALL doctotext_create_extractor_params();
107 
114 void DOCTOTEXT_CALL doctotext_free_extractor_params(DocToTextExtractorParams* extractor_params);
115 
121 void DOCTOTEXT_CALL doctotext_extractor_params_set_verbose_logging(DocToTextExtractorParams* extractor_params, int verbose);
122 
128 void DOCTOTEXT_CALL doctotext_extractor_params_set_log_file(DocToTextExtractorParams* extractor_params, const char* log_file_name);
129 
135 void DOCTOTEXT_CALL doctotext_extractor_params_set_parser_type(DocToTextExtractorParams* extractor_params, doctotext_parser_type parser_type);
136 
142 void DOCTOTEXT_CALL doctotext_extractor_params_set_xml_parse_mode(DocToTextExtractorParams* extractor_params, doctotext_xml_parse_mode xml_parse_mode);
143 
150 void DOCTOTEXT_CALL doctotext_extractor_params_set_manage_xml_parser(DocToTextExtractorParams* extractor_params, int manage_xml_parser);
151 
157 void DOCTOTEXT_CALL doctotext_extractor_params_set_formatting_style(DocToTextExtractorParams* extractor_params, DocToTextFormattingStyle* formatting_style);
158 
159 /*-------------------------------------------------------------------------------------------------------------------------*/
160 
161 typedef struct DocToTextException DocToTextException;
162 
169 void DOCTOTEXT_CALL doctotext_free_exception(DocToTextException* exception);
170 
177 size_t DOCTOTEXT_CALL doctotext_exception_error_messages_count(DocToTextException* exception);
178 
185 const char* DOCTOTEXT_CALL doctotext_exception_get_error_message(DocToTextException* exception, size_t index_message);
186 
187 /*-------------------------------------------------------------------------------------------------------------------------*/
188 
189 typedef struct DocToTextLink DocToTextLink;
190 
196 const char* DOCTOTEXT_CALL doctotext_link_get_url(DocToTextLink* link);
197 
204 const char* DOCTOTEXT_CALL doctotext_link_get_link_text(DocToTextLink* link);
205 
211 size_t DOCTOTEXT_CALL doctotext_link_get_link_position(DocToTextLink* link);
212 
213 /*-------------------------------------------------------------------------------------------------------------------------*/
214 
215 typedef struct DocToTextVariant DocToTextVariant;
216 
222 int DOCTOTEXT_CALL doctotext_variant_is_null(DocToTextVariant* variant);
223 
229 int DOCTOTEXT_CALL doctotext_variant_is_string(DocToTextVariant* variant);
230 
236 int DOCTOTEXT_CALL doctotext_variant_is_number(DocToTextVariant* variant);
237 
243 int DOCTOTEXT_CALL doctotext_variant_is_date_time(DocToTextVariant* variant);
244 
251 const char* DOCTOTEXT_CALL doctotext_variant_get_string(DocToTextVariant* variant);
252 
258 size_t DOCTOTEXT_CALL doctotext_variant_get_number(DocToTextVariant* variant);
259 
265 const struct tm* DOCTOTEXT_CALL doctotext_variant_get_date_time(DocToTextVariant* variant);
266 
267 /*-------------------------------------------------------------------------------------------------------------------------*/
268 
269 typedef struct DocToTextAttachment DocToTextAttachment;
270 
276 const char* DOCTOTEXT_CALL doctotext_attachment_get_file_name(DocToTextAttachment* attachment);
277 
283 const char* DOCTOTEXT_CALL doctotext_attachment_get_binary_content(DocToTextAttachment* attachment);
284 
290 size_t DOCTOTEXT_CALL doctotext_attachment_get_binary_content_size(DocToTextAttachment* attachment);
291 
299 int DOCTOTEXT_CALL doctotext_attachment_has_field(DocToTextAttachment* attachment, const char* key);
300 
308 DocToTextVariant* DOCTOTEXT_CALL doctotext_attachment_get_field(DocToTextAttachment* attachment, const char* key);
309 
315 size_t DOCTOTEXT_CALL doctotext_attachment_fields_count(DocToTextAttachment* attachment);
316 
322 char** DOCTOTEXT_CALL doctotext_attachment_get_keys(DocToTextAttachment* attachment);
323 
324 /*-------------------------------------------------------------------------------------------------------------------------*/
325 
326 typedef struct DocToTextExtractedData DocToTextExtractedData;
327 
334 void DOCTOTEXT_CALL doctotext_free_extracted_data(DocToTextExtractedData* extracted_data);
335 
341 const char* DOCTOTEXT_CALL doctotext_extracted_data_get_text(DocToTextExtractedData* extracted_data);
342 
349 DocToTextLink** DOCTOTEXT_CALL doctotext_extracted_data_get_links(DocToTextExtractedData* extracted_data);
350 
356 size_t DOCTOTEXT_CALL doctotext_extracted_data_get_links_count(DocToTextExtractedData* extracted_data);
357 
364 DocToTextAttachment** DOCTOTEXT_CALL doctotext_extracted_data_get_attachments(DocToTextExtractedData* extracted_data);
365 
371 size_t DOCTOTEXT_CALL doctotext_extracted_data_get_attachments_count(DocToTextExtractedData* extracted_data);
372 
373 /*-------------------------------------------------------------------------------------------------------------------------*/
374 
375 typedef struct DocToTextMetadata DocToTextMetadata;
376 
385 void DOCTOTEXT_CALL doctotext_free_metadata(DocToTextMetadata* metadata);
386 
392 const char* DOCTOTEXT_CALL doctotext_metadata_author(DocToTextMetadata* metadata);
393 
399 doctotext_metadata_type DOCTOTEXT_CALL doctotext_metadata_author_type(DocToTextMetadata* metadata);
400 
406 const char* DOCTOTEXT_CALL doctotext_metadata_last_modify_by(DocToTextMetadata* metadata);
407 
413 doctotext_metadata_type DOCTOTEXT_CALL doctotext_metadata_last_modify_by_type(DocToTextMetadata* metadata);
414 
420 const struct tm* DOCTOTEXT_CALL doctotext_metadata_creation_date(DocToTextMetadata* metadata);
421 
427 doctotext_metadata_type DOCTOTEXT_CALL doctotext_metadata_creation_date_type(DocToTextMetadata* metadata);
428 
434 const struct tm* DOCTOTEXT_CALL doctotext_metadata_last_modification_date(DocToTextMetadata* metadata);
435 
441 doctotext_metadata_type DOCTOTEXT_CALL doctotext_metadata_last_modification_date_type(DocToTextMetadata* metadata);
442 
448 size_t DOCTOTEXT_CALL doctotext_metadata_pages_count(DocToTextMetadata* metadata);
449 
455 doctotext_metadata_type DOCTOTEXT_CALL doctotext_metadata_pages_count_type(DocToTextMetadata* metadata);
456 
462 size_t DOCTOTEXT_CALL doctotext_metadata_words_count(DocToTextMetadata* metadata);
463 
469 doctotext_metadata_type DOCTOTEXT_CALL doctotext_metadata_words_count_type(DocToTextMetadata* metadata);
470 
479 int DOCTOTEXT_CALL doctotext_metadata_has_field(DocToTextMetadata* metadata, const char* key);
480 
491 DocToTextVariant* DOCTOTEXT_CALL doctotext_metadata_get_field(DocToTextMetadata* metadata, const char* key);
492 
500 size_t DOCTOTEXT_CALL doctotext_metadata_fields_count(DocToTextMetadata* metadata);
501 
509 char** DOCTOTEXT_CALL doctotext_metadata_get_keys(DocToTextMetadata* metadata);
510 
511 /*-------------------------------------------------------------------------------------------------------------------------*/
512 
526 DocToTextExtractedData* DOCTOTEXT_CALL doctotext_process_file(const char* file_name, DocToTextExtractorParams* extractor_params, DocToTextException** exception);
527 
540 DocToTextExtractedData* DOCTOTEXT_CALL doctotext_process_file_from_buffer(const char* buffer, size_t size, DocToTextExtractorParams* extractor_params, DocToTextException** exception);
541 
555 DocToTextMetadata* DOCTOTEXT_CALL doctotext_extract_metadata(const char* file_name, DocToTextExtractorParams* extractor_params, DocToTextException** exception);
556 
569 DocToTextMetadata* DOCTOTEXT_CALL doctotext_extract_metadata_from_buffer(const char* buffer, size_t size, DocToTextExtractorParams* extractor_params, DocToTextException** exception);
570 
578 int DOCTOTEXT_CALL doctotext_parser_type_by_file_extension(const char* file_name, doctotext_parser_type* parser_type);
579 
586 int DOCTOTEXT_CALL doctotext_parser_type_by_file_content(const char *file_name, doctotext_parser_type *parser_type);
587 
596 int DOCTOTEXT_CALL doctotext_parser_type_by_file_content_from_buffer(const char* buffer, size_t size, doctotext_parser_type* parser_type);
597 
598 #ifdef __cplusplus
599 }
600 #endif