10 #if defined(_WIN32) && !defined(_WIN64)
11 #define DOCTOTEXT_CALL __cdecl
13 #define DOCTOTEXT_CALL
17 typedef int doctotext_parser_type;
19 #define DOCTOTEXT_PARSER_AUTO 0
20 #define DOCTOTEXT_PARSER_RTF 1
21 #define DOCTOTEXT_PARSER_ODF_OOXML 2
22 #define DOCTOTEXT_PARSER_XLS 3
23 #define DOCTOTEXT_PARSER_DOC 4
24 #define DOCTOTEXT_PARSER_PPT 5
25 #define DOCTOTEXT_PARSER_HTML 6
26 #define DOCTOTEXT_PARSER_IWORK 7
27 #define DOCTOTEXT_PARSER_XLSB 8
28 #define DOCTOTEXT_PARSER_PDF 9
29 #define DOCTOTEXT_PARSER_TXT 10
31 typedef int doctotext_xml_parse_mode;
33 #define DOCTOTEXT_PARSE_XML 0
34 #define DOCTOTEXT_FIX_XML 1
35 #define DOCTOTEXT_STRIP_XML 2
37 typedef int doctotext_table_style;
39 #define DOCTOTEXT_TABLE_STYLE_TABLE_LOOK 0
40 #define DOCTOTEXT_TABLE_STYLE_ONE_ROW 1
41 #define DOCTOTEXT_TABLE_STYLE_ONE_COL 2
43 typedef int doctotext_url_style;
45 #define DOCTOTEXT_URL_STYLE_TEXT_ONLY 0
46 #define DOCTOTEXT_URL_STYLE_EXTENDED 1
47 #define DOCTOTEXT_URL_STYLE_UNDERSCORED 2
49 typedef int doctotext_metadata_type;
51 #define DOCTOTEXT_METADATA_TYPE_NONE 0
52 #define DOCTOTEXT_METADATA_TYPE_EXTRACTED 1
53 #define DOCTOTEXT_METADATA_TYPE_ESTIMATED 2
57 typedef struct DocToTextFormattingStyle DocToTextFormattingStyle;
65 DocToTextFormattingStyle* DOCTOTEXT_CALL doctotext_create_formatting_style();
73 void DOCTOTEXT_CALL doctotext_free_formatting_style(DocToTextFormattingStyle* formatting_style);
80 void DOCTOTEXT_CALL doctotext_formatting_style_set_table_style(DocToTextFormattingStyle* formatting_style, doctotext_table_style table_style);
87 void DOCTOTEXT_CALL doctotext_formatting_style_set_url_style(DocToTextFormattingStyle* formatting_style, doctotext_url_style url_style);
94 void DOCTOTEXT_CALL doctotext_formatting_style_set_list_prefix(DocToTextFormattingStyle* formatting_style,
const char* prefix);
98 typedef struct DocToTextExtractorParams DocToTextExtractorParams;
106 DocToTextExtractorParams* DOCTOTEXT_CALL doctotext_create_extractor_params();
114 void DOCTOTEXT_CALL doctotext_free_extractor_params(DocToTextExtractorParams* extractor_params);
121 void DOCTOTEXT_CALL doctotext_extractor_params_set_verbose_logging(DocToTextExtractorParams* extractor_params,
int verbose);
128 void DOCTOTEXT_CALL doctotext_extractor_params_set_log_file(DocToTextExtractorParams* extractor_params,
const char* log_file_name);
135 void DOCTOTEXT_CALL doctotext_extractor_params_set_parser_type(DocToTextExtractorParams* extractor_params, doctotext_parser_type parser_type);
142 void DOCTOTEXT_CALL doctotext_extractor_params_set_xml_parse_mode(DocToTextExtractorParams* extractor_params, doctotext_xml_parse_mode xml_parse_mode);
150 void DOCTOTEXT_CALL doctotext_extractor_params_set_manage_xml_parser(DocToTextExtractorParams* extractor_params,
int manage_xml_parser);
157 void DOCTOTEXT_CALL doctotext_extractor_params_set_formatting_style(DocToTextExtractorParams* extractor_params, DocToTextFormattingStyle* formatting_style);
161 typedef struct DocToTextException DocToTextException;
169 void DOCTOTEXT_CALL doctotext_free_exception(DocToTextException* exception);
177 size_t DOCTOTEXT_CALL doctotext_exception_error_messages_count(DocToTextException* exception);
185 const char* DOCTOTEXT_CALL doctotext_exception_get_error_message(DocToTextException* exception,
size_t index_message);
189 typedef struct DocToTextLink DocToTextLink;
196 const char* DOCTOTEXT_CALL doctotext_link_get_url(DocToTextLink* link);
204 const char* DOCTOTEXT_CALL doctotext_link_get_link_text(DocToTextLink* link);
211 size_t DOCTOTEXT_CALL doctotext_link_get_link_position(DocToTextLink* link);
215 typedef struct DocToTextVariant DocToTextVariant;
222 int DOCTOTEXT_CALL doctotext_variant_is_null(DocToTextVariant* variant);
229 int DOCTOTEXT_CALL doctotext_variant_is_string(DocToTextVariant* variant);
236 int DOCTOTEXT_CALL doctotext_variant_is_number(DocToTextVariant* variant);
243 int DOCTOTEXT_CALL doctotext_variant_is_date_time(DocToTextVariant* variant);
251 const char* DOCTOTEXT_CALL doctotext_variant_get_string(DocToTextVariant* variant);
258 size_t DOCTOTEXT_CALL doctotext_variant_get_number(DocToTextVariant* variant);
265 const struct tm* DOCTOTEXT_CALL doctotext_variant_get_date_time(DocToTextVariant* variant);
269 typedef struct DocToTextAttachment DocToTextAttachment;
276 const char* DOCTOTEXT_CALL doctotext_attachment_get_file_name(DocToTextAttachment* attachment);
283 const char* DOCTOTEXT_CALL doctotext_attachment_get_binary_content(DocToTextAttachment* attachment);
290 size_t DOCTOTEXT_CALL doctotext_attachment_get_binary_content_size(DocToTextAttachment* attachment);
299 int DOCTOTEXT_CALL doctotext_attachment_has_field(DocToTextAttachment* attachment,
const char* key);
308 DocToTextVariant* DOCTOTEXT_CALL doctotext_attachment_get_field(DocToTextAttachment* attachment,
const char* key);
315 size_t DOCTOTEXT_CALL doctotext_attachment_fields_count(DocToTextAttachment* attachment);
322 char** DOCTOTEXT_CALL doctotext_attachment_get_keys(DocToTextAttachment* attachment);
326 typedef struct DocToTextExtractedData DocToTextExtractedData;
334 void DOCTOTEXT_CALL doctotext_free_extracted_data(DocToTextExtractedData* extracted_data);
341 const char* DOCTOTEXT_CALL doctotext_extracted_data_get_text(DocToTextExtractedData* extracted_data);
349 DocToTextLink** DOCTOTEXT_CALL doctotext_extracted_data_get_links(DocToTextExtractedData* extracted_data);
356 size_t DOCTOTEXT_CALL doctotext_extracted_data_get_links_count(DocToTextExtractedData* extracted_data);
364 DocToTextAttachment** DOCTOTEXT_CALL doctotext_extracted_data_get_attachments(DocToTextExtractedData* extracted_data);
371 size_t DOCTOTEXT_CALL doctotext_extracted_data_get_attachments_count(DocToTextExtractedData* extracted_data);
375 typedef struct DocToTextMetadata DocToTextMetadata;
385 void DOCTOTEXT_CALL doctotext_free_metadata(DocToTextMetadata* metadata);
392 const char* DOCTOTEXT_CALL doctotext_metadata_author(DocToTextMetadata* metadata);
399 doctotext_metadata_type DOCTOTEXT_CALL doctotext_metadata_author_type(DocToTextMetadata* metadata);
406 const char* DOCTOTEXT_CALL doctotext_metadata_last_modify_by(DocToTextMetadata* metadata);
413 doctotext_metadata_type DOCTOTEXT_CALL doctotext_metadata_last_modify_by_type(DocToTextMetadata* metadata);
420 const struct tm* DOCTOTEXT_CALL doctotext_metadata_creation_date(DocToTextMetadata* metadata);
427 doctotext_metadata_type DOCTOTEXT_CALL doctotext_metadata_creation_date_type(DocToTextMetadata* metadata);
434 const struct tm* DOCTOTEXT_CALL doctotext_metadata_last_modification_date(DocToTextMetadata* metadata);
441 doctotext_metadata_type DOCTOTEXT_CALL doctotext_metadata_last_modification_date_type(DocToTextMetadata* metadata);
448 size_t DOCTOTEXT_CALL doctotext_metadata_pages_count(DocToTextMetadata* metadata);
455 doctotext_metadata_type DOCTOTEXT_CALL doctotext_metadata_pages_count_type(DocToTextMetadata* metadata);
462 size_t DOCTOTEXT_CALL doctotext_metadata_words_count(DocToTextMetadata* metadata);
469 doctotext_metadata_type DOCTOTEXT_CALL doctotext_metadata_words_count_type(DocToTextMetadata* metadata);
479 int DOCTOTEXT_CALL doctotext_metadata_has_field(DocToTextMetadata* metadata,
const char* key);
491 DocToTextVariant* DOCTOTEXT_CALL doctotext_metadata_get_field(DocToTextMetadata* metadata,
const char* key);
500 size_t DOCTOTEXT_CALL doctotext_metadata_fields_count(DocToTextMetadata* metadata);
509 char** DOCTOTEXT_CALL doctotext_metadata_get_keys(DocToTextMetadata* metadata);
526 DocToTextExtractedData* DOCTOTEXT_CALL doctotext_process_file(
const char* file_name, DocToTextExtractorParams* extractor_params, DocToTextException** exception);
540 DocToTextExtractedData* DOCTOTEXT_CALL doctotext_process_file_from_buffer(
const char* buffer,
size_t size, DocToTextExtractorParams* extractor_params, DocToTextException** exception);
555 DocToTextMetadata* DOCTOTEXT_CALL doctotext_extract_metadata(
const char* file_name, DocToTextExtractorParams* extractor_params, DocToTextException** exception);
569 DocToTextMetadata* DOCTOTEXT_CALL doctotext_extract_metadata_from_buffer(
const char* buffer,
size_t size, DocToTextExtractorParams* extractor_params, DocToTextException** exception);
578 int DOCTOTEXT_CALL doctotext_parser_type_by_file_extension(
const char* file_name, doctotext_parser_type* parser_type);
586 int DOCTOTEXT_CALL doctotext_parser_type_by_file_content(
const char *file_name, doctotext_parser_type *parser_type);
596 int DOCTOTEXT_CALL doctotext_parser_type_by_file_content_from_buffer(
const char* buffer,
size_t size, doctotext_parser_type* parser_type);