tags:

views:

16

answers:

0

I use Apache POI HWPF to extract .doc file, I found that the extracted text has no Chapter number, Can POI extract the chapter number with the text?

public void readDocFile() {
    File docFile = null;
    WordExtractor docExtractor = null;
    WordExtractor exprExtractor = null;
    try {
        docFile = new File("C:\\Documents and Settings\\Administrator\\Desktop\\Topo6.doc");
        // A FileInputStream obtains input bytes from a file.
        FileInputStream fis = new FileInputStream(docFile.getAbsolutePath());

        // A HWPFDocument used to read document file from FileInputStream
        HWPFDocument doc = new HWPFDocument(fis);
        docExtractor = new WordExtractor(doc);
    } catch (Exception exep) {
        System.out.println(exep.getMessage());
    }

    // This Array stores each line from the document file.
    String text = docExtractor.getText();
    System.out.println(text);


}