/* * Copyright 2010-2011 Øyvind Berg (elacin@gmail.com) * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.elacin.pdfextract.datasource.poppler; import com.sun.jna.*; import com.sun.jna.ptr.PointerByReference; import org.elacin.pdfextract.datasource.DocumentContent; import org.elacin.pdfextract.datasource.PDFSource; import org.elacin.pdfextract.datasource.RenderedPage; import org.jetbrains.annotations.NotNull; import java.io.File; /** * Created by IntelliJ IDEA. User: elacin Date: 06.06.11 Time: 16.46 To change this template use * File | Settings | File Templates. */ public class PopplerDataSource implements PDFSource { // ------------------------------ FIELDS ------------------------------ final File file; private static final String uri = "file:///Users/elacin/projects/evaluation/docs/C02-1013.pdf"; private static final String emptyPassword = ""; private PopplerInterface poppler = PopplerInterface.INSTANCE; // --------------------------- CONSTRUCTORS --------------------------- public PopplerDataSource(final File file) { this.file = file; } // ------------------------ INTERFACE METHODS ------------------------ // --------------------- Interface PDFSource --------------------- public void closeSource() {} @NotNull public DocumentContent readPages() { /* initialize glib */ poppler.g_type_init(); /* open document */ GError errorObj = new GError(); final PopplerDocument popplerDocument = poppler.poppler_document_new_from_file(uri, emptyPassword, errorObj); if (popplerDocument == null) { throw new RuntimeException(errorObj.toString()); } long t0 = System.currentTimeMillis(); final int numPages = poppler.poppler_document_get_n_pages(popplerDocument); if (numPages > 0) { final Pointer firstPage = poppler.poppler_document_get_page(popplerDocument, 0); PopplerRectangle selection = new PopplerRectangle(0, 0, 1000, 1000); final String content = poppler.poppler_page_get_text(firstPage, PopplerSelectionStyle.POPPLER_SELECTION_GLYPH, selection); System.out.println("content = " + content); // final Pointer contentList = poppler.poppler_page_get_text_page(firstPage); // System.out.println(contentList); } /* start reading content */ final DocumentContent documentContent = new DocumentContent(); System.out.println("t = " + (System.currentTimeMillis() - t0)); return documentContent; } @NotNull public RenderedPage renderPage(final int page) { return null; } // -------------------------- INNER CLASSES -------------------------- public interface PopplerInterface extends Library { PopplerInterface INSTANCE = (PopplerInterface) Native.loadLibrary("poppler-glib", PopplerInterface.class); PopplerDocument poppler_document_new_from_file(String uri, String password, GError error); int poppler_document_get_n_pages(PopplerDocument doc); Pointer poppler_document_get_page(PopplerDocument doc, int index); String poppler_page_get_text(Pointer page, int style, PopplerRectangle rect); Pointer poppler_page_get_text_page(Pointer page); void poppler_page_finalize(Pointer page); void g_type_init(); } public interface PopplerSelectionStyle { int POPPLER_SELECTION_GLYPH = 0, POPPLER_SELECTION_WORD = 1, POPPLER_SELECTION_LINE = 2; } public static class PopplerRectangle extends Structure implements Structure.ByReference { public PopplerRectangle(final double x1, final double y1, final double x2, final double y2) { this.x1 = x1; this.y1 = y1; this.x2 = x2; this.y2 = y2; } public double x1; public double y1; public double x2; public double y2; } public static class PopplerDocument extends Structure implements Structure.ByReference { // GObject parent_instance; public Pointer parent_instance; // PDFDoc *doc; public Pointer doc; // GList *layers; public Pointer layers; // GList *layers_rbgroups; public Pointer layers_rbgroups; // CairoOutputDev *output_dev; public Pointer output_dev; } // public static class PopplerPage extends Structure implements Structure.ByReference {} public static class GError extends Structure implements Structure.ByReference { public String domain; public int code; public String message; @Override public String toString() { return "GError{" + "domain='" + domain + '\'' + ", code=" + code + ", " + "message='" + message + '\'' + '}'; } } // --------------------------- main() method --------------------------- public static void main(String[] args) { System.setProperty("jna.library.path", "/opt/local/lib"); new PopplerDataSource(new File("asd")).readPages(); } }