#! python2# coding: utf-8import sysfrom pdfminer import pdfparserfrom pdfminer import pdfdocumentfrom pdfminer import pdfinterpfrom pdfminer import pdfpagefrom pdfminer import converterfrom pdfminer import layoutwith file(file_path, 'rb') as fp: parser = pdfparser.PDFParser(fp) document = pdfdocument.PDFDocument(parser) if not document.is_extractable: raise pdfdocument.PDFTextExtractionNotAllowed rsrcmgr = pdfinterp.PDFResourceManager() laparams = layout.LAParams() device = converter.PDFPageAggregator(rsrcmgr, laparams=laparams) interpreter = pdfinterp.PDFPageInterpreter(rsrcmgr, device) pdf_pages = pdfpage.PDFPage.create_pages(document) for page in pdf_pages: interpreter.process_page(page) page_layout = device.get_result()