pip install pdfplumber
pip install pymupdf
import pdfplumberfile_path = r'C:\xxxx\practice.PDF'
with pdfplumber.open(file_path) as pdf: page = pdf.pages[11] print(page.extract_text())
import pdfplumberfile_path = r'C:\xxxx\practice.PDF'
with pdfplumber.open(file_path) as pdf: page = pdf.pages[12] print(page.extract_table())
import pdfplumberfile_path = r'C:\xxxx\practice.PDF'
with pdfplumber.open(file_path) as pdf: page = pdf.pages[12] print(page.extract_tables())
import fitzimport reimport os
file_path = r'C:\xxx\practice.PDF'dir_path = r'C:\xxx' # 存放图片的文件夹
def pdf2pic(path, pic_path): checkXO = r"/Type(?= */XObject)" checkIM = r"/Subtype(?= */Image)" pdf = fitz.open(path) lenXREF = pdf._getXrefLength() imgcount = 0 for i in range(1, lenXREF): text = pdf._getXrefString(i) isXObject = re.search(checkXO, text) isImage = re.search(checkIM, text) if not isXObject or not isImage: continue imgcount += 1 pix = fitz.Pixmap(pdf, i) new_name = f"img_{imgcount}.png" if pix.n < 5: pix.writePNG(os.path.join(pic_path, new_name)) else: pix0 = fitz.Pixmap(fitz.csRGB, pix) pix0.writePNG(os.path.join(pic_path, new_name)) pix0 = None pix = None
pdf2pic(file_path, dir_path)
更多精彩推荐
☞2020 最烂密码 TOP 200 大曝光,霸榜的竟然是它?
☞腾讯 AI 医学进展破解“秃头”难题,登 Nature 子刊!
☞小鹏汽车CEO疑似隔空回应偷窃技术传闻;苹果明年新款iPhone将使用增强版5nm芯片;Windows诞生35周年|极客头条
点分享
点点赞
点在看