Python 批量合并 pdf

"""合并多个 pdf 到一个,使用方式:放到当前文件夹下运行。 pyPdf 库有点老了"""importos.pathfrompyPdfimportPdfFileReader,PdfFileWriter# pip install pyPdfdefget_pdf_files(dst_dir):paths=[]forroot,dirs,filesinos.walk(dst_dir):forfilespathinfiles:iffilespath.endswith('.pdf'):# pdf fileabspath=os.path.join(root,filespath)paths.append(abspath)returnpaths##########################合并同一个文件夹下所有PDF文件########################defmerge_pdf(dst_dir,outfile,sort=True):output=PdfFileWriter()curpage=0pdf_paths=sorted(get_pdf_files(dst_dir))ifsortelseget_pdf_files(dst_dir)foreachinpdf_paths:print(each)reader=PdfFileReader(file(each,"rb"))# 如果pdf文件已经加密,必须首先解密才能使用pyPdfifreader.isEncrypted==True:reader.decrypt("map")# 获得源pdf文件中页面总数page_count=reader.getNumPages()curpage+=page_countprint(page_count)foriPageinrange(0,page_count):output.addPage(reader.getPage(iPage))print("All Pages Number:"+str(curpage))outputStream=file(dst_dir+outfile,"wb")output.write(outputStream)outputStream.close()defmain():merged="all.pdf"merge_pdf("./",merged)if__name__=='__main__':main()

推荐下边这个 pikepdf 库来批量操作 pdf,pyPdf 库挺久没有更新了

# -*- coding: utf-8 -*-importos.pathfrompikepdfimportPdf,OutlineItem# pip install pikepdfdefget_pdf_files(dst_dir):paths=[]forroot,dirs,filesinos.walk(dst_dir):forfilespathinfiles:iffilespath.endswith('.pdf'):# pdf fileabspath=os.path.join(root,filespath)paths.append(abspath)returnpathsdefmerge_pdf(dst_dir,outfile,sort=True):# https://pikepdf.readthedocs.io/en/latest/topics/pages.html#merge-concatenate-pdf-from-several-pdfspdf_paths=sorted(get_pdf_files(dst_dir))ifsortelseget_pdf_files(dst_dir)pdf=Pdf.new()page_count=0withpdf.open_outline()asoutline:forpathinpdf_paths:src=Pdf.open(path)print("merging:"+src.filename)# 打印一下进度oi=OutlineItem(os.path.basename(src.filename),page_count)# 增加目录outline.root.append(oi)page_count+=len(src.pages)pdf.pages.extend(src.pages)pdf.save(outfile)defmain():merged="all.pdf"merge_pdf("./",merged)if__name__=='__main__':main()