|
1 | 1 | # 13.2 - Extract Pages From a PDF
|
2 | 2 | # Solutions to review exercises
|
3 | 3 |
|
4 |
| -importos |
5 |
| -importcopy |
6 |
| -frompyPDF2importPdfFileReader,PdfFileWriter |
| 4 | +# *********** |
| 5 | +# Exercise 1 |
| 6 | +# |
| 7 | +# Extract the last page from the `Pride_and_Prejudice.pdf` file and |
| 8 | +# save it to a new file called `last_page.pdf` in your home directory. |
| 9 | +# *********** |
7 | 10 |
|
| 11 | +# First import the classes and libraries needed |
| 12 | +frompathlibimportPath |
| 13 | +fromPyPDF2importPdfFileReader,PdfFileWriter |
8 | 14 |
|
9 |
| -# Exercise 1 |
10 |
| -path="C:/python-basics-exercises/ch13-interact-with-pdf-files/\ |
11 |
| - practice_files" |
| 15 | +# Get the path to the `Pride_and_Prejudice.pdf` file. We'll assume you |
| 16 | +# downloaded the solutions folder and extracted it into the home |
| 17 | +# directory on your computer. If this is not the case, you'll need to |
| 18 | +# update the path below. |
| 19 | +pdf_path=Path.home()/"python-basics-exercises/ch13-interact-with-pdf-files" \ |
| 20 | +"/practice_files/Pride_and_Prejudice.pdf" |
| 21 | + |
| 22 | +# Now you can create the PdfFileReader instance. Remember that |
| 23 | +# PdfFileReader objects can only be instantiated with path strings, not |
| 24 | +# Path objects! |
| 25 | +pdf_reader=PdfFileReader(str(pdf_path)) |
12 | 26 |
|
13 |
| -input_file_path=os.path.join(path,"Walrus.pdf") |
14 |
| -input_file=PdfFileReader(input_file_path) |
15 |
| -output_PDF=PdfFileWriter() |
| 27 | +# Use the .pages attribute to get an iterable over all pages in the |
| 28 | +# PDF. The last page can be accessed with the index -1. |
| 29 | +last_page=pdf_reader.pages[-1] |
16 | 30 |
|
17 |
| -input_file.decrypt("IamtheWalrus")# decrypt the input file |
| 31 | +# Now you can create a PdfFileWriter instance and add the last page to it. |
| 32 | +pdf_writer=PdfFileWriter() |
| 33 | +pdf_writer.addPage(last_page) |
18 | 34 |
|
| 35 | +# Finally, write the contents of pdf_writer to the file `last_page.pdf` |
| 36 | +# in your home directory. |
| 37 | +output_path=Path.home()/"last_page.pdf" |
| 38 | +withoutput_path.open(mode="wb")asoutput_file: |
| 39 | +pdf_writer.write(output_file) |
19 | 40 |
|
| 41 | + |
| 42 | +# *********** |
20 | 43 | # Exercise 2
|
21 |
| -forpage_numinrange(0,input_file.getNumPages()): |
22 |
| -# rotate pages (call everything page_left for now; will make a copy) |
23 |
| -page_left=input_file.getPage(page_num) |
24 |
| -page_left.rotateCounterClockwise(90) |
| 44 | +# |
| 45 | +# Extract all pages with even numbered _indices_ from the |
| 46 | +# `Pride_and_Prejudice.pdf` and save them to a new file called |
| 47 | +# `every_other_page.pdf` in your home directory. |
| 48 | +# *********** |
| 49 | + |
| 50 | +# There are several ways to extract pages with even numbered indices |
| 51 | +# so we'll cover a few of them here. |
| 52 | + |
| 53 | +# Solution A: Using a `for` loop |
| 54 | +# ------------------------------ |
| 55 | + |
| 56 | +# One way to do it is with a `for` loop. We'll create a new PdfFileWriter |
| 57 | +# instance, then loop over the numbers 0 up to the number of pages in the |
| 58 | +# PDF, and add the pages with even indices to the PdfFileWriter instance. |
| 59 | +pdf_writer=PdfFileWriter() |
| 60 | +num_pages=pdf_reader.getNumPages() |
| 61 | + |
| 62 | +foridxinrange(num_pages):# NOTE: idx is a common short name for "index" |
| 63 | +ifidx%2==0:# Check that the index is even |
| 64 | +page=pdf_reader.getPage(idx)# Get the page at the index |
| 65 | +pdf_writer.addPage(page)# Add the page to `pdf_writer` |
| 66 | + |
| 67 | +# Now write the contents of `pdf_writer` the the file `every_other_page.pdf` |
| 68 | +# in your home directory |
| 69 | +output_path=Path.home()/"every_other_page.pdf" |
| 70 | +withoutput_path.open(mode="wb")asoutput_file: |
| 71 | +pdf_writer.write(output_file) |
25 | 72 |
|
26 |
| -page_right=copy.copy(page_left)# split each page in half |
27 |
| -upper_right=page_left.mediaBox.upperRight# get original page corner |
| 73 | +# Solution B: Slicing .`pages` with steps |
| 74 | +# ------------------------------ |
28 | 75 |
|
29 |
| -# crop and add left-side page |
30 |
| -page_left.mediaBox.upperRight= (upper_right[0]/2,upper_right[1]) |
31 |
| -output_PDF.addPage(page_left) |
32 |
| -# crop and add right-side page |
33 |
| -page_right.mediaBox.upperLeft= (upper_right[0]/2,upper_right[1]) |
34 |
| -output_PDF.addPage(page_right) |
| 76 | +# A more succinct, alghouth possibly more difficult to understand, |
| 77 | +# solution involves slicing the `.pages` iterable. The indices start |
| 78 | +# with 0 and every even index can be obtained by iterating over |
| 79 | +# `.pages` in steps of size 2, so `.pages[::2]` is an iterable |
| 80 | +# containing just the pages with even indices. |
| 81 | +pdf_writer=PdfFileWriter() |
35 | 82 |
|
| 83 | +forpageinpdf_reader.pages[::2]: |
| 84 | +pdf_writer.addPage(page) |
36 | 85 |
|
| 86 | +# Now write the contents of `pdf_writer` the the file |
| 87 | +# `every_other_page.pdf` in your home directory. |
| 88 | +output_path=Path.home()/"every_other_page.pdf" |
| 89 | +withoutput_path.open(mode="wb")asoutput_file: |
| 90 | +pdf_writer.write(output_file) |
| 91 | + |
| 92 | + |
| 93 | +# *********** |
37 | 94 | # Exercise 3
|
38 |
| -# save new pages to an output file |
39 |
| -output_file_path=os.path.join(path,"Output/Updated Walrus.pdf") |
40 |
| -withopen(output_file_path,"wb")asoutput_file: |
41 |
| -output_PDF.write(output_file) |
| 95 | +# |
| 96 | +# Split the `Pride_and_Prejudice.pdf` file into two new PDF files. The |
| 97 | +# first file should contain the first 150 pages, and the second file |
| 98 | +# should contain the remaining pages. Save both files in your home |
| 99 | +# directory as `part_1.pdf` and `part_2.pdf`. |
| 100 | +# *********** |
| 101 | + |
| 102 | +# Start by creating two new PdfFileWriter instances. |
| 103 | +part1_writer=PdfFileWriter() |
| 104 | +part2_writer=PdfFileWriter() |
| 105 | + |
| 106 | +# Next, create two new iterables containing the correct pages. |
| 107 | +part1_pages=pdf_reader.pages[:150]# Contains pages 0 - 149 |
| 108 | +part2_pages=pdf_reader.pages[150:]# Contains pages 150 - last page |
| 109 | + |
| 110 | +# Add the pages to their corresponding writers. |
| 111 | +forpageinpart1_pages: |
| 112 | +part1_writer.addPage(page) |
| 113 | + |
| 114 | +forpageinpart2_pages: |
| 115 | +part2_writer.addPage(page) |
| 116 | + |
| 117 | +# Now write the contents of each writer to the files `part_1.pdf` and |
| 118 | +# `part_2.pdf` in your home directory. |
| 119 | +part1_output_path=Path.home()/"part_1.pdf" |
| 120 | +withpart1_output_path.open(mode="wb")aspart1_output_file: |
| 121 | +part1_writer.write(part1_output_file) |
| 122 | + |
| 123 | +part2_output_path=Path.home()/"part_2.pdf" |
| 124 | +withpart2_output_path.open(mode="wb")aspart2_output_file: |
| 125 | +part2_writer.write(part2_output_file) |