|
2 | 2 | # Solutions to review exercises
|
3 | 3 |
|
4 | 4 |
|
5 |
| -importos |
6 |
| -fromPyPDF2importPdfFileReader,PdfFileWriter |
7 |
| - |
8 |
| - |
| 5 | +# *********** |
9 | 6 | # Exercise 1
|
10 |
| -path="C:/python-basics-exercises/ch13-interact-with-pdf-files/\ |
11 |
| -practice_files" |
| 7 | +# |
| 8 | +# In the Chapter 13 Practice Files directory there is a PDF file called |
| 9 | +# `zen.pdf`. Create a `PdfFileReader` from this PDF. |
| 10 | +# *********** |
| 11 | + |
| 12 | +# Before you can do anything, you need to import the right objects from |
| 13 | +# the PyPDF2 and pathlib libraries |
| 14 | +frompathlibimportPath |
| 15 | +fromPyPDF2importPdfFileReader |
12 | 16 |
|
13 |
| -input_file_path=os.path.join(path,"The Whistling Gypsy.pdf") |
14 |
| -input_file=PdfFileReader(input_file_path) |
| 17 | +# To create a PdfFileReader instance, you need to path to the PDF file. |
| 18 | +# We'll assume you downloaded the solutions folder and extracted it into |
| 19 | +# the home directory on your computer. If this is not the case, you'll |
| 20 | +# need to update the path below. |
| 21 | +pdf_path=Path.home()/"python-basics-exercises/ch13-interact-with-pdf-files" \ |
| 22 | +"/practice_files/zen.pdf" |
15 | 23 |
|
16 |
| -#Display meta-data about file |
17 |
| -print("Title:",input_file.getDocumentInfo().title) |
18 |
| -print("Author:",input_file.getDocumentInfo().author) |
19 |
| -print("Number of pages:",input_file.getNumPages()) |
| 24 | +#Now you can create the PdfFileReader instance. Remember that |
| 25 | +# PdfFileReader objects can only be instantiated with path strings, not |
| 26 | +# Path objects! |
| 27 | +pdf_reader=PdfFileReader(str(pdf_path)) |
20 | 28 |
|
21 | 29 |
|
| 30 | +# *********** |
22 | 31 | # Exercise 2
|
23 |
| -# Specify and open output text file |
24 |
| -output_file_path=os.path.join(path,"Output/The Whistling Gypsy.txt") |
25 |
| -withopen(output_file_path,"w")asoutput_file: |
26 |
| -# Extract every page of text |
27 |
| -forpage_numinrange(0,input_file.getNumPages()): |
28 |
| -text=input_file.getPage(page_num).extractText() |
29 |
| -output_file.write(text) |
30 |
| - |
31 |
| -# NOTE: On some machines, you may get a UnicodeDecodeError when |
32 |
| -# writing the file. To fix this, replace line 25 with the following: |
33 |
| -# with open(output_file_path, "w", encoding="utf-8") |
| 32 | +# |
| 33 | +# Using the `PdfFileReader` instance from Exercise 1, print the total |
| 34 | +# number of pages in the PDF. |
| 35 | +# *********** |
| 36 | + |
| 37 | +# Use .getNumPages() to get the number of pages, then print the result |
| 38 | +# using the print() built-in |
| 39 | +num_pages=pdf_reader.getNumPages() |
| 40 | +print(num_pages) |
| 41 | + |
34 | 42 |
|
| 43 | +# *********** |
35 | 44 | # Exercise 3
|
36 |
| -# Save file without cover page |
37 |
| -output_PDF=PdfFileWriter() |
38 |
| -forpage_numinrange(1,input_file.getNumPages()): |
39 |
| -output_PDF.addPage(input_file.getPage(page_num)) |
40 |
| - |
41 |
| -output_file_name=os.path.join( |
42 |
| -path,"Output/The Whistling Gypsy un-covered.pdf" |
43 |
| -) |
44 |
| -withopen(output_file_name,"wb")asoutput_file: |
45 |
| -output_PDF.write(output_file) |
| 45 | +# |
| 46 | +# Print the text from the first page of the PDF in Exercise 1. |
| 47 | +# *********** |
| 48 | + |
| 49 | +# Use .getPage() to get the first page. Remember pages are indexed |
| 50 | +# starting with 0! |
| 51 | +first_page=pdf_reader.getPage(0) |
| 52 | + |
| 53 | +# Then use .extractText() to extract the text |
| 54 | +text=first_page.extractText() |
| 55 | + |
| 56 | +# Finally, print the text |
| 57 | +print(text) |
| 58 | + |
| 59 | + |
| 60 | +# **NOTE**: The text in zen.pdf is from "The Zen Of Python" written by |
| 61 | +# Tim Peters in 2004. The Zen is a collection of 19 guiding principles |
| 62 | +# for developing with Python. The story goes that there are actually 20 |
| 63 | +# such principles, but only 19 were written down! |
| 64 | +# |
| 65 | +# You can see the original submission for The Zen of Python in PEP20: |
| 66 | +# https://www.python.org/dev/peps/pep-0020/ |
| 67 | +# |
| 68 | +# For some historical context surrounding The Zen, see: |
| 69 | +# https://mail.python.org/pipermail/python-list/1999-June/001951.html |
| 70 | +# |
| 71 | +# Author Al Seigart has an interpretation of The Zen on his blog: |
| 72 | +# https://inventwithpython.com/blog/2018/08/17/the-zen-of-python-explained/ |