|
| 1 | +fromPyPDF4importPdfFileReader,PdfFileWriter |
| 2 | +fromPyPDF4.pdfimportContentStream |
| 3 | +fromPyPDF4.genericimportTextStringObject,NameObject |
| 4 | +fromPyPDF4.utilsimportb_ |
| 5 | +importos |
| 6 | +importargparse |
| 7 | +fromioimportBytesIO |
| 8 | +fromtypingimportTuple |
| 9 | +# Import the reportlab library |
| 10 | +fromreportlab.pdfgenimportcanvas |
| 11 | +# The size of the page supposedly A4 |
| 12 | +fromreportlab.lib.pagesizesimportA4 |
| 13 | +# The color of the watermark |
| 14 | +fromreportlab.libimportcolors |
| 15 | + |
| 16 | +PAGESIZE=A4 |
| 17 | +FONTNAME='Helvetica-Bold' |
| 18 | +FONTSIZE=40 |
| 19 | +# using colors module |
| 20 | +# COLOR = colors.lightgrey |
| 21 | +# or simply RGB |
| 22 | +# COLOR = (190, 190, 190) |
| 23 | +COLOR=colors.red |
| 24 | +# The position attributes of the watermark |
| 25 | +X=250 |
| 26 | +Y=10 |
| 27 | +# The rotation angle in order to display the watermark diagonally if needed |
| 28 | +ROTATION_ANGLE=45 |
| 29 | + |
| 30 | + |
| 31 | +defget_info(input_file:str): |
| 32 | +""" |
| 33 | + Extracting the file info |
| 34 | + """ |
| 35 | +# If PDF is encrypted the file metadata cannot be extracted |
| 36 | +withopen(input_file,'rb')aspdf_file: |
| 37 | +pdf_reader=PdfFileReader(pdf_file,strict=False) |
| 38 | +output= { |
| 39 | +"File":input_file,"Encrypted": ("True"ifpdf_reader.isEncryptedelse"False") |
| 40 | + } |
| 41 | +ifnotpdf_reader.isEncrypted: |
| 42 | +info=pdf_reader.getDocumentInfo() |
| 43 | +num_pages=pdf_reader.getNumPages() |
| 44 | +output["Author"]=info.author |
| 45 | +output["Creator"]=info.creator |
| 46 | +output["Producer"]=info.producer |
| 47 | +output["Subject"]=info.subject |
| 48 | +output["Title"]=info.title |
| 49 | +output["Number of pages"]=num_pages |
| 50 | +# To Display collected metadata |
| 51 | +print("## File Information ##################################################") |
| 52 | +print("\n".join("{}:{}".format(i,j)fori,jinoutput.items())) |
| 53 | +print("######################################################################") |
| 54 | +returnTrue,output |
| 55 | + |
| 56 | + |
| 57 | +defget_output_file(input_file:str,output_file:str): |
| 58 | +""" |
| 59 | + Check whether a temporary output file is needed or not |
| 60 | + """ |
| 61 | +input_path=os.path.dirname(input_file) |
| 62 | +input_filename=os.path.basename(input_file) |
| 63 | +# If output file is empty -> generate a temporary output file |
| 64 | +# If output file is equal to input_file -> generate a temporary output file |
| 65 | +ifnotoutput_fileorinput_file==output_file: |
| 66 | +tmp_file=os.path.join(input_path,'tmp_'+input_filename) |
| 67 | +returnTrue,tmp_file |
| 68 | +returnFalse,output_file |
| 69 | + |
| 70 | + |
| 71 | +defcreate_watermark(wm_text:str): |
| 72 | +""" |
| 73 | + Creates a watermark template. |
| 74 | + """ |
| 75 | +ifwm_text: |
| 76 | +# Generate the output to a memory buffer |
| 77 | +output_buffer=BytesIO() |
| 78 | +# Default Page Size = A4 |
| 79 | +c=canvas.Canvas(output_buffer,pagesize=PAGESIZE) |
| 80 | +# you can also add image instead of text |
| 81 | +# c.drawImage("logo.png", X, Y, 160, 160) |
| 82 | +# Set the size and type of the font |
| 83 | +c.setFont(FONTNAME,FONTSIZE) |
| 84 | +# Set the color |
| 85 | +ifisinstance(COLOR,tuple): |
| 86 | +color= (c/255forcinCOLOR) |
| 87 | +c.setFillColorRGB(*color) |
| 88 | +else: |
| 89 | +c.setFillColor(COLOR) |
| 90 | +# Rotate according to the configured parameter |
| 91 | +c.rotate(ROTATION_ANGLE) |
| 92 | +# Position according to the configured parameter |
| 93 | +c.drawString(X,Y,wm_text) |
| 94 | +c.save() |
| 95 | +returnTrue,output_buffer |
| 96 | +returnFalse,None |
| 97 | + |
| 98 | + |
| 99 | +defsave_watermark(wm_buffer,output_file): |
| 100 | +""" |
| 101 | + Saves the generated watermark template to disk |
| 102 | + """ |
| 103 | +withopen(output_file,mode='wb')asf: |
| 104 | +f.write(wm_buffer.getbuffer()) |
| 105 | +f.close() |
| 106 | +returnTrue |
| 107 | + |
| 108 | + |
| 109 | +defwatermark_pdf(input_file:str,wm_text:str,pages:Tuple=None): |
| 110 | +""" |
| 111 | + Adds watermark to a pdf file. |
| 112 | + """ |
| 113 | +result,wm_buffer=create_watermark(wm_text) |
| 114 | +ifresult: |
| 115 | +wm_reader=PdfFileReader(wm_buffer) |
| 116 | +pdf_reader=PdfFileReader(open(input_file,'rb'),strict=False) |
| 117 | +pdf_writer=PdfFileWriter() |
| 118 | +try: |
| 119 | +forpageinrange(pdf_reader.getNumPages()): |
| 120 | +# If required to watermark specific pages not all the document pages |
| 121 | +ifpages: |
| 122 | +ifstr(page)notinpages: |
| 123 | +continue |
| 124 | +page=pdf_reader.getPage(page) |
| 125 | +page.mergePage(wm_reader.getPage(0)) |
| 126 | +pdf_writer.addPage(page) |
| 127 | +exceptExceptionase: |
| 128 | +print("Exception = ",e) |
| 129 | +returnFalse,None,None |
| 130 | + |
| 131 | +returnTrue,pdf_reader,pdf_writer |
| 132 | + |
| 133 | + |
| 134 | +defunwatermark_pdf(input_file:str,wm_text:str,pages:Tuple=None): |
| 135 | +""" |
| 136 | + Removes watermark from the pdf file. |
| 137 | + """ |
| 138 | +pdf_reader=PdfFileReader(open(input_file,'rb'),strict=False) |
| 139 | +pdf_writer=PdfFileWriter() |
| 140 | +forpageinrange(pdf_reader.getNumPages()): |
| 141 | +# If required for specific pages |
| 142 | +ifpages: |
| 143 | +ifstr(page)notinpages: |
| 144 | +continue |
| 145 | +page=pdf_reader.getPage(page) |
| 146 | +# Get the page content |
| 147 | +content_object=page["/Contents"].getObject() |
| 148 | +content=ContentStream(content_object,pdf_reader) |
| 149 | +# Loop through all the elements page elements |
| 150 | +foroperands,operatorincontent.operations: |
| 151 | +# Checks the TJ operator and replaces the corresponding string operand (Watermark text) with '' |
| 152 | +ifoperator==b_("Tj"): |
| 153 | +text=operands[0] |
| 154 | +ifisinstance(text,str)andtext.startswith(wm_text): |
| 155 | +operands[0]=TextStringObject('') |
| 156 | +page.__setitem__(NameObject('/Contents'),content) |
| 157 | +pdf_writer.addPage(page) |
| 158 | +returnTrue,pdf_reader,pdf_writer |
| 159 | + |
| 160 | + |
| 161 | +defwatermark_unwatermark_file(**kwargs): |
| 162 | +input_file=kwargs.get('input_file') |
| 163 | +wm_text=kwargs.get('wm_text') |
| 164 | +# watermark -> Watermark |
| 165 | +# unwatermark -> Unwatermark |
| 166 | +action=kwargs.get('action') |
| 167 | +# HDD -> Temporary files are saved on the Hard Disk Drive and then deleted |
| 168 | +# RAM -> Temporary files are saved in memory and then deleted. |
| 169 | +mode=kwargs.get('mode') |
| 170 | +pages=kwargs.get('pages') |
| 171 | +temporary,output_file=get_output_file( |
| 172 | +input_file,kwargs.get('output_file')) |
| 173 | +ifaction=="watermark": |
| 174 | +result,pdf_reader,pdf_writer=watermark_pdf( |
| 175 | +input_file=input_file,wm_text=wm_text,pages=pages) |
| 176 | +elifaction=="unwatermark": |
| 177 | +result,pdf_reader,pdf_writer=unwatermark_pdf( |
| 178 | +input_file=input_file,wm_text=wm_text,pages=pages) |
| 179 | +# Completed successfully |
| 180 | +ifresult: |
| 181 | +# Generate to memory |
| 182 | +ifmode=="RAM": |
| 183 | +output_buffer=BytesIO() |
| 184 | +pdf_writer.write(output_buffer) |
| 185 | +pdf_reader.stream.close() |
| 186 | +# No need to create a temporary file in RAM Mode |
| 187 | +iftemporary: |
| 188 | +output_file=input_file |
| 189 | +withopen(output_file,mode='wb')asf: |
| 190 | +f.write(output_buffer.getbuffer()) |
| 191 | +f.close() |
| 192 | +elifmode=="HDD": |
| 193 | +# Generate to a new file on the hard disk |
| 194 | +withopen(output_file,'wb')aspdf_output_file: |
| 195 | +pdf_writer.write(pdf_output_file) |
| 196 | +pdf_output_file.close() |
| 197 | + |
| 198 | +pdf_reader.stream.close() |
| 199 | +iftemporary: |
| 200 | +ifos.path.isfile(input_file): |
| 201 | +os.replace(output_file,input_file) |
| 202 | +output_file=input_file |
| 203 | + |
| 204 | + |
| 205 | +defwatermark_unwatermark_folder(**kwargs): |
| 206 | +""" |
| 207 | + Watermarks all PDF Files within a specified path |
| 208 | + Unwatermarks all PDF Files within a specified path |
| 209 | + """ |
| 210 | +input_folder=kwargs.get('input_folder') |
| 211 | +wm_text=kwargs.get('wm_text') |
| 212 | +# Run in recursive mode |
| 213 | +recursive=kwargs.get('recursive') |
| 214 | +# watermark -> Watermark |
| 215 | +# unwatermark -> Unwatermark |
| 216 | +action=kwargs.get('action') |
| 217 | +# HDD -> Temporary files are saved on the Hard Disk Drive and then deleted |
| 218 | +# RAM -> Temporary files are saved in memory and then deleted. |
| 219 | +mode=kwargs.get('mode') |
| 220 | +pages=kwargs.get('pages') |
| 221 | +# Loop though the files within the input folder. |
| 222 | +forfoldername,dirs,filenamesinos.walk(input_folder): |
| 223 | +forfilenameinfilenames: |
| 224 | +# Check if pdf file |
| 225 | +ifnotfilename.endswith('.pdf'): |
| 226 | +continue |
| 227 | +# PDF File found |
| 228 | +inp_pdf_file=os.path.join(foldername,filename) |
| 229 | +print("Processing file:",inp_pdf_file) |
| 230 | +watermark_unwatermark_file(input_file=inp_pdf_file,output_file=None, |
| 231 | +wm_text=wm_text,action=action,mode=mode,pages=pages) |
| 232 | +ifnotrecursive: |
| 233 | +break |
| 234 | + |
| 235 | + |
| 236 | +defis_valid_path(path): |
| 237 | +""" |
| 238 | + Validates the path inputted and checks whether it is a file path or a folder path |
| 239 | + """ |
| 240 | +ifnotpath: |
| 241 | +raiseValueError(f"Invalid Path") |
| 242 | +ifos.path.isfile(path): |
| 243 | +returnpath |
| 244 | +elifos.path.isdir(path): |
| 245 | +returnpath |
| 246 | +else: |
| 247 | +raiseValueError(f"Invalid Path{path}") |
| 248 | + |
| 249 | + |
| 250 | +defparse_args(): |
| 251 | +""" |
| 252 | + Get user command line parameters |
| 253 | + """ |
| 254 | +parser=argparse.ArgumentParser(description="Available Options") |
| 255 | +parser.add_argument('-i','--input_path',dest='input_path',type=is_valid_path, |
| 256 | +required=True,help="Enter the path of the file or the folder to process") |
| 257 | +parser.add_argument('-a','--action',dest='action',choices=[ |
| 258 | +'watermark','unwatermark'],type=str,default='watermark', |
| 259 | +help="Choose whether to watermark or to unwatermark") |
| 260 | +parser.add_argument('-m','--mode',dest='mode',choices=['RAM','HDD'],type=str, |
| 261 | +default='RAM',help="Choose whether to process on the hard disk drive or in memory") |
| 262 | +parser.add_argument('-w','--watermark_text',dest='watermark_text', |
| 263 | +type=str,required=True,help="Enter a valid watermark text") |
| 264 | +parser.add_argument('-p','--pages',dest='pages',type=tuple, |
| 265 | +help="Enter the pages to consider e.g.: [2,4]") |
| 266 | +path=parser.parse_known_args()[0].input_path |
| 267 | +ifos.path.isfile(path): |
| 268 | +parser.add_argument('-o','--output_file',dest='output_file', |
| 269 | +type=str,help="Enter a valid output file") |
| 270 | +ifos.path.isdir(path): |
| 271 | +parser.add_argument('-r','--recursive',dest='recursive',default=False,type=lambdax: ( |
| 272 | +str(x).lower()in ['true','1','yes']),help="Process Recursively or Non-Recursively") |
| 273 | +# To Porse The Command Line Arguments |
| 274 | +args=vars(parser.parse_args()) |
| 275 | +# To Display The Command Line Arguments |
| 276 | +print("## Command Arguments #################################################") |
| 277 | +print("\n".join("{}:{}".format(i,j)fori,jinargs.items())) |
| 278 | +print("######################################################################") |
| 279 | +returnargs |
| 280 | + |
| 281 | + |
| 282 | +if__name__=='__main__': |
| 283 | +# Parsing command line arguments entered by user |
| 284 | +args=parse_args() |
| 285 | +# If File Path |
| 286 | +ifos.path.isfile(args['input_path']): |
| 287 | +# Extracting File Info |
| 288 | +get_info(input_file=args['input_path']) |
| 289 | +# Encrypting or Decrypting a File |
| 290 | +watermark_unwatermark_file( |
| 291 | +input_file=args['input_path'],wm_text=args['watermark_text'],action=args[ |
| 292 | +'action'],mode=args['mode'],output_file=args['output_file'],pages=args['pages'] |
| 293 | + ) |
| 294 | +# If Folder Path |
| 295 | +elifos.path.isdir(args['input_path']): |
| 296 | +# Encrypting or Decrypting a Folder |
| 297 | +watermark_unwatermark_folder( |
| 298 | +input_folder=args['input_path'],wm_text=args['watermark_text'], |
| 299 | +action=args['action'],mode=args['mode'],recursive=args['recursive'],pages=args['pages'] |
| 300 | + ) |