bugfixes for bad ssl handshakes and decrypted files
This commit is contained in:
40
pdfgrab.py
40
pdfgrab.py
@@ -10,6 +10,10 @@
|
|||||||
# TODO
|
# TODO
|
||||||
# * json output
|
# * json output
|
||||||
# * txt output
|
# * txt output
|
||||||
|
# * catch ssl exceptions
|
||||||
|
# * catch conn refused connections
|
||||||
|
# * set option for certificate verification, default is false
|
||||||
|
# * add decryption routine
|
||||||
|
|
||||||
import os
|
import os
|
||||||
import sys
|
import sys
|
||||||
@@ -40,7 +44,7 @@ def find_name(pdf):
|
|||||||
name = pdf.split("/")
|
name = pdf.split("/")
|
||||||
a = len(name)
|
a = len(name)
|
||||||
name = name[a-1]
|
name = name[a-1]
|
||||||
print(name)
|
#print(name)
|
||||||
|
|
||||||
return name
|
return name
|
||||||
|
|
||||||
@@ -49,14 +53,23 @@ def make_directory(outdir):
|
|||||||
try:
|
try:
|
||||||
os.mkdir(outdir)
|
os.mkdir(outdir)
|
||||||
except:
|
except:
|
||||||
print("[W] mkdir, some error, directory probably exists")
|
#print("[W] mkdir, some error, directory probably exists")
|
||||||
|
pass
|
||||||
|
|
||||||
def download_pdf(url, header_data):
|
def download_pdf(url, header_data):
|
||||||
''' downloading the pdfile for later analysis '''
|
''' downloading the pdfile for later analysis '''
|
||||||
req = requests.get(url,headers=header_data)
|
try:
|
||||||
data = req.content
|
req = requests.get(url,headers=header_data,verify=True)
|
||||||
#data = req.text
|
#req = requests.get(url,headers=header_data,verify=False)
|
||||||
print(len(data))
|
data = req.content
|
||||||
|
except requests.exceptions.SSLError as e:
|
||||||
|
print('Error: %s' % e)
|
||||||
|
return -1
|
||||||
|
except:
|
||||||
|
print('Error: Probably something wrong with remote server')
|
||||||
|
return -1
|
||||||
|
|
||||||
|
#print(len(data))
|
||||||
return data
|
return data
|
||||||
|
|
||||||
def store_pdf(url,data,outdir):
|
def store_pdf(url,data,outdir):
|
||||||
@@ -83,7 +96,11 @@ def _parse_pdf(filename):
|
|||||||
print('[-] Error: %s' % (e))
|
print('[-] Error: %s' % (e))
|
||||||
return
|
return
|
||||||
|
|
||||||
extract = h.documentInfo
|
try:
|
||||||
|
extract = h.documentInfo
|
||||||
|
except pdf.utils.PdfReadError as e:
|
||||||
|
print('Error: %s' % e)
|
||||||
|
return -1
|
||||||
|
|
||||||
print('-'*80)
|
print('-'*80)
|
||||||
print('File: %s' % filename)
|
print('File: %s' % filename)
|
||||||
@@ -102,8 +119,9 @@ def grab_url(url, outdir):
|
|||||||
just one pdf and analysing it
|
just one pdf and analysing it
|
||||||
'''
|
'''
|
||||||
data = download_pdf(url,None)
|
data = download_pdf(url,None)
|
||||||
savepath = store_pdf(url, data, outdir)
|
if data != -1:
|
||||||
_parse_pdf(savepath)
|
savepath = store_pdf(url, data, outdir)
|
||||||
|
_parse_pdf(savepath)
|
||||||
|
|
||||||
return
|
return
|
||||||
|
|
||||||
@@ -123,7 +141,7 @@ def search_pdf(search, sargs):
|
|||||||
query='%s filetype:pdf' % search
|
query='%s filetype:pdf' % search
|
||||||
#print(query)
|
#print(query)
|
||||||
urls = []
|
urls = []
|
||||||
for url in gs.search(query,stop=10):
|
for url in gs.search(query,stop=sargs):
|
||||||
print(url)
|
print(url)
|
||||||
urls.append(url)
|
urls.append(url)
|
||||||
|
|
||||||
@@ -150,7 +168,7 @@ def run(args):
|
|||||||
|
|
||||||
elif args.search:
|
elif args.search:
|
||||||
search = args.search
|
search = args.search
|
||||||
print(args)
|
#print(args)
|
||||||
print('[+] Seek and de...erm...analysing %s' % (search))
|
print('[+] Seek and de...erm...analysing %s' % (search))
|
||||||
sargs=10
|
sargs=10
|
||||||
seek_and_analyse(search,sargs,outdir)
|
seek_and_analyse(search,sargs,outdir)
|
||||||
|
|||||||
Reference in New Issue
Block a user