release version 0.4.9
This commit is contained in:
@@ -5,19 +5,44 @@ from libs.libhelper import *
|
||||
def get_random_agent():
|
||||
return (gs.get_random_user_agent())
|
||||
|
||||
def search_pdf(search, args):
|
||||
def hits_google(search, args):
|
||||
''' the function where googlesearch from mario vilas
|
||||
is called
|
||||
'''
|
||||
s = search.split(',')
|
||||
query = 'filetype:pdf'
|
||||
|
||||
|
||||
try:
|
||||
hits = gs.hits(query, domains=s,user_agent=gs.get_random_user_agent())
|
||||
|
||||
except urllib.error.HTTPError as e:
|
||||
return False,e
|
||||
|
||||
except urllib.error.URLError as e:
|
||||
return False,e
|
||||
|
||||
except IndexError as e:
|
||||
return False,e
|
||||
|
||||
return True,hits
|
||||
|
||||
|
||||
def search_google(search, args):
|
||||
''' the function where googlesearch from mario vilas
|
||||
is called
|
||||
'''
|
||||
|
||||
s = search.split(',')
|
||||
search_stop = args.search_stop
|
||||
|
||||
query = '%s filetype:pdf' % search
|
||||
query = 'filetype:pdf'
|
||||
#query = 'site:%s filetype:pdf' % search
|
||||
# print(query)
|
||||
urls = []
|
||||
|
||||
try:
|
||||
for url in gs.search(query, num=20, stop=search_stop, user_agent=gs.get_random_user_agent()):
|
||||
for url in gs.search(query, num=20, domains=s,stop=search_stop, user_agent=gs.get_random_user_agent()):
|
||||
#print(url)
|
||||
urls.append(url)
|
||||
|
||||
|
||||
@@ -8,10 +8,12 @@ file_handler = logging.FileHandler('pdfgrab.log')
|
||||
console_handler = logging.StreamHandler()
|
||||
console_handler.setLevel(logging.WARNING)
|
||||
|
||||
formatter = logging.Formatter('%(asctime)s:%(name)s:%(levelname)s:%(message)s')
|
||||
file_formatter = logging.Formatter('%(asctime)s:%(name)s:%(levelname)s:%(message)s')
|
||||
console_formatter = logging.Formatter('%(levelname)s:%(message)s')
|
||||
|
||||
file_handler.setFormatter(formatter)
|
||||
console_handler.setFormatter(formatter)
|
||||
|
||||
file_handler.setFormatter(file_formatter)
|
||||
console_handler.setFormatter(console_formatter)
|
||||
|
||||
logger.addHandler(file_handler)
|
||||
logger.addHandler(console_handler)
|
||||
|
||||
173
libs/libreport.py
Normal file
173
libs/libreport.py
Normal file
@@ -0,0 +1,173 @@
|
||||
import os
|
||||
import sys
|
||||
import json
|
||||
from json2html import *
|
||||
from libs.pdf_png import get_png_base64
|
||||
|
||||
def prepare_analysis_dict(ana_queue):
|
||||
'''params: ana_queue - queue with collected information
|
||||
'''
|
||||
# initiate analysis dictionary
|
||||
analysis_dict = {}
|
||||
|
||||
# move analysis dictionary in queue back to dictionary
|
||||
while ana_queue.empty() == False:
|
||||
item = ana_queue.get()
|
||||
# print('item ', item)
|
||||
analysis_dict.update(item)
|
||||
|
||||
# ana_q is empty now return the newly created dictionary
|
||||
return analysis_dict
|
||||
|
||||
def create_txt_report(analysis_dict, outdir, out_filename):
|
||||
''' create a txt report in the output directory
|
||||
'''
|
||||
|
||||
# draw seperator lines
|
||||
sep = '-' * 80 + '\n'
|
||||
|
||||
# create output filepath
|
||||
txtout = "%s/%s.txt" % (outdir, out_filename)
|
||||
|
||||
# open the file and return filedescriptor
|
||||
fwtxt = open(txtout, 'w')
|
||||
|
||||
# get the keys of the dict
|
||||
for k in analysis_dict.keys():
|
||||
# write seperator
|
||||
fwtxt.write(sep)
|
||||
|
||||
# build entry filename of the pdf
|
||||
fname = 'File: %s\n' % (analysis_dict[k]['filename'])
|
||||
|
||||
# build data entry
|
||||
ddata = analysis_dict[k]['data']
|
||||
|
||||
# write the filename
|
||||
fwtxt.write(fname)
|
||||
|
||||
# write the metadata
|
||||
for kdata in ddata.keys():
|
||||
metatxt = '%s:%s\n' % (kdata, ddata[kdata])
|
||||
fwtxt.write(metatxt)
|
||||
|
||||
# write seperator
|
||||
fwtxt.write(sep)
|
||||
|
||||
# close the file
|
||||
fwtxt.close()
|
||||
|
||||
return True
|
||||
|
||||
def create_json_report(analysis_dict, outdir, out_filename):
|
||||
''' create a jsonfile report in the output directory
|
||||
'''
|
||||
|
||||
# build json output name
|
||||
jsonout = "%s/%s.json" % (outdir, out_filename)
|
||||
|
||||
# open up json output file
|
||||
fwjson = open(jsonout, 'w')
|
||||
|
||||
# convert dictionary to json data
|
||||
jdata = json.dumps(analysis_dict)
|
||||
|
||||
# write json data to file
|
||||
fwjson.write(jdata)
|
||||
|
||||
# close file
|
||||
fwjson.close()
|
||||
|
||||
return True
|
||||
|
||||
def create_html_report(analysis_dict, outdir, out_filename):
|
||||
''' create a html report from json file using json2html in the output directory
|
||||
'''
|
||||
|
||||
# build up path for html output file
|
||||
htmlout = "%s/%s.html" % (outdir, out_filename)
|
||||
|
||||
# open htmlout filedescriptor
|
||||
fwhtml = open(htmlout,'w')
|
||||
|
||||
# some html stuff
|
||||
pdfpng=get_png_base64('supply/pdf_base64.png')
|
||||
html_style ='<style>.center { display: block; margin-left: auto;margin-right: auto;} table {border-collapse: collapse;} th, td { border: 1px solid black;text-align: left; }</style>\n'
|
||||
html_head = '<html><head><title>pdfgrab - {0} item/s</title>{1}</head>\n'.format(len(analysis_dict),html_style)
|
||||
html_pdf_png = '<p class="center"><img class="center" src="data:image/jpeg;base64,{0}"><br><center>pdfgrab - grab and analyse pdf files</center><br></p>'.format(pdfpng)
|
||||
html_body = '<body>{0}\n'.format(html_pdf_png)
|
||||
html_end = '\n<br><br><p align="center"><a href="https://github.com/c0decave/pdfgrab">pdfgrab</a> by <a href="https://twitter.com/User_to_Root">dash</a></p></body></html>\n'
|
||||
|
||||
# some attributes
|
||||
attr = 'id="meta-data" class="table table-bordered table-hover", border=1, cellpadding=3 summary="Metadata"'
|
||||
|
||||
# convert dictionary to json data
|
||||
# in this mode each finding gets its own table there are other possibilities
|
||||
# but now i go with this
|
||||
html_out = ''
|
||||
for k in analysis_dict.keys():
|
||||
trans = analysis_dict[k]
|
||||
jdata = json.dumps(trans)
|
||||
html = json2html.convert(json = jdata, table_attributes=attr)
|
||||
html_out = html_out + html + "\n"
|
||||
#html_out = html_out + "<p>" + html + "</p>\n"
|
||||
#jdata = json.dumps(analysis_dict)
|
||||
|
||||
# create html
|
||||
#html = json2html.convert(json = jdata, table_attributes=attr)
|
||||
|
||||
# write html
|
||||
fwhtml.write(html_head)
|
||||
fwhtml.write(html_body)
|
||||
fwhtml.write(html_out)
|
||||
fwhtml.write(html_end)
|
||||
|
||||
# close html file
|
||||
fwhtml.close()
|
||||
|
||||
def create_url_json(url_d, outdir, out_filename):
|
||||
''' create a json url file in output directory
|
||||
'''
|
||||
|
||||
# create url savefile
|
||||
jsonurlout = "%s/%s_url.json" % (outdir, out_filename)
|
||||
|
||||
# open up file for writting urls down
|
||||
fwjson = open(jsonurlout, 'w')
|
||||
|
||||
# convert url dictionary to json
|
||||
jdata = json.dumps(url_d)
|
||||
|
||||
# write json data to file
|
||||
fwjson.write(jdata)
|
||||
|
||||
# close filedescriptor
|
||||
fwjson.close()
|
||||
|
||||
return True
|
||||
|
||||
def create_url_txt(url_d, outdir, out_filename):
|
||||
''' create a txt url file in output directory
|
||||
'''
|
||||
# build up txt out path
|
||||
txtout = "%s/%s_url.txt" % (outdir, out_filename)
|
||||
|
||||
# open up our url txtfile
|
||||
fwtxt = open(txtout, 'w')
|
||||
|
||||
# iterating through the keys of the url dictionary
|
||||
for k in url_d.keys():
|
||||
|
||||
# get the entry
|
||||
ddata = url_d[k]
|
||||
|
||||
# create meta data for saving
|
||||
metatxt = '%s:%s\n' % (ddata['url'], ddata['filename'])
|
||||
|
||||
# write metadata to file
|
||||
fwtxt.write(metatxt)
|
||||
|
||||
# close fd
|
||||
fwtxt.close()
|
||||
|
||||
return True
|
||||
162
libs/librequest.py
Normal file
162
libs/librequest.py
Normal file
@@ -0,0 +1,162 @@
|
||||
import os
|
||||
import sys
|
||||
import json
|
||||
import socket
|
||||
import requests
|
||||
|
||||
from libs.liblog import logger
|
||||
from libs.libhelper import *
|
||||
from libs.libgoogle import get_random_agent
|
||||
|
||||
def store_file(url, data, outdir):
|
||||
''' storing the downloaded data to a file
|
||||
params: url - is used to create the filename
|
||||
data - the data of the file
|
||||
outdir - to store in which directory
|
||||
returns: dict { "code":<code>, "data":<savepath>,"error":<error>} - the status code, the savepath, the errorcode
|
||||
'''
|
||||
|
||||
logger.info('Store file {0}'.format(url))
|
||||
name = find_name(url)
|
||||
|
||||
# only allow stored file a name with 50 chars
|
||||
if len(name) > 50:
|
||||
name = name[:49]
|
||||
|
||||
# build up the save path
|
||||
save = "%s/%s" % (outdir, name)
|
||||
|
||||
try:
|
||||
f = open(save, "wb")
|
||||
|
||||
except OSError as e:
|
||||
logger.warning('store_file {0}'.format(e))
|
||||
# return ret_dict
|
||||
return {"code":False,"data":save,"error":e}
|
||||
|
||||
# write the data and return the written bytes
|
||||
ret = f.write(data)
|
||||
|
||||
# check if bytes are zero
|
||||
if ret == 0:
|
||||
logger.warning('Written {0} bytes for file: {1}'.format(ret,save))
|
||||
|
||||
else:
|
||||
# log to info that bytes and file has been written
|
||||
logger.info('Written {0} bytes for file: {1}'.format(ret,save))
|
||||
|
||||
# close file descriptor
|
||||
f.close()
|
||||
|
||||
# return ret_dict
|
||||
return {"code":True,"data":save,"error":False}
|
||||
|
||||
|
||||
def download_file(url, args, header_data):
|
||||
''' downloading the file for later analysis
|
||||
params: url - the url
|
||||
args - argparse args namespace
|
||||
header_data - pre-defined header data
|
||||
returns: ret_dict
|
||||
'''
|
||||
|
||||
# check the remote tls certificate or not?
|
||||
cert_check = args.cert_check
|
||||
|
||||
# run our try catch routine
|
||||
try:
|
||||
# request the url and save the response in req
|
||||
# give header data and set verify as delivered by args.cert_check
|
||||
req = requests.get(url, headers=header_data, verify=cert_check)
|
||||
|
||||
except requests.exceptions.SSLError as e:
|
||||
logger.warning('download file {0}{1}'.format(url,e))
|
||||
|
||||
# return retdict
|
||||
return {"code":False,"data":req,"error":e}
|
||||
|
||||
except requests.exceptions.InvalidSchema as e:
|
||||
logger.warning('download file {0}{1}'.format(url,e))
|
||||
|
||||
# return retdict
|
||||
return {"code":False,"data":False,"error":e}
|
||||
|
||||
except socket.gaierror as e:
|
||||
logger.warning('download file, host not known {0} {1}'.format(url,e))
|
||||
return {"code":False,"data":False,"error":e}
|
||||
|
||||
except:
|
||||
logger.warning('download file, something wrong with remote server? {0}'.format(url))
|
||||
# return retdict
|
||||
if not req in locals():
|
||||
req = False
|
||||
|
||||
return {"code":False,"data":req,"error":True}
|
||||
|
||||
#finally:
|
||||
# lets close the socket
|
||||
#req.close()
|
||||
|
||||
# return retdict
|
||||
return {"code":True,"data":req,"error":False}
|
||||
|
||||
def grab_run(url, args, outdir):
|
||||
''' function keeping all the steps for the user call of grabbing
|
||||
just one and analysing it
|
||||
'''
|
||||
header_data = {'User-Agent': get_random_agent()}
|
||||
rd_download = download_file(url, args, header_data)
|
||||
code_down = rd_download['code']
|
||||
|
||||
# is code True download of file was successfull
|
||||
if code_down:
|
||||
rd_evaluate = evaluate_response(rd_download)
|
||||
code_eval = rd_evaluate['code']
|
||||
# if code is True, evaluation was also successful
|
||||
if code_eval:
|
||||
# get the content from the evaluate dictionary request
|
||||
content = rd_evaluate['data'].content
|
||||
|
||||
# call store file
|
||||
rd_store = store_file(url, content, outdir)
|
||||
|
||||
# get the code
|
||||
code_store = rd_store['code']
|
||||
|
||||
# get the savepath
|
||||
savepath = rd_store['data']
|
||||
|
||||
# if code is True, storing of file was also successfull
|
||||
if code_store:
|
||||
return {"code":True,"data":savepath,"error":False}
|
||||
|
||||
return {"code":False,"data":False,"error":True}
|
||||
|
||||
def evalute_content(ret_dict):
|
||||
pass
|
||||
|
||||
def evaluate_response(ret_dict):
|
||||
''' this method comes usually after download_file,
|
||||
it will evaluate what has happened and if we even have some data to process
|
||||
or not
|
||||
params: data - is the req object from the conducted request
|
||||
return: {}
|
||||
returns: dict { "code":<code>, "data":<savepath>,"error":<error>} - the status code, the savepath, the errorcode
|
||||
'''
|
||||
# extract data from ret_dict
|
||||
req = ret_dict['data']
|
||||
|
||||
# get status code
|
||||
url = req.url
|
||||
status = req.status_code
|
||||
reason = req.reason
|
||||
|
||||
# ahh everything is fine
|
||||
if status == 200:
|
||||
logger.info('download file, {0} {1} {2}'.format(url,reason,status))
|
||||
return {"code":True,"data":req,"error":False}
|
||||
|
||||
# nah something is not like it should be
|
||||
else:
|
||||
logger.warning('download file, {0} {1} {2}'.format(url,reason,status))
|
||||
return {"code":False,"data":req,"error":True}
|
||||
5
libs/pdf_png.py
Normal file
5
libs/pdf_png.py
Normal file
@@ -0,0 +1,5 @@
|
||||
|
||||
def get_png_base64(filename):
|
||||
fr = open(filename,'r')
|
||||
buf = fr.read()
|
||||
return buf
|
||||
Reference in New Issue
Block a user