163 lines
4.9 KiB
Python
163 lines
4.9 KiB
Python
import os
|
|
import sys
|
|
import json
|
|
import socket
|
|
import requests
|
|
|
|
from libs.liblog import logger
|
|
from libs.libhelper import *
|
|
from libs.libgoogle import get_random_agent
|
|
|
|
def store_file(url, data, outdir):
|
|
''' storing the downloaded data to a file
|
|
params: url - is used to create the filename
|
|
data - the data of the file
|
|
outdir - to store in which directory
|
|
returns: dict { "code":<code>, "data":<savepath>,"error":<error>} - the status code, the savepath, the errorcode
|
|
'''
|
|
|
|
logger.info('Store file {0}'.format(url))
|
|
name = find_name(url)
|
|
|
|
# only allow stored file a name with 50 chars
|
|
if len(name) > 50:
|
|
name = name[:49]
|
|
|
|
# build up the save path
|
|
save = "%s/%s" % (outdir, name)
|
|
|
|
try:
|
|
f = open(save, "wb")
|
|
|
|
except OSError as e:
|
|
logger.warning('store_file {0}'.format(e))
|
|
# return ret_dict
|
|
return {"code":False,"data":save,"error":e}
|
|
|
|
# write the data and return the written bytes
|
|
ret = f.write(data)
|
|
|
|
# check if bytes are zero
|
|
if ret == 0:
|
|
logger.warning('Written {0} bytes for file: {1}'.format(ret,save))
|
|
|
|
else:
|
|
# log to info that bytes and file has been written
|
|
logger.info('Written {0} bytes for file: {1}'.format(ret,save))
|
|
|
|
# close file descriptor
|
|
f.close()
|
|
|
|
# return ret_dict
|
|
return {"code":True,"data":save,"error":False}
|
|
|
|
|
|
def download_file(url, args, header_data):
|
|
''' downloading the file for later analysis
|
|
params: url - the url
|
|
args - argparse args namespace
|
|
header_data - pre-defined header data
|
|
returns: ret_dict
|
|
'''
|
|
|
|
# check the remote tls certificate or not?
|
|
cert_check = args.cert_check
|
|
|
|
# run our try catch routine
|
|
try:
|
|
# request the url and save the response in req
|
|
# give header data and set verify as delivered by args.cert_check
|
|
req = requests.get(url, headers=header_data, verify=cert_check)
|
|
|
|
except requests.exceptions.SSLError as e:
|
|
logger.warning('download file {0}{1}'.format(url,e))
|
|
|
|
# return retdict
|
|
return {"code":False,"data":req,"error":e}
|
|
|
|
except requests.exceptions.InvalidSchema as e:
|
|
logger.warning('download file {0}{1}'.format(url,e))
|
|
|
|
# return retdict
|
|
return {"code":False,"data":False,"error":e}
|
|
|
|
except socket.gaierror as e:
|
|
logger.warning('download file, host not known {0} {1}'.format(url,e))
|
|
return {"code":False,"data":False,"error":e}
|
|
|
|
except:
|
|
logger.warning('download file, something wrong with remote server? {0}'.format(url))
|
|
# return retdict
|
|
if not req in locals():
|
|
req = False
|
|
|
|
return {"code":False,"data":req,"error":True}
|
|
|
|
#finally:
|
|
# lets close the socket
|
|
#req.close()
|
|
|
|
# return retdict
|
|
return {"code":True,"data":req,"error":False}
|
|
|
|
def grab_run(url, args, outdir):
|
|
''' function keeping all the steps for the user call of grabbing
|
|
just one and analysing it
|
|
'''
|
|
header_data = {'User-Agent': get_random_agent()}
|
|
rd_download = download_file(url, args, header_data)
|
|
code_down = rd_download['code']
|
|
|
|
# is code True download of file was successfull
|
|
if code_down:
|
|
rd_evaluate = evaluate_response(rd_download)
|
|
code_eval = rd_evaluate['code']
|
|
# if code is True, evaluation was also successful
|
|
if code_eval:
|
|
# get the content from the evaluate dictionary request
|
|
content = rd_evaluate['data'].content
|
|
|
|
# call store file
|
|
rd_store = store_file(url, content, outdir)
|
|
|
|
# get the code
|
|
code_store = rd_store['code']
|
|
|
|
# get the savepath
|
|
savepath = rd_store['data']
|
|
|
|
# if code is True, storing of file was also successfull
|
|
if code_store:
|
|
return {"code":True,"data":savepath,"error":False}
|
|
|
|
return {"code":False,"data":False,"error":True}
|
|
|
|
def evalute_content(ret_dict):
|
|
pass
|
|
|
|
def evaluate_response(ret_dict):
|
|
''' this method comes usually after download_file,
|
|
it will evaluate what has happened and if we even have some data to process
|
|
or not
|
|
params: data - is the req object from the conducted request
|
|
return: {}
|
|
returns: dict { "code":<code>, "data":<savepath>,"error":<error>} - the status code, the savepath, the errorcode
|
|
'''
|
|
# extract data from ret_dict
|
|
req = ret_dict['data']
|
|
|
|
# get status code
|
|
url = req.url
|
|
status = req.status_code
|
|
reason = req.reason
|
|
|
|
# ahh everything is fine
|
|
if status == 200:
|
|
logger.info('download file, {0} {1} {2}'.format(url,reason,status))
|
|
return {"code":True,"data":req,"error":False}
|
|
|
|
# nah something is not like it should be
|
|
else:
|
|
logger.warning('download file, {0} {1} {2}'.format(url,reason,status))
|
|
return {"code":False,"data":req,"error":True}
|