In a project I needed to create snapshots from a list of websites. I decided to automate the job and I used this following code in many other projects.
This script interactively gets URL from user and creates screenshots in the same directory level as script is executed.
#!/usr/bin/env python3
import datetime
import math
import os
import sys
import tempfile
# third-party imports
from PIL import Image
from selenium import webdriver
from time import sleep
def get_chrome_drive(driver_path=None):
base_dir = os.path.dirname( os.path.abspath(__file__) )
log_path = os.path.join( base_dir, 'chromedriver.log' )
if driver_path is None:
driver_path = '/home/bmzi/bin/chromedriver'
pass
options = webdriver.ChromeOptions()
options.headless = True
options.add_argument('--hide-scrollbars')
options.add_argument('--no-sandbox')
driver = webdriver.Chrome(
executable_path=driver_path,
chrome_options=options,
service_args=[
# '--log-path={}'.format(log_path),
# '--verbose',
]
)
return driver
def get_firefox_drive(driver_path=None):
base_dir = os.path.dirname( os.path.abspath(__file__) )
log_path = os.path.join( base_dir, 'geckodriver.log' )
if driver_path is None:
driver_path = './geckodriver'
pass
options = webdriver.FirefoxOptions()
options.add_argument('-headless')
driver = webdriver.Firefox(
executable_path=driver_path,
firefox_options=options
)
return driver
def save_fullpage_screenshot(driver, url, output_path, tmp_prefix='selenium_screenshot', tmp_suffix='.png'):
"""
Creates a full page screenshot using a selenium driver by scrolling and taking multiple screenshots,
and stitching them into a single image.
"""
# get the page
driver.get(url)
# get dimensions
window_height = driver.execute_script('return window.innerHeight')
scroll_height = driver.execute_script('return document.body.parentNode.scrollHeight')
num = int( math.ceil( float(scroll_height) / float(window_height) ) )
# get temp files
tempfiles = []
for i in range( num ):
fd,path = tempfile.mkstemp(prefix='{0}-{1:02}-'.format(tmp_prefix, i+1), suffix=tmp_suffix)
os.close(fd)
tempfiles.append(path)
pass
tempfiles_len = len(tempfiles)
try:
# take screenshots
for i,path in enumerate(tempfiles):
if i > 0:
driver.execute_script( 'window.scrollBy(%d,%d)' % (0, window_height) )
driver.save_screenshot(path)
pass
# stitch images together
stiched = None
for i,path in enumerate(tempfiles):
img = Image.open(path)
w, h = img.size
y = i * window_height
if i == ( tempfiles_len - 1 ) and num > 1:
img = img.crop((
0,
h-(scroll_height % h),
w,
h
))
w, h = img.size
pass
if stiched is None:
stiched = Image.new('RGB', (w, scroll_height))
stiched.paste(img, (
0, # x0
y, # y0
w, # x1
y + h # y1
))
pass
stiched.save(output_path)
finally:
# cleanup
for path in tempfiles:
if os.path.isfile(path):
os.remove(path)
pass
return output_path
def main():
# url = sys.argv[1]
# filename = sys.argv[2]
url = input('Enter url: ')
filename = input('Enter image file name: ')
filename = '/tmp/' + filename
driver = get_chrome_drive()
driver.set_window_size(1280,768)
save_fullpage_screenshot(driver, url, filename)
driver.quit()
return
if __name__ == '__main__':
main()