...

Capture Full Screenshot from URL

In a project I needed to create snapshots from a list of websites. I decided to automate the job and I used this following code in many other projects.

This script interactively gets URL from user and creates screenshots in the same directory level as script is executed.

 

#!/usr/bin/env python3

import datetime
import math
import os
import sys
import tempfile

# third-party imports
from PIL import Image
from selenium import webdriver
from time import sleep

def get_chrome_drive(driver_path=None):
    base_dir = os.path.dirname( os.path.abspath(__file__) )
    log_path = os.path.join( base_dir, 'chromedriver.log' )
 
    if driver_path is None:
        driver_path = '/home/bmzi/bin/chromedriver'
        pass

    options = webdriver.ChromeOptions()
    options.headless = True
    options.add_argument('--hide-scrollbars')
    options.add_argument('--no-sandbox')
 
    driver = webdriver.Chrome(
        executable_path=driver_path,
        chrome_options=options,
        service_args=[
            # '--log-path={}'.format(log_path),
            # '--verbose',
        ]
    )
 
    return driver
 
def get_firefox_drive(driver_path=None):
    base_dir = os.path.dirname( os.path.abspath(__file__) )
    log_path = os.path.join( base_dir, 'geckodriver.log' )
 
    if driver_path is None:
        driver_path = './geckodriver'
        pass
 
    options = webdriver.FirefoxOptions()
    options.add_argument('-headless')
 
    driver = webdriver.Firefox(
        executable_path=driver_path,
        firefox_options=options
    )
 
    return driver
 
def save_fullpage_screenshot(driver, url, output_path, tmp_prefix='selenium_screenshot', tmp_suffix='.png'):
    """
    Creates a full page screenshot using a selenium driver by scrolling and taking multiple screenshots,
    and stitching them into a single image.
    """
 
    # get the page
    driver.get(url)

    # get dimensions
    window_height = driver.execute_script('return window.innerHeight')
    scroll_height = driver.execute_script('return document.body.parentNode.scrollHeight')

    num = int( math.ceil( float(scroll_height) / float(window_height) ) )
 
    # get temp files
    tempfiles = []
    for i in range( num ):
        fd,path = tempfile.mkstemp(prefix='{0}-{1:02}-'.format(tmp_prefix, i+1), suffix=tmp_suffix)
        os.close(fd)
        tempfiles.append(path)
        pass
    tempfiles_len = len(tempfiles)
 
    try:
        # take screenshots
        for i,path in enumerate(tempfiles):
            if i > 0:
                driver.execute_script( 'window.scrollBy(%d,%d)' % (0, window_height) )
            driver.save_screenshot(path)
            pass
 
        # stitch images together
        stiched = None
        for i,path in enumerate(tempfiles):
            img = Image.open(path)
 
            w, h = img.size
            y = i * window_height
 
            if i == ( tempfiles_len - 1 ) and num > 1:
                img = img.crop((
                    0,
                    h-(scroll_height % h),
                    w,
                    h
                ))
 
                w, h = img.size
                pass
 
            if stiched is None:
                stiched = Image.new('RGB', (w, scroll_height))
 
            stiched.paste(img, (
                0, # x0
                y, # y0
                w, # x1
                y + h # y1
            ))
            pass
        stiched.save(output_path)
    finally:
        # cleanup
        for path in tempfiles:
            if os.path.isfile(path):
                os.remove(path)
        pass
 
    return output_path
 
 
def main():
    # url = sys.argv[1]
    # filename = sys.argv[2]
    
    url = input('Enter url: ')
    filename = input('Enter image file name: ')
    filename = '/tmp/' + filename   
    driver = get_chrome_drive()
    driver.set_window_size(1280,768)

    save_fullpage_screenshot(driver, url, filename)
    driver.quit()

    return

if __name__ == '__main__':
    main()