#!/bin/env python3
# a tool for collecting mastodon /about/ & ToS and CoC pages.
# (c) roel roscam abbing 2020
# gplv3
import os, time, json

scrape_data = json.loads(open('instance_scrape.json').read())

output_dir = 'about_pages'
tos_dir = os.path.join(output_dir,'with_tos')

if not os.path.exists(output_dir):
    os.mkdir(output_dir)
    if not os.path.exists(tos_dir):
        os.mkdir(tos_dir)


def find_mastodon_instances(scrape_data):
    mastodon_instances = []   

    for i in scrape_data:
        if 'nodeinfo' in scrape_data[i].keys():
            if 'software' in scrape_data[i]['nodeinfo'].keys():
                if scrape_data[i]['nodeinfo']['software']['name'].lower() == 'mastodon':
                    mastodon_instances.append(i)
    return mastodon_instances


def clear_cache(driver, timeout=60):
    """Clear the cookies and cache for the ChromeDriver instance."""
    # navigate to the settings page
    driver.get('chrome://settings/clearBrowserData')

    # wait for the button to appear
    wait = WebDriverWait(driver, timeout)
    wait.until(get_clear_browsing_button)

    # click the button to clear the cache
    get_clear_browsing_button(driver).click()

    # wait for the button to be gone before returning
    wait.until_not(get_clear_browsing_button)


from selenium import webdriver # you need to set up a driver
from selenium.webdriver.firefox.options import Options

options = Options()
options.add_argument('-headless')

browser = webdriver.Firefox(options=options)
#browser.set_window_size(1024, 768) # set the window size that you need 

for mi in find_mastodon_instances(scrape_data):
    #clear_cache(browser)

    browser.get('https://{}/about/more/'.format(mi))
    page = browser.find_element_by_tag_name('html')
    height=page.size['height']
    browser.set_window_size(1400,height+100)


    try:
        bw = browser.find_element_by_css_selector('div .box-widget')
        about_text = bw.find_element_by_css_selector('div .rich-formatting')
        html = browser.execute_script('return arguments[0].innerHTML;',about_text)
        if html:
            output_dir = tos_dir
        with open(os.path.join(output_dir,mi+'_tos.txt'),'w') as f:
            f.write(html)
    except Exception as e:
        output_dir = 'about_pages'
        print(e)

    time.sleep(0.1)
    print('taking screenshot of', mi)
    browser.save_screenshot(os.path.join(output_dir,mi+'.png'))
    with open(os.path.join(output_dir,mi+'_about.html'),'w') as f:
        f.write(browser.page_source)
    #browser.save_screenshot()
browser.quit()