From 1f9ee30dbda3ae2aba26040975cfe0362c5900c8 Mon Sep 17 00:00:00 2001 From: Russell Jarvis <rjjarvis@asu.edu> Date: Wed, 24 Jun 2020 09:59:38 +1000 Subject: [PATCH] changes --- scrape.py | 23 +++++++++++++---------- setup.sh | 2 +- 2 files changed, 14 insertions(+), 11 deletions(-) diff --git a/scrape.py b/scrape.py index 8dc21be..fe7a614 100644 --- a/scrape.py +++ b/scrape.py @@ -49,25 +49,28 @@ import urllib.request from io import StringIO import io from selenium import webdriver -#from selenium.webdriver.firefox.options import Options +from selenium.webdriver.firefox.options import Options from selenium.common.exceptions import NoSuchElementException from selenium import webdriver import os - +''' chrome_options = webdriver.ChromeOptions() chrome_options.binary_location = os.environ.get("GOOGLE_CHROME_BIN") chrome_options.add_argument("--headless") chrome_options.add_argument("--disable-dev-shm-usage") chrome_options.add_argument("--no-sandbox") -driver = webdriver.Chrome(executable_path=os.environ.get("CHROMEDRIVER_PATH"), chrome_options=chrome_options) +driver = webdriver.Chrome(executable_path=os.environ.get("CHROMEDRIVER_PATH"), chrome_options=chrome_options) ''' -os.system("which firefox") +#os.system("which firefox") options = Options() #options.headless = True +options.binary_location = os.environ.get("/app/vendor/firefox/firefox") options.add_argument("--headless") +options.add_argument("--disable-dev-shm-usage") +options.add_argument("--no-sandbox") import os @@ -77,19 +80,19 @@ try: except: try: #driver = webdriver.Firefox(options=options) - GECKODRIVER_PATH=str(os.getcwd())+str("/geckodriver") + #GECKODRIVER_PATH=str(os.getcwd())+str("/geckodriver") driver = webdriver.Firefox(options=options,executable_path=GECKODRIVER_PATH) except: #GECKODRIVER_PATH="/app/vendor/geckodriver/geckodriver" #driver = webdriver.Firefox(options=options,executable_path=GECKODRIVER_PATH) #os.system("wget wget https://ftp.mozilla.org/pub/firefox/releases/45.0.2/linux-x86_64/en-GB/firefox-45.0.2.tar.bz2") - os.system("wget https://github.com/mozilla/geckodriver/releases/download/v0.26.0/geckodriver-v0.26.0-linux64.tar.gz") - os.system("tar -xf geckodriver-v0.26.0-linux64.tar.gz") + #os.system("wget https://github.com/mozilla/geckodriver/releases/download/v0.26.0/geckodriver-v0.26.0-linux64.tar.gz") + #os.system("tar -xf geckodriver-v0.26.0-linux64.tar.gz") #os.system("tar xvf firefox-45.0.2.tar.bz2") - GECKODRIVER_PATH=str(os.getcwd())+str("/geckodriver") - driver = webdriver.Firefox(options=options,executable_path=GECKODRIVER_PATH) -''' + #GECKODRIVER_PATH=str(os.getcwd())+str("/geckodriver") + driver = webdriver.Firefox(options=options,executable_path="/app/vendor/geckodriver/geckodriver") + rsrcmgr = PDFResourceManager() diff --git a/setup.sh b/setup.sh index d8a0aa9..a5ecdeb 100644 --- a/setup.sh +++ b/setup.sh @@ -4,7 +4,7 @@ # download and install latest geckodriver for linux or mac. # required for selenium to drive a firefox browser. sudo apt-get update -sudo apt-get install jq wget #chromium-chromedriver +sudo apt-get install jq wget chromium-chromedriver firefox sudo python3 -m pip install -r requirements.txt -- GitLab