From 1f9ee30dbda3ae2aba26040975cfe0362c5900c8 Mon Sep 17 00:00:00 2001
From: Russell Jarvis <rjjarvis@asu.edu>
Date: Wed, 24 Jun 2020 09:59:38 +1000
Subject: [PATCH] changes

---
 scrape.py | 23 +++++++++++++----------
 setup.sh  |  2 +-
 2 files changed, 14 insertions(+), 11 deletions(-)

diff --git a/scrape.py b/scrape.py
index 8dc21be..fe7a614 100644
--- a/scrape.py
+++ b/scrape.py
@@ -49,25 +49,28 @@ import urllib.request
 from io import StringIO
 import io
 from selenium import webdriver
-#from selenium.webdriver.firefox.options import Options
+from selenium.webdriver.firefox.options import Options
 from selenium.common.exceptions import NoSuchElementException
 
 
 from selenium import webdriver
 import os
-
+'''
 chrome_options = webdriver.ChromeOptions()
 chrome_options.binary_location = os.environ.get("GOOGLE_CHROME_BIN")
 chrome_options.add_argument("--headless")
 chrome_options.add_argument("--disable-dev-shm-usage")
 chrome_options.add_argument("--no-sandbox")
-driver = webdriver.Chrome(executable_path=os.environ.get("CHROMEDRIVER_PATH"), chrome_options=chrome_options)
 
+driver = webdriver.Chrome(executable_path=os.environ.get("CHROMEDRIVER_PATH"), chrome_options=chrome_options)
 '''
-os.system("which firefox")
+#os.system("which firefox")
 options = Options()
 #options.headless = True
+options.binary_location = os.environ.get("/app/vendor/firefox/firefox")
 options.add_argument("--headless")
+options.add_argument("--disable-dev-shm-usage")
+options.add_argument("--no-sandbox")
 
 import os
 
@@ -77,19 +80,19 @@ try:
 except:
     try:
         #driver = webdriver.Firefox(options=options)
-        GECKODRIVER_PATH=str(os.getcwd())+str("/geckodriver")
+        #GECKODRIVER_PATH=str(os.getcwd())+str("/geckodriver")
         driver = webdriver.Firefox(options=options,executable_path=GECKODRIVER_PATH)
 
     except:
         #GECKODRIVER_PATH="/app/vendor/geckodriver/geckodriver"
         #driver = webdriver.Firefox(options=options,executable_path=GECKODRIVER_PATH)
         #os.system("wget wget https://ftp.mozilla.org/pub/firefox/releases/45.0.2/linux-x86_64/en-GB/firefox-45.0.2.tar.bz2")
-        os.system("wget https://github.com/mozilla/geckodriver/releases/download/v0.26.0/geckodriver-v0.26.0-linux64.tar.gz")
-        os.system("tar -xf geckodriver-v0.26.0-linux64.tar.gz")
+        #os.system("wget https://github.com/mozilla/geckodriver/releases/download/v0.26.0/geckodriver-v0.26.0-linux64.tar.gz")
+        #os.system("tar -xf geckodriver-v0.26.0-linux64.tar.gz")
         #os.system("tar xvf firefox-45.0.2.tar.bz2")
-        GECKODRIVER_PATH=str(os.getcwd())+str("/geckodriver")
-        driver = webdriver.Firefox(options=options,executable_path=GECKODRIVER_PATH)
-'''
+        #GECKODRIVER_PATH=str(os.getcwd())+str("/geckodriver")
+        driver = webdriver.Firefox(options=options,executable_path="/app/vendor/geckodriver/geckodriver")
+
 
 
 rsrcmgr = PDFResourceManager()
diff --git a/setup.sh b/setup.sh
index d8a0aa9..a5ecdeb 100644
--- a/setup.sh
+++ b/setup.sh
@@ -4,7 +4,7 @@
 # download and install latest geckodriver for linux or mac.
 # required for selenium to drive a firefox browser.
 sudo apt-get update
-sudo apt-get install jq wget #chromium-chromedriver
+sudo apt-get install jq wget chromium-chromedriver firefox
 
 
 sudo python3 -m pip install -r requirements.txt
-- 
GitLab