diff --git a/science_access/enter_author_name.py b/science_access/enter_author_name.py index 7cddce82d993f3c60158b0d785555445205441eb..ee997a6e311b7f363c27c5cf6a0baec56262f4d9 100644 --- a/science_access/enter_author_name.py +++ b/science_access/enter_author_name.py @@ -31,6 +31,9 @@ from science_access.word_cloud_by_word_len import generate_from_lengths from science_access.utils import check_passive import plotly.graph_objects as go +from typing import List, Any +import pandas as pd + theme = px.colors.diverging.Portland colors = [theme[-1], theme[-2]] @@ -211,12 +214,6 @@ def create_giant_strings(ar, not_want_list): return sci_corpus -def make_clickable(link): - # target _blank to open new window - # extract clickable text to display for your link - text = link # .split('=')[1] - return f'<a target="_blank" href="{link}">{text}</a>' - def extra_options(ar, trainingDats, df1): @@ -353,12 +350,22 @@ def grand_distribution_plot(ar, scraped_labels, standard_sci, df0, author_name=" return df1, fig -from typing import List, Any -import pandas as pd -# import streamlit as st -# List -def push_frame_to_screen(contents: Any, readability_vector: List) -> pd.DataFrame(): + +def make_clickable(link): + # target _blank to open new window + # extract clickable text to display for your link + text = link # .split('=')[1] + return f'<a target="_blank" href="{link}">{text}</a>' +df_links["Web_Link"] = contents["Web_Link"] +df_links["Reading_Level"] = contents["Reading_Level"] +df_links.drop_duplicates(subset="Web_Link", inplace=True) +df_links["Web_Link"] = df_links["Web_Link"].apply(make_clickable) +df_links = df_links.to_html(escape=False) +st.write(df_links, unsafe_allow_html=True) + + +def push_frame_to_screen(contents, readability_vector):# -> pd.DataFrame(): if type(contents) is type(list()): df_links = pd.DataFrame() df_links["Web_Link"] = pd.Series(contents) @@ -367,19 +374,16 @@ def push_frame_to_screen(contents: Any, readability_vector: List) -> pd.DataFram df_links["Web_Link"] = df_links["Web_Link"].apply(make_clickable) df_links = df_links.to_html(escape=False) st.write(df_links, unsafe_allow_html=True) - - if type(contents) is type(pd.DataFrame()): + else: df_links = pd.DataFrame() - try: - df_links["Web_Link"] = contents["Web_Link"] - df_links["Reading_Level"] = contents["Reading_Level"] - df_links.drop_duplicates(subset="Web_Link", inplace=True) - df_links["Web_Link"] = df_links["Web_Link"].apply(make_clickable) - df_links = df_links.to_html(escape=False) - st.write(df_links, unsafe_allow_html=True) - - except: - pass + #try: + df_links["Web_Link"] = contents["Web_Link"] + df_links["Reading_Level"] = contents["Reading_Level"] + df_links.drop_duplicates(subset="Web_Link", inplace=True) + df_links["Web_Link"] = df_links["Web_Link"].apply(make_clickable) + df_links = df_links.to_html(escape=False) + st.write(df_links, unsafe_allow_html=True) + return df_links diff --git a/science_access/online_app_backend.py b/science_access/online_app_backend.py index 2628a16f780deb4da8808958aeac274ab7b32bea..01d06cc37649df8fd54a731dcb1f2553a250748f 100644 --- a/science_access/online_app_backend.py +++ b/science_access/online_app_backend.py @@ -17,6 +17,9 @@ from tqdm.auto import tqdm import streamlit as st from .t_analysis import text_proc +import streamlit as st +from dask import compute + class tqdm: """ @@ -145,7 +148,6 @@ def semantic_scholar_alias(NAME): return aliases -import streamlit as st def visit_semantic_scholar_abstracts(NAME, tns, more_links): @@ -159,28 +161,31 @@ def visit_semantic_scholar_abstracts(NAME, tns, more_links): dois, coauthors, titles, visit_urls = author_to_urls(NAME) for d in tqdm(dois, title="visiting abstracts"): - paper = sch.paper(d, timeout=8) - - urlDat = {} - if "citationVelocity" in paper.keys(): - urlDat["citationVelocity"] = paper["citationVelocity"] - if "fieldsOfStudy" in paper.keys(): - urlDat["fieldsOfStudy"] = str(paper["fieldsOfStudy"]) - if "numCitedBy" in paper.keys(): - urlDat["numCitedBy"] = paper["numCitedBy"] - # urlDat["influentialCitationCount"] = paper["influentialCitationCount"] - urlDat["semantic"] = True - - if "url" in paper.keys(): - urlDat["link"] = paper["title"] - if aliases is None: - if "aliases" in paper.keys(): - urlDat["aliases"] = paper["aliases"] - else: - pass - if "abstract" in paper.keys(): - urlDat = text_proc(str(paper["abstract"]), urlDat) - author_results.append(urlDat) + try: + paper = sch.paper(d, timeout=16) + + urlDat = {} + if "citationVelocity" in paper.keys(): + urlDat["citationVelocity"] = paper["citationVelocity"] + if "fieldsOfStudy" in paper.keys(): + urlDat["fieldsOfStudy"] = str(paper["fieldsOfStudy"]) + if "numCitedBy" in paper.keys(): + urlDat["numCitedBy"] = paper["numCitedBy"] + # urlDat["influentialCitationCount"] = paper["influentialCitationCount"] + urlDat["semantic"] = True + + if "url" in paper.keys(): + urlDat["link"] = paper["title"] + if aliases is None: + if "aliases" in paper.keys(): + urlDat["aliases"] = paper["aliases"] + else: + pass + if "abstract" in paper.keys(): + urlDat = text_proc(str(paper["abstract"]), urlDat) + author_results.append(urlDat) + except: + pass author_results = [ urlDat for urlDat in author_results if not isinstance(urlDat, type(None)) ] @@ -188,7 +193,6 @@ def visit_semantic_scholar_abstracts(NAME, tns, more_links): return author_results, visit_urls -from dask import compute def visit_link_unpaywall(NAME): # ), tns, visit_urls):