Skip to content
GitLab
Menu
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in / Register
Toggle navigation
Menu
Open sidebar
Russell Jarvis
ScienceAccess
Commits
87c6894a
Commit
87c6894a
authored
Dec 25, 2021
by
Russell Jarvis
💬
Browse files
update all
parent
d6e44a55
Changes
2
Hide whitespace changes
Inline
Side-by-side
science_access/enter_author_name.py
View file @
87c6894a
...
...
@@ -31,6 +31,9 @@ from science_access.word_cloud_by_word_len import generate_from_lengths
from
science_access.utils
import
check_passive
import
plotly.graph_objects
as
go
from
typing
import
List
,
Any
import
pandas
as
pd
theme
=
px
.
colors
.
diverging
.
Portland
colors
=
[
theme
[
-
1
],
theme
[
-
2
]]
...
...
@@ -211,12 +214,6 @@ def create_giant_strings(ar, not_want_list):
return
sci_corpus
def
make_clickable
(
link
):
# target _blank to open new window
# extract clickable text to display for your link
text
=
link
# .split('=')[1]
return
f
'<a target="_blank" href="
{
link
}
">
{
text
}
</a>'
def
extra_options
(
ar
,
trainingDats
,
df1
):
...
...
@@ -353,12 +350,22 @@ def grand_distribution_plot(ar, scraped_labels, standard_sci, df0, author_name="
return
df1
,
fig
from
typing
import
List
,
Any
import
pandas
as
pd
# import streamlit as st
# List
def
push_frame_to_screen
(
contents
:
Any
,
readability_vector
:
List
)
->
pd
.
DataFrame
():
def
make_clickable
(
link
):
# target _blank to open new window
# extract clickable text to display for your link
text
=
link
# .split('=')[1]
return
f
'<a target="_blank" href="
{
link
}
">
{
text
}
</a>'
df_links
[
"Web_Link"
]
=
contents
[
"Web_Link"
]
df_links
[
"Reading_Level"
]
=
contents
[
"Reading_Level"
]
df_links
.
drop_duplicates
(
subset
=
"Web_Link"
,
inplace
=
True
)
df_links
[
"Web_Link"
]
=
df_links
[
"Web_Link"
].
apply
(
make_clickable
)
df_links
=
df_links
.
to_html
(
escape
=
False
)
st
.
write
(
df_links
,
unsafe_allow_html
=
True
)
def
push_frame_to_screen
(
contents
,
readability_vector
):
# -> pd.DataFrame():
if
type
(
contents
)
is
type
(
list
()):
df_links
=
pd
.
DataFrame
()
df_links
[
"Web_Link"
]
=
pd
.
Series
(
contents
)
...
...
@@ -367,19 +374,16 @@ def push_frame_to_screen(contents: Any, readability_vector: List) -> pd.DataFram
df_links
[
"Web_Link"
]
=
df_links
[
"Web_Link"
].
apply
(
make_clickable
)
df_links
=
df_links
.
to_html
(
escape
=
False
)
st
.
write
(
df_links
,
unsafe_allow_html
=
True
)
if
type
(
contents
)
is
type
(
pd
.
DataFrame
()):
else
:
df_links
=
pd
.
DataFrame
()
try
:
df_links
[
"Web_Link"
]
=
contents
[
"Web_Link"
]
df_links
[
"Reading_Level"
]
=
contents
[
"Reading_Level"
]
df_links
.
drop_duplicates
(
subset
=
"Web_Link"
,
inplace
=
True
)
df_links
[
"Web_Link"
]
=
df_links
[
"Web_Link"
].
apply
(
make_clickable
)
df_links
=
df_links
.
to_html
(
escape
=
False
)
st
.
write
(
df_links
,
unsafe_allow_html
=
True
)
except
:
pass
#try:
df_links
[
"Web_Link"
]
=
contents
[
"Web_Link"
]
df_links
[
"Reading_Level"
]
=
contents
[
"Reading_Level"
]
df_links
.
drop_duplicates
(
subset
=
"Web_Link"
,
inplace
=
True
)
df_links
[
"Web_Link"
]
=
df_links
[
"Web_Link"
].
apply
(
make_clickable
)
df_links
=
df_links
.
to_html
(
escape
=
False
)
st
.
write
(
df_links
,
unsafe_allow_html
=
True
)
return
df_links
...
...
science_access/online_app_backend.py
View file @
87c6894a
...
...
@@ -17,6 +17,9 @@ from tqdm.auto import tqdm
import
streamlit
as
st
from
.t_analysis
import
text_proc
import
streamlit
as
st
from
dask
import
compute
class
tqdm
:
"""
...
...
@@ -145,7 +148,6 @@ def semantic_scholar_alias(NAME):
return
aliases
import
streamlit
as
st
def
visit_semantic_scholar_abstracts
(
NAME
,
tns
,
more_links
):
...
...
@@ -159,28 +161,31 @@ def visit_semantic_scholar_abstracts(NAME, tns, more_links):
dois
,
coauthors
,
titles
,
visit_urls
=
author_to_urls
(
NAME
)
for
d
in
tqdm
(
dois
,
title
=
"visiting abstracts"
):
paper
=
sch
.
paper
(
d
,
timeout
=
8
)
urlDat
=
{}
if
"citationVelocity"
in
paper
.
keys
():
urlDat
[
"citationVelocity"
]
=
paper
[
"citationVelocity"
]
if
"fieldsOfStudy"
in
paper
.
keys
():
urlDat
[
"fieldsOfStudy"
]
=
str
(
paper
[
"fieldsOfStudy"
])
if
"numCitedBy"
in
paper
.
keys
():
urlDat
[
"numCitedBy"
]
=
paper
[
"numCitedBy"
]
# urlDat["influentialCitationCount"] = paper["influentialCitationCount"]
urlDat
[
"semantic"
]
=
True
if
"url"
in
paper
.
keys
():
urlDat
[
"link"
]
=
paper
[
"title"
]
if
aliases
is
None
:
if
"aliases"
in
paper
.
keys
():
urlDat
[
"aliases"
]
=
paper
[
"aliases"
]
else
:
pass
if
"abstract"
in
paper
.
keys
():
urlDat
=
text_proc
(
str
(
paper
[
"abstract"
]),
urlDat
)
author_results
.
append
(
urlDat
)
try
:
paper
=
sch
.
paper
(
d
,
timeout
=
16
)
urlDat
=
{}
if
"citationVelocity"
in
paper
.
keys
():
urlDat
[
"citationVelocity"
]
=
paper
[
"citationVelocity"
]
if
"fieldsOfStudy"
in
paper
.
keys
():
urlDat
[
"fieldsOfStudy"
]
=
str
(
paper
[
"fieldsOfStudy"
])
if
"numCitedBy"
in
paper
.
keys
():
urlDat
[
"numCitedBy"
]
=
paper
[
"numCitedBy"
]
# urlDat["influentialCitationCount"] = paper["influentialCitationCount"]
urlDat
[
"semantic"
]
=
True
if
"url"
in
paper
.
keys
():
urlDat
[
"link"
]
=
paper
[
"title"
]
if
aliases
is
None
:
if
"aliases"
in
paper
.
keys
():
urlDat
[
"aliases"
]
=
paper
[
"aliases"
]
else
:
pass
if
"abstract"
in
paper
.
keys
():
urlDat
=
text_proc
(
str
(
paper
[
"abstract"
]),
urlDat
)
author_results
.
append
(
urlDat
)
except
:
pass
author_results
=
[
urlDat
for
urlDat
in
author_results
if
not
isinstance
(
urlDat
,
type
(
None
))
]
...
...
@@ -188,7 +193,6 @@ def visit_semantic_scholar_abstracts(NAME, tns, more_links):
return
author_results
,
visit_urls
from
dask
import
compute
def
visit_link_unpaywall
(
NAME
):
# ), tns, visit_urls):
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment