90 lines
2.2 KiB
R
90 lines
2.2 KiB
R
library("SPARQL")
|
|
library("stringi")
|
|
|
|
### Konfiguration einbinden
|
|
source("config/config.R")
|
|
|
|
### Gets the bibliographicCitations which are currently in the db ##################
|
|
|
|
read_current_geoera_lit_db <- function() {
|
|
|
|
## http verwenden - https funktioniert nicht ##
|
|
endpoint = "http://resource.geolba.ac.at/PoolParty/sparql/geoera"
|
|
|
|
query <-
|
|
"PREFIX skos:<http://www.w3.org/2004/02/skos/core#>
|
|
PREFIX dcterms:<http://purl.org/dc/terms/>
|
|
select *
|
|
where {
|
|
?uri a skos:Concept; skos:prefLabel ?L; dcterms:bibliographicCitation ?citation .
|
|
filter(regex(str(?uri),\"/ref/\")) filter(lang(?L)=\"en\")
|
|
optional {?uri dcterms:source ?downloadLink}
|
|
}
|
|
order by ?L
|
|
"
|
|
|
|
|
|
qd <- SPARQL(endpoint, query)
|
|
inDB <- qd$results
|
|
|
|
for (j in 1:length (inDB[1, ])) {
|
|
|
|
for (i in 1:length(inDB[, j])) {
|
|
Encoding(inDB[i, j]) <- "UTF-8"
|
|
}
|
|
}
|
|
|
|
|
|
pfad <- paste(PFAD_DB_OUT, "inDB-", format(Sys.Date(), "%Y_%m_%d"), ".xlsx", sep="")
|
|
|
|
write_xlsx(inDB, pfad)
|
|
|
|
}
|
|
|
|
|
|
get_current_geoera_lit_db <- function() {
|
|
|
|
pfad <- paste(PFAD_DB_OUT, "inDB-", format(Sys.Date(), "%Y_%m_%d"), ".xlsx", sep="")
|
|
|
|
if (file.exists(pfad)) {
|
|
inDB <- read_excel(pfad)
|
|
return(inDB)
|
|
}
|
|
|
|
else {
|
|
read_current_geoera_lit_db()
|
|
|
|
get_current_geoera_lit_db()
|
|
}
|
|
|
|
}
|
|
|
|
## nicht meine Funktion - eingebunden von https://rdrr.io/cran/retractcheck/src/R/utils.R
|
|
find_doi <- function (strings) {
|
|
regex <- '10\\.\\d{4,9}/[-._;()/:A-Z0-9]+'
|
|
doiLoc <- gregexpr(text = strings, pattern = regex, perl = TRUE, ignore.case = TRUE)
|
|
|
|
i <- 1
|
|
res <- NULL
|
|
|
|
# for each in the doiLoc list check whether match (!-1)
|
|
for ( i in 1:length(doiLoc) ) {
|
|
if ( doiLoc[[i]][1] != -1 ) {
|
|
for ( j in 1:length(doiLoc[[i]]) ) {
|
|
res <- c(res,
|
|
substring(strings[i], doiLoc[[i]][j], doiLoc[[i]][j] + attr(doiLoc[[i]], 'match.length')[j] - 1))
|
|
}
|
|
}
|
|
}
|
|
|
|
return(res)
|
|
}
|
|
|
|
|
|
## nicht meine Funktion - eingebunden von https://stackoverflow.com/questions/52911812/check-if-url-exists-in-r
|
|
valid_url <- function(url_in,t=2){
|
|
con <- url(url_in)
|
|
check <- suppressWarnings(try(open.connection(con,open="rt",timeout=t),silent=T)[1])
|
|
suppressWarnings(try(close.connection(con),silent=T))
|
|
ifelse(is.null(check),TRUE,FALSE)
|
|
}
|