added a script that finds URLs an puts them in a new column
This commit is contained in:
parent
4f6f247986
commit
fba552577e
1 changed files with 28 additions and 0 deletions
28
find_URLs.R
Normal file
28
find_URLs.R
Normal file
|
|
@ -0,0 +1,28 @@
|
||||||
|
library("stringr")
|
||||||
|
library("readxl")
|
||||||
|
library("writexl")
|
||||||
|
|
||||||
|
#Konfiguration einbinden
|
||||||
|
source("config/config.R")
|
||||||
|
|
||||||
|
url_pattern <- "http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\\(\\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+"
|
||||||
|
|
||||||
|
#Excel mit den Zitaten die hinzugefügt werden sollen einlesen
|
||||||
|
df <- data.frame(read_excel(paste(PFAD_EXCEL, "distincts_kontrolliert.xlsx", sep="")))
|
||||||
|
df <- data.frame(lapply(df, stri_enc_toutf8))
|
||||||
|
|
||||||
|
#Neue Spalte für die DownloadLinks erstellen
|
||||||
|
df['downloadLink'] <- NA
|
||||||
|
|
||||||
|
#Schleife um nach Links zu suchen
|
||||||
|
for (element in 1:length(df$bibliographicCitation)){
|
||||||
|
|
||||||
|
url <- str_extract(df$bibliographicCitation[element], url_pattern)
|
||||||
|
|
||||||
|
if (!is.na(url)) {
|
||||||
|
df$downloadLink[element] <- url
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
write_xlsx(df, paste(PFAD_OUT, "distincts_automatisch_mit_URL",format(Sys.time(), "%Y_%m_%d") ,".xlsx", sep=""))
|
||||||
Loading…
Add table
Add a link
Reference in a new issue