Style Anpassungen und Pfade geändert

This commit is contained in:
Linsberger Christian 2021-09-02 11:08:10 +02:00
commit 3e372f3cb6
2 changed files with 45 additions and 44 deletions

View file

@ -18,25 +18,26 @@ setwd("C:/Users/linchr/ownCloud/GIT/URI_Replacement")
source("sparql.R") source("sparql.R")
#Datei in der ersetzt werden soll #Datei in der ersetzt werden soll
PFAD_EXCEL = "GC3D_Limits_ref.xlsx" FILENAME_EXCEL <- "GC3D_Limits_ref.xlsx"
PFAD_EXCEL <- "data/"
#Spaltennummer in denen ersetzt werden soll im Excel #Spaltennummer in denen ersetzt werden soll im Excel
SPALTEN = c(2,3,4,5,6) SPALTEN <- c(2, 3, 4, 5, 6)
#Spaltennummer mit der verglichen wird aus Datenbank (citations) #Spaltennummer mit der verglichen wird aus Datenbank (citations)
INHALTE_DB = 3 INHALTE_DB <- 3
#Spaltennummer der Inhalte die eingesetzt werden soll (uris) #Spaltennummer der Inhalte die eingesetzt werden soll (uris)
ID = 1 ID <- 1
############################## ##############################
### Einlesen ### ### Einlesen ###
inDB = get_current_geoera_lit_db() inDB <- get_current_geoera_lit_db()
to_replace_original = read_excel(PFAD_EXCEL) to_replace_original <- read_excel(paste(PFAD_EXCEL, FILENAME_EXCEL, sep=""))
to_replace_done = to_replace_original to_replace_done <- to_replace_original
### ###
@ -50,51 +51,51 @@ for(spalte_excel in SPALTEN[1]:SPALTEN[length(SPALTEN)]){
for (zeile_excel in 1:nrow(to_replace_original[, spalte_excel])) { for (zeile_excel in 1:nrow(to_replace_original[, spalte_excel])) {
current_excel = tolower(to_replace_original[zeile_excel,spalte_excel]) current_excel <- tolower(to_replace_original[zeile_excel, spalte_excel])
if (is.na(current_excel)) { if (is.na(current_excel)) {
next next
} }
if ((nchar(current_excel) < 20)) { if ((nchar(current_excel) < 20)) {
to_replace_done[zeile_excel, spalte_excel] = "" to_replace_done[zeile_excel, spalte_excel] <- ""
next next
} }
excel_search_all = unlist(strsplit(gsub("[^[:alnum:] ]", "", current_excel), " +")) excel_search_all <- unlist(strsplit(gsub("[^[:alnum:] ]", "", current_excel), " +"))
excel_search_numbers = str_extract_all(current_excel,"\\(?[0-9]+\\)?") excel_search_numbers <- str_extract_all(current_excel, "\\(?[0-9]+\\)?")
for (zeile_db in 1:nrow(inDB[, INHALTE_DB])) { for (zeile_db in 1:nrow(inDB[, INHALTE_DB])) {
current_db = tolower(inDB[zeile_db,INHALTE_DB]) current_db <- tolower(inDB[zeile_db,INHALTE_DB])
db_search_all = unlist(strsplit(gsub("[^[:alnum:] ]", "", current_db), " +")) db_search_all <- unlist(strsplit(gsub("[^[:alnum:] ]", "", current_db), " +"))
db_search_numbers = str_extract_all(current_db,"\\(?[0-9]+\\)?") db_search_numbers <- str_extract_all(current_db, "\\(?[0-9]+\\)?")
count_all = 0 count_all <- 0
count_numbers = 0 count_numbers <- 0
for (k in 1:length(excel_search_all)) { for (k in 1:length(excel_search_all)) {
if (excel_search_all[k] %in% db_search_all) { if (excel_search_all[k] %in% db_search_all) {
count_all = count_all + 1 count_all <- count_all + 1
} }
} }
for (l in 1:length(excel_search_numbers)) { for (l in 1:length(excel_search_numbers)) {
if (excel_search_numbers[l] %in% db_search_numbers) { if (excel_search_numbers[l] %in% db_search_numbers) {
count_numbers =+ count_numbers + 1 count_numbers <- count_numbers + 1
} }
} }
percent_match_all = count_all/ length(excel_search_all) percent_match_all <- count_all / length(excel_search_all)
percent_match_numbers = count_numbers/ length(excel_search_numbers) percent_match_numbers <- count_numbers / length(excel_search_numbers)
if ((percent_match_all > 0.80) && (percent_match_numbers > 0.99)) { if ((percent_match_all > 0.80) && (percent_match_numbers > 0.99)) {
to_replace_done[zeile_excel, spalte_excel] = str_remove_all(inDB[zeile_db,ID],"[<>]") to_replace_done[zeile_excel, spalte_excel] <- str_remove_all(inDB[zeile_db,ID], "[<>]")
} }
@ -105,7 +106,7 @@ for(spalte_excel in SPALTEN[1]:SPALTEN[length(SPALTEN)]){
toc() toc()
pfad_output = paste("replaced_",PFAD_EXCEL, sep="") pfad_output <- paste(PFAD_EXCEL, "replaced_" , FILENAME_EXCEL, sep="")
write_xlsx(to_replace_done, pfad_output) write_xlsx(to_replace_done, pfad_output)

View file

@ -8,7 +8,7 @@ read_current_geoera_lit_db <- function() {
## http verwenden - https funktioniert nicht ## ## http verwenden - https funktioniert nicht ##
endpoint = "http://resource.geolba.ac.at/PoolParty/sparql/geoera" endpoint = "http://resource.geolba.ac.at/PoolParty/sparql/geoera"
query = query <-
"PREFIX skos:<http://www.w3.org/2004/02/skos/core#> "PREFIX skos:<http://www.w3.org/2004/02/skos/core#>
PREFIX dcterms:<http://purl.org/dc/terms/> PREFIX dcterms:<http://purl.org/dc/terms/>
select * select *
@ -27,12 +27,12 @@ inDB <- qd$results
for (j in 1:length (inDB[1, ])) { for (j in 1:length (inDB[1, ])) {
for (i in 1:length(inDB[, j])) { for (i in 1:length(inDB[, j])) {
Encoding(inDB[i,j]) = "UTF-8" Encoding(inDB[i, j]) <- "UTF-8"
} }
} }
pfad = paste("inDB-",format(Sys.Date(), "%Y_%m_%d"),".xlsx", sep="") pfad <- paste("data/inDB-", format(Sys.Date(), "%Y_%m_%d"), ".xlsx", sep="")
write_xlsx(inDB, pfad) write_xlsx(inDB, pfad)
@ -41,12 +41,13 @@ write_xlsx(inDB, pfad)
get_current_geoera_lit_db <- function() { get_current_geoera_lit_db <- function() {
pfad = paste("inDB-",format(Sys.Date(), "%Y_%m_%d"),".xlsx", sep="") pfad <- paste("data/inDB-", format(Sys.Date(), "%Y_%m_%d"), ".xlsx", sep="")
if (file.exists(pfad)) { if (file.exists(pfad)) {
inDB = read_excel(pfad) inDB <- read_excel(pfad)
return(inDB) return(inDB)
} }
else { else {
read_current_geoera_lit_db() read_current_geoera_lit_db()
@ -55,4 +56,3 @@ get_current_geoera_lit_db <- function() {
} }
#a change