Style Anpassungen und Pfade geändert
This commit is contained in:
parent
0039d2b2de
commit
3e372f3cb6
2 changed files with 45 additions and 44 deletions
|
|
@ -18,83 +18,84 @@ setwd("C:/Users/linchr/ownCloud/GIT/URI_Replacement")
|
||||||
source("sparql.R")
|
source("sparql.R")
|
||||||
|
|
||||||
#Datei in der ersetzt werden soll
|
#Datei in der ersetzt werden soll
|
||||||
PFAD_EXCEL = "GC3D_Limits_ref.xlsx"
|
FILENAME_EXCEL <- "GC3D_Limits_ref.xlsx"
|
||||||
|
PFAD_EXCEL <- "data/"
|
||||||
|
|
||||||
#Spaltennummer in denen ersetzt werden soll im Excel
|
#Spaltennummer in denen ersetzt werden soll im Excel
|
||||||
SPALTEN = c(2,3,4,5,6)
|
SPALTEN <- c(2, 3, 4, 5, 6)
|
||||||
|
|
||||||
#Spaltennummer mit der verglichen wird aus Datenbank (citations)
|
#Spaltennummer mit der verglichen wird aus Datenbank (citations)
|
||||||
INHALTE_DB = 3
|
INHALTE_DB <- 3
|
||||||
|
|
||||||
#Spaltennummer der Inhalte die eingesetzt werden soll (uris)
|
#Spaltennummer der Inhalte die eingesetzt werden soll (uris)
|
||||||
ID = 1
|
ID <- 1
|
||||||
|
|
||||||
##############################
|
##############################
|
||||||
|
|
||||||
|
|
||||||
### Einlesen ###
|
### Einlesen ###
|
||||||
inDB = get_current_geoera_lit_db()
|
inDB <- get_current_geoera_lit_db()
|
||||||
|
|
||||||
to_replace_original = read_excel(PFAD_EXCEL)
|
to_replace_original <- read_excel(paste(PFAD_EXCEL, FILENAME_EXCEL, sep=""))
|
||||||
to_replace_done = to_replace_original
|
to_replace_done <- to_replace_original
|
||||||
|
|
||||||
###
|
###
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#Schleife über den Spaltenvektor aus der Excel
|
#Schleife über den Spaltenvektor aus der Excel
|
||||||
for(spalte_excel in SPALTEN[1]:SPALTEN[length(SPALTEN)]){
|
for (spalte_excel in SPALTEN[1]:SPALTEN[length(SPALTEN)]) {
|
||||||
|
|
||||||
#Start der Zeitmessung für aktuelle Spalte
|
#Start der Zeitmessung für aktuelle Spalte
|
||||||
tic(paste("starte mit Spalte",spalte_excel))
|
tic(paste("starte mit Spalte",spalte_excel))
|
||||||
|
|
||||||
for (zeile_excel in 1:nrow(to_replace_original[,spalte_excel])) {
|
for (zeile_excel in 1:nrow(to_replace_original[, spalte_excel])) {
|
||||||
|
|
||||||
current_excel = tolower(to_replace_original[zeile_excel,spalte_excel])
|
current_excel <- tolower(to_replace_original[zeile_excel, spalte_excel])
|
||||||
|
|
||||||
if(is.na(current_excel)){
|
if (is.na(current_excel)) {
|
||||||
next
|
next
|
||||||
}
|
}
|
||||||
|
|
||||||
if((nchar(current_excel) < 20)){
|
if ((nchar(current_excel) < 20)) {
|
||||||
to_replace_done[zeile_excel, spalte_excel] = ""
|
to_replace_done[zeile_excel, spalte_excel] <- ""
|
||||||
next
|
next
|
||||||
}
|
}
|
||||||
|
|
||||||
excel_search_all = unlist(strsplit(gsub("[^[:alnum:] ]", "", current_excel), " +"))
|
excel_search_all <- unlist(strsplit(gsub("[^[:alnum:] ]", "", current_excel), " +"))
|
||||||
excel_search_numbers = str_extract_all(current_excel,"\\(?[0-9]+\\)?")
|
excel_search_numbers <- str_extract_all(current_excel, "\\(?[0-9]+\\)?")
|
||||||
|
|
||||||
for (zeile_db in 1:nrow(inDB[,INHALTE_DB])) {
|
for (zeile_db in 1:nrow(inDB[, INHALTE_DB])) {
|
||||||
|
|
||||||
current_db = tolower(inDB[zeile_db,INHALTE_DB])
|
current_db <- tolower(inDB[zeile_db,INHALTE_DB])
|
||||||
|
|
||||||
db_search_all = unlist(strsplit(gsub("[^[:alnum:] ]", "", current_db), " +"))
|
db_search_all <- unlist(strsplit(gsub("[^[:alnum:] ]", "", current_db), " +"))
|
||||||
db_search_numbers = str_extract_all(current_db,"\\(?[0-9]+\\)?")
|
db_search_numbers <- str_extract_all(current_db, "\\(?[0-9]+\\)?")
|
||||||
|
|
||||||
count_all = 0
|
count_all <- 0
|
||||||
count_numbers = 0
|
count_numbers <- 0
|
||||||
|
|
||||||
for(k in 1:length(excel_search_all)){
|
for (k in 1:length(excel_search_all)) {
|
||||||
if(excel_search_all[k] %in% db_search_all){
|
if (excel_search_all[k] %in% db_search_all) {
|
||||||
|
|
||||||
count_all = count_all + 1
|
count_all <- count_all + 1
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
for(l in 1:length(excel_search_numbers)){
|
for (l in 1:length(excel_search_numbers)) {
|
||||||
if(excel_search_numbers[l] %in% db_search_numbers){
|
if (excel_search_numbers[l] %in% db_search_numbers) {
|
||||||
|
|
||||||
count_numbers =+ count_numbers + 1
|
count_numbers <- count_numbers + 1
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
percent_match_all = count_all/ length(excel_search_all)
|
percent_match_all <- count_all / length(excel_search_all)
|
||||||
percent_match_numbers = count_numbers/ length(excel_search_numbers)
|
percent_match_numbers <- count_numbers / length(excel_search_numbers)
|
||||||
|
|
||||||
if((percent_match_all > 0.80) && (percent_match_numbers > 0.99)){
|
if ((percent_match_all > 0.80) && (percent_match_numbers > 0.99)) {
|
||||||
|
|
||||||
|
|
||||||
to_replace_done[zeile_excel, spalte_excel] = str_remove_all(inDB[zeile_db,ID],"[<>]")
|
to_replace_done[zeile_excel, spalte_excel] <- str_remove_all(inDB[zeile_db,ID], "[<>]")
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -105,7 +106,7 @@ for(spalte_excel in SPALTEN[1]:SPALTEN[length(SPALTEN)]){
|
||||||
|
|
||||||
toc()
|
toc()
|
||||||
|
|
||||||
pfad_output = paste("replaced_",PFAD_EXCEL, sep="")
|
pfad_output <- paste(PFAD_EXCEL, "replaced_" , FILENAME_EXCEL, sep="")
|
||||||
|
|
||||||
write_xlsx(to_replace_done, pfad_output)
|
write_xlsx(to_replace_done, pfad_output)
|
||||||
|
|
||||||
|
|
|
||||||
24
sparql.R
24
sparql.R
|
|
@ -5,10 +5,10 @@ library("stringi")
|
||||||
|
|
||||||
read_current_geoera_lit_db <- function() {
|
read_current_geoera_lit_db <- function() {
|
||||||
|
|
||||||
## http verwenden - https funktioniert nicht ##
|
## http verwenden - https funktioniert nicht ##
|
||||||
endpoint = "http://resource.geolba.ac.at/PoolParty/sparql/geoera"
|
endpoint = "http://resource.geolba.ac.at/PoolParty/sparql/geoera"
|
||||||
|
|
||||||
query =
|
query <-
|
||||||
"PREFIX skos:<http://www.w3.org/2004/02/skos/core#>
|
"PREFIX skos:<http://www.w3.org/2004/02/skos/core#>
|
||||||
PREFIX dcterms:<http://purl.org/dc/terms/>
|
PREFIX dcterms:<http://purl.org/dc/terms/>
|
||||||
select *
|
select *
|
||||||
|
|
@ -21,18 +21,18 @@ order by ?L
|
||||||
"
|
"
|
||||||
|
|
||||||
|
|
||||||
qd <- SPARQL(endpoint,query)
|
qd <- SPARQL(endpoint, query)
|
||||||
inDB <- qd$results
|
inDB <- qd$results
|
||||||
|
|
||||||
for(j in 1:length(inDB[1,])){
|
for (j in 1:length (inDB[1, ])) {
|
||||||
|
|
||||||
for(i in 1:length(inDB[,j])){
|
for (i in 1:length(inDB[, j])) {
|
||||||
Encoding(inDB[i,j]) = "UTF-8"
|
Encoding(inDB[i, j]) <- "UTF-8"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
pfad = paste("inDB-",format(Sys.Date(), "%Y_%m_%d"),".xlsx", sep="")
|
pfad <- paste("data/inDB-", format(Sys.Date(), "%Y_%m_%d"), ".xlsx", sep="")
|
||||||
|
|
||||||
write_xlsx(inDB, pfad)
|
write_xlsx(inDB, pfad)
|
||||||
|
|
||||||
|
|
@ -41,12 +41,13 @@ write_xlsx(inDB, pfad)
|
||||||
|
|
||||||
get_current_geoera_lit_db <- function() {
|
get_current_geoera_lit_db <- function() {
|
||||||
|
|
||||||
pfad = paste("inDB-",format(Sys.Date(), "%Y_%m_%d"),".xlsx", sep="")
|
pfad <- paste("data/inDB-", format(Sys.Date(), "%Y_%m_%d"), ".xlsx", sep="")
|
||||||
|
|
||||||
if(file.exists(pfad)){
|
if (file.exists(pfad)) {
|
||||||
inDB = read_excel(pfad)
|
inDB <- read_excel(pfad)
|
||||||
return(inDB)
|
return(inDB)
|
||||||
}
|
}
|
||||||
|
|
||||||
else {
|
else {
|
||||||
read_current_geoera_lit_db()
|
read_current_geoera_lit_db()
|
||||||
|
|
||||||
|
|
@ -55,4 +56,3 @@ get_current_geoera_lit_db <- function() {
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#a change
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue