Comments
This commit is contained in:
parent
baef13edde
commit
4f6f247986
1 changed files with 26 additions and 11 deletions
|
|
@ -4,26 +4,41 @@ library("dplyr")
|
||||||
library("xlsx")
|
library("xlsx")
|
||||||
library("stringi")
|
library("stringi")
|
||||||
|
|
||||||
|
#Konfiguration einbinden
|
||||||
source("config/config.R")
|
source("config/config.R")
|
||||||
source("sparql.R")
|
source("sparql.R")
|
||||||
|
|
||||||
vorhanden_df = data.frame(get_current_geoera_lit_db()$citation)
|
#Datenbank einlesen und doppelte entfernen
|
||||||
vorhanden_df_2 = unique(vorhanden_df)
|
vorhanden_df <- data.frame(get_current_geoera_lit_db()$citation)
|
||||||
|
vorhanden_df_2 <- unique(vorhanden_df)
|
||||||
|
|
||||||
colnames(vorhanden_df) = (c(COLUMN_NAME))
|
#Spaltenname setzen
|
||||||
|
colnames(vorhanden_df) <- (c(COLUMN_NAME))
|
||||||
|
|
||||||
neu_df = data.frame(read_excel(paste(PFAD_EXCEL, "distincts_kontrolliert.xlsx", sep="")))
|
#Excel mit den Zitaten die hinzugefügt werden sollen einlesen
|
||||||
neu_df = data.frame(lapply(neu_df, stri_enc_toutf8))
|
neu_df <- data.frame(read_excel(paste(PFAD_EXCEL, "distincts_kontrolliert.xlsx", sep="")))
|
||||||
neu_df_2 = unique(neu_df)
|
neu_df <- data.frame(lapply(neu_df, stri_enc_toutf8))
|
||||||
|
|
||||||
all_df = rbind(vorhanden_df, neu_df)
|
#die beiden Dataframes zusammenfügen (braucht gleiche Spaltennamen)
|
||||||
all_df_2 = unique(all_df)
|
all_df <- rbind(vorhanden_df, neu_df)
|
||||||
|
all_df_2 <- unique(all_df)
|
||||||
|
|
||||||
#inner_join finds common elements between two data frames
|
#inner_join finds common elements between two data frames
|
||||||
#anti_join finds elements the are exclusively in one of the data frames
|
#anti_join finds elements the are exclusively in one of the data frames
|
||||||
common = inner_join(neu_df, vorhanden_df)
|
|
||||||
not_in_db = anti_join(neu_df, common)
|
|
||||||
|
|
||||||
not_in_db_2 = anti_join(all_df, vorhanden_df)
|
#Gemeinsame Element finden
|
||||||
|
common <- inner_join(neu_df, vorhanden_df)
|
||||||
|
|
||||||
|
#Finde die Element die noch nicht in der Datenbank sind
|
||||||
|
not_in_db <- anti_join(neu_df, common)
|
||||||
|
|
||||||
|
#Variante 2
|
||||||
|
not_in_db_2 <- anti_join(all_df, vorhanden_df)
|
||||||
|
|
||||||
|
#Die der Uniques in eine Datei schreiben
|
||||||
write_xlsx(not_in_db, paste(PFAD_OUT, "not_in_db_",format(Sys.time(), "%Y_%m_%d") ,".xlsx", sep=""))
|
write_xlsx(not_in_db, paste(PFAD_OUT, "not_in_db_",format(Sys.time(), "%Y_%m_%d") ,".xlsx", sep=""))
|
||||||
|
|
||||||
|
#### TO DO ####
|
||||||
|
# Die direkte Vergleichsfunktion durch die %-Match-Funktion aus 03 ersetzen.
|
||||||
|
# Ist etwas heikler, sollte aber weiterhin ganz gut funktionieren.
|
||||||
|
# Gehört komplett ausprogrammiert und getest.
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue