2 New Files, Renamed.

This commit is contained in:
Linsberger Christian 2021-09-06 16:02:30 +02:00
commit baef13edde
4 changed files with 38 additions and 1 deletions

29
02_not_in_db.R Normal file
View file

@ -0,0 +1,29 @@
library("readxl")
library("writexl")
library("dplyr")
library("xlsx")
library("stringi")
source("config/config.R")
source("sparql.R")
vorhanden_df = data.frame(get_current_geoera_lit_db()$citation)
vorhanden_df_2 = unique(vorhanden_df)
colnames(vorhanden_df) = (c(COLUMN_NAME))
neu_df = data.frame(read_excel(paste(PFAD_EXCEL, "distincts_kontrolliert.xlsx", sep="")))
neu_df = data.frame(lapply(neu_df, stri_enc_toutf8))
neu_df_2 = unique(neu_df)
all_df = rbind(vorhanden_df, neu_df)
all_df_2 = unique(all_df)
#inner_join finds common elements between two data frames
#anti_join finds elements the are exclusively in one of the data frames
common = inner_join(neu_df, vorhanden_df)
not_in_db = anti_join(neu_df, common)
not_in_db_2 = anti_join(all_df, vorhanden_df)
write_xlsx(not_in_db, paste(PFAD_OUT, "not_in_db_",format(Sys.time(), "%Y_%m_%d") ,".xlsx", sep=""))