2 New Files, Renamed.
This commit is contained in:
parent
0ba9850f10
commit
baef13edde
4 changed files with 38 additions and 1 deletions
8
00_Setup.R
Normal file
8
00_Setup.R
Normal file
|
|
@ -0,0 +1,8 @@
|
||||||
|
#### Generiert die Ordnerstruktur der Daten ####
|
||||||
|
|
||||||
|
source("config/config.R")
|
||||||
|
|
||||||
|
ifelse(!dir.exists(file.path("data/")), dir.create(file.path("data/")), FALSE)
|
||||||
|
ifelse(!dir.exists(file.path(PFAD_EXCEL)), dir.create(file.path(PFAD_EXCEL)), FALSE)
|
||||||
|
ifelse(!dir.exists(file.path(PFAD_OUT)), dir.create(file.path(PFAD_OUT)), FALSE)
|
||||||
|
ifelse(!dir.exists(file.path(PFAD_EXCEL)), dir.create(file.path(PFAD_DB_OUT)), FALSE)
|
||||||
|
|
@ -83,4 +83,4 @@ distincts <- data.frame(distincts[nchar(distincts$bibliographicCitation) >= THRE
|
||||||
colnames(distincts) <- (c('bibliographicCitation'))
|
colnames(distincts) <- (c('bibliographicCitation'))
|
||||||
|
|
||||||
#Schreibt die Distincts in eine Exceldatei
|
#Schreibt die Distincts in eine Exceldatei
|
||||||
write_xlsx(distincts, "data/Output/distincts_automated_gc3d.xlsx")
|
write_xlsx(distincts, paste(PFAD_OUT, "distincts_automated_gc3d.xlsx", sep=""))
|
||||||
29
02_not_in_db.R
Normal file
29
02_not_in_db.R
Normal file
|
|
@ -0,0 +1,29 @@
|
||||||
|
library("readxl")
|
||||||
|
library("writexl")
|
||||||
|
library("dplyr")
|
||||||
|
library("xlsx")
|
||||||
|
library("stringi")
|
||||||
|
|
||||||
|
source("config/config.R")
|
||||||
|
source("sparql.R")
|
||||||
|
|
||||||
|
vorhanden_df = data.frame(get_current_geoera_lit_db()$citation)
|
||||||
|
vorhanden_df_2 = unique(vorhanden_df)
|
||||||
|
|
||||||
|
colnames(vorhanden_df) = (c(COLUMN_NAME))
|
||||||
|
|
||||||
|
neu_df = data.frame(read_excel(paste(PFAD_EXCEL, "distincts_kontrolliert.xlsx", sep="")))
|
||||||
|
neu_df = data.frame(lapply(neu_df, stri_enc_toutf8))
|
||||||
|
neu_df_2 = unique(neu_df)
|
||||||
|
|
||||||
|
all_df = rbind(vorhanden_df, neu_df)
|
||||||
|
all_df_2 = unique(all_df)
|
||||||
|
|
||||||
|
#inner_join finds common elements between two data frames
|
||||||
|
#anti_join finds elements the are exclusively in one of the data frames
|
||||||
|
common = inner_join(neu_df, vorhanden_df)
|
||||||
|
not_in_db = anti_join(neu_df, common)
|
||||||
|
|
||||||
|
not_in_db_2 = anti_join(all_df, vorhanden_df)
|
||||||
|
|
||||||
|
write_xlsx(not_in_db, paste(PFAD_OUT, "not_in_db_",format(Sys.time(), "%Y_%m_%d") ,".xlsx", sep=""))
|
||||||
Loading…
Add table
Add a link
Reference in a new issue