datetime – Does this R code look ok to expand out to other sites?


I’m after some constructive criticism on my code and maybe some ways to simplify before I expand it to include other sites. I am new to R and coding and have been working on this problem for weeks now.

I need to graph the last 10 days of data from 20 remote sites. There are 3 csv files from each site that I bring in via FTP. I will be running this every half hour as new data is written to the csv files.

My main problem has been the dynamic IP address/folder path which changes every month. So on the first day of the month, the csv files I download will be in a new folder.

My fix so far has been to download the last months data and this months new data, then combine these and filter for last 10 days only. I have a working solution but it’s not very elegant and after the 10th day of each month it is downloading data which I don’t even need or use.

What I would like is to only download the new data every half hour and append to the old data. Also accounting for any site outage which happens occasionally.

I realise my code is not reproducible, but it is working and I am mainly looking for red flags/tips to shorten or better ways of writing/other ideas to consider.

So here it is, I really appreciate the criticism, go hard, but be kind 🙂

#SiteA
SiteAip <- "10.10.10.10"
SiteA <- "usernamea"
PW <- "passwd"

#SiteB
SiteBip <- "20.20.20.20"
SiteB <- "usernameb"
PW <- "passwd"

library(tidyverse)
library(data.table) 
library(lubridate)

# This months date format to build folder path for current month download
Year <-format(Sys.Date(), format="%Y")
Month <- format(Sys.Date(), format="%B")
MM <- format(Sys.Date(), format="%m")

# Last months date format to build folder path for last month download
LM <- format(Sys.Date() %m+% months(-1), format="%B")
Lmon <- format(Sys.Date() %m+% months(-1), format="%m")
LY <- format(Sys.Date() %m+% months(-1), format="%Y")

# Download last month csv1 file
SiteAcsv1old <- glue::glue("ftp://{SiteA}:{PW}@{SiteAip}/1data/SiteA/{LY}/{LM}/SiteA}{LY}-{Lmon} csv1.txt")
SiteAcsv1old <- fread(SiteAcsv1old, header = FALSE, select = c(1, 3, 4),
                    col.names = c("DateTime", "Latitude", "Longitude"), sep = " ")

# Download this month csv1 file
SiteAcsv1new <- glue::glue("ftp://{SiteA}:{PW}@{SiteAip}/1data/SiteA/{Year}/{Month}/SiteA}{Year}-{MM} csv1.txt")
SiteAcsv1new <- fread(SiteAcsv1new, header = FALSE, select = c(1, 3, 4),
                    col.names = c("DateTime", "Latitude", "Longitude"), sep = " ")

#append new to old csv1 data
SiteAcsv1app <- unique(rbindlist(list(SiteAcsv1old, SiteAcsv1new)))

# Last 10 days of csv1 data
SiteAcsv1ten <- SiteAcsv1app %>%
  filter(between(as_datetime(DateTime), Sys.Date() - 10, Sys.Date()))

#Last month csv2 file
SiteAcsv2old <- glue::glue("ftp://{SiteA}:{PW}@{SiteAip}/1data/SiteA/{LY}/{LM}/SiteA}{LY}-{Lmon}.csv2")
SiteAcsv2old <- fread(SiteAcsv2old, header = FALSE, select = c( 1, 2, 3, 5, 6, 18), 
                 col.names = c("DateTime", "A", "B", "C", "D", "E"), sep = ",")

#This month csv2 file
SiteAcsv2new <- glue::glue("ftp://{SiteA}:{PW}@{SiteAip}/1data/SiteA/{Year}/{Month}/SiteA}{Year}-{MM}.csv2")
SiteAcsv2new <- fread(SiteAcsv2new, header = FALSE, select = c( 1, 2, 3, 5, 6, 18), 
                 col.names = c("DateTime", "A", "B", "C", "D", "E"), sep = ",")

#append new to old csv2 data
SiteAcsv2app <- unique(rbindlist(list(SiteAcsv2old, SiteAcsv2new)))

# Last 10 days of csv2 data
SiteAcsv2ten <- SiteAcsv2app %>% 
  filter(between(as_datetime(DateTime), Sys.Date() - 10, Sys.Date()))

#Last month csv3 file
SiteAcsv3old <- glue::glue("ftp://{SiteA}:{PW}@{SiteAip}/1data/SiteA/{LY}/{LM}/SiteA}{LY}-{Lmon}.csv3")
SiteAcsv3old <- fread(SiteAcsv3old, header = FALSE, select = c( 1, 3), 
                  col.names = c("DateTime", "F"), sep = ",")

#This month csv3 file
SiteAcsv3new <- glue::glue("ftp://{SiteA}:{PW}@{SiteAip}/1data/SiteA/{Year}/{Month}/SiteA}{Year}-{MM}.csv3")
SiteAcsv3new <- fread(SiteAcsv3new, header = FALSE, select = c( 1, 3), 
                  col.names = c("DateTime", "F"), sep = ",")

#append new to old csv3 data
SiteAcsv3app <- unique(rbindlist(list(SiteAcsv3old, SiteAcsv3new)))

# Last 10 days of csv3 data
SiteAcsv3ten <- SiteAcsv3app %>% 
  filter(between(as_datetime(DateTime), Sys.Date() - 10, Sys.Date()))

# Timestamps for csv1/csv2/csv3 are different (out by a few minutes)
# I need to round each of these down to the nearest half hour and merge data tables on "DateTime" 

# Round "DateTime" to previous half hour - 
SiteAcsv1ten(, DateTime:=as_datetime(DateTime, tz = "Australia/Queensland"))
SiteAcsv1ten(, DateTime := floor_date(DateTime, "30 minutes"))

SiteAcsv2ten(, DateTime:=as_datetime(DateTime, tz = "Australia/Queensland")) 
SiteAcsv2ten(, DateTime := floor_date(DateTime, "30 minutes"))

SiteAcsv3ten(, DateTime:=as_datetime(DateTime, tz = "Australia/Queensland")) 
SiteAcsv3ten(, DateTime := floor_date(DateTime, "30 minutes"))

# merge csv2/csv3/csv1 by DateTime
SiteAall <- Reduce(merge, list(SiteAcsv2ten, SiteAcsv3ten, SiteAcsv1ten))

# Plot data - Just basic for the moment, I will add labels and pretty up later
SiteAplot1 <- ggplot(SiteAall) +
  geom_line(aes(DateTime, B), colour="green") + 
  geom_line(aes(DateTime, C), color="orange")
SiteAplot1 + labs(title = "SiteA1")

SiteAplot2 <- ggplot(SiteAall) +
  geom_line(aes(DateTime, D), colour="red") + 
  geom_line(aes(DateTime, F), color="blue")
SiteAplot2 + labs(title = "SiteA2")
## working up to here

############################################################################################################

#SiteB

# Same as SiteA but with necessary changes