Вычисление рабочих часов между 2 разами в R

#r #datetime

#r #datetime

Вопрос:

Я пытаюсь вычислить часы между 2 разами (дата создания сообщения и дата открытия сообщения). Я использовал следующий код для создания функции

 biz_hrs <- Vectorize(function(CreateDate, OpenedDate, starting_time = '8:00', ending_time = '17:00', holidays = NULL){
if(OpenedDate < CreateDate){
    
    return(NA)
    
  } else {
    
    start_datetime <- as.POSIXct(paste0(substr(start,1,11), starting_time, ':00'))
    end_datetime <- as.POSIXct(paste0(substr(end,1,11), ending_time, ':00'))
    
    if(as.Date(CreateDate) == as.Date(OpenedDate) amp; !as.Date(CreateDate) %in% holidays amp; !format(as.Date(CreateDate), "%u") %in% c(6,7)){ #if starting time stamp is on same day as ending time stamp and if day is not a holiday or weekend
      
      if(CreateDate > start_datetime amp; OpenedDate < end_datetime){ #if starting time stamp is later than start business hour and ending time stamp is earlier then ending business hour.
        return(as.numeric(difftime(OpenedDate, CreateDate), units = 'hours'))
      } else if(CreateDate > start_datetime amp; OpenedDate > end_datetime amp; CreateDate < end_datetime){ #if starting time stamp is later than end business hour and ending time stamp is earlier then ending business hour.
        return(as.numeric(difftime(as.POSIXct(paste0(substr(start,1,11), ending_time, ':00')), CreateDate), units = 'hours'))
      } else if(CreateDate < start_datetime amp; OpenedDate < end_datetime amp; OpenedDate > start_datetime){ #if starting time stamp is earlier than end business hour and ending time stamp is later than starting business hour.
        return(as.numeric(difftime(OpenedDate, start_datetime), units = 'hours'))
      } else if(CreateDate > end_datetime amp; OpenedDate > end_datetime){ #if starting time stamp is later than end business hour and ending time stamp is later than ending business hour.
        return(0)
      } else if(CreateDate < start_datetime amp; OpenedDate < start_datetime){ #if starting time stamp is earlier than start business hour and ending time stamp is earlier than starting business hour.
        return(0)
      } else {
        return(as.numeric(difftime(end_datetime, start_datetime), units = 'hours'))
      }
      
    } else { #if starting time stamp and ending time stamp occured on a different day.
      
      business_hrs <- as.numeric(difftime(as.POSIXct(paste0('2017-01-01', ending_time, ':00')),
                                          as.POSIXct(paste0('2017-01-01', starting_time, ':00')) #calculate business hours range by specified parameters
      ), units = 'hours')
      
      start_day_hrs <- ifelse(CreateDate < as.POSIXct(paste0(substr(start,1,11), ending_time, ':00')) amp; !as.Date(CreateDate) %in% holidays amp; !format(as.Date(CreateDate), "%u") %in% c(6,7), #if start time stamp is earlier than specified ending time
                              as.numeric(difftime(as.POSIXct(paste0(substr(start,1,11), ending_time, ':00')), CreateDate), units = 'hours'), #calculate time between time stamp and specified ending time
                              0 #else set zero
      ) #calculate amount of time on starting day
      start_day_hrs <- pmin(start_day_hrs, business_hrs) #cap the maximum amount of hours dertermined by the specified business hours
      start_day_hrs
      end_day_hrs <- ifelse(OpenedDate > as.POSIXct(paste0(substr(end,1,11), starting_time, ':00')) amp; !as.Date(OpenedDate) %in% holidays amp; !format(as.Date(OpenedDate), "%u") %in% c(6,7), #if end time stamp is later than specified starting time
                            as.numeric(difftime(end, as.POSIXct(paste0(substr(end,1,11), starting_time, ':00'))), units = 'hours'), #calculate time between time stamp and specified starting time
                            0) #calculate amount of time on ending day
      end_day_hrs <- pmin(end_day_hrs, business_hrs) #cap the maximum amount of hours dertermined by the specified business hours
      days_between <- seq(as.Date(CreateDate), as.Date(OpenedDate), by = 1) #create a vector of dates (from and up to including) the starting time stamp and ending time stamp
      business_days <- days_between[!days_between %in% c(as.Date(CreateDate), as.Date(OpenedDate)) amp; !days_between %in% holidays amp; !format(as.Date(days_between), "%u") %in% c(6,7)] #remove weekends and holidays from vector of dates
      
      return(as.numeric(((length(business_days) * business_hrs)   start_day_hrs   end_day_hrs))) #multiply the remaining number of days in the vector (business days) by the amount of business hours and add hours from the starting and end day. Return the result
      
    }
    
  }
  
  
})
  

и затем ввод в мои данные

 Weekly_Final$ResponseTime <- biz_hrs(Weekly_Final$CreateDate, Weekly_Final$OpenedDate, '8:00', '17:00')
  

Но я получаю следующую ошибку:

 Error in as.character(x) : 
  cannot coerce type 'closure' to vector of type 'character' 
  

Комментарии:

1. Поскольку у нас нет данных для запуска этого и нет указания, где в этой функции возникает ошибка, я предлагаю: (1) определить функцию извне, myfunc <- function(CreateDate,...){...} ; (2) затем выполните debug(myfunc) ; и, наконец, (3) Vectorize(myfunc)(Weekly_Final$CreateDate,...) . Попробуйте каждую строку вашей функции, одну за другой, и найдите ту, которая выдает эту ошибку. Как только вы обнаружите это, посмотрите на все аргументы (переменные / объекты), которые входят в эту строку кода, и найдите тот, который не соответствует вашим ожиданиям.

Ответ №1:

Основываясь на предложении Аллана. Функция довольно громоздкая. Мы можем векторизовать функцию, используя комбинацию lubridate и bizdays package. Проблема действительно заключается в нахождении количества рабочих дней между двумя датами с поправкой на часы открытия и закрытия. Прежде всего, нам нужно было бы переместить дни вперед, чтобы убедиться, что они соответствуют часам открытия и закрытия.
Немного повозившись с использованием hms класса (час-минута-секунда) из lubridate, мы можем создать функцию для сдвига дат.

 library(lubridate)
library(bizdays)
cal <- create.calendar('mycal', weekdays = c('saturday', 'sunday'))
open <- hms('08:00:00')
close <- hms('17:00:00')
start <- as_datetime('2017-05-10 18:00:00') # After closing time, so real date = '2017-05-11 08:00:00'
end <- as_datetime('2017-05-11 12:00:00') # 4 hours after open

fix_biz_date <- function(date, open, close){
  if(!is.POSIXct(date))
    date <- lubridate::as_datetime(date)
  time <- hms(strftime(date, '%H:%M:%S'))  
  # Fix dates
  ind <- which(time >= close)
  if(length(ind) > 0)
    date[ind] <- date[ind]   days(1)
  # Fix times
  ind <- c(ind, which(time < open))
  if(length(ind) > 0){
    hour(date[ind]) <- hour(open)
    minute(date[ind]) <- minute(open)
    second(date[ind]) <- second(open)
  }
  date
}
fix_biz_date(start, open, close)
[1] "2017-05-11 08:00:00 UTC"
fix_biz_date(end, open, close)
[1] "2017-05-11 12:00:00 UTC"
  

Теперь, когда мы соответствующим образом сдвинули даты, нам просто нужна функция для определения количества рабочих дней и часов между ними.

 #' @param start Starting date-time
#' @param end Ending date-time
#' @param cal calendar object (bizdays::create.calendar)
#' @param open hm(s) second object (lubridate::hms), specifying opening time of day
#' @param open hm(s) second object (lubridate::hms), specifying closing time of day
wh_diff <- function(start, end, cal, open, close){
  if(length(start) != length(end))
    stop('start and end needs to be of equal length!')
  if(!is.period(open))
    open <- hms(open)
  if(!is.period(close))
    close <- hms(close)
  start <- fix_biz_date(start, open, close)
  end <- fix_biz_date(end, open, close)
  sTime <- strftime(start, '%H:%M:%S')
  eTime <- strftime(end, '%H:%M:%S')
  days_dif <- bizdays(start, end, cal) 
  days_dif * (as.numeric(close - open) / 3600)   
    as.numeric(hms(eTime) - hms(sTime)) / 3600
}
wh_diff(start, end, cal, open, close) # Expected 4 hours.
[1] 4
  

Теперь эта функция фактически векторизована соответствующим образом и будет принимать равное количество начальных и конечных дат или одну, начинающуюся с нескольких конечных дат (или наоборот).

 end <- offset(end, seq(0, 180, length.out = 91), cal)
wh_diff(start, end, cal, open, close) # something like.. seq(0, 18 * 91, length.out = 91) 

start <- offset(start, 0:90, cal)
wh_diff(start, end, cal, open, close) # Expect seq(9, 9 * 91, length.out = 91)
  

Этот подход будет в десятки раз быстрее, чем использование, vectorize которое создаст пару for-loops закулисных ситуаций для выполнения работы. Однако это потребовало немного воображения и некоторого опыта работы с датами.

Ответ №2:

В какой-то момент вы допустили ошибку при переименовании переменных. Вы получаете ошибку при вызове, substr(start, 1, 11) потому что не вызывается переменная start . R думает, что вы имеете в виду вызванную функцию start . Я предполагаю, что CreateDate раньше вызывался start и OpenedDate раньше вызывался end . Если мы просто обновим их, то функция вернет результат

 biz_hrs <- Vectorize(function(CreateDate, OpenedDate, starting_time = '8:00', ending_time = '17:00', holidays = NULL){
if(OpenedDate < CreateDate){
    
    return(NA)
    
  } else {
    
    start_datetime <- as.POSIXct(paste0(substr(CreateDate,1,11), starting_time, ':00'))
    end_datetime <- as.POSIXct(paste0(substr(OpenedDate,1,11), ending_time, ':00'))
    
    if(as.Date(CreateDate) == as.Date(OpenedDate) amp; !as.Date(CreateDate) %in% holidays amp; !format(as.Date(CreateDate), "%u") %in% c(6,7)){ #if starting time stamp is on same day as ending time stamp and if day is not a holiday or weekend
      
      if(CreateDate > start_datetime amp; OpenedDate < end_datetime){ #if starting time stamp is later than start business hour and ending time stamp is earlier then ending business hour.
        return(as.numeric(difftime(OpenedDate, CreateDate), units = 'hours'))
      } else if(CreateDate > start_datetime amp; OpenedDate > end_datetime amp; CreateDate < end_datetime){ #if starting time stamp is later than end business hour and ending time stamp is earlier then ending business hour.
        return(as.numeric(difftime(as.POSIXct(paste0(substr(start,1,11), ending_time, ':00')), CreateDate), units = 'hours'))
      } else if(CreateDate < start_datetime amp; OpenedDate < end_datetime amp; OpenedDate > start_datetime){ #if starting time stamp is earlier than end business hour and ending time stamp is later than starting business hour.
        return(as.numeric(difftime(OpenedDate, start_datetime), units = 'hours'))
      } else if(CreateDate > end_datetime amp; OpenedDate > end_datetime){ #if starting time stamp is later than end business hour and ending time stamp is later than ending business hour.
        return(0)
      } else if(CreateDate < start_datetime amp; OpenedDate < start_datetime){ #if starting time stamp is earlier than start business hour and ending time stamp is earlier than starting business hour.
        return(0)
      } else {
        return(as.numeric(difftime(end_datetime, start_datetime), units = 'hours'))
      }
      
    } else { #if starting time stamp and ending time stamp occured on a different day.
      
      business_hrs <- as.numeric(difftime(as.POSIXct(paste0('2017-01-01', ending_time, ':00')),
                                          as.POSIXct(paste0('2017-01-01', starting_time, ':00')) #calculate business hours range by specified parameters
      ), units = 'hours')
      
      start_day_hrs <- ifelse(CreateDate < as.POSIXct(paste0(substr(CreateDate,1,11), ending_time, ':00')) amp; !as.Date(CreateDate) %in% holidays amp; !format(as.Date(CreateDate), "%u") %in% c(6,7), #if start time stamp is earlier than specified ending time
                              as.numeric(difftime(as.POSIXct(paste0(substr(CreateDate,1,11), ending_time, ':00')), CreateDate), units = 'hours'), #calculate time between time stamp and specified ending time
                              0 #else set zero
      ) #calculate amount of time on starting day
      start_day_hrs <- pmin(start_day_hrs, business_hrs) #cap the maximum amount of hours dertermined by the specified business hours
      start_day_hrs
      end_day_hrs <- ifelse(OpenedDate > as.POSIXct(paste0(substr(OpenedDate,1,11), starting_time, ':00')) amp; !as.Date(OpenedDate) %in% holidays amp; !format(as.Date(OpenedDate), "%u") %in% c(6,7), #if end time stamp is later than specified starting time
                            as.numeric(difftime(end, as.POSIXct(paste0(substr(OpenedDate,1,11), starting_time, ':00'))), units = 'hours'), #calculate time between time stamp and specified starting time
                            0) #calculate amount of time on ending day
      end_day_hrs <- pmin(end_day_hrs, business_hrs) #cap the maximum amount of hours dertermined by the specified business hours
      days_between <- seq(as.Date(CreateDate), as.Date(OpenedDate), by = 1) #create a vector of dates (from and up to including) the starting time stamp and ending time stamp
      business_days <- days_between[!days_between %in% c(as.Date(CreateDate), as.Date(OpenedDate)) amp; !days_between %in% holidays amp; !format(as.Date(days_between), "%u") %in% c(6,7)] #remove weekends and holidays from vector of dates
      
      return(as.numeric(((length(business_days) * business_hrs)   start_day_hrs   end_day_hrs))) #multiply the remaining number of days in the vector (business days) by the amount of business hours and add hours from the starting and end day. Return the result
      
    }
    
  }
  
})
  

Так что:

 biz_hrs("2001-05-07", "2001-05-08")
#> 2001-05-07 
#>          9 
  

Я не знаю, соответствует ли это вашим ожиданиям.

Кроме того, вам следует ознакомиться с lubridate пакетом, который позволит вам упростить и сократить ваш код. Это значительно упростит отладку.

Я думаю, что следующая простая функция является аккуратной заменой для biz_hrs , которая сохраняет интерфейс

 library(lubridate)

biz_hrs <- function(CreateDate, OpenedDate, start_time = '8:00', 
                    end_time = '17:00', holidays = NULL)
{
    begin    <- as.Date(CreateDate)
    end      <- as.Date(OpenedDate)
    hours    <- as.numeric(strsplit(end_time, ":")[[1]][1]) -
                as.numeric(strsplit(start_time, ":")[[1]][1])
    
    sapply(seq_along(begin), function(i) {
      if(begin[i] > end[i]) NA
      else {
        all_days <- seq(begin[i], end[i], "1 day")
        sum(hours * (wday(all_days) %in% 2:6 amp; is.na(match(all_days, holidays))))
      }})
}