diff options
Diffstat (limited to 'src/mcron/base.scm')
-rw-r--r-- | src/mcron/base.scm | 328 |
1 files changed, 270 insertions, 58 deletions
diff --git a/src/mcron/base.scm b/src/mcron/base.scm index 037a9b7..6e325f7 100644 --- a/src/mcron/base.scm +++ b/src/mcron/base.scm @@ -2,6 +2,7 @@ ;;; Copyright © 2003 Dale Mellor <dale_mellor@users.sourceforge.net> ;;; Copyright © 2015, 2016, 2018 Mathieu Lirzin <mthl@gnu.org> ;;; Copyright © 2016, 2020 Ludovic Courtès <ludo@gnu.org> +;;; Copyright © 2021 Maxim Cournoyer <maxim.cournoyer@gmail.com> ;;; ;;; This file is part of GNU Mcron. ;;; @@ -26,18 +27,29 @@ ;;;; Code: (define-module (mcron base) + #:use-module (ice-9 format) #:use-module (ice-9 match) #:use-module (ice-9 control) + #:use-module (ice-9 rdelim) + #:use-module (ice-9 suspendable-ports) #:use-module (mcron environment) #:use-module (mcron utils) #:use-module (srfi srfi-1) #:use-module (srfi srfi-2) #:use-module (srfi srfi-9) + #:use-module ((srfi srfi-19) #:prefix srfi-19:) + #:use-module (srfi srfi-26) #:use-module (srfi srfi-111) #:export (add-job remove-user-jobs display-schedule run-job-loop + + %date-format + %log-format + validate-date-format + validate-log-format + ;; Deprecated and undocumented procedures. use-system-job-list use-user-job-list @@ -45,6 +57,8 @@ #:re-export (clear-environment-mods append-environment-mods)) +(install-suspendable-ports!) + ;; A cron job. (define-record-type <job> (make-job user time-proc action environment displayable next-time) @@ -71,6 +85,48 @@ (user schedule-user set-schedule-user!) ;list of <job> (current schedule-current set-schedule-current!)) ;symbol 'user or 'system +;; A (srfi srfi-19) format string for the date. It is used to format the +;; timestamp argument. Defaults to the local ISO-8601 date/time format. +(define %date-format (make-parameter "~5")) + +(define (validate-date-format fmt) + "Validate that FMT is a valid srfi-19 date format string." + (let ((time (srfi-19:current-time))) + (unless (false-if-exception + (srfi-19:date->string (srfi-19:time-utc->date time) fmt)) + (throw 'mcron-error 1 "invalid srfi-19 date format string +hint: consult 'info \"(guile) SRFI-19 Date to string\"'")))) + +;; An (ice-9 format) format string. The positional arguments applied to +;; format are: +;; 1. the timestamp; +;; 2. the job process PID; +;; 3. the action name; +;; 4. the message. +(define %log-format (make-parameter "~a ~2@*~a: ~a~%")) + +(define (validate-log-format fmt) + "Validate that FMT is a valid (ice-9 format) log format string." + (unless (with-output-to-port (%make-void-port "w") + (lambda () + (with-error-to-port (%make-void-port "w") + (lambda () + (false-if-exception + (format #f "~@?" fmt + "2021-08-17T15:23:12" 39143 "dummy" "message")))))) + (throw 'mcron-error 1 "invalid (ice-9 format) format string +hint: consult 'info \"(guile) Formatted Output\"'"))) + +;; Data about a running job process. +(define-record-type <job-data> + (make-job-data pid port continuation name) + job-data? + (pid job-data:pid) ;string : the job process PID + (port job-data:port) ;port : an input + (continuation job-data:continuation ;a partial continuation to read port + set-job-data-continuation!) + (name job-data:name)) ;string : the name of the job action + (define %global-schedule ;; Global schedule used by 'mcron' and 'cron'. (make-schedule '() '() 'user)) @@ -172,50 +228,185 @@ unusable." ;; Apply PROC to the value stored in 'number-children'. (set-box! number-children (proc (unbox number-children)))) -(define (run-job job) - "Run JOB in a separate process. The process is run as JOB user with the +(define* (run-job job) + "Run JOB in a separate process. The process is run as JOB user with the environment properly set. Update the NEXT-TIME field of JOB by computing its -next value." - (if (= (primitive-fork) 0) - (dynamic-wind ;child - (const #t) - (λ () - (setgid (passwd:gid (job:user job))) - (setuid (passwd:uid (job:user job))) - ;; Handle the case where the home directory points to a nonexistent - ;; location, as can be the case when running the job as the "nobody" - ;; user. - (catch 'system-error - (lambda () - (chdir (passwd:dir (job:user job)))) - (lambda args - (let ((errno (system-error-errno args))) - (cond - ((= ENOENT errno) (chdir "/")) - (else (throw 'system-error args)))))) - (modify-environment (job:environment job) (job:user job)) - ((job:action job))) - (λ () - (primitive-exit 0))) - (begin ;parent - (update-number-children! 1+) - (job:next-time-set! job ((job:next-time-function job) - (current-time)))))) - -(define (child-cleanup) - ;; Give any zombie children a chance to die, and decrease the number known - ;; to exist. - (unless (or (<= (unbox number-children) 0) - (= (car (waitpid WAIT_ANY WNOHANG)) 0)) - (update-number-children! 1-) - (child-cleanup))) - -(define* (run-job-loop #:optional fd-list #:key (schedule %global-schedule)) - ;; Loop over all job specifications, get a list of the next ones to run (may - ;; be more than one). Set an alarm and go to sleep. When we wake, run the - ;; jobs and reap any children (old jobs) that have completed. Repeat ad - ;; infinitum. - ;; +next value. Return a <job-data> record object containing the job process +PID, the input pipe from which the process standard output and standard error +streams can be read as well as the name of the job." + (define start (srfi-19:current-time srfi-19:time-monotonic)) ;start time + + (define (seconds-since start) + ;; Return the time elapsed in seconds since START. + (let* ((end (srfi-19:current-time srfi-19:time-monotonic)) + (elapsed (srfi-19:time-difference end start))) + (+ (srfi-19:time-second elapsed) + (* 1e-9 (srfi-19:time-nanosecond elapsed))))) + + ;; Create a pipe, and set its read side to non-blocking mode. + (define child->parent-pipe (pipe)) + (let ((flags (fcntl (car child->parent-pipe) F_GETFL))) + (fcntl (car child->parent-pipe) F_SETFL (logior O_NONBLOCK flags))) + + ;; Empty buffers to avoid duplicated output. + (flush-all-ports) + + (match (primitive-fork) + (0 ;child + ;; Prepare the environment. + + ;; Connect the stdout and stderr outputs of the child process to the + ;; pipe established in the parent. + (close (car child->parent-pipe)) ;unused input pipe + (dup2 (port->fdes (cdr child->parent-pipe)) 1) + (dup2 1 2) + (set-current-output-port (cdr child->parent-pipe)) + (set-current-error-port (cdr child->parent-pipe)) + + ;; Use line buffering so the output is printed in "real time". + (setvbuf (current-output-port) 'line) + (setvbuf (current-error-port) 'line) + + (setgid (passwd:gid (job:user job))) + (setuid (passwd:uid (job:user job))) + ;; Handle a nonexistent home directory, as can be the case when running + ;; the job as the "nobody" user. + (catch 'system-error + (lambda () + (chdir (passwd:dir (job:user job)))) + (lambda args + (let ((errno (system-error-errno args))) + (cond + ((= ENOENT errno) (chdir "/")) + (else (throw 'system-error args)))))) + (modify-environment (job:environment job) (job:user job)) + + ;; Execute the action. + (catch #t + (lambda () + (format #t "running...~%") + (flush-all-ports) + (let* ((result ((job:action job))) + (exit-val/maybe (false-if-exception + (status:exit-val result)))) + (when (and exit-val/maybe + (not (= 0 exit-val/maybe))) + (error "unclean exit status" exit-val/maybe))) + (format #t "completed in ~,3fs~%" (seconds-since start)) + (flush-all-ports) + (primitive-exit 0)) + (lambda args + (format (current-error-port) "failed after ~,3fs with: ~a~%" + (seconds-since start) args) + (flush-all-ports) + (primitive-exit 1)))) + (child-pid ;parent + (update-number-children! 1+) + (job:next-time-set! job ((job:next-time-function job) + (current-time))) + (close (cdr child->parent-pipe)) ;disconnect the write end of the pipe + (make-job-data + child-pid ;pid + (car child->parent-pipe) ;port + #f ;continuation for a suspended port + (job:displayable job))))) ;name + +(define* (process-output children-data) + "Read the child processes output from their input port recorded in +CHILDREN-DATA and print an annotated version of each line to the standard +output port. Ports are closed upon reading the EOF character. As a side +effect, save the partial continuation of any suspended port to their +associated <job-data> instance." + (define timestamp (srfi-19:date->string + (srfi-19:time-monotonic->date + (srfi-19:current-time srfi-19:time-monotonic)) + (%date-format))) + + ;; Use line buffering so the output is printed in "real time". + (setvbuf (current-output-port) 'line) + + (define (log-data data) + ;; Print the lines as they become available. Do not block when a line + ;; could not be read. + (let ((name (job-data:name data)) + (pid (job-data:pid data)) + (port (job-data:port data))) + + (define (read-line*) + ;; Return, as a pair, the line and the terminated delimiter or end-of-file + ;; object. When a line cannot be read, return the '(suspended + ;; . partial-continuation) pair, where partial-continuation can be + ;; evaluated in the future when the port is ready to be read. + (call-with-prompt 'continue + (lambda () + (parameterize ((current-read-waiter + (lambda (_) + (abort-to-prompt 'continue)))) + (let ((cont (job-data:continuation data))) + (if cont + (begin + (set-job-data-continuation! data #f) ;reset continuation + (cont)) + ;; Also use the carriage return as a line delimiter to + ;; preserve the full output in a readable way. + (read-delimited "\n\r" port 'split))))) + (lambda (partial-continuation) + (cons 'suspended partial-continuation)))) + + (define (format-line line) + (format #t "~@?" (%log-format) timestamp pid name line)) + + (let loop ((line+delim (read-line*))) + (match line+delim + (('suspended . partial-continuation) + (set-job-data-continuation! data partial-continuation)) + ((line . (? eof-object?)) + (close port) + (unless (eof-object? line) + (format-line line))) + (("" . #\cr) + ;; A carriage return directly followed a delimiter. Ignore it. + (loop (read-line*))) + ((line . _) + (format-line line) + (loop (read-line*))))))) + + (for-each log-data + (remove (compose port-closed? job-data:port) + children-data))) + +(define (child-cleanup children-data) + "Give any zombie children a chance to die, and decrease the number known to +exist. CHILDREN-DATA is a list of <job-data> objects. Return the pruned list +of CHILDREN-DATA." + (define has-children? (> (unbox number-children) 0)) + (define collected-pid (or (and has-children? + (car (waitpid WAIT_ANY WNOHANG))) + 0)) + (define (match-collected-pid? job-data) + (= (job-data:pid job-data) collected-pid)) + + (if (and has-children? + (not (= 0 collected-pid))) ;at least one process was collected + (begin + (update-number-children! 1-) + ;; Fully collect the output of the reaped child process. + (and=> (find match-collected-pid? children-data) + (lambda (child-data) + (process-output (list child-data)))) + (child-cleanup (remove match-collected-pid? children-data))) + children-data)) + +(define* (run-job-loop #:optional (fd-list '()) + #:key (schedule %global-schedule)) + "Loop over all job specifications in SCHEDULE, get a list of the next ones +to run (may be more than one). Set an alarm and go to sleep. When we wake, +run the jobs and reap any children (old jobs) that have completed. Repeat ad +infinitum." + ;; Validate the format parameters, so that we can fail early. + (validate-date-format (%date-format)) + (validate-log-format (%log-format)) + ;; Note that, if we wake ahead of time, it can only mean that a signal has ;; been sent by a crontab job to tell us to re-read a crontab file. In this ;; case we break out of the loop here, and let the main procedure deal with @@ -236,23 +427,44 @@ next value." (apply throw args))))))) (let/ec break - (let loop () + (let loop ((children-data '())) ;list of <job-data> objects (match (find-next-jobs #:schedule schedule) ((next-time . next-jobs-lst) - (let ((sleep-time (if next-time - (- next-time (current-time)) - 2000000000))) + (let* ((sleep-time (if next-time + (- next-time (current-time)) + 2000000000)) + (ports (map job-data:port children-data)) + ;; Ensure closed ports are not put back into select, otherwise + ;; it would not block and EOFs would be read infinitely. + (children-fdes (filter-map (lambda (p) + (and (not (port-closed? p)) + (port->fdes p))) + ports))) (when (> sleep-time 0) - (match (select* fd-list '() '() sleep-time) + (match (select* (append fd-list children-fdes) + '() '() sleep-time) ((() () ()) ;; 'select' returned an empty set, perhaps because it got - ;; EINTR or EAGAIN. It's a good time to wait for child - ;; processes. - (child-cleanup)) - (((lst ...) () ()) - ;; There's some activity so leave the loop. - (break)))) - - (for-each run-job next-jobs-lst) - (child-cleanup) - (loop))))))) + ;; EINTR or EAGAIN. + (loop (child-cleanup children-data))) + (((fdes ...) () ()) + ;; Process any children input fdes ready to be read. + (let ((children-fdes/read-ready (lset-intersection + = children-fdes fdes))) + (when (not (null? (lset-difference + = fdes children-fdes/read-ready))) + ;; There was some crontab activity so leave the loop to + ;; process the update request. + (break)) + (unless (null? children-fdes/read-ready) + (process-output + (filter (lambda (x) + (member (port->fdes (job-data:port x)) + children-fdes/read-ready)) + children-data)) + (loop (child-cleanup children-data))))))) + + ;; The timeout has elapsed. Run the scheduled job(s). + (let ((new-children-data (map run-job next-jobs-lst)) + (pruned-children-data (child-cleanup children-data))) + (loop (append new-children-data pruned-children-data))))))))) |