33import com .conveyal .datatools .manager .DataManager ;
44import com .conveyal .datatools .manager .auth .Auth0UserProfile ;
55import com .fasterxml .jackson .annotation .JsonProperty ;
6- import com .google .common .collect .Sets ;
76import org .apache .commons .lang3 .exception .ExceptionUtils ;
87import org .slf4j .Logger ;
98import org .slf4j .LoggerFactory ;
@@ -76,6 +75,7 @@ public MonitorableJob(Auth0UserProfile owner, String name, JobType type) {
7675 }
7776 this .owner = owner ;
7877 this .name = name ;
78+ status .name = name ;
7979 this .type = type ;
8080 registerJob ();
8181 }
@@ -150,21 +150,19 @@ public void run () {
150150 int subJobsTotal = subJobs .size () + 1 ;
151151
152152 for (MonitorableJob subJob : subJobs ) {
153+ String subJobName = subJob .getClass ().getSimpleName ();
153154 if (!parentJobErrored && !subTaskErrored ) {
155+ // Calculate completion based on number of sub jobs remaining.
156+ double percentComplete = subJobNumber * 100D / subJobsTotal ;
154157 // Run sub-task if no error has errored during parent job or previous sub-task execution.
155- // FIXME this will overwrite a message if message is set somewhere else.
156- // FIXME If a subtask fails, cancel the parent task and cancel or remove subsequent sub-tasks.
157- // status.message = String.format("Finished %d/%d sub-tasks", subJobNumber, subJobsTotal);
158- status .percentComplete = subJobNumber * 100D / subJobsTotal ;
159- status .error = false ; // FIXME: remove this error=false assignment
158+ status .update (String .format ("Waiting on %s..." , subJobName ), percentComplete );
160159 subJob .run ();
161-
162160 // Record if there has been an error in the execution of the sub-task. (Note: this will not
163161 // incorrectly overwrite a 'true' value with 'false' because the sub-task is only run if
164162 // jobHasErrored is false.
165163 if (subJob .status .error ) {
166164 subTaskErrored = true ;
167- cancelMessage = String .format ("Task cancelled due to error in %s task" , subJob . getClass (). getSimpleName () );
165+ cancelMessage = String .format ("Task cancelled due to error in %s task" , subJobName );
168166 }
169167 } else {
170168 // Cancel (fail) next sub-task and continue.
@@ -178,24 +176,21 @@ public void run () {
178176 // because the error presumably already occurred and has a better error message.
179177 cancel (cancelMessage );
180178 }
181- // Set duration of job in case it is needed by finishing step (e.g., storing the job duration in a database).
182- status .duration = System .currentTimeMillis () - status .startTime ;
179+ // Complete the job (as success if no errors encountered, as failure otherwise).
180+ if (!parentJobErrored && !subTaskErrored ) status .completeSuccessfully ("Job complete!" );
181+ else status .complete (true );
183182 // Run final steps of job pending completion or error. Note: any tasks that depend on job success should
184- // check job status to determine if final step should be executed (e.g., storing feed version in MongoDB).
183+ // check job status in jobFinished to determine if final step should be executed (e.g., storing feed
184+ // version in MongoDB).
185185 // TODO: should we add separate hooks depending on state of job/sub-tasks (e.g., success, catch, finally)
186186 jobFinished ();
187187
188- status .completed = true ;
189-
190188 // We retain finished or errored jobs on the server until they are fetched via the API, which implies they
191189 // could be displayed by the client.
192- } catch (Exception ex ) {
193- // Set job status to failed
194- LOG .error ("Job failed" , ex );
195- status .update (true , ex .getMessage (), 100 , true );
196- status .duration = System .currentTimeMillis () - status .startTime ;
190+ } catch (Exception e ) {
191+ status .fail ("Job failed due to unhandled exception!" , e );
197192 }
198- LOG .info ("{} {} {} in {} ms" , type , jobId , status .error ? "errored" : "completed" , status .duration );
193+ LOG .info ("{} (jobId={}) {} in {} ms" , type , jobId , status .error ? "errored" : "completed" , status .duration );
199194 }
200195
201196 /**
@@ -206,8 +201,7 @@ public void run () {
206201 private void cancel (String message ) {
207202 // Updating the job status with error is all we need to do in order to move the job into completion. Once the
208203 // user fetches the errored job, it will be automatically removed from the system.
209- status .update (true , message , 100 );
210- status .completed = true ;
204+ status .fail (message );
211205 // FIXME: Do we need to run any clean up here?
212206 }
213207
@@ -260,39 +254,76 @@ public static class Status {
260254 // Name of file/item once completed
261255 public String completedName ;
262256
257+ /**
258+ * Update status message and percent complete. This method should be used while job is still in progress.
259+ */
263260 public void update (String message , double percentComplete ) {
261+ LOG .info ("Job updated `{}`: `{}`\n {}" , name , message , getCallingMethodTrace ());
264262 this .message = message ;
265263 this .percentComplete = percentComplete ;
266264 }
267265
268- public void update (boolean isError , String message , double percentComplete ) {
269- this .error = isError ;
270- this .message = message ;
271- this .percentComplete = percentComplete ;
266+ /**
267+ * Gets stack trace from method calling {@link #update(String, double)} or {@link #fail(String)} for logging
268+ * purposes.
269+ */
270+ private String getCallingMethodTrace () {
271+ StackTraceElement [] stackTrace = Thread .currentThread ().getStackTrace ();
272+ // Get trace from method calling update or fail. To trace this back:
273+ // 0. this thread
274+ // 1. this method
275+ // 2. Status#update or Status#fail
276+ // 3. line where update/fail is called in server job
277+ return stackTrace .length >= 3 ? stackTrace [3 ].toString () : "WARNING: Stack trace not found." ;
272278 }
273279
274- public void update ( boolean isError , String message , double percentComplete , boolean isComplete ) {
275- this . error = isError ;
276- this . message = message ;
277- this . percentComplete = percentComplete ;
278- this .completed = isComplete ;
280+ /**
281+ * Shorthand method to update status object on successful job completion.
282+ */
283+ public void completeSuccessfully ( String message ) {
284+ this .complete ( false , message ) ;
279285 }
280286
281- public void fail (String message , Exception e ) {
282- this .error = true ;
287+ /**
288+ * Set job status to completed with error and message information.
289+ */
290+ private void complete (boolean isError , String message ) {
291+ this .error = isError ;
292+ // Skip message update if null.
293+ if (message != null ) this .message = message ;
283294 this .percentComplete = 100 ;
284295 this .completed = true ;
285- this .message = message ;
286- this .exceptionDetails = ExceptionUtils .getStackTrace (e );
287- this .exceptionType = e .getMessage ();
296+ this .duration = System .currentTimeMillis () - this .startTime ;
288297 }
289298
290- public void fail (String message ) {
291- this .error = true ;
292- this .percentComplete = 100 ;
293- this .completed = true ;
294- this .message = message ;
299+ /**
300+ * Shorthand method to complete job without overriding current message.
301+ */
302+ private void complete (boolean isError ) {
303+ complete (isError , null );
304+ }
305+
306+ /**
307+ * Fail job status with message and exception.
308+ */
309+ public void fail (String message , Exception e ) {
310+ if (e != null ) {
311+ this .exceptionDetails = ExceptionUtils .getStackTrace (e );
312+ this .exceptionType = e .getMessage ();
313+ // If exception is null, overloaded fail method was called and message already logged with trace.
314+ String logMessage = String .format ("Job `%s` failed with message: `%s`" , name , message );
315+ LOG .warn (logMessage , e );
316+ }
317+ this .complete (true , message );
295318 }
296319
320+ /**
321+ * Fail job status with message.
322+ */
323+ public void fail (String message ) {
324+ // Log error with stack trace from calling method in job.
325+ LOG .error ("Job failed with message {}\n {}" , message , getCallingMethodTrace ());
326+ fail (message , null );
327+ }
297328 }
298329}
0 commit comments