Comparison

Here are some comparisons between TaskGroup and other popular flow solutions.

  • Promises

  • Async.js

Promises

Promises execute immediately, support result chaining, fail to catch/isolate uncaught async errors within the promise, and loses/silences errors that were not handled.

TaskGroup execution is controlled, supports concurrency configuration, supports optional result storage, catches/isolates uncaught async errors in environments with domains enabled, and throws unhandled errors to they are not lost/silenced if unhandled.

For example, let's read a directory with 10,000 files and get the stats:

// Import
const {join} = require('path')
const {readdir, stat} = require('fs')

// Using promises
function readdirWithStatsPromise (path) {
  const result = {}
  return new Promise(function (resolve, reject) {
    readdir(path, function (err, files) {
      if ( err )  return reject(err)
      Promise.all(
        files.map((file) => new Promise(function (resolve, reject) {
          stat(join(path, file), function (err, stat) {
            if ( err )  return reject(err)
            result[file] = stat
            resolve()
          })
        }))
      ).then(() => result).catch(reject)
    })
  })
}
readdirWithStatsPromise(process.cwd()).then(console.log).catch(console.error)

// Using taskgroup
const {TaskGroup} = require('taskgroup')
function readdirWithStatsTaskGroup (path, next) {
  const result = {}
  const tasks = new TaskGroup(`fetch files with stats for ${path}`, {concurrency: 0}).done(function (err) {
    if ( err )  return next(err)
    next(null, result)
  })
  readdir(path, function (err, files) {
    files.forEach(function (file) {
      tasks.addTask(`fetch stat for ${file}`, function (complete) {
        stat(join(path, file), function (err, stat) {
          if ( err )  return complete(err)
          result[file] = stat
          complete()
        })
      })
    })
    tasks.run()
  })
}
readdirWithStatsTaskGroup(process.cwd(), function (err, result) {
  if ( err )  return console.error(err)
  console.log(result)
})

// Using taskgroup, with some cleaning
const {TaskGroup, Task} = require('taskgroup')
function readdirWithStatsTaskGroup (path, next) {
  const result = {}
  TaskGroup.create({
    concurrency: 0,
    name: `fetch files with stats for ${path}`,
    next: function (err) {
        if ( err )  return next(err)
        next(null, result)
    },
    tasks: files.map(function (file) {
      return Task.create(`fetch stat for ${file}`, function (complete) {
        stat(join(path, file), function (err, stat) {
          if ( err )  return complete(err)
          result[file] = stat
          complete()
        })
      })
    })
  }).run()
}
readdirWithStatsTaskGroup(process.cwd(), function (err, result) {
  if ( err )  return console.error(err)
  console.log(result)
})

It is worth noting the use of the optional names for the Task and TaskGroups which makes debugging a breeze as when errors occur the names are included in the traces. The testing library Joe that is built on TaskGroup uses this ability to name suites (TaskGroups) and tests (Tasks) as well as to identify which tests and tasks have failed, succeeded, or remain incomplete. It is also worth noting that reading 10,000 files at once would have signficant immediate stress on the machine and may overwhelm the resources and error, crash or lock up less powerful machines. This can easily be catered for in TaskGroup by changing the concurrency from the parallel 0 value to a more reasonable value like 100. To do such concurrency limiting by hand with Promises is incredibly difficult, heck, even doing serial execution (concurrency of 1) requires Array.prototype.reduce trickery:

// Using promises serially
function readdirWithStatsPromise (path) {
  const result = {}
  return new Promise(function (resolve, reject) {
    readdir(path, function (err, files) {
      if ( err )  return reject(err)
      files.reduce((file) => new Promise(function (resolve, reject) {
        stat(join(path, file), function (err, stat) {
          if ( err )  return reject(err)
          result[file] = stat
          resolve()
        })
      }), Primise.resolve()).then(() => result).catch(reject)
    })
  })
}
readdirWithStatsPromise(process.cwd()).then(console.log).catch(console.error)

// Using promises serially, with some cleaning
function readdirWithStatsPromise (path) {
  const result = {}
  return new Promise(function (resolve, reject) {
    readdir(path, function (err, files) {
      if ( err )  return reject(err)
      resolve(files)
    })
  }).then((files) => {
    return files.reduce(function (file) {
      return new Promise(function (resolve, reject) {
        stat(join(path, file), function (err, stat) {
          if ( err )  return reject(err)
          result[file] = stat
          resolve()
        })
      })
    }, Promise.resolve())
  })
}
readdirWithStatsPromise(process.cwd()).then(console.log).catch(console.error)


// Using promises serially, with complete cleaning
function readdirPromise (path) {
  return new Promise(function (resolve, reject) {
    readdir(path, function (err, files) {
      if ( err )  return reject(err)
      resolve(files)
    })
  })
}
function statPromise (path) {
  return new Promise(function (resolve, reject) {
    stat(join(path, file), function (err, stat) {
      if ( err )  return reject(err)
      result[file] = stat
      resolve()
    })
  })
}
function statDirectoryPromise (path) {
  const result = {}
  return files.reduce(function (file) {
    return statPromise(join(path, file)).then(function (stat) {
      result[file] = stat
      return result
    })
  }, Promise.resolve(result))
}
function readdirWithStatsPromise (path) {
  return readdirPromise(path).then(statDirectoryPromise(path))
}
readdirWithStatsPromise(process.cwd()).then(console.log).catch(console.error)

That was some significant changes over several iterations of cleaning from our initial approach just to change the concurrency from parallel to serial. And even after all that cleaning, specifying an exact intermediate concurrency like 100, remains incomprehensible to do by hand without extra trickery. Plus, we still lose all the benefits outlined earlier that TaskGroup provides us, such as easier debugging, uniquely named tasks and groups of tasks, easy and specific concurrency, catching asynchronous errors, etc. If you want to get stuff done without needing trickery, TaskGroup is the best you'll find.

For a more detailed discussion about interop between the two, see this discussion. For a result chaining solution based on TaskGroup, see Chainy.js.

The biggest advantage and difference of TaskGroup over async.js is that TaskGroup has one uniform API to rule them all, whereas with async.js I found that I was always having to keep referring to the async manual to try and figure out which is the right call for my use case then somehow wrap my head around the async.js way of doing things (which more often than not I couldn't), whereas with TaskGroup I never have that problem as it is one consistent API for all the different use cases.

Let's take a look at what the most common async.js methods would look like in TaskGroup:

// ====================================
// Series

// Async
async.series([
    function () {},
    function (callback) {
        callback()
    }
], next)

// TaskGroup via API, using config
TaskGroup.create({next, tasks: [
  function () {},
  function (callback) {
    callback()
  }
]}).run()

// TaskGroup via API, using chaining
TaskGroup.create().done(next).addTasks(
  function () {},
  function (callback) {
    callback()
  }
).run()

// TaskGroup via API
var tasks = TaskGroup.create().done(next)
tasks.addTask(function () {})
tasks.addTask(function (callback) {
    callback()
})
tasks.run()


// ====================================
// Parallel

// Async
async.parallel([
    function () {},
    function (callback) {
        callback()
    }
], next)

// TaskGroup via API, using config
TaskGroup.create({concurrency: 0, next, tasks: [
  function () {},
  function (callback) {
    callback()
  }
]}).run()

// TaskGroup via API, using chaining
TaskGroup.create({concurrency: 0}).done(next).addTasks(
  function () {},
  function (callback) {
    callback()
  }
).run()

// TaskGroup via API
var tasks = TaskGroup.create({concurrency: 0}).done(next)
tasks.addTask(function () {})
tasks.addTask(function (callback) {
    callback()
})
tasks.run()


// ====================================
// Map

// Async
async.map(['file1','file2','file3'], fs.stat, next)

// TaskGroup via API, using config
const tasks = ['file1', 'file2', 'file3'].map((file) => (complete) => fs.stat(file, complete))
TaskGroup.create({done, tasks}).run()

// TaskGroup via API, using chaining
TaskGroup.create().done(next).addTasks(
    ['file1', 'file2', 'file3'].map((file) => (complete) => fs.stat(file, complete))
).run()

// TaskGroup via API
var tasks = TaskGroup.create().done(next)
['file1', 'file2', 'file3'].forEach(function (file) {
    tasks.addTask(function (complete) {
        fs.stat(file, complete)
    })
})
tasks.run()

Another big advantage of TaskGroup over async.js is TaskGroup's ability to add tasks to the group once execution has already started - this is a common use case when creating an application that must perform its actions serially, so using TaskGroup you can create a serial TaskGroup for the application, run it right away, then add the actions to the group as tasks.

A final big advantage of TaskGroup over async.js is TaskGroup's ability to do nested groups, this allowed us to created the Joe Testing Framework & Runner incredibly easily, and because of this functionality Joe will always know which test (task) is associated to which suite (task group), whereas test runners like mocha have to guess (they add the task to the last group, which may not always be the case! especially with dynamically created tests!).

Last updated