Data-Forge

JavaScript data wrangling, transformation and analysis toolkit

Install

Node.js:

npm install --save data-forge

Browser:

bower install --save data-forge

Download from Github

Import

var dataForge = require('data-forge');

Transform data files

dataForge.readFileSync('input.csv')     // Read input file.
    .parseCSV()                         // Parse CSV (or other) data to a dataframe.
    .parseInts("Column2", "Column3")    // Parse from strings to integers.
    .parseDates("Column4")              // Parse from strings to date objects.
    .dropSeries("Column5")              // Don't want column 5.
    .where(row => predicate(row))       // Filter out rows that you don't want.
    .select(row => transform(row))      // Apply transformation to each row.
    .asCSV()                            // Write out data file in CSV (or other) format.
    .writeFileSync('output.csv');       // Write output file.

Transform MongoDB collections

dataForge.fromMongoDB('mydb', 'mycoll')     // Read database collection (an async op).
    .then(dataframe =>                      // Wait for async operation to complete...
        dataframe.dropSeries("Column5")     // Don't want column 5.
            .where(row => predicate(row))   // Filter out rows that you don't want.
            .select(row => transform(row))  // Apply transformation to each record.
            .toMongoDB('mydb', 'mycoll');   // Write output colection (another asyn op).
    })
    .then(() => {
        console.log('Data processing complete.');
    })
    .catch(err => {
        console.error('An error occured!');
        console.error(err && err.stack || err);
    });

Transform REST APIs

dataForge.httpGet('http://some-host/mycoll')      // Make request to REST API (an async op).
    .then(dataframe =>                            // Wait for async operation to complete...
        dataframe.parseDates("Column4")           // Parse from strings to date objects.
            .dropSeries("Column5")                // Don't want column 5.
            .where(row => predicate(row))         // Filter out rows that you don't want.
            .select(row => transform(row))        // Apply transformation to each row.
            .httpPost('http://some-host/mycoll'); // Push transformed data to REST API.
    })
    .then(() => {
        console.log('Data processing complete.');
    })
    .catch(err => {
        console.error('An error occured!');
        console.error(err && err.stack || err);
    });

Sort, aggregate and analyse your data

var salesData = dataForge.readFileSync('sales.csv')
    .asCSV()
    .parseDates("Date")
    .parseFloats("Sales");

var summmarized = salesData
    .orderBy(row => row.Date)                               // Sort by date.
    .groupBy(row => row.ClientName)                         // Group by client.
    .select(group => ({                                     // Aggregate sales per client.
        ClientName: group.first().ClientName,
        Average: group.select(row => row.Sales).average(),    // Average sales per client.
        Total: group.select(row => row.Sales).sum(),          // Sum sales per client.
    }))
    .inflate();                                             // Series -> dataframe.

salesData.asCSV().writeFileSync('sales-by-client.csv');

And much more...