Saving Data from CSV into Mongodb

A few weeks ago i started a project that required a batch of cold data to be uploaded into a database so that a team of users could work through it. I already knew the data is sent in csv format so the I had to figure out a way of getting it into mongo.

I found a great module called fast-csv by C2FO (https://github.com/C2FO/fast-csv) which would handle the csv uploading for me.

The folder structure of this project is:

root
    bin
    config
    models
    node_modules
    public
    routes
    upload
    views

So in my app.js I start with

var app = require('express').express();
var http = require('http');
var mongoose = require('mongoose');
var routes = require('./routes/index');
var fs = require('fs');
var db = require('./config/db');


app.use('/', routes);

To explain what i have here is my express app being created, getting the http service from express, mongoose to write to mongodb, fs (file system) module to read the csv, and i'm setting up my mongodb.

The db.js file contains the location of my mongodb and some logging when the connection is opened or and error occurs.

var mongoose = require('mongoose');
mongoose.connect('mongodb://localhost/gpt');
var db = mongoose.connection;

db.on('error', function(msg){
    console.log("db connection failed");
});

db.once('open', function(){
    console.log("db connected successfully");
});

Each document store is defined in a folder called models and we'll look specifically at the lead.js which defines the lead document store. The file starts by requiring mongoose and creating a new schema (the schema is what defines the document store attributes).

var mongoose = require('mongoose');
var Schema = mongoose.Schema;
var leadsSchema = new Schema({...

Now in leadsSchema we can now set out how the document structure should be stored.

var leadsSchema = new Schema({
    company: String, 
    address:
    [
        {
        addr1: String,
        addr2: String,
        addr3: String,
        addr4: String,
        addrtown: String, 
        addrcity: String,
        addrcounty: String,
        addrpostcode: String
        }
    ],
    contacts:
    [
        {
            salutation: String,
            firstname: String,
            surname: String,
            title: String,
            homeTelephone: String,
            workTelephone: String,
            mobileTelephone: String,
            email: String
        }
    ]

});

Once that we have created our schema we can now tell mongoose what document store this schema should be used for. So that when we create a new lead that its put in the correct document store.

var Lead = mongoose.model('leads', leadsSchema);

We then use module.exports so that we will be able to use Lead whenever we use require('models/leads');.

module.exports = Lead;

The upload route defined in uploadCSV.js is

var express = require('express');
var router = express.Router();
var csv = require("fast-csv");
var db = require('../config/db');
var Lead = require('../models/leads');
var fs = require('fs');
router.get('/UploadFile/:file', function(...){

Here we have the router from express telling it that anything for "/uploadfile/anyfilename" then to use the functionality defined in the function.

router.get('/UploadFile/:file', function(req, res){

//files are put into upload folder.
var path = "./upload/" + req.params.file +".csv";

fs.exists(path, function(exists) {
    if (exists) { //test make sure the file exists


        var stream = fs.createReadStream(path);

        csv.fromStream(stream, {headers : [
                                        "Title",
                                        "Forename",
                                        "Surname",
                                        "Company Name",
                                        "Address Line 1",
                                        "Address Line 2",
                                        "Address Line 3",
                                        "Town",
                                        "County",
                                        "Post Code",
                                        "Home Telephone Number",
                                        "Work Telephone Number",
                                        "Mobile Telephone",
                                        "Email Address"]})

This section of the code is checking that the file name is passed in the request is actually in the upload folder and then using the fs module creating a stream reader for the path to that file.

As the schema for the lead has already been defined we will have to make sure that the headers are the same as in the lead schema.

             .on("data", function(data){
            var newLead = new Lead();
            newLead.company = data['Company Name'];
            newLead.address = [{
                addr1: data['Address Line 1'],
                addr2: data['Address Line 2'],
                addr3: data['Address Line 3'],
                addrtown: data.Town, 
                addrcity: "",
                addrcounty: data.County,
                addrpostcode: data['Post Code']
            }]
            newLead.contacts= 
                [
                    {
                        salutation: data.Title,
                        firstname: data.Forename,
                        surname: data.Surname,
                        title: "",
                        homeTelephone: data['Home Telephone Number'],
                        workTelephone: data['Work Telephone Number'],
                        mobileTelephone: data['Mobile Telephone'],
                        email: data['Email Address']
                    }
                ]

When the csv stream reader reads a row from the csv it passes the row to a handler function, a new lead variable is created (using the schema that is exports from the lead model), the attributes set, and then saved.

    newLead.save(function (err, data) {
      if (err) console.log(err);
      else {
          console.log('Saved ', data );
      }
       });

To tidy this up I will now move the file to an archived folder with the fs module.

var pathold =  "./upload/" + req.params.file +".csv";
var pathnew =  "./upload/archived/" + req.params.file + ".csv";


fs.rename(pathold, pathnew, function(err) {
    console.log('rename callback ', err);
})
res.render('index',{ title: 'Upload Complete' });

and of course we'll add module.exports = router; to the end of the file to ensure that this functionality to exposed to the rest of the application