Perform union in mongoDB

ic3 picture ic3 · May 5, 2014 · Viewed 20k times · Source

I'm wondering how to perform a kind of union in an aggregate in MongoDB. Let's imaging the following document in a collection (the structure is for the sake of the example) :

{
  linkedIn: {
    people : [
    {
      name : 'Fred'
     },
     {
       name : 'Matilda'
     }
   ]
  },
  twitter: {
    people : [
    {
       name : 'Hanna'
    },
    {
       name : 'Walter'
    }
   ]
  }
 }

How to make an aggregate that returns the union of the people in twitter and linkedIn ?

{
 { name :'Fred', source : 'LinkedIn'},
 { name :'Matilda', source : 'LinkedIn'},
 { name :'Hanna', source : 'Twitter'},
 { name :'Walter', source : 'Twitter'},
}

Answer

Neil Lunn picture Neil Lunn · May 6, 2014

There are a couple of approaches to this that you can use the aggregate method for

db.collection.aggregate([
    // Assign an array of constants to each document
    { "$project": {
        "linkedIn": 1,
        "twitter": 1,
        "source": { "$cond": [1, ["linkedIn", "twitter"],0 ] }
    }},

    // Unwind the array
    { "$unwind": "$source" },

    // Conditionally push the fields based on the matching constant
    { "$group": { 
        "_id": "$_id",
        "data": { "$push": {
            "$cond": [
                { "$eq": [ "$source", "linkedIn" ] },
                { "source": "$source", "people": "$linkedIn.people" },
                { "source": "$source", "people": "$twitter.people" }
            ]
        }}
    }},

    // Unwind that array
    { "$unwind": "$data" },

    // Unwind the underlying people array
    { "$unwind": "$data.people" },

    // Project the required fields
    { "$project": {
        "_id": 0,
        "name": "$data.people.name",
        "source": "$data.source"
    }}
])

Or with a different approach using some operators from MongoDB 2.6:

db.people.aggregate([
    // Unwind the "linkedIn" people
    { "$unwind": "$linkedIn.people" },

    // Tag their source and re-group the array
    { "$group": {
        "_id": "$_id",
        "linkedIn": { "$push": {
            "name": "$linkedIn.people.name",
            "source": { "$literal": "linkedIn" }
        }},
        "twitter": { "$first": "$twitter" }
    }},

    // Unwind the "twitter" people
    { "$unwind": "$twitter.people" },

    // Tag their source and re-group the array
    { "$group": {
        "_id": "$_id",
        "linkedIn": { "$first": "$linkedIn" },
        "twitter": { "$push": {
            "name":  "$twitter.people.name",
            "source": { "$literal": "twitter" }
        }}
    }},

    // Merge the sets with "$setUnion"
    { "$project": {
        "data": { "$setUnion": [ "$twitter", "$linkedIn" ] }
    }},

    // Unwind the union array
    { "$unwind": "$data" },

    // Project the fields
    { "$project": {
        "_id": 0,
        "name": "$data.name",
        "source": "$data.source"
    }}
])

And of course if you simply did not care what the source was:

db.collection.aggregate([
    // Union the two arrays
    { "$project": {
        "data": { "$setUnion": [
            "$linkedIn.people",
            "$twitter.people"
        ]}
    }},

    // Unwind the union array
    { "$unwind": "$data" },

    // Project the fields
    { "$project": {
        "_id": 0,
        "name": "$data.name",
    }}

])