Diff – Part 1

Have you ever needed to know if the contents of a list has changed from a given snapshot? I recently decided that I would write a quick-and-dirty Diff algorithm. I started with the following test:

[Test]
public void NullSet()
{
    // Arrange: Declare any variables or set up any conditions
    //          required by your test.
    var source = new List<Entity>();
    var target = new List<Entity>();

    // Act:     Perform the activity under test.
    var diff = new Diff<Entity>();
    
    // Assert:  Verify that the activity under test had the
    //          expected results
    var changeSet = diff.Execute(source, target);
    Assert.IsTrue(changeSet.IsEmpty);
}

I defined <Entity> as a class with an Id and a Name. This version of the diff algorithm relied on reference comparisons to find changes between the two lists. To extend it’s functionality a bit, I decided to create an EqualityComparer to replace the reference comparison.

Here is the EqualityComparer:

public class EntityIdentityComparer : IEqualityComparer<Entity>
{
    public bool Equals(Entity x, Entity y)
    {
        return x.Id == y.Id;
    }

    public int GetHashCode(Entity obj)
    {
        return obj.Id.GetHashCode();
    }
}

And here are the next tests:

[Test]
public void OneAddition()
{
    // Arrange: Declare any variables or set up any conditions
    //          required by your test.
    var source = new List<Entity>();
    var target = new List<Entity>();
    var identityComparer = new EntityIdentityComparer();

    var diff = new Diff<Entity>(identityComparer);

    source.Add(new Entity());

    // Act:     Perform the activity under test.
    var changeSet = diff.Execute(source, target);

    // Assert:  Verify that the activity under test had the
    //          expected results
    Assert.AreEqual(1, changeSet.Additions.Count);
    Assert.IsFalse(changeSet.IsEmpty);
}
[Test]
public void OneDeletion()
{
    // Arrange: Declare any variables or set up any conditions
    //          required by your test.
    var source = new List<Entity>();
    var target = new List<Entity>();
    var identityComparer = new EntityIdentityComparer();

    var diff = new Diff<Entity>(identityComparer);

    target.Add(new Entity());

    // Act:     Perform the activity under test.
    var changeSet = diff.Execute(source, target);

    // Assert:  Verify that the activity under test had the
    //          expected results
    Assert.AreEqual(1, changeSet.Deletions.Count);
    Assert.IsFalse(changeSet.IsEmpty);
}

[Test]
public void OneAdditionAndOneDeletion()
{
    // Arrange: Declare any variables or set up any conditions
    //          required by your test.
    var source = new List<Entity>();
    var target = new List<Entity>();
    var identityComparer = new EntityIdentityComparer();

    var diff = new Diff<Entity>(identityComparer);

    source.Add(new Entity() { Id = 1 });
    target.Add(new Entity() { Id = 2 });

    // Act:     Perform the activity under test.
    var changeSet = diff.Execute(source, target);

    // Assert:  Verify that the activity under test had the
    //          expected results
    Assert.AreEqual(1, changeSet.Additions.Count);
    Assert.AreEqual(1, changeSet.Deletions.Count);
    Assert.IsFalse(changeSet.IsEmpty);
}

Here is the test I wrote to detect modifications:

[Test]
public void OneModification()
{
    // Arrange: Declare any variables or set up any conditions
    //          required by your test.
    var source = new List<Entity> { new Entity() { Id = 1, Name = "Test" } };
    var target = new List<Entity> { new Entity() { Id = 1, Name = "Test 1" } };

    var identityComparer = new EntityIdentityComparer();

    var diff = new Diff<Entity>(identityComparer);

    // Act:     Perform the activity under test.
    var changeSet = diff.Execute(source, target);

    // Assert:  Verify that the activity under test had the
    //          expected results
    Assert.AreEqual(0, changeSet.Additions.Count);
    Assert.AreEqual(0, changeSet.Deletions.Count);
    Assert.AreEqual(1, changeSet.Modifications.Count);
    Assert.IsFalse(changeSet.IsEmpty);
}

Notice that my source and target lists have an entity that, according to the IdentityComparer, are the same entity. I need a new concept to be able to tell them apart. I need to know if two entities with the same Identity have the same values. In short, I need another EqualityComparer. This time, instead of testing for equality on the Id property, it should test for equality on all other properties. Here is the EqualityComparer:

public class EntityEqualityComparer : IEqualityComparer<Entity>
{
    public bool Equals(Entity x, Entity y)
    {
        return (x.Name == y.Name);
    }

    public int GetHashCode(Entity obj)
    {
        return obj.GetHashCode();
    }
}

And here is the refactored modification test:

[Test]
public void OneModification()
{
    // Arrange: Declare any variables or set up any conditions
    //          required by your test.
    var source = new List<Entity> {new Entity() {Id = 1, Name = "Test"}};
    var target = new List<Entity> {new Entity() {Id = 1, Name = "Test 1"}};

    var identityComparer = new EntityIdentityComparer();
    var modificationComparer = new EntityEqualityComparer();

    var diff = new Diff<Entity>(identityComparer
        , modificationComparer);

    // Act:     Perform the activity under test.
    var changeSet = diff.Execute(source, target);

    // Assert:  Verify that the activity under test had the
    //          expected results
    Assert.AreEqual(0, changeSet.Additions.Count);
    Assert.AreEqual(0, changeSet.Deletions.Count);
    Assert.AreEqual(1, changeSet.Modifications.Count);
    Assert.IsFalse(changeSet.IsEmpty);
}

Here is the definition of DiffResult:

public class DiffResult<T>
{
    public bool IsEmpty
    {
        get { return this.Additions.Count == 0 
                     && this.Deletions.Count == 0
                     && this.Modifications.Count == 0; }
    }

    private readonly List<T> _additions = new List<T>();
    public List<T> Additions
    {
        get { return this._additions; }
    }

    private readonly List<T> _deletions = new List<T>();
    public List<T> Deletions
    {
        get { return this._deletions; }
    }

    private readonly List<T> _modifications = new List<T>();
    public List<T> Modifications
    {
        get { return this._modifications; }
    }
}

And the implementation of Diff:

 

/// <summary>
/// Used to find changes between two snapshots of a list.
/// </summary>
/// <typeparam name="T"></typeparam>
public class Diff<T>
{
    /// <summary>
    /// Determines if two objects are supposed to be the same object.
    /// </summary>
    private readonly IEqualityComparer<T> _identityComparer;

    /// <summary>
    /// Determines if two objects have equivalent values.
    /// </summary>
    private readonly IEqualityComparer<T> _modificationComparer;

    /// <summary>
    /// Constructor
    /// </summary>
    /// <param name="identityComparer"></param>
    /// <param name="modificationComparer"></param>
    public Diff(IEqualityComparer<T> identityComparer
        , IEqualityComparer<T> modificationComparer)
    {
        Require.That
            .IsNotNull(identityComparer, "Identity comparer is required.")
            .IsNotNull(modificationComparer, "Modification comparer is required.");

        this._identityComparer = identityComparer;
        this._modificationComparer = modificationComparer;
    }

    /// <summary>
    /// Returns a changeset of the differences between the new and original versions of the list.
    /// </summary>
    /// <param name="newList">The new version of the list.</param>
    /// <param name="originalList">The original version of the list.</param>
    /// <returns></returns>
    public DiffResult<T> Execute(IEnumerable<T> newList
        , IEnumerable<T> originalList) 
    {
        var result = new DiffResult<T>();

        var addedItems = newList.Except(originalList, this._identityComparer)
            .ToList();

        var deletedItems = originalList.Except(newList, this._identityComparer)
            .ToList();

        var modifiedItems = from newElement in newList
                            from originalElement in originalList
                            where this._identityComparer.Equals(newElement, originalElement) 
                            && !this._modificationComparer.Equals(newElement, originalElement)
                            select newElement;

        result.Modifications.AddRange(modifiedItems);
        result.Additions.AddRange(addedItems);
        result.Deletions.AddRange(deletedItems);

        return result;
    }
}

You can download the solution files here.

2 thoughts on “Diff – Part 1

Leave a Reply

%d bloggers like this: