JURVANOERLE.nl

An application written in C# that calculates the difference between two strings of characters by examining the minimum amount of operations required to get from the first string to the second. The three operations that can be performed are:
Insert:
insert a character in string 1.

Delete:
delete a character from string 1.

Substitute:
substitute a character from string 1 for a character in string 2.

How it calculates this can be found on wikipedia .

click to download exe


Here is the source:
  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
//Program.cs


namespace Levenshtein_distance
{
    class Program
    {
        static void Main( string[] args )
        {
            while ( true )
            {
                //request the user to input two strings.
                Console.WriteLine(  "Please enter the first string:" );
                string s1   = Console.ReadLine( );
                Console.WriteLine( "Please enter the second string:" );
                string s2   = Console.ReadLine( );

                //create a new LevenshteinsMatrix object that will calculate the path between the two input strings.
                LevenshteinMatrix lm    = new LevenshteinMatrix( );
                int[,] m                = lm.CalculateMatrix( s1, s2 );
                
                //when we have the matrix, let's trace it back to find which steps are required to get from the first string
                //to the second string and store it in a Queue object.
                Queue<Path> lo          = lm.TraceBackMatrix( m, s2, s1 );
               
                //don't mind all the Console.Writes as they are mostly used to markup the output.
                Console.Write( "\n      " );
                Console.ForegroundColor = ConsoleColor.Cyan;
                for ( int k = 1; k < s2.Length + 1; ++k )   //write the second string (where we want to work to) on top of the matrix in light blue.
                    Console.Write( string.Format( "  {0}", s2[k - 1] ) );
                Console.WriteLine( '\n' );
                
                for ( int i = 0; i < s1.Length + 1; ++i ) //for every char in the first string...
                {
                    Console.Write( ' ' );
                    if ( i > 0 
                    {
                        //write the first string (from which we are working) on the left side of the matrix in green.
                        Console.ForegroundColor = ConsoleColor.Green;
                        Console.Write( string.Format( " {0} ", s1[i - 1] ) );
                    }
                    //the first stream of ints should be indented.
                    else Console.Write( "   " );
                    
                    //now loop through all chars of the second string...
                    for ( int j = 0; j < s2.Length + 1; ++j )
                    {
                        //write all elements of the matrix in yellow.
                        Console.ForegroundColor = ConsoleColor.Yellow;

                        //this can be done much neater, I know. It's just for the sake of clarity...
                        foreach ( Path p in lo )
                        {
                            //and the background of the path in red.
                            if ( ( p.j == i && p.i == j ) || ( i == s1.Length && j == s2.Length ) )
                            {
                                Console.BackgroundColor     = ConsoleColor.Red;
                                break;
                            }
                        }
                        Console.Write( string.Format( "{0, 2:D1} ", m[j, i] ) );
                        //make sure that the background is black again for the next element in the matrix.
                        Console.BackgroundColor  = ConsoleColor.Black;
                    }
                    //markup.
                    Console.Write( "\n\n" );
                }

                //make the foreground dull once more.
                Console.ForegroundColor = ConsoleColor.Gray;
                int steps = 0;
                   
                //keep deQueueing the Queue we got by tracing back the matrix until it is empty.
                while ( lo.Count > 0 )
                {
                    Path p                      = lo.DeQueue( );
                    LevenshteinOperation nextOp = p.operation;

                    //if we need to do something to alter the first string, it will take an additional step.
                    if ( nextOp != LevenshteinOperation.Nothing )
                        Console.Write( string.Format( "step {0}; ", ++steps ) );

                    //check to see which operation needs to be performed on the string next and output the result:
                    switch( nextOp )
                    {
                        case LevenshteinOperation.Delete:
                            Console.WriteLine( string.Format( "delete {0}:", s1[p.j] ) );
                            s1  = s1.Remove( p.j, 1 );
                            break;
                        case LevenshteinOperation.Insert:
                            Console.WriteLine( string.Format( "insert {0}:", s2[p.i] ) );
                            s1    = s1.Insert( p.j, s2[p.i].ToString( ) );
                            break;
                        case LevenshteinOperation.Substitute:
                            Console.WriteLine( "substitute {0} for {1}:", s1[p.j], s2[p.i] );
                            s1  = s1.Remove( p.j, 1 );
                            s1  = s1.Insert( p.j, s2[p.i].ToString( ) );
                            break;
                        case LevenshteinOperation.Nothing:
                            continue;
                    }
                    Console.WriteLine( string.Format( "{0}\n", s1 ) );
                }
            }
        }       
    } 
}


  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
//LevenshteinMatrix.cs 


public enum LevenshteinOperation
{
    Nothing,
    Insert,
    Delete,
    Substitute
}

namespace Levenshtein_distance
{
	//store the operations performed and the char in both the first string(i) and the second string(j) to get from string1 to string2.
    public struct Path
    {
        public LevenshteinOperation operation   { get; set; }
        public int i                            { get; set; }
        public int j                            { get; set; }
    };

    class LevenshteinMatrix.cs
    {
        /// <summary>
        /// calculates the matrix to get from string1 to string2 and returns it.
        /// </summary>        
        public int[,] CalculateMatrix( string from, string to )
        {
            //the rows represent the letters in from, the columns represent the letters in to.
            int[,] m    = new int[to.Length + 1, from.Length + 1];
            m[0, 0]     = 0;

            //the indices of the rows (i) and columns (j).
            int i, j; 

            /*
             Make sure that the first row and column are filled (they go from 0, 1, ...n)             
             */
            for ( i = 0; i < to.Length + 1; ++i )
            {
                m[i, 0] = i;
            }
            for ( j = 0; j < from.Length + 1; ++j )
            {
                m[0, j] = j;
            }

            /*
             The values of the matrix are then filled in (this can be done horizontally or vertically, I chose for horizontal).
             */
            for ( j = 1; j < from.Length + 1; ++j )
            {
                for ( i = 1; i < to.Length + 1; ++i )
                {
                    //get the value left of the current element + 1 (insert)
                    int ins = m[i - 1, j] + 1;

                    //get the value above the current element + 1 (delete)
                    int del = m[i, j - 1] + 1;

                    //substitution will be chosen either when it is the cheapest operation or when no operation needs to be performed.
                    //Consequently, the value of this element is either incremented (when substitution is required) and left unchanged otherwise.
                    int sub = m[i - 1, j - 1] + ( to[i - 1] == from[j - 1] ? 0 : 1 );

                    //get the cheapest operation.
                    int min = Math.Min( Math.Min( ins, del ), sub );
                    m[i, j] = min;                
                }
            }
            return m;
        }

        /// <summary>
        /// traces the matrix back to calculate the path taken to get from string1 to string2.
        /// </summary>
        public Queue<Path> TraceBackMatrix( int[,] m, string from, string to )
        {
            //we always go from the last element to the first, so a Queue is most convenient.
            Queue<Path> path = new Queue<Path>( );
            
            //start at the last element in the matrix.
            int i   = from.Length;
            int j   = to.Length;

            
            //keep looping until we reached the first index of the matrix.
            while ( i != 0 || j != 0 )
            {
                int left  = m[Math.Max( i - 1, 0 ), Math.Max( j, 0 )];     //the element left to the current element
                int up    = m[Math.Max( i, 0 ), Math.Max( j - 1, 0 )];     //the element above the current element.
                int diag  = m[Math.Max( i - 1, 0 ), Math.Max( j - 1, 0 )];  //the element diaganol of the current element.

                //get the lowest operation cost.
                int min   = Math.Min( Math.Min( left, up ), diag );
                LevenshteinOperation op;

                Path p          = new Path( );
                if ( from[Math.Max( i - 1, 0 )] == to[Math.Max( j - 1, 0 )] ) //the current letter of the first string is the same as the letter from the second string
                {
                    //always pick the diagonal element.
                    --i;
                    --j;
                    op  = LevenshteinOperation.Nothing;
                }
                //first check if the char should be deleted. Has priority over insert and substitute.
                else if ( min == up )
                {
                    --j;
                    op  = LevenshteinOperation.Delete;
                }
                //then check if the current char from the second string should be inserted. Has priority over substitute.
                else if ( min == left )
                {
                    --i;
                    op  = LevenshteinOperation.Insert;
                }
                //if none of the above is true, the current char from the first string is substituted with the current char from the second string.
                else
                {
                    --i;
                    --j;
                    op  = LevenshteinOperation.Substitute;
                }

                //set the horizontal and vertical position of the current element (we need it later).
                p.i             = i;
                p.j             = j;
                p.operation     = op;

                path.EnQueue( p );
            }
            return path;
        }
    }
}