5
\$\begingroup\$

I wanted to implementDijkstra's Algorithm in an Excel VBA Add-In and built it to be used as follows:

  1. Define a list of paths with distances between points. This list needs to contain 3 headings that are used as flags to pick up where the list is. The 3 headings are!dijk:dat:from,!dijk:dat:to and!dijk:dat:dist
  2. Specify from which point to which point you want to go. This is indicated with flags to the left of the cell. The flags are!dijk:get:from and!dijk:get:to
  3. If the list of paths is on a different sheet, specify which sheet it is on by putting the name of the sheet in a cell next to a cell with the text!dijk:dat
  4. Specify where the output should go. This is defined with a flag at the top left of where it should go. The flag is!dijk:steps
  5. Push a button in the Ribbon that triggersSub sCalcDijkstra() in a module in my Add-In

An example of a dummy sheet I used for testing:

Example Sheet

This is the procedure that does all the work:

Sub sCalcDijkstra()    'Calculate the shortest path between 2 points    Dim vError As String    Dim vRange As Range    Dim vRangeDat As Range    Dim vRow As Long    Dim vRowDatHead As Long    Dim vRowSteps As Long    Dim vRowFirst As Long    Dim vRowCount As Long    Dim vRowCountDat As Long    Dim vCol As Long    Dim vColDatFrom As Long    Dim vColDatTo As Long    Dim vColDatDist As Long    Dim vColSteps As Long    Dim vColFirst As Long    Dim vColCount As Long    Dim vColCountDat As Long    Dim vCell As String    Dim vCellFrom As String    Dim vCellTo As String    Dim vValDist As Double    Dim vParFrom As String    Dim vParTo As String    Dim vParDat As String    Dim vDist As Scripting.Dictionary    Dim vKey As Variant    Dim vCurNode As String    Dim vCurDist As Double    Dim vCurDistTo As Double    Dim vSteps() As String    On Error GoTo 0    vError = ""    'Check that there is a workbook open    If ActiveSheet Is Nothing Then vError = "You need to open a workbook in order to do this"    If vError <> "" Then GoTo ErrorHandler    'Get the settings from the current sheet    Set vRange = ActiveSheet.UsedRange    vRowCount = vRange.Rows.Count    vColCount = vRange.Columns.Count    vRowFirst = vRange.Row    vColFirst = vRange.Column    vRowSteps = 0    vColSteps = 0    vParFrom = ""    vParTo = ""    vParDat = ""    For vRow = 1 To vRowCount        For vCol = 1 To vColCount            vCell = ""            On Error Resume Next            vCell = Trim(UCase(vRange.Cells(vRow, vCol).Value))            On Error GoTo 0            If vCell = "!DIJK:GET:FROM" Then                vParFrom = Trim(UCase(vRange.Cells(vRow, vCol + 1).Value))            ElseIf vCell = "!DIJK:GET:TO" Then                vParTo = Trim(UCase(vRange.Cells(vRow, vCol + 1).Value))            ElseIf vCell = "!DIJK:DAT" Then                vParDat = Trim(UCase(vRange.Cells(vRow, vCol + 1).Value))            ElseIf vCell = "!DIJK:STEPS" Then                vRowSteps = vRow                vColSteps = vCol            End If        Next    Next    If vParFrom = "" Then vError = vError & "Need to specify a Source with the parameter !dijk:get:from" & vbCrLf & vbCrLf    If vParTo = "" Then vError = vError & "Need to specify a Destination with the parameter !dijk:get:to" & vbCrLf & vbCrLf    If vRowSteps = 0 Then vError = vError & "Need to designate an area to print the results with the parameter !dijk:steps" & vbCrLf & vbCrLf    If vError <> "" Then GoTo ErrorHandler    'Clean up the output area    vRange.Range(vRange.Cells(vRowSteps + 2 - vRowFirst, vColSteps + 1 - vColFirst).Address, vRange.Cells(vRowCount + vRowFirst - 1, vColSteps + 3 - vColFirst).Address).ClearContents    'Get the paths from the data sheet    If vParDat = "" Then        Set vRangeDat = vRange    Else        Set vRangeDat = ActiveWorkbook.Worksheets(vParDat).UsedRange    End If    vRowCountDat = vRangeDat.Rows.Count    vColCountDat = vRangeDat.Columns.Count    vRowDatHead = 0    vColDatFrom = 0    vColDatTo = 0    vColDatDist = 0    For vRow = 1 To vRowCountDat        For vCol = 1 To vColCountDat            vCell = ""            On Error Resume Next            vCell = Trim(UCase(vRangeDat.Cells(vRow, vCol).Value))            On Error GoTo 0            If vCell = "!DIJK:DAT:FROM" Then                vRowDatHead = vRow                vColDatFrom = vCol            ElseIf vCell = "!DIJK:DAT:TO" Then                vRowDatHead = vRow                vColDatTo = vCol            ElseIf vCell = "!DIJK:DAT:DIST" Then                vRowDatHead = vRow                vColDatDist = vCol            End If        Next        If vRowDatHead > 0 Then Exit For    Next    If vColDatFrom = 0 Then vError = vError & "Data sheet is missing !dijk:dat:from column" & vbCrLf & vbCrLf    If vColDatTo = 0 Then vError = vError & "Data sheet is missing !dijk:dat:to column" & vbCrLf & vbCrLf    If vColDatDist = 0 Then vError = vError & "Data sheet is missing !dijk:dat:dist column" & vbCrLf & vbCrLf    If vError <> "" Then GoTo ErrorHandler    Set vDist = New Scripting.Dictionary    For vRow = vRowDatHead + 1 To vRowCountDat        vCellFrom = ""        vCellTo = ""        vValDist = -1        On Error Resume Next        vCellFrom = Trim(UCase(vRangeDat.Cells(vRow, vColDatFrom).Value))        vCellTo = Trim(UCase(vRangeDat.Cells(vRow, vColDatTo).Value))        vValDist = Val(Trim(UCase(vRangeDat.Cells(vRow, vColDatDist).Value)))        On Error GoTo 0        If vCellFrom <> "" And vCellTo <> "" And vValDist >= 0 Then            If Not vDist.Exists(vCellFrom) Then Set vDist.Item(vCellFrom) = New Scripting.Dictionary            If Not vDist.Exists(vCellTo) Then Set vDist.Item(vCellTo) = New Scripting.Dictionary            vDist(vCellFrom).Item(vCellTo) = vValDist            If Not vDist(vCellTo).Exists(vCellFrom) Then vDist(vCellTo).Item(vCellFrom) = vValDist        End If    Next    If Not vDist.Exists(vParFrom) Then vError = vError & "Source " & vParFrom & " not listed in data" & vbCrLf & vbCrLf    If Not vDist.Exists(vParTo) Then vError = vError & "Destination " & vParTo & " not listed in data" & vbCrLf & vbCrLf    If vError <> "" Then GoTo ErrorHandler    'Calculate the shortest path    For Each vKey In vDist.Keys()        vDist(vKey).Item("!dist") = -1        vDist(vKey).Item("!scan") = False        vDist(vKey).Item("!steps") = ""    Next    vDist(vParFrom).Item("!dist") = 0    vDist(vParFrom).Item("!steps") = vParFrom    Do While True        vCurNode = ""        vCurDist = 0        For Each vKey In vDist.Keys()            If vDist(vKey)("!scan") = False Then                If vDist(vKey)("!dist") >= 0 Then                    If vCurNode = "" Or vCurDist > vDist(vKey)("!dist") Then                        vCurNode = vKey                        vCurDist = vDist(vKey)("!dist")                    End If                End If            End If        Next        If vCurNode = "" Then Exit Do        If vCurNode = vParTo Then Exit Do        vDist(vCurNode).Item("!scan") = True        For Each vKey In vDist(vCurNode).Keys()            If Left(vKey, 1) <> "!" And vKey <> vCurNode Then                vCurDistTo = vCurDist + vDist(vCurNode)(vKey)                If vDist(vKey)("!dist") < 0 Or vCurDistTo < vDist(vKey)("!dist") Then                    vDist(vKey).Item("!dist") = vCurDistTo                    vDist(vKey).Item("!steps") = vDist(vCurNode)("!steps") & "!" & vKey                End If            End If        Next    Loop    'Print the result    If vDist(vParTo)("!dist") < 0 Then        vRange.Cells(vRowSteps + 1, vColSteps).Value = "No path found from source to destination"    Else        vSteps = Split(vDist(vParTo)("!steps"), "!")        For vRow = 1 To UBound(vSteps)            vRange.Cells(vRowSteps + vRow, vColSteps).Value = vSteps(vRow - 1)            vRange.Cells(vRowSteps + vRow, vColSteps + 1).Value = vSteps(vRow)            vRange.Cells(vRowSteps + vRow, vColSteps + 2).Value = vDist(vSteps(vRow - 1))(vSteps(vRow))        Next        vRange.Cells(vRowSteps + vRow, vColSteps).Value = "Total:"        vRange.Cells(vRowSteps + vRow, vColSteps + 2).Value = vDist(vParTo)("!dist")    End If    'Done    MsgBox "Done", vbOKOnly + vbInformation, "Path and Distance"    GoTo FinalizeErrorHandler:    Err.Clear    MsgBox vError, vbOKOnly + vbCritical, "Error"Finalize:    Set vDist = NothingEnd Sub

The code works, but I would like some feedback on the following aspects:

  • How can I make this easier and more intuitive for a user? I know I can use named ranges instead of flags, but I would prefer to keep it more visibly obvious what the code is using
  • How can I apply the DRY principle more here. I'm repeating the same patterns all the time, but it seems like the details vary too much for me to just stick something like the nested for loops into a function
  • I useScripting.Dictionary for almost everything due to it's flexibility and simply due to the fact that I am comfortable with how it works, but I suspect there may be better data structures that I can use which work better for this use case
  • At the heart of this is theDo While True loop which is probably a horribly inefficient way to implement Dijkstra's Algorithm. How can I make it more efficient?
  • Any help/critique is greatly appreciated. I taught myself VBA with the help of Google and I may be doing some bad things I never even realised
askedJul 19, 2018 at 9:32
neelsg's user avatar
\$\endgroup\$

2 Answers2

4
\$\begingroup\$

That is a humungous procedure you got there. It's all in one piece and it's following a lot of conventions from older VBA code that you should not let become a habit.

The first of these "conventions" is to declare every variable at the top of the block they're scoped to. This is a relic of "ye olden days" where it was important to know up front what things were in a procedure and how you could refer to them. See what that does to the readability of your code on a screen that is not in portrait orientation:

declarations section from the question screenshotted

This is... not really useful, because I can't even remotely tell what variable is needed where and evenwhether it's needed.

Declare variables as close as possible to their usage.
This has the added benefit of reducing the mental strain when reading the code. You don't need to remember every variable declaration, only those in close proximity to understand the code.

While we're at that section: I noticed that you prefixed every single one of these variables with av, most likely for "Variable". Don't do that. This adds no useful information to the name of the variable and should frankly speaking be unnecessary.

'Check that there is a workbook openIf ActiveSheet Is Nothing Then vError = "You need to open a workbook in order to do this"If vError <> "" Then GoTo ErrorHandler

Let's rewrite this a bit. For one the comment is a lie, this doesn't check that a workbook is open, it checks that the ActiveSheet property of the global Application object is not Nothing. In addition you're making this somewhat harder to read by forcing yourself into single-line if-statements:

If ActiveSheet Is Nothing Then    vError = "You need to open a workbook and select a sheet in order to do this"    GoTo ErrorHandlerEnd If

Doing all the work is long and tedious. This code can benefit a lot from extracting subroutines into actual subroutines or Functions. Consider encapsulating blocks of code that have an explanatory comment into their own function:

Sub SolveDijkstra()    If Not CheckWorkbook() Then Exit Sub    Dim settings As DijkstraSettings    Set settings = GetSettingsFromCurrentSheet()    If Not CheckSettings(settings) Then Exit Sub    CleanOutputArea()    Dim graph As Graph    Set graph = GetPathsFromDataSheet()    If Not CheckGraph(graph) Then Exit Sub    Dim path As DijkstraSolution    Set path = Dijkstra(graph, settings)    If path.HasSolution Then        WritePathToOutput(path)    Else        WriteErrorToOutput("No path found from source to destination")    End If    MsgBox "Done", vbOKOnly + vbInformation, "Path and Distance"End Sub

This "reimagination" has the clear benefit of allowing us to abstract the tedious separate steps into methods and objects. We don't need to understand the10 different loops to grasp what this sub does.

Note that this also sidesteps the usage of GoTo which is ... problematic in some contexts.


In closing I want to explicitly call out stuff I noticed as outstanding:

  • You always access a cell's value explicitly throughValue :+1:
  • You use a dictionary to keep track of the cost for a given node
  • You reinstate error handling afterOn Error Resume Next statements and seem to have tried to keep OERN areas as small as possible
  • You validate your input and have a pretty clean way of getting it from your sheet

What's missing to take your VBA coding to the next step is the use of objects and user-defined types as well as noting that you don't need to reuse a block of code to extract it.

answeredJul 19, 2018 at 15:20
Vogel612's user avatar
\$\endgroup\$
1
  • \$\begingroup\$Thank you. This is exactly the kind of feedback I was looking for. I agree with all your points. Time to learn about user-defined objects in VBA. I'm not very familiar with objects in VBA, but have used them in other languages\$\endgroup\$CommentedJul 20, 2018 at 7:36
3
\$\begingroup\$

Question 1

How can I make this easier and more intuitive for a user? I know I can use named ranges instead of flags, but I would prefer to keep it more visibly obvious what the code is using

The first thing that I would do is rename all data headers. I understand that you wanted an unique identifier that "!dijk:dat:from" to me is not at all intuitive to me. After studying Dijkstra's algorithm I came back to determined that it is representsDijkstra's - data - From nodes (vertices). Although they make perfect sense, I also don't likeFrom andTo. I'm not an authority but I think that there is no inherent direction to the connection of two nodes in a graph.

Alternate Column Headers:

  • !dijk:dat:from Node, Node1, Vertex, Vertex1
  • !dijk:dat:to Neighbor, Node2, Vertex2
  • !dijk:dat:dist Distance
  • !dijk:get:from Origin
  • !dijk:get:to Destination
  • !dijk:steps Path

enter image description here

Question 2

How can I apply the DRY principle more here. I'm repeating the same patterns all the time, but it seems like the details vary too much for me to just stick something like the nested for loops into a function

There isn't really any unnecessary repeated code.

↓This loop↓ is repeated twice but the second loop relies on information gathered from the first loop.

For vRow = 1 To vRowCount    For vCol = 1 To vColCount

Question 3

I use Scripting.Dictionary for almost everything due to it's flexibility and simply due to the fact that I am comfortable with how it works, but I suspect there may be better data structures that I can use which work better for this use case

A custom Node class would have made it easier work out the logic but it is not necessary.

Pseudo Class: GraphNode

Option ExplicitConst Infinity As Long = 2147483647Private Type Members    NodeKey As String    NeighborKey As String    Distance As LongEnd TypePrivate m As MembersPublic Sub Init(NodeKey As String, Optional NeighborKey As String, Optional Distance As Long = Infinity)    m.NodeKey = NodeKey    m.NeighborKey = NeighborKey    m.Distance = DistanceEnd SubPublic Function getKey() As StringAttribute Value.VB_UserMemId = 0    getKey = m.NodeKey = NodeKeyEnd FunctionPublic Function getDistance(Node As GraphNode) As Long        getDistance = m.DistanceEnd FunctionPublic Function UpdateDistance(Node As GraphNode) As Boolean    If Node.getDistance < m.Distance Then        UpdateDistance = True        m.Distance = Node.getDistance    End IfEnd Function

I would make some changes in the way that you use the dictionaries.

Using these keys "!dist", "!steps", "!scan" as properties works very well.

vDist(vKey)("!dist")vDist(vKey).Item("!dist")vDist(vKey).Item("!steps")vDist(vKey).Item("!scan")

I would use constants instead. This will not only make the code easier to read but enable you to use intellisense with them.

enter image description here

Item can be omitted because the default member of a Scripting Dictionary isItems().

 vDist(vKey).Item("!dist")vDist(vKey)("!dist")

SinceItems() takes a string as an argument we can use the Bang operator to pass a string key toItems() and return the it's value.

vDist(vKey)![!dist]

The bracket are necessary because!dist is not a valid variable name. Removing! from the variable names will allow you to do this:

vDist(vKey)!dist

Question 4

At the heart of this is the Do While True loop which is probably a horribly inefficient way to implement Dijkstra's Algorithm. How can I make it more efficient?

Dijkstra's Algorithm requires aDo or aWhile loop this is unavoidable. Implementing themin-priority queue meationed in the article you linked would make it more efficient but that is out of the scope of a review.

Wikipedia - Dijkstra's algorithm

A min-priority queue is an abstract data type that provides 3 basic operations : add_with_priority(), decrease_priority() and extract_min(). As mentioned earlier, using such a data structure can lead to faster computing times than using a basic queue.

Add Template

Creating a subroutine to add a template worksheet would make the setup easier for both you and the users. Using constants for the Column Headers and working with tables would also simplify things.

Const NodeHeader As String = "Node", NeighborHeader As String = "Neighbor", DistanceHeader As String = "Distance"Const OriginHeader As String = "Origin", DestinationHeader As String = "Destination"Const Distance As String = "Distance", Path As String = "Path", Visited As String = "Visited"Sub AddTemplate()    Dim TableRange As Range    With Worksheets.Add        ' Add Graph NodeList Table        .Range("B1").Value = "Graph - Node List"        Set TableRange = .Range("B2").Resize(1, 3)        TableRange.Value = Array(NodeHeader, NeighborHeader, DistanceHeader)        .ListObjects.Add xlSrcRange, TableRange, , xlYes        ' Add Settings Table        .Range("F1").Value = "Settings"        Set TableRange = .Range("F2").Resize(1, 2)        TableRange.Value = Array(OriginHeader, DestinationHeader)        .ListObjects.Add xlSrcRange, TableRange, , xlYes        ' Add Results Table        .Range("F5").Value = "Results"        Set TableRange = .Range("F6").Resize(1, 3)        TableRange.Value = Array(OriginHeader, DestinationHeader, DistanceHeader)        .ListObjects.Add(xlSrcRange, TableRange, , xlYes).ShowTotals = True        TableRange.Cells(3, 3).Formula = "=SUBTOTAL(109,[Distance])"    End WithEnd Sub

enter image description here

answeredJul 24, 2018 at 5:25
TinMan's user avatar
\$\endgroup\$
1
  • \$\begingroup\$For the record: I dislike the suggestion of the bang operator. Other than that, this looks pretty good :)\$\endgroup\$CommentedSep 4, 2018 at 0:53

You mustlog in to answer this question.

Protected question. To answer this question, you need to have at least 10 reputation on this site (not counting theassociation bonus). The reputation requirement helps protect this question from spam and non-answer activity.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.