K Edit Distance

K Edit Distance

Given a list of word and a target word, output all the words for each the edit distance with the target no greater than k.

e.g. [abc, abd, abcd, adc], target “ac”, k = 1,

output = [abc, adc]

  • Naive Solution:
    A naive solution would be, for each word in the list, calculate the edit distance with the target word. If it is equal or less than k, output to the result list. If we assume that the average length of the words is m, and the total number of words in the list is n, the total time complexity is O(n * m^2).

  • Trie tree:
    The problem with the previous solution is if the given list of the words is like ab, abc, abcd, each time we need to repeatedly calculate the edit distance with the target word. If we can combine the prefix of all words together, we can save lots of time.

Solution

Explanation will be added.

reference
reference

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
#include <vector>
#include <iostream>
using namespace std;
typedef struct TrieNode{
bool isWord {false};
TrieNode *children[26]{NULL};
} TrieNode;
TrieNode *buildTree(const vector<string>& words){
TrieNode *root = new TrieNode(), *cur = root;
for(const auto& w: words){
cur = root;
for(const auto& c: w){
if(!cur->children[c-'a']) cur->children[c-'a'] = new TrieNode();
cur = cur->children[c-'a'];
}
cur->isWord = true;
}
return root;
}
void search(const string& s, TrieNode* root, const int& k, vector<int>& prev_dist,
string path, vector<string>& v){
if(root->isWord && prev_dist.back()<=k) v.push_back(path);
for(int i = 0; i < 26; ++i){
if(!root->children[i]) continue;
vector<int> cur_dist = prev_dist;
cur_dist[0]++;
char c = 'a'+i;
bool hasValid = cur_dist[0]<=k;
for(int j = 1; j <= s.length(); ++j){
if(s[j-1] == c)
cur_dist[j] = prev_dist[j-1];
else
cur_dist[j] = min(min(cur_dist[j-1], prev_dist[j]), prev_dist[j-1])+1;
if(cur_dist[j] <= k) hasValid = true; //to fill up the cur_dist, cannot break here;
}
if(hasValid) search(s, root->children[i], k, cur_dist, path+c, v);
}
}
int main(int argc, char *argv[])
{
vector<string> words{"abc", "abd", "abcd", "adc"};
TrieNode *root = buildTree(words);
vector<string> v;
string s = "ac";
int k = 2;
vector<int> dist(s.length()+1, 0);
for(int i = 0; i <= s.length(); ++i) dist[i] = i;
search(s, root, k, dist, "", v);
for(int i = 0; i < v.size(); ++i) cout<<v[i]<<endl;
return 0;
}

Always welcome new ideas and practical tricks, just leave them in the comments!