Difficulty: Easy, Asked-in: Google, Amazon, Microsoft, Goldman Sachs
Key takeaway: An excellent problem to learn problem-solving and step-by-step optimization using direct address table. We can use similar approach to solve several other coding questions.
Given two strings str1 and str2 of size m and n, write a program to check whether two given strings are anagram of each other or not.
Input: str1= “dusty” , str2= “study”, Output: true
Input: str1= “state” , str2= “taste”, Output: true
Input: str1= “enjoyalgorithms”, str2= “enjoymathematics”, Output: false
One basic solution would be to pick each character from str1 and search for that character in str2. If any character is missing, both strings are not an anagram of each other.
On another side, if characters are repeated, then frequency of repeated characters must be same in anagrams strings. To ensure this, we can replace the found characters in str2 with an empty flag ‘\0’. Think!
bool validAnagram(String str1, int m, String str2, int n)
{
if (m != n)
return false
for (int i = 0; i < m; i = i + 1)
{
int pos = linearSearch(str2, n, str1[i])
if (pos == -1)
return false
else
str2[pos] = '\0'
}
return true
}
// function to perform linear search
int linearSearch(string str, int n, char ch)
{
for (int i = 0; i < n; i = i + 1)
if (str[i] == ch)
return i;
return -1;
}
// function to check if str1 is an anagram of str2
bool validAnagram(string str1, int m, string str2, int n)
{
if (m != n)
return false;
for (int i = 0; i < m; i = i + 1)
{
int pos = linearSearch(str2, n, str1[i]);
if (pos == -1)
return false;
else
str2[pos] = '\0';
}
return true;
}
We are searching each character of str1 in str2 using the linear search. So time complexity = m * O(n) = O(mn). We are using constant extra space, so space complexity = O(1).
Can we think of improving time complexity further? One idea would be to sort both strings and compare each character. This will arrange all repeated characters together and help us to compare their frequency in both strings.
Step 1: If (m != n), strings are not an anagram of each other.
Step 2: Now we sort both strings in increasing order. What sorting algorithm will be efficient in this scenario? Only lowercase characters are present in the input, and the string will contain 26 unique characters. Instead of using the O(nlogn) sorting algorithm, one can use a direct address table of size 26 to count the frequency of each character and sort the string in O(26*n) time and O(1) space. Note: This idea is similar to counting sort algorithm.
void sortString(string & str, int n)
{
int count[26]
for (int i = 0; i < 26; i = i + 1)
count[i] = 0
for (int i = 0; i < n; i = i + 1)
count[str[i] - 'a'] = count[str[i] - 'a'] + 1
int k = 0
for (int i = 0; i < 26; i + 1)
{
for (int j = 0; j < count[i]; j = j + 1)
{
str[k] = 'a' + i
k = k + 1
}
}
}
Step 3: Now run a loop from i = 0 to m - 1 or i = 0 to n - 1 and compare characters at index i on both strings. If we find some unequal character in sequence, return false. Otherwise, by the end of loop, return true.
bool validAnagram(string str1, int m, string str2, int n)
{
if (m != n)
return false
sortString(str1, m)
sortString(str2, n)
for (int i = 0; i < m; i = i + 1)
{
if (str1[i] != str2[i])
return false
}
return true
}
void sortString(string & str, int n)
{
int count[26];
for (int i = 0; i < 26; i = i + 1)
count[i] = 0;
for (int i = 0; i < n; i = i + 1)
count[str[i] - 'a'] = count[str[i] - 'a'] + 1;
int k = 0;
for (int i = 0; i < 26; i = i + 1)
{
for (int j = 0; j < count[i]; j = j + 1)
{
str[k] = 'a' + i;
k = k + 1;
}
}
}
bool validAnagram(string str1, int m, string str2, int n)
{
if (m != n)
return false;
sortString(str1, m);
sortString(str2, n);
for (int i = 0; i < m; i = i + 1)
{
if (str1[i] != str2[i])
return false;
}
return true;
}
We are sorting and comparing both strings. So the time complexity = Time complexity of sorting str1 + Time complexity of sorting str2 + Time complexity of comparing sorted strings = O(n) + O(n) + O(n) = O(n). We use constant extra space, so space complexity = O(1).
Searching is one of the critical operations to solve this problem. So can we think of using hash table or direct address table to compare frequency of characters? Hash table is an efficient data structure for searching, insertion and deletion in O(1) time average.
All are lowercase characters, which have 26 possible values. So we can create two separate direct address tables of size 26 to store frequency of each character in both strings. We can directly store values in each table using the ASCII value of characters.
After storing values in direct address tables, we traverse them linearly and compare the frequency count of each character. If values in both tables are equal, then strings are anagrams of each other.
bool validAnagram(string str1, int m, string str2, int n)
{
if (m != n)
return false;
int freqCount1[26];
int freqCount2[26];
for (int i = 0; i < 26; i = i + 1)
{
freqCount1[i] = 0;
freqCount2[i] = 0;
}
for (int i = 0; i < n; i = i + 1)
{
freqCount1[str1[i] - 'a'] = freqCount1[str1[i] - 'a'] + 1;
freqCount2[str2[i] - 'a'] = freqCount2[str2[i] - 'a'] + 1;
}
for (int i = 0; i < 26; i = i + 1)
{
if (freqCount1[i] != freqCount2[i])
return false;
}
return true;
}
def valid_anagram(str1, str2):
if len(str1) != len(str2):
return False
freq_count1 = [0] * 26
freq_count2 = [0] * 26
for i in range(26):
freq_count1[i] = 0
freq_count2[i] = 0
for i in range(len(str2)):
freq_count1[ord(str1[i]) - ord('a')] += 1
freq_count2[ord(str2[i]) - ord('a')] += 1
for i in range(26):
if freq_count1[i] != freq_count2[i]:
return False
return True
public class Solution
{
public boolean validAnagram(String str1, String str2)
{
if (str1.length() != str2.length())
return false;
int[] freqCount1 = new int[26];
int[] freqCount2 = new int[26];
for (int i = 0; i < 26; i = i + 1)
{
freqCount1[i] = 0;
freqCount2[i] = 0;
}
for (int i = 0; i < str2.length(); i = i + 1)
{
freqCount1[str1.charAt(i) - 'a']++;
freqCount2[str2.charAt(i) - 'a']++;
}
for (int i = 0; i < 26; i = i + 1)
{
if (freqCount1[i] != freqCount2[i])
return false;
}
return true;
}
}
We are running three separate loops, where 1st and 3rd loops run constant times and the 2nd loop runs n times. So time complexity = Time complexity of initializing direct address tables + Time complexity of storing direct address tables + Time complexity of comparing direct address tables = = O(1) + O(n) + O(1) = O(n).
The size of direct address tables are constant, so space complexity = O(1) + O(1) = O(1).
The critical question is: Can we optimize space complexity of the above approach and solve problem using one direct address table?
Suppose we traverse the first string and store frequency count of each character in a direct address table of size 26. Now we need to do slight modifications to the above approach.
Instead of storing frequency count of second string in a separate table, we traverse second string and decrement frequency of each character in direct address table of first string. Now at this stage, if both strings are anagrams, then all values stored in direct address table must be 0.
So we traverse the table to check this. If we find any value not equal to 0, return false. Otherwise, by the end of loop, return true.
bool validAnagram(string str1, int m, string str2, int n)
{
if (m != n)
return false;
int freqCount[26];
for (int i = 0; i < 26; i = i + 1)
freqCount[i] = 0;
for (int i = 0; i < n; i = i + 1)
freqCount[str1[i] - 'a'] = freqCount[str1[i] - 'a'] + 1;
for (int i = 0; i < n; i = i + 1)
freqCount[str2[i] - 'a'] = freqCount[str2[i] - 'a'] - 1;
for (int i = 0; i < 26; i = i + 1)
{
if (freqCount[i] != 0)
return false;
}
return true;
}
def valid_anagram(str1, str2):
if len(str1) != len(str2):
return False
freq_count = [0] * 26
for i in range(26):
freq_count[i] = 0
for i in range(len(str2)):
freq_count[ord(str1[i]) - ord('a')] += 1
freq_count[ord(s2[i]) - ord('a')] -= 1
for i in range(26):
if freq_count[i] != 0:
return False
return True
public class Solution
{
public boolean validAnagram(String str1, int m, String str2, int n)
{
if (m != n)
return false;
int[] freqCount = new int[26];
for (int i = 0; i < 26; i = i + 1)
freqCount[i] = 0;
for (int i = 0; i < n; i = i + 1)
freqCount[str1.charAt(i) - 'a'] += 1;
for (int i = 0; i < n; i = i + 1)
freqCount[str2.charAt(i) - 'a'] -= 1;
for (int i = 0; i < 26; i = i + 1)
{
if (freqCount[i] != 0)
return false;
}
return true;
}
}
Time complexity = Time complexity of traversing first string and storing values in direct address table + Time complexity of traversing second string and decrementing values in direct address table + Time complexity of checking 0’s in direct address table = O(n) + O(n) + O(n) = O(n).
We only use one frequency table of constant size, so space complexity = O(1).
Please write in the message below if you have some additional insights or if you find an error.
Enjoy learning, Enjoy algorithms!