Author : MD TAREQ HASSAN | Updated : 2021/03/22
HashSet in CSharp
- The HashSet
class provides high-performance set operations. A set is a collection that contains no duplicate elements, and whose elements are in no particular order. - The HashSet
class is based on the model of mathematical sets and provides high-performance set operations similar to accessing the keys of the Dictionary<TKey,TValue> - In simple terms, the HashSet
class can be thought of as a Dictionary<TKey,TValue> collection without values - A HashSet’s behavior could be compared to using a Dictionary<TKey,TValue> by only adding/removing keys as values, and ignoring dictionary values themselves. You would expect keys in a dictionary not to have duplicate values, and that’s the point of the “Set” part.
- The primary difference between LINQ set operations and HashSet
operations is that LINQ set operations always return a new IEnumerable collection, whereas the HashSet equivalent methods modify the current collection.
HashSet<T> provides (Mathematical set operations):
- UnionWith
- IntersectWith
- ExceptWith
- SymmetricExceptWith
- Overlaps
- IsSubsetOf
- IsProperSubsetOf
- IsSupersetOf
- IsProperSubsetOf
- SetEquals
Links
Creating hashset
// Creating hashset
HashSet<int> evenNumbers = new HashSet<int>();
var oddNumbers = new HashSet<int>();
var foo = new HashSet<string>(StringComparer.OrdinalIgnoreCase);
// Initialization
var fooSet = new HashSet<int>() {1, 2, 3};
// from array
var fooArray = new int[] { 1, 2, 3};
var fooSet = new HashSet<int>(fooArray);
// from list
var fooList = new List<int> { 1, 2, 3};
var fooSet = new HashSet<int>(fooList);
// Add
fooSet.Add("new item")
// Conatins: O(1)
if (fooSet.Contains("bar")) {
fooSet.Remove("bar");
}
// Get item from hashset
var found = fooSet.TryGetValue("bar", out var bar);
// Properties
var uniqueItemCount = fooSet.Count
// Looping
foreach(var foo in fooSet) {
}
Char Array to HashSet
var testStr = "axbycz".ToCharArray(); // if there are spaces, only one space will be in charSet
var charSet = new HashSet<char>(testStr);
// since string is char array, we can pass string directly to get hashset
var testStr = "axbycz";
var charSet = new HashSet<char>(testStr); // if there are spaces, only one space will be in charSet
HashSet to Char Array
var charSet = new HashSet<char>("axbycz");
var charArray = charSet.ToArray();
String to HashSet
var str = "ax by cz";
var strArray = str.Split(" ");
var strSet = new HashSet<string>(strArray);
HashSet to String
var strSet = new HashSet<string>("ax by cz".Split(" "));
var str = String.Join(" ", strSet);
Array to HashSet
// HashSet<T>(IEnumerable<T> collection)
var fooArray = new int[] { 1, 2, 3};
var fooSet = new HashSet<int>(fooArray);
HashSet to Array
var fooSet = new HashSet<int>() { 1, 2, 3};
var fooArray = new int[fooSet.Count];
fooSet.CopyTo(fooArray);
// LINQ
var fooArray = fooSet.ToArray();
List to HashSet
// HashSet<T>(IEnumerable<T> collection)
var fooList = new int[] { 1, 2, 3};
var fooSet = new HashSet<int>(fooList);
HashSet to List
var fooSet = new HashSet<int>() { 1, 2, 3};
var fooList = new List<int>(fooSet);
// LINQ
var fooList = fooSet.ToList();
Check duplicates using hashset
public static bool HasDuplicates(int[] inArray) => (inArray.Length != new HashSet<int>(inArray).Count);
public static void Main()
{
var items = new int[] { 1, 2, 3, 4, 5, 2, 3};
var items2 = new int[] { 1, 2, 3, 4, 5};
Console.WriteLine($"has duplicate in items: {HasDuplicates(inArray: items)}");
Console.WriteLine($"has duplicate in items2: {HasDuplicates(inArray: items2)}");
}
Remove duplicates from array using hashset
string[] animals = { "cat", "dog", "cat", "leopard", "tiger", "cat"};
var hash = new HashSet<string>(animals);
string[] uniqueAnimals = hash.ToArray();
Remove duplicates from list using hashset
var animals = new string[] { "cat", "dog", "cat", "leopard", "tiger", "cat"};
var hash = new HashSet<string>(animals);
List<string> uniqueAnimals = hash.ToList(); // hash.ToList() is LINQ extension where new List<T>(HashSet<T>) is ctor overload
foreach(var animal in uniqueAnimals){
Console.WriteLine(animal);
}
Find duplicates using hashset
var items = new int[] { 1, 2, 3, 1, 4, 5, 1, 2, 3, 2, 1, 1};
var uniqueItems = new HashSet<int>();
var duplicateHolder = new Dictionary<int, int>();
foreach(var item in items){
if(uniqueItems.Contains(item)){ // found duplicate
if(duplicateHolder.ContainsKey(item)){
duplicateHolder[item] += 1;
} else {
duplicateHolder[item] = 2;
}
} else {
uniqueItems.Add(item);
}
}
foreach(var keyValuePair in duplicateHolder){
Console.WriteLine($"item {keyValuePair.Key} appeared {keyValuePair.Value} times");
}
Union
HashSet<int> evenNumbers = new HashSet<int>();
HashSet<int> oddNumbers = new HashSet<int>();
for (int i = 0; i < 5; i++){
evenNumbers.Add(i * 2);
oddNumbers.Add((i * 2) + 1);
}
HashSet<int> numbers = new HashSet<int>(evenNumbers);
numbers.UnionWith(oddNumbers);