| 1 | namespace Cobra.Core |
| 2 | use System.Collections |
| 3 | class CountedSet<of T> implements ICollection<of T> |
| 4 | """ |
| 5 | This class is a CountedSet (AKA multiset) container. Unlike a set, a multiset allows the same |
| 6 | item to be inserted multiple times. |
| 7 | For a computer science background, see |
| 8 | http://en.wikipedia.org/wiki/Set_(computer_science)#Multiset. |
| 9 | For a mathematical background, see |
| 10 | http://en.wikipedia.org/wiki/Multiset |
| 11 | |
| 12 | To get the number of times that a particular |
| 13 | item is in the CountedSet, use count = CountedSet[item]. |
| 14 | Adding an item to the CountedSet will increase it's count by one |
| 15 | As an invariant the count of an item must be >0 for all items |
| 16 | Removing an item decreases the count by one |
| 17 | Use removeAll to delete all occurances of a particular item. |
| 18 | """ |
| 19 | |
| 20 | var _data as IDictionary<of T, int> |
| 21 | |
| 22 | cue init |
| 23 | base.init |
| 24 | _data = Dictionary<of T, int>() |
| 25 | |
| 26 | cue init(items as T*) |
| 27 | .init |
| 28 | .addRange(items) |
| 29 | |
| 30 | cue init(other as CountedSet<of T>) |
| 31 | .init |
| 32 | .addRange(other) |
| 33 | |
| 34 | cue init(other as IDictionary<of T, int>) |
| 35 | .init |
| 36 | .addRange(other) |
| 37 | |
| 38 | pro [item as T] as int |
| 39 | """ |
| 40 | If the item is not in the CountedSet, .get returns 0 |
| 41 | """ |
| 42 | get |
| 43 | return _data.get(item, 0) |
| 44 | set |
| 45 | require value > 0 |
| 46 | _data[item] = value |
| 47 | |
| 48 | get count as int |
| 49 | """ |
| 50 | Total number of items in the CountedSet |
| 51 | Equivalent to the sum of counts |
| 52 | """ |
| 53 | sum = 0 |
| 54 | for kvp in .counts, sum += kvp.value |
| 55 | return sum |
| 56 | |
| 57 | get uniqueCount |
| 58 | """ |
| 59 | Returns the total number of unique items |
| 60 | in the CountedSet. |
| 61 | """ |
| 62 | return _data.count |
| 63 | |
| 64 | get isReadOnly as bool |
| 65 | return false |
| 66 | |
| 67 | get counts as KeyValuePair<of T, int>* |
| 68 | """ |
| 69 | In the returned Stream of KeyValuePairs, |
| 70 | the key is the item |
| 71 | and the value is the item's count |
| 72 | """ |
| 73 | for item, count in _data |
| 74 | yield KeyValuePair<of T, int>(item, count) |
| 75 | |
| 76 | def contains(item as T) as bool |
| 77 | return _data.keys.contains(item) |
| 78 | |
| 79 | def items as IEnumerator<of T> |
| 80 | for item, count in _data, for c in count, yield item |
| 81 | |
| 82 | def getEnumerator as System.Collections.IEnumerator |
| 83 | implements System.Collections.IEnumerable |
| 84 | return .getEnumerator |
| 85 | |
| 86 | def getEnumerator as IEnumerator<of T> |
| 87 | return _data.keys.getEnumerator |
| 88 | |
| 89 | cue enumerate as T* |
| 90 | for item, count in _data, for c in count, yield item |
| 91 | |
| 92 | def toList as IList<of T> |
| 93 | return List<of T>(.enumerate) |
| 94 | |
| 95 | def uniqueSet as Cobra.Core.ISet<of T> |
| 96 | return Set<of T>(_data.keys) |
| 97 | |
| 98 | def add(item as T) |
| 99 | """ |
| 100 | Adding an instance of an item is |
| 101 | equivalent to increasing the item's count by one |
| 102 | """ |
| 103 | this[item] += 1 |
| 104 | |
| 105 | def add(item as T, num as int) |
| 106 | """ |
| 107 | Adds num instances of item to the CountedSet |
| 108 | This is equivalent to increasing the item's count by num. |
| 109 | Num must be >0. |
| 110 | """ |
| 111 | require num > 0 |
| 112 | this[item] += num |
| 113 | |
| 114 | def addRange(other as CountedSet<of T>) |
| 115 | for item, count in other.counts, .add(item, count) |
| 116 | |
| 117 | def addRange(other as IDictionary<of T, int>) |
| 118 | for item, count in other, .add(item, count) |
| 119 | |
| 120 | def addRange(items as T*) |
| 121 | for item in items, .add(item) |
| 122 | |
| 123 | def remove(item as T) as bool |
| 124 | """ |
| 125 | Removes one instance of item from the CountedSet |
| 126 | This is equivalent to subtracting the item's count by one |
| 127 | """ |
| 128 | return .remove(item, 1) > 0 |
| 129 | |
| 130 | def remove(item as T, num as int) as int |
| 131 | """ |
| 132 | Removes up to num instances of item from the CountedSet |
| 133 | This is equivalent to subtracting the item's count by num |
| 134 | Returns the actual number of items removed. |
| 135 | """ |
| 136 | require num > 0 |
| 137 | ensure result >= 0 |
| 138 | if .contains(item) |
| 139 | maxNumRemoved = this[item] |
| 140 | _data[item] -= num |
| 141 | if _data[item] <= 0 |
| 142 | _data.remove(item) |
| 143 | return maxNumRemoved |
| 144 | return num |
| 145 | return 0 |
| 146 | |
| 147 | def removeAll(item as T) as bool |
| 148 | """Removes all occurances of item from the CountedSet """ |
| 149 | return _data.remove(item) |
| 150 | |
| 151 | |
| 152 | def clear |
| 153 | """Removes all items from the CountedSet """ |
| 154 | _data.clear |
| 155 | |
| 156 | def copyTo(array as T[], arrayIndex as int) |
| 157 | if arrayIndex < 0 |
| 158 | throw ArgumentOutOfRangeException("arrayIndex = [arrayIndex] must be > 0.") |
| 159 | if array.length + arrayIndex < .count |
| 160 | throw ArgumentException("The count of this CountedSet = [.count] is greater than" |
| 161 | + " the available space from index = [arrayIndex] to the end of the destination array of length [array.length]") |
| 162 | pos = 0 |
| 163 | for el in .enumerate |
| 164 | array[pos + arrayIndex] = el |
| 165 | pos += 1 |
| 166 | |
| 167 | def equals(other as Object?) as bool is override |
| 168 | if not other inherits CountedSet<of T>, return false |
| 169 | return _data == (other to CountedSet<of T>)._data |
| 170 | |
| 171 | def getHashCode as int is override |
| 172 | """ |
| 173 | As a completely mutable object, CountedSet does not |
| 174 | support getHashCode |
| 175 | """ |
| 176 | throw NotSupportedException() |
| 177 | |
| 178 | def toString as String is override |
| 179 | sb = StringBuilder().append(c'{') |
| 180 | pos = 0 |
| 181 | count = .count |
| 182 | for item in .enumerate |
| 183 | sb.append(item) |
| 184 | if pos < count - 1, sb.append(", ") |
| 185 | pos += 1 |
| 186 | sb.append(c'}') |
| 187 | return sb.toString |
| 188 | |
| 189 | #Multiset math operations |
| 190 | |
| 191 | def sum(other as CountedSet<of T>) as CountedSet<of T> |
| 192 | """ |
| 193 | Adds the counts of each CountedSet |
| 194 | """ |
| 195 | answer = CountedSet<of T>(this) |
| 196 | answer.addRange(other) |
| 197 | return answer |
| 198 | |
| 199 | def difference(other as CountedSet<of T>) as CountedSet<of T> |
| 200 | """ |
| 201 | Subtracts the counts of other from this CountedSet. |
| 202 | Only keeps items with count > 0 |
| 203 | """ |
| 204 | answer = CountedSet<of T>(this) |
| 205 | for item, count in .counts |
| 206 | count = count #suppress warning |
| 207 | if other.contains(item) |
| 208 | newCount = answer[item] - other[item] |
| 209 | if newCount > 0, answer[item] = newCount |
| 210 | else, answer.removeAll(item) |
| 211 | return answer |
| 212 | |
| 213 | def union(other as CountedSet<of T>) as CountedSet<of T> |
| 214 | """ |
| 215 | Take the max count of each item from each CountedSet |
| 216 | """ |
| 217 | answer = CountedSet<of T>(this) |
| 218 | for item, count in other.counts |
| 219 | if answer[item] < count, answer[item] = count |
| 220 | return answer |
| 221 | |
| 222 | def intersection(other as CountedSet<of T>) as CountedSet<of T> |
| 223 | """ |
| 224 | Takes the min count of each item from each CountedSet |
| 225 | """ |
| 226 | answer = CountedSet<of T>() |
| 227 | for item, count in .counts |
| 228 | if other.contains(item) |
| 229 | answer[item] = Math.min(count, other[item]) |
| 230 | return answer |
| 231 | |
| 232 | /#def mostCommon(num as int) as ICollection<of T> |
| 233 | This should be implemented with a heap queue#/ |
| 234 | |
| 235 | /# |
| 236 | These probably don't have much practical use but could be implemented |
| 237 | def isSubsetOf(s as CountedSet<of T>) as bool |
| 238 | def isSupersetOf(s as CountedSet<of T>) as bool |
| 239 | def product(other as CountedSet<of T>) as bool |
| 240 | def multiply(n as int) |
| 241 | def symmetricDifference(s as CountedSet<of T>) as CountedSet<of T> |
| 242 | #/ |
| 243 | |
| 244 | class TestCountedSet |
| 245 | test |
| 246 | cs = CountedSet<of char>('mississippi') |
| 247 | assert cs.toList.sorted == [c'i', c'i', c'i', c'i', c'm', c'p', c'p', c's', c's', c's', c's'] |
| 248 | assert cs[c'm'] == 1 |
| 249 | assert cs[c'i'] == 4 |
| 250 | assert cs[c'p'] == 2 |
| 251 | assert cs[c's'] == 4 |
| 252 | |
| 253 | test sum |
| 254 | cs1 = CountedSet<of int>([1, 2, 2]) |
| 255 | cs2 = CountedSet<of int>([0, 2, 2]) |
| 256 | sumCS = cs1.sum(cs2) |
| 257 | assert sumCS[1] == 1 and sumCS[2] == 4 and sumCS[0] == 1 |
| 258 | |
| 259 | test union |
| 260 | cs1 = CountedSet<of int>([1, 2, 2]) |
| 261 | cs2 = CountedSet<of int>([1, 1, 2]) |
| 262 | unionCS = cs1.union(cs2) |
| 263 | assert unionCS[1] == 2 and unionCS[2] == 2 |
| 264 | |
| 265 | test intersection |
| 266 | cs1 = CountedSet<of int>([1, 2, 2, 3]) |
| 267 | cs2 = CountedSet<of int>([1, 1, 2]) |
| 268 | interCS = cs1.intersection(cs2) |
| 269 | assert interCS[1] == 1 and interCS[2] == 1 and interCS[3] == 0 |
| 270 | |
| 271 | test difference |
| 272 | cs1 = CountedSet<of int>([1, 2, 2, 3, 3, 3]) |
| 273 | cs2 = CountedSet<of int>([1, 1, 2, 3]) |
| 274 | diffCS = cs1.difference(cs2) |
| 275 | assert diffCS[1] == 0 and diffCS[2] == 1 and diffCS[3] == 2 |
| 276 | assert not diffCS.contains(1) |
| 277 | |
| 278 | test toString |
| 279 | cs = CountedSet<of char>('mississippi') |
| 280 | assert cs.toString == '{m, i, i, i, i, s, s, s, s, p, p}' |
| 281 | |
| 282 | test remove |
| 283 | cs = CountedSet<of int>() |
| 284 | #case: not in CountedSet |
| 285 | removed = cs.remove(4,4) |
| 286 | assert removed == 0 |
| 287 | #case: remove some |
| 288 | cs[4] = 4 |
| 289 | removed = cs.remove(4,2) |
| 290 | assert removed == 2 |
| 291 | assert cs[4] == 2 |
| 292 | #case: remove all |
| 293 | cs[5] = 3 |
| 294 | removed = cs.remove(5,3) |
| 295 | assert removed == 3 |
| 296 | assert not cs.contains(5) |
| 297 | #case: remove more than count |
| 298 | cs[7] = 4 |
| 299 | removed = cs.remove(7,5) |
| 300 | assert removed == 4 |
| 301 | assert not cs.contains(7) |
| 302 | |
| 303 | test removeAll |
| 304 | cs = CountedSet<of char>('mississippi') |
| 305 | cs.removeAll(c'i') |
| 306 | assert not cs.contains(c'i') |
| 307 | |
| 308 | /#test set |
| 309 | CountedSet= CountedSet<of int>() |
| 310 | expect Cobra.Core.RequireException, CountedSet[3] = -2 #/ |
| 311 | |
| 312 | test uniqueSet |
| 313 | cs = CountedSet<of int>([1, 1, 1, 1, 2, 2, 2, 3]) |
| 314 | assert cs.uniqueSet == Set<of int>([1, 2, 3]) |
| 315 | |
| 316 | |
| 317 | |