ordered_set.py 6.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199
  1. """
  2. An OrderedSet is a custom MutableSet that remembers its order, so that every
  3. entry has an index that can be looked up.
  4. Based on a recipe originally posted to ActiveState Recipes by Raymond Hettiger,
  5. and released under the MIT license.
  6. Rob Speer's changes are as follows:
  7. - changed the content from a doubly-linked list to a regular Python list.
  8. Seriously, who wants O(1) deletes but O(N) lookups by index?
  9. - add() returns the index of the added item
  10. - index() just returns the index of an item
  11. - added a __getstate__ and __setstate__ so it can be pickled
  12. - added __getitem__
  13. """
  14. import collections
  15. SLICE_ALL = slice(None)
  16. __version__ = '2.0.1'
  17. def is_iterable(obj):
  18. """
  19. Are we being asked to look up a list of things, instead of a single thing?
  20. We check for the `__iter__` attribute so that this can cover types that
  21. don't have to be known by this module, such as NumPy arrays.
  22. Strings, however, should be considered as atomic values to look up, not
  23. iterables. The same goes for tuples, since they are immutable and therefore
  24. valid entries.
  25. We don't need to check for the Python 2 `unicode` type, because it doesn't
  26. have an `__iter__` attribute anyway.
  27. """
  28. return hasattr(obj, '__iter__') and not isinstance(obj, str) and not isinstance(obj, tuple)
  29. class OrderedSet(collections.MutableSet):
  30. """
  31. An OrderedSet is a custom MutableSet that remembers its order, so that
  32. every entry has an index that can be looked up.
  33. """
  34. def __init__(self, iterable=None):
  35. self.items = []
  36. self.map = {}
  37. if iterable is not None:
  38. self |= iterable
  39. def __len__(self):
  40. return len(self.items)
  41. def __getitem__(self, index):
  42. """
  43. Get the item at a given index.
  44. If `index` is a slice, you will get back that slice of items. If it's
  45. the slice [:], exactly the same object is returned. (If you want an
  46. independent copy of an OrderedSet, use `OrderedSet.copy()`.)
  47. If `index` is an iterable, you'll get the OrderedSet of items
  48. corresponding to those indices. This is similar to NumPy's
  49. "fancy indexing".
  50. """
  51. if index == SLICE_ALL:
  52. return self
  53. elif hasattr(index, '__index__') or isinstance(index, slice):
  54. result = self.items[index]
  55. if isinstance(result, list):
  56. return OrderedSet(result)
  57. else:
  58. return result
  59. elif is_iterable(index):
  60. return OrderedSet([self.items[i] for i in index])
  61. else:
  62. raise TypeError("Don't know how to index an OrderedSet by %r" %
  63. index)
  64. def copy(self):
  65. return OrderedSet(self)
  66. def __getstate__(self):
  67. if len(self) == 0:
  68. # The state can't be an empty list.
  69. # We need to return a truthy value, or else __setstate__ won't be run.
  70. #
  71. # This could have been done more gracefully by always putting the state
  72. # in a tuple, but this way is backwards- and forwards- compatible with
  73. # previous versions of OrderedSet.
  74. return (None,)
  75. else:
  76. return list(self)
  77. def __setstate__(self, state):
  78. if state == (None,):
  79. self.__init__([])
  80. else:
  81. self.__init__(state)
  82. def __contains__(self, key):
  83. return key in self.map
  84. def add(self, key):
  85. """
  86. Add `key` as an item to this OrderedSet, then return its index.
  87. If `key` is already in the OrderedSet, return the index it already
  88. had.
  89. """
  90. if key not in self.map:
  91. self.map[key] = len(self.items)
  92. self.items.append(key)
  93. return self.map[key]
  94. append = add
  95. def update(self, sequence):
  96. """
  97. Update the set with the given iterable sequence, then return the index
  98. of the last element inserted.
  99. """
  100. item_index = None
  101. try:
  102. for item in sequence:
  103. item_index = self.add(item)
  104. except TypeError:
  105. raise ValueError('Argument needs to be an iterable, got %s' % type(sequence))
  106. return item_index
  107. def index(self, key):
  108. """
  109. Get the index of a given entry, raising an IndexError if it's not
  110. present.
  111. `key` can be an iterable of entries that is not a string, in which case
  112. this returns a list of indices.
  113. """
  114. if is_iterable(key):
  115. return [self.index(subkey) for subkey in key]
  116. return self.map[key]
  117. def pop(self):
  118. """
  119. Remove and return the last element from the set.
  120. Raises KeyError if the set is empty.
  121. """
  122. if not self.items:
  123. raise KeyError('Set is empty')
  124. elem = self.items[-1]
  125. del self.items[-1]
  126. del self.map[elem]
  127. return elem
  128. def discard(self, key):
  129. """
  130. Remove an element. Do not raise an exception if absent.
  131. The MutableSet mixin uses this to implement the .remove() method, which
  132. *does* raise an error when asked to remove a non-existent item.
  133. """
  134. if key in self:
  135. i = self.items.index(key)
  136. del self.items[i]
  137. del self.map[key]
  138. for k, v in self.map.items():
  139. if v >= i:
  140. self.map[k] = v - 1
  141. def clear(self):
  142. """
  143. Remove all items from this OrderedSet.
  144. """
  145. del self.items[:]
  146. self.map.clear()
  147. def __iter__(self):
  148. return iter(self.items)
  149. def __reversed__(self):
  150. return reversed(self.items)
  151. def __repr__(self):
  152. if not self:
  153. return '%s()' % (self.__class__.__name__,)
  154. return '%s(%r)' % (self.__class__.__name__, list(self))
  155. def __eq__(self, other):
  156. if isinstance(other, OrderedSet):
  157. return len(self) == len(other) and self.items == other.items
  158. try:
  159. other_as_set = set(other)
  160. except TypeError:
  161. # If `other` can't be converted into a set, it's not equal.
  162. return False
  163. else:
  164. return set(self) == other_as_set