diff --git a/pandas/_libs/hashtable_class_helper.pxi.in b/pandas/_libs/hashtable_class_helper.pxi.in index ee65245574f9e..8fd09d123c359 100644 --- a/pandas/_libs/hashtable_class_helper.pxi.in +++ b/pandas/_libs/hashtable_class_helper.pxi.in @@ -757,10 +757,11 @@ cdef class {{name}}HashTable(HashTable): append_data_uint8(rmd, 1) continue - k = kh_put_{{dtype}}(self.table, val, &ret) + k = kh_get_{{dtype}}(self.table, val) - if ret != 0: + if k == self.table.n_buckets: # k hasn't been seen yet + k = kh_put_{{dtype}}(self.table, val, &ret) if needs_resize(ud.size, ud.capacity): with gil: @@ -898,8 +899,12 @@ cdef class {{name}}HashTable(HashTable): labels[i] = -1 continue - k = kh_put_{{dtype}}(self.table, val, &ret) - if ret != 0: + k = kh_get_{{dtype}}(self.table, val) + if k != self.table.n_buckets: + idx = self.table.vals[k] + labels[i] = idx + else: + k = kh_put_{{dtype}}(self.table, val, &ret) self.table.vals[k] = count if needs_resize(ud.size, ud.capacity): @@ -908,9 +913,6 @@ cdef class {{name}}HashTable(HashTable): append_data_{{dtype}}(ud, val) labels[i] = count count += 1 - else: - idx = self.table.vals[k] - labels[i] = idx arr_uniques = uniques.to_array() @@ -1219,9 +1221,10 @@ cdef class StringHashTable(HashTable): continue v = vecs[i] - k = kh_put_str(self.table, v, &ret) - if ret != 0: + k = kh_get_str(self.table, v) + if k == self.table.n_buckets: # k hasn't been seen yet + k = kh_put_str(self.table, v, &ret) uindexer[count] = i if return_inverse: self.table.vals[k] = count @@ -1491,9 +1494,10 @@ cdef class PyObjectHashTable(HashTable): labels[i] = na_sentinel continue - k = kh_put_pymap(self.table, val, &ret) - if ret != 0: + k = kh_get_pymap(self.table, val) + if k == self.table.n_buckets: # k hasn't been seen yet + k = kh_put_pymap(self.table, val, &ret) uniques.append(val) if return_inverse: self.table.vals[k] = count diff --git a/pandas/_libs/hashtable_func_helper.pxi.in b/pandas/_libs/hashtable_func_helper.pxi.in index cefa5d0ad7f1b..3487f5ebd050d 100644 --- a/pandas/_libs/hashtable_func_helper.pxi.in +++ b/pandas/_libs/hashtable_func_helper.pxi.in @@ -66,12 +66,13 @@ cdef value_count_{{dtype}}(const {{dtype}}_t[:] values, bint dropna, const uint8 for i in range(n): val = values[i] if not dropna or not checknull(val): - k = kh_put_{{ttype}}(table, {{to_c_type}}val, &ret) - if ret != 0: + k = kh_get_{{ttype}}(table, {{to_c_type}}val) + if k != table.n_buckets: + table.vals[k] += 1 + else: + k = kh_put_{{ttype}}(table, {{to_c_type}}val, &ret) table.vals[k] = 1 result_keys.append(val) - else: - table.vals[k] += 1 {{else}} kh_resize_{{ttype}}(table, n) @@ -89,12 +90,13 @@ cdef value_count_{{dtype}}(const {{dtype}}_t[:] values, bint dropna, const uint8 if uses_mask and isna_entry: na_counter += 1 else: - k = kh_put_{{ttype}}(table, val, &ret) - if ret != 0: + k = kh_get_{{ttype}}(table, val) + if k != table.n_buckets: + table.vals[k] += 1 + else: + k = kh_put_{{ttype}}(table, val, &ret) table.vals[k] = 1 result_keys.append(val) - else: - table.vals[k] += 1 {{endif}} # collect counts in the order corresponding to result_keys: @@ -191,13 +193,14 @@ cdef duplicated_{{dtype}}(const {{dtype}}_t[:] values, object keep='first', cons else: value = {{to_c_type}}(values[i]) - k = kh_put_{{ttype}}(table, value, &ret) - if ret != 0: - table.vals[k] = i - out[i] = 0 - else: + k = kh_get_{{ttype}}(table, value) + if k != table.n_buckets: out[table.vals[k]] = 1 out[i] = 1 + else: + k = kh_put_{{ttype}}(table, value, &ret) + table.vals[k] = i + out[i] = 0 kh_destroy_{{ttype}}(table) return out