Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pandas/_testing/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -290,7 +290,7 @@ def box_expected(expected, box_cls, transpose: bool = True):
else:
expected = pd.array(expected, copy=False)
elif box_cls is Index:
expected = Index(expected)
expected = Index(expected, copy=False)
elif box_cls is Series:
expected = Series(expected)
elif box_cls is DataFrame:
Expand Down
2 changes: 1 addition & 1 deletion pandas/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -769,7 +769,7 @@ def index_with_missing(request):
vals = ind.values.copy()
vals[0] = None
vals[-1] = None
return type(ind)(vals)
return type(ind)(vals, copy=False)


# ----------------------------------------------------------------
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/algorithms.py
Original file line number Diff line number Diff line change
Expand Up @@ -926,7 +926,7 @@ def value_counts_internal(

# Starting in 3.0, we no longer perform dtype inference on the
# Index object we construct here, xref GH#56161
idx = Index(keys, dtype=keys.dtype, name=index_name)
idx = Index(keys, dtype=keys.dtype, name=index_name, copy=False)

if (
not sort
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/apply.py
Original file line number Diff line number Diff line change
Expand Up @@ -1972,7 +1972,7 @@ def relabel_result(
fun = [
com.get_callable_name(f) if not isinstance(f, str) else f for f in fun
]
col_idx_order = Index(s.index).get_indexer(fun)
col_idx_order = Index(s.index, copy=False).get_indexer(fun)
valid_idx = col_idx_order != -1
if valid_idx.any():
s = s.iloc[col_idx_order[valid_idx]]
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/arrays/_mixins.py
Original file line number Diff line number Diff line change
Expand Up @@ -496,7 +496,7 @@ def value_counts(self, dropna: bool = True) -> Series:
result = value_counts(values, sort=False, dropna=dropna)

index_arr = self._from_backing_data(np.asarray(result.index._data))
index = Index(index_arr, name=result.index.name)
index = Index(index_arr, name=result.index.name, copy=False)
return Series(result._values, index=index, name=result.name, copy=False)

def _quantile(
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/arrays/arrow/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -1804,7 +1804,7 @@ def value_counts(self, dropna: bool = True) -> Series:

counts = ArrowExtensionArray(counts)

index = Index(self._from_pyarrow_array(values))
index = Index(self._from_pyarrow_array(values), copy=False)

return Series(counts, index=index, name="count", copy=False)

Expand Down
4 changes: 2 additions & 2 deletions pandas/core/arrays/categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -670,7 +670,7 @@ def _from_inferred_categories(
to_timedelta,
)

cats = Index(inferred_categories)
cats = Index(inferred_categories, copy=False)
known_categories = (
isinstance(dtype, CategoricalDtype) and dtype.categories is not None
)
Expand Down Expand Up @@ -2397,7 +2397,7 @@ def _validate_listlike(self, value):
from pandas import Index

# tupleize_cols=False for e.g. test_fillna_iterable_category GH#41914
to_add = Index._with_infer(value, tupleize_cols=False).difference(
to_add = Index._with_infer(value, tupleize_cols=False, copy=False).difference(
self.categories
)

Expand Down
3 changes: 2 additions & 1 deletion pandas/core/arrays/masked.py
Original file line number Diff line number Diff line change
Expand Up @@ -1422,7 +1422,8 @@ def value_counts(self, dropna: bool = True) -> Series:
self.dtype.construct_array_type()(
keys, # type: ignore[arg-type]
mask_index,
)
),
copy=False,
)
return Series(arr, index=index, name="count", copy=False)

Expand Down
2 changes: 1 addition & 1 deletion pandas/core/arrays/sparse/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -961,7 +961,7 @@ def value_counts(self, dropna: bool = True) -> Series:
counts = np.insert(counts, 0, fcounts)

if not isinstance(keys, ABCIndex):
index = Index(keys)
index = Index(keys, copy=False)
else:
index = keys
return Series(counts, index=index, copy=False)
Expand Down
4 changes: 2 additions & 2 deletions pandas/core/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -1312,11 +1312,11 @@ def factorize(
from pandas import Index

try:
uniques = Index(uniques, dtype=self.dtype)
uniques = Index(uniques, dtype=self.dtype, copy=False)
except NotImplementedError:
# not all dtypes are supported in Index that are allowed for Series
# e.g. float16 or bytes
uniques = Index(uniques)
uniques = Index(uniques, copy=False)
return codes, uniques

_shared_docs["searchsorted"] = """
Expand Down
8 changes: 4 additions & 4 deletions pandas/core/groupby/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -1250,7 +1250,7 @@ def _set_result_index_ordered(
return result

# row order is scrambled => sort the rows by position in original index
original_positions = Index(self._grouper.result_ilocs)
original_positions = Index(self._grouper.result_ilocs, copy=False)
result = result.set_axis(original_positions, axis=0)
result = result.sort_index(axis=0)
if self._grouper.has_dropped_na:
Expand Down Expand Up @@ -1298,7 +1298,7 @@ def _insert_inaxis_grouper(
if qs is None:
result.insert(0, name, lev)
else:
result.insert(0, name, Index(np.repeat(lev, len(qs))))
result.insert(0, name, Index(np.repeat(lev, len(qs)), copy=False))

return result

Expand Down Expand Up @@ -4392,7 +4392,7 @@ def _nth(
# error: No overload variant of "where" matches argument types
# "Any", "NAType", "Any"
values = np.where(nulls, NA, grouper) # type: ignore[call-overload]
grouper = Index(values, dtype="Int64")
grouper = Index(values, dtype="Int64", copy=False)

grb = dropped.groupby(grouper, as_index=self.as_index, sort=self.sort)
return grb.nth(n)
Expand Down Expand Up @@ -5806,7 +5806,7 @@ def _insert_quantile_level(idx: Index, qs: npt.NDArray[np.float64]) -> MultiInde
MultiIndex
"""
nqs = len(qs)
lev_codes, lev = Index(qs).factorize()
lev_codes, lev = Index(qs, copy=False).factorize()
lev_codes = coerce_indexer_dtype(lev_codes, lev)

if idx._is_multi:
Expand Down
6 changes: 4 additions & 2 deletions pandas/core/groupby/grouper.py
Original file line number Diff line number Diff line change
Expand Up @@ -515,7 +515,9 @@ def __init__(
# error: Cannot determine type of "grouping_vector" [has-type]
ng = newgrouper.groupings[0].grouping_vector # type: ignore[has-type]
# use Index instead of ndarray so we can recover the name
grouping_vector = Index(ng, name=newgrouper.result_index.name)
grouping_vector = Index(
ng, name=newgrouper.result_index.name, copy=False
)

elif not isinstance(
grouping_vector, (Series, Index, ExtensionArray, np.ndarray)
Expand Down Expand Up @@ -684,7 +686,7 @@ def _codes_and_uniques(self) -> tuple[npt.NDArray[np.signedinteger], ArrayLike]:
@cache_readonly
def groups(self) -> dict[Hashable, Index]:
codes, uniques = self._codes_and_uniques
uniques = Index._with_infer(uniques, name=self.name)
uniques = Index._with_infer(uniques, name=self.name, copy=False)

r, counts = libalgos.groupsort_indexer(ensure_platform_int(codes), len(uniques))
counts = ensure_int64(counts).cumsum()
Expand Down
6 changes: 4 additions & 2 deletions pandas/core/groupby/ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -730,7 +730,7 @@ def groups(self) -> dict[Hashable, Index]:
@cache_readonly
def is_monotonic(self) -> bool:
# return if my group orderings are monotonic
return Index(self.ids).is_monotonic_increasing
return Index(self.ids, copy=False).is_monotonic_increasing

@final
@cache_readonly
Expand Down Expand Up @@ -760,7 +760,9 @@ def ids(self) -> npt.NDArray[np.intp]:

@cache_readonly
def result_index_and_ids(self) -> tuple[Index, npt.NDArray[np.intp]]:
levels = [Index._with_infer(ping.uniques) for ping in self.groupings]
levels = [
Index._with_infer(ping.uniques, copy=False) for ping in self.groupings
]
obs = [
ping._observed or not ping._passed_categorical for ping in self.groupings
]
Expand Down
31 changes: 17 additions & 14 deletions pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -300,6 +300,7 @@ def _new_Index(cls, d):
"""
# required for backward compat, because PI can't be instantiated with
# ordinals through __new__ GH #13277
d["copy"] = False
if issubclass(cls, ABCPeriodIndex):
from pandas.core.indexes.period import _new_PeriodIndex

Expand Down Expand Up @@ -692,7 +693,7 @@ def _with_infer(cls, *args, **kwargs):
# "ndarray[Any, Any]"
values = lib.maybe_convert_objects(result._values) # type: ignore[arg-type]
if values.dtype.kind in "iufb":
return Index(values, name=result.name)
return Index(values, name=result.name, copy=False)

return result

Expand Down Expand Up @@ -2775,7 +2776,7 @@ def fillna(self, value):
# no need to care metadata other than name
# because it can't have freq if it has NaTs
# _with_infer needed for test_fillna_categorical
return Index._with_infer(result, name=self.name)
return Index._with_infer(result, name=self.name, copy=False)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Strictly speaking, this one is not needed I think because result returned from putmask above is an Index, and so we already do a shallow copy by default.

But no harm in keeping it to avoid confusion ;)

return self._view()

def dropna(self, how: AnyAll = "any") -> Self:
Expand Down Expand Up @@ -3910,8 +3911,8 @@ def _get_fill_indexer(
if not (self.is_monotonic_increasing or self.is_monotonic_decreasing):
raise ValueError("index must be monotonic increasing or decreasing")
encoded = self.append(target)._engine.values # type: ignore[union-attr]
self_encoded = Index(encoded[: len(self)])
target_encoded = Index(encoded[len(self) :])
self_encoded = Index(encoded[: len(self)], copy=False)
target_encoded = Index(encoded[len(self) :], copy=False)
return self_encoded._get_fill_indexer(
target_encoded, method, limit, tolerance
)
Expand Down Expand Up @@ -4338,7 +4339,7 @@ def _reindex_non_unique(
new_indexer[~check] = -1

if not isinstance(self, ABCMultiIndex):
new_index = Index(new_labels, name=self.name)
new_index = Index(new_labels, name=self.name, copy=False)
else:
new_index = type(self).from_tuples(new_labels, names=self.names)
return new_index, indexer, new_indexer
Expand Down Expand Up @@ -4487,7 +4488,7 @@ def join(
and not self.categories.equals(other.categories)
):
# dtypes are "equal" but categories are in different order
other = Index(other._values.reorder_categories(self.categories))
other = Index(other._values.reorder_categories(self.categories), copy=False)

_validate_join_method(how)

Expand Down Expand Up @@ -4930,7 +4931,9 @@ def _wrap_join_result(
elif ridx is None:
join_index = other
else:
join_index = self._constructor._with_infer(joined, dtype=self.dtype)
join_index = self._constructor._with_infer(
joined, dtype=self.dtype, copy=False
)

names = other.names if how == "right" else self.names
if join_index.names != names:
Expand Down Expand Up @@ -6368,7 +6371,7 @@ def _maybe_downcast_for_indexing(self, other: Index) -> tuple[Index, Index]:
other = type(self).from_tuples(other) # type: ignore[attr-defined]
except (TypeError, ValueError):
# let's instead try with a straight Index
self = Index(self._values)
self = Index(self._values, copy=False)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This ._values of a MultiIndex is essentially always already a copy?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, but also any place we're passing in ._values it is always safe to not make a copy. If ._values is user-owned data, that is a problem in and of itself.


if not is_object_dtype(self.dtype) and is_object_dtype(other.dtype):
# Reverse op so we dont need to re-implement on the subclasses
Expand Down Expand Up @@ -7124,7 +7127,7 @@ def insert(self, loc: int, item) -> Index:
new_values[loc] = item

# GH#51363 stopped doing dtype inference here
out = Index(new_values, dtype=new_values.dtype, name=self.name)
out = Index(new_values, dtype=new_values.dtype, name=self.name, copy=False)
return out

def drop(
Expand Down Expand Up @@ -7220,7 +7223,7 @@ def infer_objects(self, copy: bool = True) -> Index:
)
if copy and res_values is values:
return self.copy()
result = Index(res_values, name=self.name)
result = Index(res_values, name=self.name, copy=False)
if not copy and res_values is values and self._references is not None:
result._references = self._references
result._references.add_index_reference(result)
Expand Down Expand Up @@ -7329,10 +7332,10 @@ def _logical_method(self, other, op):
def _construct_result(self, result, name, other):
if isinstance(result, tuple):
return (
Index(result[0], name=name, dtype=result[0].dtype),
Index(result[1], name=name, dtype=result[1].dtype),
Index(result[0], name=name, dtype=result[0].dtype, copy=False),
Index(result[1], name=name, dtype=result[1].dtype, copy=False),
)
return Index(result, name=name, dtype=result.dtype)
return Index(result, name=name, dtype=result.dtype, copy=False)

def _arith_method(self, other, op):
if (
Expand All @@ -7350,7 +7353,7 @@ def _arith_method(self, other, op):
@final
def _unary_method(self, op):
result = op(self._values)
return Index(result, name=self.name)
return Index(result, name=self.name, copy=False)

def __abs__(self) -> Index:
return self._unary_method(operator.abs)
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/indexes/category.py
Original file line number Diff line number Diff line change
Expand Up @@ -517,4 +517,4 @@ def map(self, mapper, na_action: Literal["ignore"] | None = None):
Index(['first', 'second', nan], dtype='object')
"""
mapped = self._values.map(mapper, na_action=na_action)
return Index(mapped, name=self.name)
return Index(mapped, name=self.name, copy=False)
2 changes: 1 addition & 1 deletion pandas/core/indexes/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -323,7 +323,7 @@ def strftime(self, date_format) -> Index:
dtype='str')
"""
arr = self._data.strftime(date_format)
return Index(arr, name=self.name, dtype=arr.dtype)
return Index(arr, name=self.name, dtype=arr.dtype, copy=False)

def tz_convert(self, tz) -> Self:
"""
Expand Down
4 changes: 2 additions & 2 deletions pandas/core/indexes/extension.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ def fget(self):
return type(self)._simple_new(result, name=self.name)
elif isinstance(result, ABCDataFrame):
return result.set_index(self)
return Index(result, name=self.name, dtype=result.dtype)
return Index(result, name=self.name, dtype=result.dtype, copy=False)
return result

def fset(self, value) -> None:
Expand All @@ -101,7 +101,7 @@ def method(self, *args, **kwargs): # type: ignore[misc]
return type(self)._simple_new(result, name=self.name)
elif isinstance(result, ABCDataFrame):
return result.set_index(self)
return Index(result, name=self.name, dtype=result.dtype)
return Index(result, name=self.name, dtype=result.dtype, copy=False)
return result

# error: "property" has no attribute "__name__"
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/indexes/interval.py
Original file line number Diff line number Diff line change
Expand Up @@ -691,7 +691,7 @@ def _maybe_convert_i8(self, key):
key_i8 = key_i8.view("i8")
else:
# DatetimeIndex/TimedeltaIndex
key_dtype, key_i8 = key.dtype, Index(key.asi8)
key_dtype, key_i8 = key.dtype, Index(key.asi8, copy=False)
if key.hasnans:
# convert NaT from its i8 value to np.nan so it's not viewed
# as a valid value, maybe causing errors (e.g. is_overlapping)
Expand Down
14 changes: 8 additions & 6 deletions pandas/core/indexes/multi.py
Original file line number Diff line number Diff line change
Expand Up @@ -1505,7 +1505,9 @@ def _get_values_for_csv(

if len(new_levels) == 1:
# a single-level multi-index
return Index(new_levels[0].take(new_codes[0]))._get_values_for_csv()
return Index(
new_levels[0].take(new_codes[0]), copy=False
)._get_values_for_csv()
else:
# reconstruct the multi-index
mi = MultiIndex(
Expand Down Expand Up @@ -1732,10 +1734,10 @@ def is_monotonic_increasing(self) -> bool:
# int, float, complex, str, bytes, _NestedSequence[Union
# [bool, int, float, complex, str, bytes]]]"
sort_order = np.lexsort(values) # type: ignore[arg-type]
return Index(sort_order).is_monotonic_increasing
return Index(sort_order, copy=False).is_monotonic_increasing
except TypeError:
# we have mixed types and np.lexsort is not happy
return Index(self._values).is_monotonic_increasing
return Index(self._values, copy=False).is_monotonic_increasing

@cache_readonly
def is_monotonic_decreasing(self) -> bool:
Expand Down Expand Up @@ -1996,7 +1998,7 @@ def to_flat_index(self) -> Index: # type: ignore[override]
('bar', 'baz'), ('bar', 'qux')],
dtype='object')
"""
return Index(self._values, tupleize_cols=False)
return Index(self._values, tupleize_cols=False, copy=False)

def _is_lexsorted(self) -> bool:
"""
Expand Down Expand Up @@ -2448,7 +2450,7 @@ def append(self, other):
# setting names to None automatically
return MultiIndex.from_tuples(new_tuples)
except (TypeError, IndexError):
return Index(new_tuples)
return Index(new_tuples, copy=False)

def argsort(
self, *args, na_position: NaPosition = "last", **kwargs
Expand Down Expand Up @@ -3077,7 +3079,7 @@ def _get_indexer_level_0(self, target) -> npt.NDArray[np.intp]:
lev = self.levels[0]
codes = self._codes[0]
cat = Categorical.from_codes(codes=codes, categories=lev, validate=False)
ci = Index(cat)
ci = Index(cat, copy=False)
return ci.get_indexer_for(target)

def get_slice_bound(
Expand Down
Loading
Loading