pandas-dev
diff --git a/‎asv_bench/asv.conf.json
Lines changed: 0 additions & 1 deletion b/‎asv_bench/asv.conf.json
Lines changed: 0 additions & 1 deletion
diff --git a/‎asv_bench/benchmarks/io/excel.py
Lines changed: 4 additions & 10 deletions b/‎asv_bench/benchmarks/io/excel.py
Lines changed: 4 additions & 10 deletions
diff --git a/‎ci/code_checks.sh
Lines changed: 1 addition & 1 deletion b/‎ci/code_checks.sh
Lines changed: 1 addition & 1 deletion
diff --git a/‎ci/deps/actions-310.yaml
Lines changed: 0 additions & 1 deletion b/‎ci/deps/actions-310.yaml
Lines changed: 0 additions & 1 deletion
diff --git a/‎ci/deps/actions-38-downstream_compat.yaml
Lines changed: 0 additions & 1 deletion b/‎ci/deps/actions-38-downstream_compat.yaml
Lines changed: 0 additions & 1 deletion
diff --git a/‎ci/deps/actions-38-minimum_versions.yaml
Lines changed: 0 additions & 1 deletion b/‎ci/deps/actions-38-minimum_versions.yaml
Lines changed: 0 additions & 1 deletion
diff --git a/‎ci/deps/actions-38.yaml
Lines changed: 0 additions & 1 deletion b/‎ci/deps/actions-38.yaml
Lines changed: 0 additions & 1 deletion
diff --git a/‎ci/deps/actions-39.yaml
Lines changed: 0 additions & 1 deletion b/‎ci/deps/actions-39.yaml
Lines changed: 0 additions & 1 deletion
diff --git a/‎ci/deps/circle-38-arm64.yaml
Lines changed: 0 additions & 1 deletion b/‎ci/deps/circle-38-arm64.yaml
Lines changed: 0 additions & 1 deletion
diff --git a/‎doc/scripts/eval_performance.py
Lines changed: 108 additions & 0 deletions b/‎doc/scripts/eval_performance.py
Lines changed: 108 additions & 0 deletions
diff --git a/‎doc/source/_static/eval-perf-small.png
-24.7 KB b/‎doc/source/_static/eval-perf-small.png
-24.7 KB
diff --git a/‎doc/source/_static/eval-perf.png
10.8 KB b/‎doc/source/_static/eval-perf.png
10.8 KB
diff --git a/‎doc/source/_static/query-perf-small.png
-21.2 KB b/‎doc/source/_static/query-perf-small.png
-21.2 KB
diff --git a/‎doc/source/_static/query-perf.png
8.79 KB b/‎doc/source/_static/query-perf.png
8.79 KB
diff --git a/‎doc/source/conf.py
Lines changed: 1 addition & 1 deletion b/‎doc/source/conf.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎doc/source/development/contributing_environment.rst
Lines changed: 1 addition & 1 deletion b/‎doc/source/development/contributing_environment.rst
Lines changed: 1 addition & 1 deletion
diff --git a/‎doc/source/getting_started/install.rst
Lines changed: 0 additions & 1 deletion b/‎doc/source/getting_started/install.rst
Lines changed: 0 additions & 1 deletion
diff --git a/‎doc/source/user_guide/enhancingperf.rst
Lines changed: 5 additions & 19 deletions b/‎doc/source/user_guide/enhancingperf.rst
Lines changed: 5 additions & 19 deletions
diff --git a/‎doc/source/user_guide/indexing.rst
Lines changed: 19 additions & 5 deletions b/‎doc/source/user_guide/indexing.rst
Lines changed: 19 additions & 5 deletions
diff --git a/‎doc/source/user_guide/io.rst
Lines changed: 2 additions & 19 deletions b/‎doc/source/user_guide/io.rst
Lines changed: 2 additions & 19 deletions
@@ -54,7 +54,6 @@
         "openpyxl": [],
         "xlsxwriter": [],
         "xlrd": [],
-        "xlwt": [],
         "odfpy": [],
         "jinja2": [],
     },
 
@@ -33,7 +33,7 @@ def _generate_dataframe():
 
 class WriteExcel:
 
-    params = ["openpyxl", "xlsxwriter", "xlwt"]
+    params = ["openpyxl", "xlsxwriter"]
     param_names = ["engine"]
 
     def setup(self, engine):
@@ -68,10 +68,9 @@ def time_write_excel_style(self, engine):
 
 class ReadExcel:
 
-    params = ["xlrd", "openpyxl", "odf"]
+    params = ["openpyxl", "odf"]
     param_names = ["engine"]
     fname_excel = "spreadsheet.xlsx"
-    fname_excel_xls = "spreadsheet.xls"
     fname_odf = "spreadsheet.ods"
 
     def _create_odf(self):
@@ -92,13 +91,10 @@ def setup_cache(self):
         self.df = _generate_dataframe()
 
         self.df.to_excel(self.fname_excel, sheet_name="Sheet1")
-        self.df.to_excel(self.fname_excel_xls, sheet_name="Sheet1")
         self._create_odf()
 
     def time_read_excel(self, engine):
-        if engine == "xlrd":
-            fname = self.fname_excel_xls
-        elif engine == "odf":
+        if engine == "odf":
             fname = self.fname_odf
         else:
             fname = self.fname_excel
@@ -107,9 +103,7 @@ def time_read_excel(self, engine):
 
 class ReadExcelNRows(ReadExcel):
     def time_read_excel(self, engine):
-        if engine == "xlrd":
-            fname = self.fname_excel_xls
-        elif engine == "odf":
+        if engine == "odf":
             fname = self.fname_odf
         else:
             fname = self.fname_excel
 
@@ -47,7 +47,7 @@ import pandas
 
 blocklist = {'bs4', 'gcsfs', 'html5lib', 'http', 'ipython', 'jinja2', 'hypothesis',
              'lxml', 'matplotlib', 'openpyxl', 'py', 'pytest', 's3fs', 'scipy',
-             'tables', 'urllib.request', 'xlrd', 'xlsxwriter', 'xlwt'}
+             'tables', 'urllib.request', 'xlrd', 'xlsxwriter'}
 
 # GH#28227 for some of these check for top-level modules, while others are
 #  more specific (e.g. urllib.request)
 
@@ -51,5 +51,4 @@ dependencies:
   - xarray
   - xlrd
   - xlsxwriter
-  - xlwt
   - zstandard
@@ -51,7 +51,6 @@ dependencies:
   - xarray
   - xlrd
   - xlsxwriter
-  - xlwt
   - zstandard
 
   # downstream packages
 
@@ -53,5 +53,4 @@ dependencies:
   - xarray=0.19.0
   - xlrd=2.0.1
   - xlsxwriter=1.4.3
-  - xlwt=1.3.0
   - zstandard=0.15.2
@@ -50,5 +50,4 @@ dependencies:
   - xarray
   - xlrd
   - xlsxwriter
-  - xlwt
   - zstandard
@@ -51,5 +51,4 @@ dependencies:
   - xarray
   - xlrd
   - xlsxwriter
-  - xlwt
   - zstandard
@@ -51,5 +51,4 @@ dependencies:
   - xarray
   - xlrd
   - xlsxwriter
-  - xlwt
   - zstandard
@@ -0,0 +1,108 @@
+from timeit import repeat as timeit
+
+import numpy as np
+import seaborn as sns
+
+from pandas import DataFrame
+
+setup_common = """from pandas import DataFrame
+from numpy.random import randn
+df = DataFrame(randn(%d, 3), columns=list('abc'))
+%s"""
+
+setup_with = "s = 'a + b * (c ** 2 + b ** 2 - a) / (a * c) ** 3'"
+
+
+def bench_with(n, times=10, repeat=3, engine="numexpr"):
+    return (
+        np.array(
+            timeit(
+                "df.eval(s, engine=%r)" % engine,
+                setup=setup_common % (n, setup_with),
+                repeat=repeat,
+                number=times,
+            )
+        )
+        / times
+    )
+
+
+setup_subset = "s = 'a <= b <= c ** 2 + b ** 2 - a and b > c'"
+
+
+def bench_subset(n, times=20, repeat=3, engine="numexpr"):
+    return (
+        np.array(
+            timeit(
+                "df.query(s, engine=%r)" % engine,
+                setup=setup_common % (n, setup_subset),
+                repeat=repeat,
+                number=times,
+            )
+        )
+        / times
+    )
+
+
+def bench(mn=3, mx=7, num=100, engines=("python", "numexpr"), verbose=False):
+    r = np.logspace(mn, mx, num=num).round().astype(int)
+
+    ev = DataFrame(np.empty((num, len(engines))), columns=engines)
+    qu = ev.copy(deep=True)
+
+    ev["size"] = qu["size"] = r
+
+    for engine in engines:
+        for i, n in enumerate(r):
+            if verbose & (i % 10 == 0):
+                print("engine: %r, i == %d" % (engine, i))
+            ev_times = bench_with(n, times=1, repeat=1, engine=engine)
+            ev.loc[i, engine] = np.mean(ev_times)
+            qu_times = bench_subset(n, times=1, repeat=1, engine=engine)
+            qu.loc[i, engine] = np.mean(qu_times)
+
+    return ev, qu
+
+
+def plot_perf(df, engines, title, filename=None):
+    from matplotlib.pyplot import figure
+
+    sns.set()
+    sns.set_palette("Set2")
+
+    fig = figure(figsize=(4, 3), dpi=120)
+    ax = fig.add_subplot(111)
+
+    for engine in engines:
+        ax.loglog(df["size"], df[engine], label=engine, lw=2)
+
+    ax.set_xlabel("Number of Rows")
+    ax.set_ylabel("Time (s)")
+    ax.set_title(title)
+    ax.legend(loc="best")
+    ax.tick_params(top=False, right=False)
+
+    fig.tight_layout()
+
+    if filename is not None:
+        fig.savefig(filename)
+
+
+if __name__ == "__main__":
+    import os
+
+    pandas_dir = os.path.dirname(
+        os.path.dirname(os.path.abspath(os.path.dirname(__file__)))
+    )
+    static_path = os.path.join(pandas_dir, "doc", "source", "_static")
+
+    join = lambda p: os.path.join(static_path, p)
+
+    fn = join("eval-query-perf-data.h5")
+
+    engines = "python", "numexpr"
+
+    ev, qu = bench(verbose=True)  # only this one
+
+    plot_perf(ev, engines, "DataFrame.eval()", filename=join("eval-perf.png"))
+    plot_perf(qu, engines, "DataFrame.query()", filename=join("query-perf.png"))
@@ -236,7 +236,7 @@
 if ".dev" in version:
     switcher_version = "dev"
 elif "rc" in version:
-    switcher_version = version.split("rc")[0] + " (rc)"
+    switcher_version = version.split("rc", maxsplit=1)[0] + " (rc)"
 
 html_theme_options = {
     "external_links": [],
 
@@ -10,7 +10,7 @@ To test out code changes, you'll need to build pandas from source, which
 requires a C/C++ compiler and Python environment. If you're making documentation
 changes, you can skip to :ref:`contributing to the documentation <contributing_documentation>` but if you skip
 creating the development environment you won't be able to build the documentation
-locally before pushing your changes.
+locally before pushing your changes. It's recommended to also install the :ref:`pre-commit hooks <contributing.pre-commit>`.
 
 .. contents:: Table of contents:
    :local:
 
@@ -336,7 +336,6 @@ Can be managed as optional_extra with ``pandas[excel]``.
 Dependency                Minimum Version    optional_extra  Notes
 ========================= ================== =============== =============================================================
 xlrd                      2.0.1              excel           Reading Excel
-xlwt                      1.3.0              excel           Writing Excel
 xlsxwriter                1.4.3              excel           Writing Excel
 openpyxl                  3.0.7              excel           Reading / writing for xlsx files
 pyxlsb                    1.0.8              excel           Reading for xlsb files
 
@@ -690,21 +690,12 @@ The equivalent in standard Python would be
    df["a"] = 1
    df
 
-The :class:`DataFrame.query` method has a ``inplace`` keyword which determines
-whether the query modifies the original frame.
-
-.. ipython:: python
-
-   df = pd.DataFrame(dict(a=range(5), b=range(5, 10)))
-   df.query("a > 2")
-   df.query("a > 2", inplace=True)
-   df
-
 Local variables
 ~~~~~~~~~~~~~~~
 
 You must *explicitly reference* any local variable that you want to use in an
-expression by placing the ``@`` character in front of the name. For example,
+expression by placing the ``@`` character in front of the name. This mechanism is
+the same for both :meth:`DataFrame.query` and :meth:`DataFrame.eval`. For example,
 
 .. ipython:: python
 
@@ -820,17 +811,12 @@ significant performance benefit.  Here is a plot showing the running time of
 :func:`pandas.eval` as function of the size of the frame involved in the
 computation. The two lines are two different engines.
 
+..
+    The eval-perf.png figure below was generated with /doc/scripts/eval_performance.py
 
 .. image:: ../_static/eval-perf.png
 
-
-.. note::
-
-   Operations with smallish objects (around 15k-20k rows) are faster using
-   plain Python:
-
-       .. image:: ../_static/eval-perf-small.png
-
+You will only see the performance benefits of using the ``numexpr`` engine with :func:`pandas.eval` if your frame has more than approximately 100,000 rows.
 
 This plot was created using a :class:`DataFrame` with 3 columns each containing
 floating point values generated using ``numpy.random.randn()``.
 
@@ -1240,6 +1240,17 @@ If instead you don't want to or cannot name your index, you can use the name
    renaming your columns to something less ambiguous.
 
 
+The :class:`DataFrame.query` method has a ``inplace`` keyword which determines
+whether the query modifies the original frame.
+
+.. ipython:: python
+
+   df = pd.DataFrame(dict(a=range(5), b=range(5, 10)))
+   df.query("a > 2")
+   df.query("a > 2", inplace=True)
+   df
+
+
 :class:`~pandas.MultiIndex` :meth:`~pandas.DataFrame.query` Syntax
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
@@ -1438,15 +1449,18 @@ Performance of :meth:`~pandas.DataFrame.query`
 ``DataFrame.query()`` using ``numexpr`` is slightly faster than Python for
 large frames.
 
+..
+    The eval-perf.png figure below was generated with /doc/scripts/eval_performance.py
+
 .. image:: ../_static/query-perf.png
 
-.. note::
 
-   You will only see the performance benefits of using the ``numexpr`` engine
-   with ``DataFrame.query()`` if your frame has more than approximately 200,000
-   rows.
 
-      .. image:: ../_static/query-perf-small.png
+You will only see the performance benefits of using the ``numexpr`` engine
+with ``DataFrame.query()`` if your frame has more than approximately 100,000
+rows.
+
+
 
 This plot was created using a ``DataFrame`` with 3 columns each containing
 floating point values generated using ``numpy.random.randn()``.
 
@@ -3466,8 +3466,6 @@ See the :ref:`cookbook<cookbook.excel>` for some advanced strategies.
 
 .. warning::
 
-   The `xlwt <https://xlwt.readthedocs.io/en/latest/>`__ package for writing old-style ``.xls``
-   excel files is no longer maintained.
    The `xlrd <https://xlrd.readthedocs.io/en/latest/>`__ package is now only for reading
    old-style ``.xls`` files.
 
@@ -3481,12 +3479,6 @@ See the :ref:`cookbook<cookbook.excel>` for some advanced strategies.
    **Please do not report issues when using ``xlrd`` to read ``.xlsx`` files.**
    This is no longer supported, switch to using ``openpyxl`` instead.
 
-   Attempting to use the ``xlwt`` engine will raise a ``FutureWarning``
-   unless the option :attr:`io.excel.xls.writer` is set to ``"xlwt"``.
-   While this option is now deprecated and will also raise a ``FutureWarning``,
-   it can be globally set and the warning suppressed. Users are recommended to
-   write ``.xlsx`` files using the ``openpyxl`` engine instead.
-
 .. _io.excel_reader:
 
 Reading Excel files
@@ -3788,7 +3780,7 @@ written. For example:
 
    df.to_excel("path_to_file.xlsx", sheet_name="Sheet1")
 
-Files with a ``.xls`` extension will be written using ``xlwt`` and those with a
+Files with a
 ``.xlsx`` extension will be written using ``xlsxwriter`` (if available) or
 ``openpyxl``.
 
@@ -3849,35 +3841,26 @@ pandas supports writing Excel files to buffer-like objects such as ``StringIO``
 Excel writer engines
 ''''''''''''''''''''
 
-.. deprecated:: 1.2.0
-
-   As the `xlwt <https://pypi.org/project/xlwt/>`__ package is no longer
-   maintained, the ``xlwt`` engine will be removed from a future version
-   of pandas. This is the only engine in pandas that supports writing to
-   ``.xls`` files.
-
 pandas chooses an Excel writer via two methods:
 
 1. the ``engine`` keyword argument
 2. the filename extension (via the default specified in config options)
 
 By default, pandas uses the `XlsxWriter`_  for ``.xlsx``, `openpyxl`_
-for ``.xlsm``, and `xlwt`_ for ``.xls`` files. If you have multiple
+for ``.xlsm``. If you have multiple
 engines installed, you can set the default engine through :ref:`setting the
 config options <options>` ``io.excel.xlsx.writer`` and
 ``io.excel.xls.writer``. pandas will fall back on `openpyxl`_ for ``.xlsx``
 files if `Xlsxwriter`_ is not available.
 
 .. _XlsxWriter: https://xlsxwriter.readthedocs.io
 .. _openpyxl: https://openpyxl.readthedocs.io/
-.. _xlwt: http://www.python-excel.org
 
 To specify which writer you want to use, you can pass an engine keyword
 argument to ``to_excel`` and to ``ExcelWriter``. The built-in engines are:
 
 * ``openpyxl``: version 2.4 or higher is required
 * ``xlsxwriter``
-* ``xlwt``
 
 .. code-block:: python