From 61471038f568095b7bc95bf925e2bc08381a1c99 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Danie=CC=88l=20de=20Kok?= <me@danieldk.eu>
Date: Tue, 5 Dec 2023 13:08:57 +0100
Subject: [PATCH 01/14] Add TextCatReduce.v1

This is a textcat classifier that pools the vectors generated by a
tok2vec implementation and then applies a classifier to the pooled
representation. Three reductions are supported for pooling: first, max,
and mean. When multiple reductions are enabled, the reductions are
concatenated before providing them to the classification layer.

This model is a generalization of the TextCatCNN model, which only
supports mean reductions and is a bit of a misnomer, because it can also
be used with transformers. This change also reimplements TextCatCNN.v2
using the new TextCatReduce.v1 layer.
---
 spacy/errors.py                      |   2 +
 spacy/ml/models/textcat.py           | 115 +++++++++++++++++++--------
 spacy/tests/pipeline/test_textcat.py |  16 ++--
 spacy/tests/test_models.py           |  15 ++++
 website/docs/api/architectures.mdx   |  41 ++++++++++
 5 files changed, 148 insertions(+), 41 deletions(-)

diff --git a/spacy/errors.py b/spacy/errors.py
index 093c65f3d1a..28f34e26605 100644
--- a/spacy/errors.py
+++ b/spacy/errors.py
@@ -984,6 +984,8 @@ class Errors(metaclass=ErrorsWithCodes):
     E1055 = ("The 'replace_listener' callback expects {num_params} parameters, "
              "but only callbacks with one or three parameters are supported")
     E1056 = ("The `TextCatBOW` architecture expects a length of at least 1, was {length}.")
+    E1057 = ("The `TextCatReduce` architecture must be used with at least one reduction. "
+             "Please enable one of `use_reduce_first`, `use_reduce_max` or `use_reduce_mean`.")
 
 
 # Deprecated model shortcuts, only used in errors and warnings
diff --git a/spacy/ml/models/textcat.py b/spacy/ml/models/textcat.py
index e6d1f030fef..a579a0e4d4d 100644
--- a/spacy/ml/models/textcat.py
+++ b/spacy/ml/models/textcat.py
@@ -17,6 +17,8 @@
     clone,
     concatenate,
     list2ragged,
+    reduce_first,
+    reduce_max,
     reduce_mean,
     reduce_sum,
     residual,
@@ -49,39 +51,14 @@ def build_simple_cnn_text_classifier(
     outputs sum to 1. If exclusive_classes=False, a logistic non-linearity
     is applied instead, so that outputs are in the range [0, 1].
     """
-    fill_defaults = {"b": 0, "W": 0}
-    with Model.define_operators({">>": chain}):
-        cnn = tok2vec >> list2ragged() >> reduce_mean()
-        nI = tok2vec.maybe_get_dim("nO")
-        if exclusive_classes:
-            output_layer = Softmax(nO=nO, nI=nI)
-            fill_defaults["b"] = NEG_VALUE
-            resizable_layer: Model = resizable(
-                output_layer,
-                resize_layer=partial(
-                    resize_linear_weighted, fill_defaults=fill_defaults
-                ),
-            )
-            model = cnn >> resizable_layer
-        else:
-            output_layer = Linear(nO=nO, nI=nI)
-            resizable_layer = resizable(
-                output_layer,
-                resize_layer=partial(
-                    resize_linear_weighted, fill_defaults=fill_defaults
-                ),
-            )
-            model = cnn >> resizable_layer >> Logistic()
-        model.set_ref("output_layer", output_layer)
-        model.attrs["resize_output"] = partial(
-            resize_and_set_ref,
-            resizable_layer=resizable_layer,
-        )
-    model.set_ref("tok2vec", tok2vec)
-    if nO is not None:
-        model.set_dim("nO", cast(int, nO))
-    model.attrs["multi_label"] = not exclusive_classes
-    return model
+    return build_reduce_text_classifier(
+        tok2vec=tok2vec,
+        exclusive_classes=exclusive_classes,
+        use_reduce_first=False,
+        use_reduce_max=False,
+        use_reduce_mean=True,
+        nO=nO,
+    )
 
 
 def resize_and_set_ref(model, new_nO, resizable_layer):
@@ -230,3 +207,75 @@ def build_text_classifier_lowdata(
             model = model >> Dropout(dropout)
         model = model >> Logistic()
     return model
+
+
+@registry.architectures("spacy.TextCatReduce.v1")
+def build_reduce_text_classifier(
+    tok2vec: Model,
+    exclusive_classes: bool,
+    use_reduce_first: bool,
+    use_reduce_max: bool,
+    use_reduce_mean: bool,
+    nO: Optional[int] = None,
+) -> Model[List[Doc], Floats2d]:
+    """Build a model that classifies pooled `Doc` representations.
+
+    Pooling is performed using reductions. Reductions are concatenated when
+    multible reductions are used.
+
+    tok2vec (Model): the tok2vec layer to pool over.
+    exclusive_classes (bool): Whether or not classes are mutually exclusive.
+    use_reduce_first (bool): Pool by using the hidden representation of the
+        first token of a `Doc`
+    use_reduce_max (bool): Pool by taking the maximum values of the hidden
+        representations of a `Doc`.
+    use_reduce_mean (bool): Pool by taking the mean of all hidden
+        representations of a `Doc`.
+    nO (Optional[int]): Number of classes.
+    """
+
+    fill_defaults = {"b": 0, "W": 0}
+    reductions = []
+    if use_reduce_first:
+        reductions.append(reduce_first())
+    if use_reduce_max:
+        reductions.append(reduce_max())
+    if use_reduce_mean:
+        reductions.append(reduce_mean())
+
+    if not len(reductions):
+        raise ValueError(Errors.E1057)
+
+    with Model.define_operators({">>": chain}):
+        cnn = tok2vec >> list2ragged() >> concatenate(*reductions)
+        nO_tok2vec = tok2vec.maybe_get_dim("nO")
+        nI = nO_tok2vec * len(reductions) if nO_tok2vec is not None else None
+        if exclusive_classes:
+            output_layer = Softmax(nO=nO, nI=nI)
+            fill_defaults["b"] = NEG_VALUE
+            resizable_layer: Model = resizable(
+                output_layer,
+                resize_layer=partial(
+                    resize_linear_weighted, fill_defaults=fill_defaults
+                ),
+            )
+            model = cnn >> resizable_layer
+        else:
+            output_layer = Linear(nO=nO, nI=nI)
+            resizable_layer = resizable(
+                output_layer,
+                resize_layer=partial(
+                    resize_linear_weighted, fill_defaults=fill_defaults
+                ),
+            )
+            model = cnn >> resizable_layer >> Logistic()
+        model.set_ref("output_layer", output_layer)
+        model.attrs["resize_output"] = partial(
+            resize_and_set_ref,
+            resizable_layer=resizable_layer,
+        )
+    model.set_ref("tok2vec", tok2vec)
+    if nO is not None:
+        model.set_dim("nO", cast(int, nO))
+    model.attrs["multi_label"] = not exclusive_classes
+    return model
diff --git a/spacy/tests/pipeline/test_textcat.py b/spacy/tests/pipeline/test_textcat.py
index 147ea49005c..78573cb9b4b 100644
--- a/spacy/tests/pipeline/test_textcat.py
+++ b/spacy/tests/pipeline/test_textcat.py
@@ -457,8 +457,8 @@ def test_no_resize(name, textcat_config):
         ("textcat_multilabel", {"@architectures": "spacy.TextCatBOW.v3", "exclusive_classes": False, "no_output_layer": False, "ngram_size": 3}),
         ("textcat_multilabel", {"@architectures": "spacy.TextCatBOW.v3", "exclusive_classes": False, "no_output_layer": True, "ngram_size": 3}),
         # CNN
-        ("textcat", {"@architectures": "spacy.TextCatCNN.v2", "tok2vec": DEFAULT_TOK2VEC_MODEL, "exclusive_classes": True}),
-        ("textcat_multilabel", {"@architectures": "spacy.TextCatCNN.v2", "tok2vec": DEFAULT_TOK2VEC_MODEL, "exclusive_classes": False}),
+        ("textcat", {"@architectures": "spacy.TextCatReduce.v1", "tok2vec": DEFAULT_TOK2VEC_MODEL, "exclusive_classes": True, "use_reduce_first": True, "use_reduce_max": True, "use_reduce_mean": True}),
+        ("textcat_multilabel", {"@architectures": "spacy.TextCatReduce.v1", "tok2vec": DEFAULT_TOK2VEC_MODEL, "exclusive_classes": False, "use_reduce_first": True, "use_reduce_max": True, "use_reduce_mean": True}),
     ],
 )
 # fmt: on
@@ -485,9 +485,9 @@ def test_resize(name, textcat_config):
         ("textcat", {"@architectures": "spacy.TextCatBOW.v3", "exclusive_classes": True, "no_output_layer": True, "ngram_size": 3}),
         ("textcat_multilabel", {"@architectures": "spacy.TextCatBOW.v3", "exclusive_classes": False, "no_output_layer": False, "ngram_size": 3}),
         ("textcat_multilabel", {"@architectures": "spacy.TextCatBOW.v3", "exclusive_classes": False, "no_output_layer": True, "ngram_size": 3}),
-        # CNN
-        ("textcat", {"@architectures": "spacy.TextCatCNN.v2", "tok2vec": DEFAULT_TOK2VEC_MODEL, "exclusive_classes": True}),
-        ("textcat_multilabel", {"@architectures": "spacy.TextCatCNN.v2", "tok2vec": DEFAULT_TOK2VEC_MODEL, "exclusive_classes": False}),
+        # REDUCE
+        ("textcat", {"@architectures": "spacy.TextCatReduce.v1", "tok2vec": DEFAULT_TOK2VEC_MODEL, "exclusive_classes": True, "use_reduce_first": True, "use_reduce_max": True, "use_reduce_mean": True}),
+        ("textcat_multilabel", {"@architectures": "spacy.TextCatReduce.v1", "tok2vec": DEFAULT_TOK2VEC_MODEL, "exclusive_classes": False, "use_reduce_first": True, "use_reduce_max": True, "use_reduce_mean": True}),
     ],
 )
 # fmt: on
@@ -701,9 +701,9 @@ def test_overfitting_IO_multi():
         # ENSEMBLE V2
         ("textcat_multilabel", TRAIN_DATA_MULTI_LABEL, {"@architectures": "spacy.TextCatEnsemble.v2", "tok2vec": DEFAULT_TOK2VEC_MODEL, "linear_model": {"@architectures": "spacy.TextCatBOW.v3", "exclusive_classes": False, "ngram_size": 1, "no_output_layer": False}}),
         ("textcat", TRAIN_DATA_SINGLE_LABEL, {"@architectures": "spacy.TextCatEnsemble.v2", "tok2vec": DEFAULT_TOK2VEC_MODEL, "linear_model": {"@architectures": "spacy.TextCatBOW.v3", "exclusive_classes": True, "ngram_size": 5, "no_output_layer": False}}),
-        # CNN V2
-        ("textcat", TRAIN_DATA_SINGLE_LABEL, {"@architectures": "spacy.TextCatCNN.v2", "tok2vec": DEFAULT_TOK2VEC_MODEL, "exclusive_classes": True}),
-        ("textcat_multilabel", TRAIN_DATA_MULTI_LABEL, {"@architectures": "spacy.TextCatCNN.v2", "tok2vec": DEFAULT_TOK2VEC_MODEL, "exclusive_classes": False}),
+        # REDUCE V1
+        ("textcat", TRAIN_DATA_SINGLE_LABEL, {"@architectures": "spacy.TextCatReduce.v1", "tok2vec": DEFAULT_TOK2VEC_MODEL, "exclusive_classes": True, "use_reduce_first": True, "use_reduce_max": True, "use_reduce_mean": True}),
+        ("textcat_multilabel", TRAIN_DATA_MULTI_LABEL, {"@architectures": "spacy.TextCatReduce.v1", "tok2vec": DEFAULT_TOK2VEC_MODEL, "exclusive_classes": False, "use_reduce_first": True, "use_reduce_max": True, "use_reduce_mean": True}),
     ],
 )
 # fmt: on
diff --git a/spacy/tests/test_models.py b/spacy/tests/test_models.py
index e6692ad92c0..ce033a27465 100644
--- a/spacy/tests/test_models.py
+++ b/spacy/tests/test_models.py
@@ -26,6 +26,8 @@
     build_Tok2Vec_model,
 )
 from spacy.ml.staticvectors import StaticVectors
+from spacy.pipeline import tok2vec
+from spacy.util import registry
 
 
 def get_textcat_bow_kwargs():
@@ -284,3 +286,16 @@ def test_spancat_model_forward_backward(nO=5):
     Y, backprop = model((docs, spans), is_train=True)
     assert Y.shape == (spans.dataXd.shape[0], nO)
     backprop(Y)
+
+
+def test_textcat_reduce_invalid_args():
+    textcat_reduce = registry.architectures.get("spacy.TextCatReduce.v1")
+    tok2vec = make_test_tok2vec()
+    with pytest.raises(ValueError, match=r"must be used with at least one reduction"):
+        textcat_reduce(
+            tok2vec=tok2vec,
+            exclusive_classes=False,
+            use_reduce_first=False,
+            use_reduce_max=False,
+            use_reduce_mean=False,
+        )
diff --git a/website/docs/api/architectures.mdx b/website/docs/api/architectures.mdx
index 9d8b3ddfa5a..4af60ec6540 100644
--- a/website/docs/api/architectures.mdx
+++ b/website/docs/api/architectures.mdx
@@ -1043,6 +1043,9 @@ A neural network model where token vectors are calculated using a CNN. The
 vectors are mean pooled and used as features in a feed-forward network. This
 architecture is usually less accurate than the ensemble, but runs faster.
 
+This model is identical to [TexCatReduce.v1](#TextCatReduce) with
+`use_reduce_mean=true`.
+
 | Name                | Description                                                                                                                                                                                    |
 | ------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
 | `exclusive_classes` | Whether or not categories are mutually exclusive. ~~bool~~                                                                                                                                     |
@@ -1096,6 +1099,44 @@ the others, but may not be as accurate, especially if texts are short.
 
 </Accordion>
 
+### spacy.TextCatReduce.v1 {id="TextCatReduce"}
+
+> #### Example Config
+>
+> ```ini
+> [model]
+> @architectures = "spacy.TextCatReduce.v1"
+> exclusive_classes = false
+> use_reduce_first = false
+> use_reduce_max = false
+> use_reduce_mean = true
+> nO = null
+>
+> [model.tok2vec]
+> @architectures = "spacy.HashEmbedCNN.v2"
+> pretrained_vectors = null
+> width = 96
+> depth = 4
+> embed_size = 2000
+> window_size = 1
+> maxout_pieces = 3
+> subword_features = true
+> ```
+
+A classifier that pools token hidden representations of each `Doc` using first,
+max or mean reduction and then applies a classification layer. Reductions are
+concatenated when multiple reductions are used.
+
+| Name                | Description                                                                                                                                                                                    |
+| ------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `exclusive_classes` | Whether or not categories are mutually exclusive. ~~bool~~                                                                                                                                     |
+| `tok2vec`           | The [`tok2vec`](#tok2vec) layer of the model. ~~Model~~                                                                                                                                        |
+| `use_reduce_first`  | Pool by using the hidden representation of the first token of a `Doc`. ~~bool~~                                                                                                                |
+| `use_reduce_max`    | Pool by taking the maximum values of the hidden representations of a `Doc`. ~~bool~~                                                                                                           |
+| `use_reduce_mean`   | Pool by taking the mean of all hidden representations of a `Doc`. ~~bool~~                                                                                                                     |
+| `nO`                | Output dimension, determined by the number of different labels. If not set, the [`TextCategorizer`](/api/textcategorizer) component will set it when `initialize` is called. ~~Optional[int]~~ |
+| **CREATES**         | The model using the architecture. ~~Model[List[Doc], Floats2d]~~                                                                                                                               |
+
 ## Span classification architectures {id="spancat",source="spacy/ml/models/spancat.py"}
 
 ### spacy.SpanCategorizer.v1 {id="SpanCategorizer"}

From ab97add6e60eea2679cfd70bde02695e45fded54 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Dani=C3=ABl=20de=20Kok?= <me@github.danieldk.eu>
Date: Thu, 7 Dec 2023 15:42:22 +0100
Subject: [PATCH 02/14] Doc fixes

Co-authored-by: Sofie Van Landeghem <svlandeg@users.noreply.github.com>
---
 spacy/ml/models/textcat.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/spacy/ml/models/textcat.py b/spacy/ml/models/textcat.py
index a579a0e4d4d..95362d45bcb 100644
--- a/spacy/ml/models/textcat.py
+++ b/spacy/ml/models/textcat.py
@@ -221,12 +221,12 @@ def build_reduce_text_classifier(
     """Build a model that classifies pooled `Doc` representations.
 
     Pooling is performed using reductions. Reductions are concatenated when
-    multible reductions are used.
+    multiple reductions are used.
 
     tok2vec (Model): the tok2vec layer to pool over.
     exclusive_classes (bool): Whether or not classes are mutually exclusive.
     use_reduce_first (bool): Pool by using the hidden representation of the
-        first token of a `Doc`
+        first token of a `Doc`.
     use_reduce_max (bool): Pool by taking the maximum values of the hidden
         representations of a `Doc`.
     use_reduce_mean (bool): Pool by taking the mean of all hidden

From 71aa6f46288e0f28d85a98e99529fa82305166d1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Danie=CC=88l=20de=20Kok?= <me@danieldk.eu>
Date: Thu, 7 Dec 2023 16:20:36 +0100
Subject: [PATCH 03/14] Fully specify `TextCatCNN` <-> `TextCatReduce`
 equivalence

---
 website/docs/api/architectures.mdx | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/website/docs/api/architectures.mdx b/website/docs/api/architectures.mdx
index 4af60ec6540..9447ca1166d 100644
--- a/website/docs/api/architectures.mdx
+++ b/website/docs/api/architectures.mdx
@@ -1044,7 +1044,7 @@ vectors are mean pooled and used as features in a feed-forward network. This
 architecture is usually less accurate than the ensemble, but runs faster.
 
 This model is identical to [TexCatReduce.v1](#TextCatReduce) with
-`use_reduce_mean=true`.
+`use_reduce_mean=true`, `use_reduce_first=false` and `use_reduce_max=false`.
 
 | Name                | Description                                                                                                                                                                                    |
 | ------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |

From 9fb573e2556dd4b0dafe9c9bc8a922006156583a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Danie=CC=88l=20de=20Kok?= <me@danieldk.eu>
Date: Fri, 8 Dec 2023 10:38:29 +0100
Subject: [PATCH 04/14] Move TextCatCNN docs to legacy, in prep for moving to
 spacy-legacy

---
 netlify.toml                       |  1 +
 website/docs/api/architectures.mdx | 43 -------------------------
 website/docs/api/legacy.mdx        | 50 +++++++++++++++++++++++++++++-
 3 files changed, 50 insertions(+), 44 deletions(-)

diff --git a/netlify.toml b/netlify.toml
index ddcd0ca6c51..27a15fa2522 100644
--- a/netlify.toml
+++ b/netlify.toml
@@ -55,6 +55,7 @@ redirects = [
     {from = "/models/comparison", to = "/models", force = true},
     {from = "/api/#section-cython", to = "/api/cython", force = true},
     {from = "/api/#cython", to = "/api/cython", force = true},
+    {from = "/api/architectures#TextCatCNN", to = "/api/legacy#TextCatCNN_v2", force = true},
     {from = "/api/sentencesegmenter", to="/api/sentencizer"},
     {from = "/universe", to = "/universe/project/:id", query = {id = ":id"}, force = true},
     {from = "/universe", to = "/universe/category/:category", query = {category = ":category"}, force = true},
diff --git a/website/docs/api/architectures.mdx b/website/docs/api/architectures.mdx
index 9447ca1166d..643d6614046 100644
--- a/website/docs/api/architectures.mdx
+++ b/website/docs/api/architectures.mdx
@@ -1018,49 +1018,6 @@ but used an internal `tok2vec` instead of taking it as argument:
 
 </Accordion>
 
-### spacy.TextCatCNN.v2 {id="TextCatCNN"}
-
-> #### Example Config
->
-> ```ini
-> [model]
-> @architectures = "spacy.TextCatCNN.v2"
-> exclusive_classes = false
-> nO = null
->
-> [model.tok2vec]
-> @architectures = "spacy.HashEmbedCNN.v2"
-> pretrained_vectors = null
-> width = 96
-> depth = 4
-> embed_size = 2000
-> window_size = 1
-> maxout_pieces = 3
-> subword_features = true
-> ```
-
-A neural network model where token vectors are calculated using a CNN. The
-vectors are mean pooled and used as features in a feed-forward network. This
-architecture is usually less accurate than the ensemble, but runs faster.
-
-This model is identical to [TexCatReduce.v1](#TextCatReduce) with
-`use_reduce_mean=true`, `use_reduce_first=false` and `use_reduce_max=false`.
-
-| Name                | Description                                                                                                                                                                                    |
-| ------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| `exclusive_classes` | Whether or not categories are mutually exclusive. ~~bool~~                                                                                                                                     |
-| `tok2vec`           | The [`tok2vec`](#tok2vec) layer of the model. ~~Model~~                                                                                                                                        |
-| `nO`                | Output dimension, determined by the number of different labels. If not set, the [`TextCategorizer`](/api/textcategorizer) component will set it when `initialize` is called. ~~Optional[int]~~ |
-| **CREATES**         | The model using the architecture. ~~Model[List[Doc], Floats2d]~~                                                                                                                               |
-
-<Accordion title="spacy.TextCatCNN.v1 definition" spaced>
-
-[TextCatCNN.v1](/api/legacy#TextCatCNN_v1) had the exact same signature, but was
-not yet resizable. Since v2, new labels can be added to this component, even
-after training.
-
-</Accordion>
-
 ### spacy.TextCatBOW.v3 {id="TextCatBOW"}
 
 > #### Example Config
diff --git a/website/docs/api/legacy.mdx b/website/docs/api/legacy.mdx
index 32111ce9233..5fdc791c2ad 100644
--- a/website/docs/api/legacy.mdx
+++ b/website/docs/api/legacy.mdx
@@ -162,7 +162,10 @@ network has an internal CNN Tok2Vec layer and uses attention.
 
 Since `spacy.TextCatCNN.v2`, this architecture has become resizable, which means
 that you can add labels to a previously trained textcat. `TextCatCNN` v1 did not
-yet support that.
+yet support that. `TextCatCNN` has been replaced by the more general
+[`TextCatReduce`](/api/architectures#TextCatReduce) layer. `TextCatCNN` is
+identical to `TextCatReduce` with `use_reduce_mean=true`,
+`use_reduce_first=false` and `use_reduce_max=false`.
 
 > #### Example Config
 >
@@ -194,6 +197,51 @@ architecture is usually less accurate than the ensemble, but runs faster.
 | `nO`                | Output dimension, determined by the number of different labels. If not set, the [`TextCategorizer`](/api/textcategorizer) component will set it when `initialize` is called. ~~Optional[int]~~ |
 | **CREATES**         | The model using the architecture. ~~Model[List[Doc], Floats2d]~~                                                                                                                               |
 
+### spacy.TextCatCNN.v2 {id="TextCatCNN_v2"}
+
+> #### Example Config
+>
+> ```ini
+> [model]
+> @architectures = "spacy.TextCatCNN.v2"
+> exclusive_classes = false
+> nO = null
+>
+> [model.tok2vec]
+> @architectures = "spacy.HashEmbedCNN.v2"
+> pretrained_vectors = null
+> width = 96
+> depth = 4
+> embed_size = 2000
+> window_size = 1
+> maxout_pieces = 3
+> subword_features = true
+> ```
+
+A neural network model where token vectors are calculated using a CNN. The
+vectors are mean pooled and used as features in a feed-forward network. This
+architecture is usually less accurate than the ensemble, but runs faster.
+
+`TextCatCNN` has been replaced by the more general
+[`TextCatReduce`](/api/architectures#TextCatReduce) layer. `TextCatCNN` is
+identical to `TextCatReduce` with `use_reduce_mean=true`,
+`use_reduce_first=false` and `use_reduce_max=false`.
+
+| Name                | Description                                                                                                                                                                                    |
+| ------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `exclusive_classes` | Whether or not categories are mutually exclusive. ~~bool~~                                                                                                                                     |
+| `tok2vec`           | The [`tok2vec`](#tok2vec) layer of the model. ~~Model~~                                                                                                                                        |
+| `nO`                | Output dimension, determined by the number of different labels. If not set, the [`TextCategorizer`](/api/textcategorizer) component will set it when `initialize` is called. ~~Optional[int]~~ |
+| **CREATES**         | The model using the architecture. ~~Model[List[Doc], Floats2d]~~                                                                                                                               |
+
+<Accordion title="spacy.TextCatCNN.v1 definition" spaced>
+
+[TextCatCNN.v1](/api/legacy#TextCatCNN_v1) had the exact same signature, but was
+not yet resizable. Since v2, new labels can be added to this component, even
+after training.
+
+</Accordion>
+
 ### spacy.TextCatBOW.v1 {id="TextCatBOW_v1"}
 
 Since `spacy.TextCatBOW.v2`, this architecture has become resizable, which means

From 5fa33ce8ce465715a62b301bdfcb4fd5896c2f52 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Danie=CC=88l=20de=20Kok?= <me@danieldk.eu>
Date: Fri, 8 Dec 2023 10:39:28 +0100
Subject: [PATCH 05/14] Add back a test for TextCatCNN.v2

---
 spacy/tests/pipeline/test_textcat.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/spacy/tests/pipeline/test_textcat.py b/spacy/tests/pipeline/test_textcat.py
index 78573cb9b4b..3eee067ebe7 100644
--- a/spacy/tests/pipeline/test_textcat.py
+++ b/spacy/tests/pipeline/test_textcat.py
@@ -701,6 +701,9 @@ def test_overfitting_IO_multi():
         # ENSEMBLE V2
         ("textcat_multilabel", TRAIN_DATA_MULTI_LABEL, {"@architectures": "spacy.TextCatEnsemble.v2", "tok2vec": DEFAULT_TOK2VEC_MODEL, "linear_model": {"@architectures": "spacy.TextCatBOW.v3", "exclusive_classes": False, "ngram_size": 1, "no_output_layer": False}}),
         ("textcat", TRAIN_DATA_SINGLE_LABEL, {"@architectures": "spacy.TextCatEnsemble.v2", "tok2vec": DEFAULT_TOK2VEC_MODEL, "linear_model": {"@architectures": "spacy.TextCatBOW.v3", "exclusive_classes": True, "ngram_size": 5, "no_output_layer": False}}),
+        # CNN V2 (legacy)
+        ("textcat", TRAIN_DATA_SINGLE_LABEL, {"@architectures": "spacy.TextCatCNN.v2", "tok2vec": DEFAULT_TOK2VEC_MODEL, "exclusive_classes": True}),
+        ("textcat_multilabel", TRAIN_DATA_MULTI_LABEL, {"@architectures": "spacy.TextCatCNN.v2", "tok2vec": DEFAULT_TOK2VEC_MODEL, "exclusive_classes": False}),
         # REDUCE V1
         ("textcat", TRAIN_DATA_SINGLE_LABEL, {"@architectures": "spacy.TextCatReduce.v1", "tok2vec": DEFAULT_TOK2VEC_MODEL, "exclusive_classes": True, "use_reduce_first": True, "use_reduce_max": True, "use_reduce_mean": True}),
         ("textcat_multilabel", TRAIN_DATA_MULTI_LABEL, {"@architectures": "spacy.TextCatReduce.v1", "tok2vec": DEFAULT_TOK2VEC_MODEL, "exclusive_classes": False, "use_reduce_first": True, "use_reduce_max": True, "use_reduce_mean": True}),

From 09800bd7c63b9d3c52dcd7510f6f17303f2048ba Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Danie=CC=88l=20de=20Kok?= <me@danieldk.eu>
Date: Fri, 8 Dec 2023 11:03:04 +0100
Subject: [PATCH 06/14] Replace TextCatCNN in pipe configurations and templates

---
 spacy/cli/templates/quickstart_training.jinja | 10 ++++++++--
 spacy/pipeline/textcat.py                     |  5 ++++-
 spacy/pipeline/textcat_multilabel.py          |  5 ++++-
 3 files changed, 16 insertions(+), 4 deletions(-)

diff --git a/spacy/cli/templates/quickstart_training.jinja b/spacy/cli/templates/quickstart_training.jinja
index 2817147f3e9..5d5c1770aa1 100644
--- a/spacy/cli/templates/quickstart_training.jinja
+++ b/spacy/cli/templates/quickstart_training.jinja
@@ -279,8 +279,11 @@ no_output_layer = false
 
 {% else -%}
 [components.textcat.model]
-@architectures = "spacy.TextCatCNN.v2"
+@architectures = "spacy.TextCatReduce.v1"
 exclusive_classes = true
+use_reduce_first = false
+use_reduce_max = false
+use_reduce_mean = true
 nO = null
 
 [components.textcat.model.tok2vec]
@@ -317,8 +320,11 @@ no_output_layer = false
 
 {% else -%}
 [components.textcat_multilabel.model]
-@architectures = "spacy.TextCatCNN.v2"
+@architectures = "spacy.TextCatReduce.v1"
 exclusive_classes = false
+use_reduce_first = false
+use_reduce_max = false
+use_reduce_mean = true
 nO = null
 
 [components.textcat_multilabel.model.tok2vec]
diff --git a/spacy/pipeline/textcat.py b/spacy/pipeline/textcat.py
index 43a335c4ac7..964a772c7cd 100644
--- a/spacy/pipeline/textcat.py
+++ b/spacy/pipeline/textcat.py
@@ -55,8 +55,11 @@
 
 single_label_cnn_config = """
 [model]
-@architectures = "spacy.TextCatCNN.v2"
+@architectures = "spacy.TextCatReduce.v1"
 exclusive_classes = true
+use_reduce_first = false
+use_reduce_max = false
+use_reduce_mean = true
 
 [model.tok2vec]
 @architectures = "spacy.HashEmbedCNN.v2"
diff --git a/spacy/pipeline/textcat_multilabel.py b/spacy/pipeline/textcat_multilabel.py
index c917cc61078..1183aaff3ac 100644
--- a/spacy/pipeline/textcat_multilabel.py
+++ b/spacy/pipeline/textcat_multilabel.py
@@ -53,8 +53,11 @@
 
 multi_label_cnn_config = """
 [model]
-@architectures = "spacy.TextCatCNN.v2"
+@architectures = "spacy.TextCatReduce.v1"
 exclusive_classes = false
+use_reduce_first = false
+use_reduce_max = false
+use_reduce_mean = true
 
 [model.tok2vec]
 @architectures = "spacy.HashEmbedCNN.v2"

From e310ddfd4a768b8294a52934c35e81d7b1656d25 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Danie=CC=88l=20de=20Kok?= <me@danieldk.eu>
Date: Fri, 8 Dec 2023 12:03:18 +0100
Subject: [PATCH 07/14] Add an infobox to the `TextCatReduce` section with an
 `TextCatCNN` anchor

---
 website/docs/api/architectures.mdx | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/website/docs/api/architectures.mdx b/website/docs/api/architectures.mdx
index 643d6614046..1e5f505127d 100644
--- a/website/docs/api/architectures.mdx
+++ b/website/docs/api/architectures.mdx
@@ -1084,6 +1084,14 @@ A classifier that pools token hidden representations of each `Doc` using first,
 max or mean reduction and then applies a classification layer. Reductions are
 concatenated when multiple reductions are used.
 
+<Infobox variant="warning" title="Relation to TextCatCNN" id="TextCatCNN">
+
+`TextCatReduce` is a generalization of the older
+[`TextCatCNN`](/api/legacy#TextCatCNN_v2) model. `TextCatCNN` always uses a mean
+reduction, whereas `TextCatReduce` also supports first/max reductions.
+
+</Infobox>
+
 | Name                | Description                                                                                                                                                                                    |
 | ------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
 | `exclusive_classes` | Whether or not categories are mutually exclusive. ~~bool~~                                                                                                                                     |

From 7669a450ec31e4b98224df1f1794c429665603dc Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Danie=CC=88l=20de=20Kok?= <me@danieldk.eu>
Date: Mon, 11 Dec 2023 10:13:53 +0100
Subject: [PATCH 08/14] Add last reduction (`use_reduce_last`)

---
 spacy/cli/templates/quickstart_training.jinja |  2 ++
 spacy/errors.py                               |  3 ++-
 spacy/ml/models/textcat.py                    |  7 +++++++
 spacy/pipeline/textcat.py                     |  1 +
 spacy/pipeline/textcat_multilabel.py          |  1 +
 spacy/tests/pipeline/test_textcat.py          | 12 ++++++------
 spacy/tests/test_models.py                    |  1 +
 website/docs/api/architectures.mdx            |  2 ++
 website/docs/api/legacy.mdx                   |  4 ++--
 9 files changed, 24 insertions(+), 9 deletions(-)

diff --git a/spacy/cli/templates/quickstart_training.jinja b/spacy/cli/templates/quickstart_training.jinja
index 5d5c1770aa1..f2b9777cd73 100644
--- a/spacy/cli/templates/quickstart_training.jinja
+++ b/spacy/cli/templates/quickstart_training.jinja
@@ -282,6 +282,7 @@ no_output_layer = false
 @architectures = "spacy.TextCatReduce.v1"
 exclusive_classes = true
 use_reduce_first = false
+use_reduce_last = false
 use_reduce_max = false
 use_reduce_mean = true
 nO = null
@@ -323,6 +324,7 @@ no_output_layer = false
 @architectures = "spacy.TextCatReduce.v1"
 exclusive_classes = false
 use_reduce_first = false
+use_reduce_last = false
 use_reduce_max = false
 use_reduce_mean = true
 nO = null
diff --git a/spacy/errors.py b/spacy/errors.py
index 28f34e26605..2455c2f8628 100644
--- a/spacy/errors.py
+++ b/spacy/errors.py
@@ -985,7 +985,8 @@ class Errors(metaclass=ErrorsWithCodes):
              "but only callbacks with one or three parameters are supported")
     E1056 = ("The `TextCatBOW` architecture expects a length of at least 1, was {length}.")
     E1057 = ("The `TextCatReduce` architecture must be used with at least one reduction. "
-             "Please enable one of `use_reduce_first`, `use_reduce_max` or `use_reduce_mean`.")
+             "Please enable one of `use_reduce_first`, `reduce_last`, `use_reduce_max` or "
+             "`use_reduce_mean`.")
 
 
 # Deprecated model shortcuts, only used in errors and warnings
diff --git a/spacy/ml/models/textcat.py b/spacy/ml/models/textcat.py
index 95362d45bcb..e0dcb47aa85 100644
--- a/spacy/ml/models/textcat.py
+++ b/spacy/ml/models/textcat.py
@@ -18,6 +18,7 @@
     concatenate,
     list2ragged,
     reduce_first,
+    reduce_last,
     reduce_max,
     reduce_mean,
     reduce_sum,
@@ -55,6 +56,7 @@ def build_simple_cnn_text_classifier(
         tok2vec=tok2vec,
         exclusive_classes=exclusive_classes,
         use_reduce_first=False,
+        use_reduce_last=False,
         use_reduce_max=False,
         use_reduce_mean=True,
         nO=nO,
@@ -214,6 +216,7 @@ def build_reduce_text_classifier(
     tok2vec: Model,
     exclusive_classes: bool,
     use_reduce_first: bool,
+    use_reduce_last: bool,
     use_reduce_max: bool,
     use_reduce_mean: bool,
     nO: Optional[int] = None,
@@ -227,6 +230,8 @@ def build_reduce_text_classifier(
     exclusive_classes (bool): Whether or not classes are mutually exclusive.
     use_reduce_first (bool): Pool by using the hidden representation of the
         first token of a `Doc`.
+    use_reduce_first (bool): Pool by using the hidden representation of the
+        last token of a `Doc`.
     use_reduce_max (bool): Pool by taking the maximum values of the hidden
         representations of a `Doc`.
     use_reduce_mean (bool): Pool by taking the mean of all hidden
@@ -238,6 +243,8 @@ def build_reduce_text_classifier(
     reductions = []
     if use_reduce_first:
         reductions.append(reduce_first())
+    if use_reduce_last:
+        reductions.append(reduce_last())
     if use_reduce_max:
         reductions.append(reduce_max())
     if use_reduce_mean:
diff --git a/spacy/pipeline/textcat.py b/spacy/pipeline/textcat.py
index 964a772c7cd..ae227017a9f 100644
--- a/spacy/pipeline/textcat.py
+++ b/spacy/pipeline/textcat.py
@@ -58,6 +58,7 @@
 @architectures = "spacy.TextCatReduce.v1"
 exclusive_classes = true
 use_reduce_first = false
+use_reduce_last = false
 use_reduce_max = false
 use_reduce_mean = true
 
diff --git a/spacy/pipeline/textcat_multilabel.py b/spacy/pipeline/textcat_multilabel.py
index 1183aaff3ac..2f8d5e60437 100644
--- a/spacy/pipeline/textcat_multilabel.py
+++ b/spacy/pipeline/textcat_multilabel.py
@@ -56,6 +56,7 @@
 @architectures = "spacy.TextCatReduce.v1"
 exclusive_classes = false
 use_reduce_first = false
+use_reduce_last = false
 use_reduce_max = false
 use_reduce_mean = true
 
diff --git a/spacy/tests/pipeline/test_textcat.py b/spacy/tests/pipeline/test_textcat.py
index 3eee067ebe7..5dff8d12455 100644
--- a/spacy/tests/pipeline/test_textcat.py
+++ b/spacy/tests/pipeline/test_textcat.py
@@ -457,8 +457,8 @@ def test_no_resize(name, textcat_config):
         ("textcat_multilabel", {"@architectures": "spacy.TextCatBOW.v3", "exclusive_classes": False, "no_output_layer": False, "ngram_size": 3}),
         ("textcat_multilabel", {"@architectures": "spacy.TextCatBOW.v3", "exclusive_classes": False, "no_output_layer": True, "ngram_size": 3}),
         # CNN
-        ("textcat", {"@architectures": "spacy.TextCatReduce.v1", "tok2vec": DEFAULT_TOK2VEC_MODEL, "exclusive_classes": True, "use_reduce_first": True, "use_reduce_max": True, "use_reduce_mean": True}),
-        ("textcat_multilabel", {"@architectures": "spacy.TextCatReduce.v1", "tok2vec": DEFAULT_TOK2VEC_MODEL, "exclusive_classes": False, "use_reduce_first": True, "use_reduce_max": True, "use_reduce_mean": True}),
+        ("textcat", {"@architectures": "spacy.TextCatReduce.v1", "tok2vec": DEFAULT_TOK2VEC_MODEL, "exclusive_classes": True, "use_reduce_first": True, "use_reduce_last": True, "use_reduce_max": True, "use_reduce_mean": True}),
+        ("textcat_multilabel", {"@architectures": "spacy.TextCatReduce.v1", "tok2vec": DEFAULT_TOK2VEC_MODEL, "exclusive_classes": False, "use_reduce_first": True, "use_reduce_last": True, "use_reduce_max": True, "use_reduce_mean": True}),
     ],
 )
 # fmt: on
@@ -486,8 +486,8 @@ def test_resize(name, textcat_config):
         ("textcat_multilabel", {"@architectures": "spacy.TextCatBOW.v3", "exclusive_classes": False, "no_output_layer": False, "ngram_size": 3}),
         ("textcat_multilabel", {"@architectures": "spacy.TextCatBOW.v3", "exclusive_classes": False, "no_output_layer": True, "ngram_size": 3}),
         # REDUCE
-        ("textcat", {"@architectures": "spacy.TextCatReduce.v1", "tok2vec": DEFAULT_TOK2VEC_MODEL, "exclusive_classes": True, "use_reduce_first": True, "use_reduce_max": True, "use_reduce_mean": True}),
-        ("textcat_multilabel", {"@architectures": "spacy.TextCatReduce.v1", "tok2vec": DEFAULT_TOK2VEC_MODEL, "exclusive_classes": False, "use_reduce_first": True, "use_reduce_max": True, "use_reduce_mean": True}),
+        ("textcat", {"@architectures": "spacy.TextCatReduce.v1", "tok2vec": DEFAULT_TOK2VEC_MODEL, "exclusive_classes": True, "use_reduce_first": True, "use_reduce_last": True, "use_reduce_max": True, "use_reduce_mean": True}),
+        ("textcat_multilabel", {"@architectures": "spacy.TextCatReduce.v1", "tok2vec": DEFAULT_TOK2VEC_MODEL, "exclusive_classes": False, "use_reduce_first": True, "use_reduce_last": True, "use_reduce_max": True, "use_reduce_mean": True}),
     ],
 )
 # fmt: on
@@ -705,8 +705,8 @@ def test_overfitting_IO_multi():
         ("textcat", TRAIN_DATA_SINGLE_LABEL, {"@architectures": "spacy.TextCatCNN.v2", "tok2vec": DEFAULT_TOK2VEC_MODEL, "exclusive_classes": True}),
         ("textcat_multilabel", TRAIN_DATA_MULTI_LABEL, {"@architectures": "spacy.TextCatCNN.v2", "tok2vec": DEFAULT_TOK2VEC_MODEL, "exclusive_classes": False}),
         # REDUCE V1
-        ("textcat", TRAIN_DATA_SINGLE_LABEL, {"@architectures": "spacy.TextCatReduce.v1", "tok2vec": DEFAULT_TOK2VEC_MODEL, "exclusive_classes": True, "use_reduce_first": True, "use_reduce_max": True, "use_reduce_mean": True}),
-        ("textcat_multilabel", TRAIN_DATA_MULTI_LABEL, {"@architectures": "spacy.TextCatReduce.v1", "tok2vec": DEFAULT_TOK2VEC_MODEL, "exclusive_classes": False, "use_reduce_first": True, "use_reduce_max": True, "use_reduce_mean": True}),
+        ("textcat", TRAIN_DATA_SINGLE_LABEL, {"@architectures": "spacy.TextCatReduce.v1", "tok2vec": DEFAULT_TOK2VEC_MODEL, "exclusive_classes": True, "use_reduce_first": True, "use_reduce_last": True, "use_reduce_max": True, "use_reduce_mean": True}),
+        ("textcat_multilabel", TRAIN_DATA_MULTI_LABEL, {"@architectures": "spacy.TextCatReduce.v1", "tok2vec": DEFAULT_TOK2VEC_MODEL, "exclusive_classes": False, "use_reduce_first": True, "use_reduce_last": True, "use_reduce_max": True, "use_reduce_mean": True}),
     ],
 )
 # fmt: on
diff --git a/spacy/tests/test_models.py b/spacy/tests/test_models.py
index ce033a27465..5ce9da50801 100644
--- a/spacy/tests/test_models.py
+++ b/spacy/tests/test_models.py
@@ -296,6 +296,7 @@ def test_textcat_reduce_invalid_args():
             tok2vec=tok2vec,
             exclusive_classes=False,
             use_reduce_first=False,
+            use_reduce_last=False,
             use_reduce_max=False,
             use_reduce_mean=False,
         )
diff --git a/website/docs/api/architectures.mdx b/website/docs/api/architectures.mdx
index 1e5f505127d..63f723a28cf 100644
--- a/website/docs/api/architectures.mdx
+++ b/website/docs/api/architectures.mdx
@@ -1065,6 +1065,7 @@ the others, but may not be as accurate, especially if texts are short.
 > @architectures = "spacy.TextCatReduce.v1"
 > exclusive_classes = false
 > use_reduce_first = false
+> use_reduce_last = false
 > use_reduce_max = false
 > use_reduce_mean = true
 > nO = null
@@ -1097,6 +1098,7 @@ reduction, whereas `TextCatReduce` also supports first/max reductions.
 | `exclusive_classes` | Whether or not categories are mutually exclusive. ~~bool~~                                                                                                                                     |
 | `tok2vec`           | The [`tok2vec`](#tok2vec) layer of the model. ~~Model~~                                                                                                                                        |
 | `use_reduce_first`  | Pool by using the hidden representation of the first token of a `Doc`. ~~bool~~                                                                                                                |
+| `use_reduce_last`   | Pool by using the hidden representation of the last token of a `Doc`. ~~bool~~                                                                                                                 |
 | `use_reduce_max`    | Pool by taking the maximum values of the hidden representations of a `Doc`. ~~bool~~                                                                                                           |
 | `use_reduce_mean`   | Pool by taking the mean of all hidden representations of a `Doc`. ~~bool~~                                                                                                                     |
 | `nO`                | Output dimension, determined by the number of different labels. If not set, the [`TextCategorizer`](/api/textcategorizer) component will set it when `initialize` is called. ~~Optional[int]~~ |
diff --git a/website/docs/api/legacy.mdx b/website/docs/api/legacy.mdx
index 5fdc791c2ad..b44df538766 100644
--- a/website/docs/api/legacy.mdx
+++ b/website/docs/api/legacy.mdx
@@ -165,7 +165,7 @@ that you can add labels to a previously trained textcat. `TextCatCNN` v1 did not
 yet support that. `TextCatCNN` has been replaced by the more general
 [`TextCatReduce`](/api/architectures#TextCatReduce) layer. `TextCatCNN` is
 identical to `TextCatReduce` with `use_reduce_mean=true`,
-`use_reduce_first=false` and `use_reduce_max=false`.
+`use_reduce_first=false`, `reduce_last=false` and `use_reduce_max=false`.
 
 > #### Example Config
 >
@@ -225,7 +225,7 @@ architecture is usually less accurate than the ensemble, but runs faster.
 `TextCatCNN` has been replaced by the more general
 [`TextCatReduce`](/api/architectures#TextCatReduce) layer. `TextCatCNN` is
 identical to `TextCatReduce` with `use_reduce_mean=true`,
-`use_reduce_first=false` and `use_reduce_max=false`.
+`use_reduce_first=false`, `reduce_last=false` and `use_reduce_max=false`.
 
 | Name                | Description                                                                                                                                                                                    |
 | ------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |

From a53c482bdfabe89743cabe80b79b167e5724ceed Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Danie=CC=88l=20de=20Kok?= <me@danieldk.eu>
Date: Mon, 11 Dec 2023 10:33:26 +0100
Subject: [PATCH 09/14] Remove non-working TextCatCNN Netlify redirect

---
 netlify.toml | 1 -
 1 file changed, 1 deletion(-)

diff --git a/netlify.toml b/netlify.toml
index 27a15fa2522..ddcd0ca6c51 100644
--- a/netlify.toml
+++ b/netlify.toml
@@ -55,7 +55,6 @@ redirects = [
     {from = "/models/comparison", to = "/models", force = true},
     {from = "/api/#section-cython", to = "/api/cython", force = true},
     {from = "/api/#cython", to = "/api/cython", force = true},
-    {from = "/api/architectures#TextCatCNN", to = "/api/legacy#TextCatCNN_v2", force = true},
     {from = "/api/sentencesegmenter", to="/api/sentencizer"},
     {from = "/universe", to = "/universe/project/:id", query = {id = ":id"}, force = true},
     {from = "/universe", to = "/universe/category/:category", query = {category = ":category"}, force = true},

From 87816d1cc2f91464d04d8df32715e98ad16817d9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Danie=CC=88l=20de=20Kok?= <me@danieldk.eu>
Date: Mon, 11 Dec 2023 16:56:52 +0100
Subject: [PATCH 10/14] Revert layer changes for the quickstart

---
 spacy/cli/templates/quickstart_training.jinja | 14 +++-----------
 1 file changed, 3 insertions(+), 11 deletions(-)

diff --git a/spacy/cli/templates/quickstart_training.jinja b/spacy/cli/templates/quickstart_training.jinja
index f2b9777cd73..424570124d2 100644
--- a/spacy/cli/templates/quickstart_training.jinja
+++ b/spacy/cli/templates/quickstart_training.jinja
@@ -279,12 +279,8 @@ no_output_layer = false
 
 {% else -%}
 [components.textcat.model]
-@architectures = "spacy.TextCatReduce.v1"
+@architectures = "spacy.TextCatCNN.v2"
 exclusive_classes = true
-use_reduce_first = false
-use_reduce_last = false
-use_reduce_max = false
-use_reduce_mean = true
 nO = null
 
 [components.textcat.model.tok2vec]
@@ -321,12 +317,8 @@ no_output_layer = false
 
 {% else -%}
 [components.textcat_multilabel.model]
-@architectures = "spacy.TextCatReduce.v1"
-exclusive_classes = false
-use_reduce_first = false
-use_reduce_last = false
-use_reduce_max = false
-use_reduce_mean = true
+@architectures = "spacy.TextCatCNN.v2"
+exclusive_classes = true
 nO = null
 
 [components.textcat_multilabel.model.tok2vec]

From 65a1dd4c1a30c544290bca0abe5092265f4aa792 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Danie=CC=88l=20de=20Kok?= <me@danieldk.eu>
Date: Mon, 11 Dec 2023 16:57:44 +0100
Subject: [PATCH 11/14] Revert one more quickstart change

---
 spacy/cli/templates/quickstart_training.jinja | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/spacy/cli/templates/quickstart_training.jinja b/spacy/cli/templates/quickstart_training.jinja
index 424570124d2..2817147f3e9 100644
--- a/spacy/cli/templates/quickstart_training.jinja
+++ b/spacy/cli/templates/quickstart_training.jinja
@@ -318,7 +318,7 @@ no_output_layer = false
 {% else -%}
 [components.textcat_multilabel.model]
 @architectures = "spacy.TextCatCNN.v2"
-exclusive_classes = true
+exclusive_classes = false
 nO = null
 
 [components.textcat_multilabel.model.tok2vec]

From 684bd5e677af5e86fc0c49a4f245b605ef068099 Mon Sep 17 00:00:00 2001
From: Adriane Boyd <adrianeboyd@gmail.com>
Date: Wed, 20 Dec 2023 17:21:55 +0100
Subject: [PATCH 12/14] Remove unused import

---
 spacy/tests/test_models.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/spacy/tests/test_models.py b/spacy/tests/test_models.py
index 5ce9da50801..5228b4544fd 100644
--- a/spacy/tests/test_models.py
+++ b/spacy/tests/test_models.py
@@ -26,7 +26,6 @@
     build_Tok2Vec_model,
 )
 from spacy.ml.staticvectors import StaticVectors
-from spacy.pipeline import tok2vec
 from spacy.util import registry
 
 

From 3d86539e23749ef1c33c6ff6040f54759d37f719 Mon Sep 17 00:00:00 2001
From: Adriane Boyd <adrianeboyd@gmail.com>
Date: Wed, 20 Dec 2023 17:22:10 +0100
Subject: [PATCH 13/14] Fix docstring

---
 spacy/ml/models/textcat.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/spacy/ml/models/textcat.py b/spacy/ml/models/textcat.py
index e0dcb47aa85..93929bd4ec9 100644
--- a/spacy/ml/models/textcat.py
+++ b/spacy/ml/models/textcat.py
@@ -230,7 +230,7 @@ def build_reduce_text_classifier(
     exclusive_classes (bool): Whether or not classes are mutually exclusive.
     use_reduce_first (bool): Pool by using the hidden representation of the
         first token of a `Doc`.
-    use_reduce_first (bool): Pool by using the hidden representation of the
+    use_reduce_last (bool): Pool by using the hidden representation of the
         last token of a `Doc`.
     use_reduce_max (bool): Pool by taking the maximum values of the hidden
         representations of a `Doc`.

From 288f623cf283be4ba3414590b9df70dc1a062cf4 Mon Sep 17 00:00:00 2001
From: Adriane Boyd <adrianeboyd@gmail.com>
Date: Wed, 20 Dec 2023 17:23:44 +0100
Subject: [PATCH 14/14] Fix setting name in error message

---
 spacy/errors.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/spacy/errors.py b/spacy/errors.py
index 2455c2f8628..b6108dd0ff7 100644
--- a/spacy/errors.py
+++ b/spacy/errors.py
@@ -984,9 +984,9 @@ class Errors(metaclass=ErrorsWithCodes):
     E1055 = ("The 'replace_listener' callback expects {num_params} parameters, "
              "but only callbacks with one or three parameters are supported")
     E1056 = ("The `TextCatBOW` architecture expects a length of at least 1, was {length}.")
-    E1057 = ("The `TextCatReduce` architecture must be used with at least one reduction. "
-             "Please enable one of `use_reduce_first`, `reduce_last`, `use_reduce_max` or "
-             "`use_reduce_mean`.")
+    E1057 = ("The `TextCatReduce` architecture must be used with at least one "
+             "reduction. Please enable one of `use_reduce_first`, "
+             "`use_reduce_last`, `use_reduce_max` or `use_reduce_mean`.")
 
 
 # Deprecated model shortcuts, only used in errors and warnings