Use Case: Extraction set temperature to 0, qualify a statement (langc…

…hain-ai#18672) Minor changes: 1) Set temperature to 0 (important) 2) Better qualify one of the statements with confidence
bechbd · Mar 29, 2024 · baf953b · baf953b
1 parent 45cfd97
commit baf953b
Show file tree

Hide file tree

Showing 5 changed files with 46 additions and 65 deletions.
diff --git a/docs/docs/use_cases/extraction/how_to/examples.ipynb b/docs/docs/use_cases/extraction/how_to/examples.ipynb
@@ -430,14 +430,6 @@
     "    }\n",
     ")"
    ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "d18bb013",
-   "metadata": {},
-   "outputs": [],
-   "source": []
   }
  ],
  "metadata": {
@@ -456,7 +448,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.10.1"
+   "version": "3.11.2"
   }
  },
  "nbformat": 4,

diff --git a/docs/docs/use_cases/extraction/how_to/handle_long_text.ipynb b/docs/docs/use_cases/extraction/how_to/handle_long_text.ipynb
@@ -394,14 +394,6 @@
     "* Large chunk overlap may cause the same information to be extracted twice, so be prepared to de-duplicate!\n",
     "* LLMs can make up data. If looking for a single fact across a large text and using a brute force approach, you may end up getting more made up data."
    ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "b5f9685f-9d68-4155-a78c-0cb50821e21f",
-   "metadata": {},
-   "outputs": [],
-   "source": []
   }
  ],
  "metadata": {
@@ -420,7 +412,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.10.1"
+   "version": "3.11.2"
   }
  },
  "nbformat": 4,

diff --git a/docs/docs/use_cases/extraction/how_to/parse.ipynb b/docs/docs/use_cases/extraction/how_to/parse.ipynb
@@ -32,7 +32,7 @@
    "source": [
     "from langchain_anthropic.chat_models import ChatAnthropic\n",
     "\n",
-    "model = ChatAnthropic(model_name=\"claude-3-sonnet-20240229\")"
+    "model = ChatAnthropic(model_name=\"claude-3-sonnet-20240229\", temperature=0)"
    ]
   },
   {
@@ -59,7 +59,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 2,
    "id": "497eb023-c043-443d-ac62-2d4ea85fe1b0",
    "metadata": {},
    "outputs": [],
@@ -111,7 +111,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": 3,
    "id": "20b99ffb-a114-49a9-a7be-154c525f8ada",
    "metadata": {},
    "outputs": [],
@@ -121,7 +121,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": 4,
    "id": "4f3a66ce-de19-4571-9e54-67504ae3fba7",
    "metadata": {},
    "outputs": [
@@ -149,7 +149,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": 5,
    "id": "3a46b5fd-9242-4b8c-a4e2-3f04fc19b3a4",
    "metadata": {},
    "outputs": [
@@ -159,7 +159,7 @@
        "People(people=[Person(name='Anna', height_in_meters=1.83)])"
       ]
      },
-     "execution_count": 11,
+     "execution_count": 5,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -183,7 +183,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 16,
+   "execution_count": 6,
    "id": "b1f11912-c1bb-4a2a-a482-79bf3996961f",
    "metadata": {},
    "outputs": [],
@@ -253,15 +253,15 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 17,
+   "execution_count": 7,
    "id": "cda52ef5-a354-47a7-9c25-45153c2389e2",
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "System: Answer the user query. Output your answer as JSON that matches the given schema: ```json\n",
+      "System: Answer the user query. Output your answer as JSON that  matches the given schema: ```json\n",
       "{'title': 'People', 'description': 'Identifying information about all people in a text.', 'type': 'object', 'properties': {'people': {'title': 'People', 'type': 'array', 'items': {'$ref': '#/definitions/Person'}}}, 'required': ['people'], 'definitions': {'Person': {'title': 'Person', 'description': 'Information about a person.', 'type': 'object', 'properties': {'name': {'title': 'Name', 'description': 'The name of the person', 'type': 'string'}, 'height_in_meters': {'title': 'Height In Meters', 'description': 'The height of the person expressed in meters.', 'type': 'number'}}, 'required': ['name', 'height_in_meters']}}}\n",
       "```. Make sure to wrap the answer in ```json and ``` tags\n",
       "Human: Anna is 23 years old and she is 6 feet tall\n"
@@ -275,7 +275,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 18,
+   "execution_count": 8,
    "id": "993dc61a-229d-4795-a746-0d17df86b5c0",
    "metadata": {},
    "outputs": [
@@ -285,7 +285,7 @@
        "[{'people': [{'name': 'Anna', 'height_in_meters': 1.83}]}]"
       ]
      },
-     "execution_count": 18,
+     "execution_count": 8,
      "metadata": {},
      "output_type": "execute_result"
     }

diff --git a/docs/docs/use_cases/extraction/index.ipynb b/docs/docs/use_cases/extraction/index.ipynb
@@ -34,7 +34,7 @@
     "\n",
     "- **Tool/Function Calling** Mode: Some LLMs support a *tool or function calling* mode. These LLMs can structure output according to a given **schema**. Generally, this approach is the easiest to work with and is expected to yield good results.\n",
     "\n",
-    "- **JSON Mode**: Some LLMs are can be forced to output valid JSON. This is similar to **tool/function Calling** approach, except that the schema is provided as part of the prompt. Generally, our intuition is that this performs worse than a **tool/function calling** approach.\n",
+    "- **JSON Mode**: Some LLMs are can be forced to output valid JSON. This is similar to **tool/function Calling** approach, except that the schema is provided as part of the prompt. Generally, our intuition is that this performs worse than a **tool/function calling** approach, but don't trust us and verify for your own use case!\n",
     "\n",
     "- **Prompting Based**: LLMs that can follow instructions well can be instructed to generate text in a desired format. The generated text can be parsed downstream using existing [Output Parsers](/docs/modules/model_io/output_parsers/) or using [custom parsers](/docs/modules/model_io/output_parsers/custom) into a structured format like JSON. This approach can be used with LLMs that **do not support** JSON mode or tool/function calling modes. This approach is more broadly applicable, though may yield worse results than models that have been fine-tuned for extraction or function calling.\n",
     "\n",
@@ -71,14 +71,6 @@
     "* [OpenAI's function and tool calling](https://platform.openai.com/docs/guides/function-calling)\n",
     "* For example, see [OpenAI's JSON mode](https://platform.openai.com/docs/guides/text-generation/json-mode)."
    ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "6e171cab",
-   "metadata": {},
-   "outputs": [],
-   "source": []
   }
  ],
  "metadata": {
@@ -97,7 +89,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.10.1"
+   "version": "3.11.2"
   }
  },
  "nbformat": 4,

diff --git a/docs/docs/use_cases/extraction/quickstart.ipynb b/docs/docs/use_cases/extraction/quickstart.ipynb
@@ -68,7 +68,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": 7,
    "id": "c141084c-fb94-4093-8d6a-81175d688e40",
    "metadata": {},
    "outputs": [],
@@ -91,9 +91,11 @@
     "    # Having a good description can help improve extraction results.\n",
     "    name: Optional[str] = Field(..., description=\"The name of the person\")\n",
     "    hair_color: Optional[str] = Field(\n",
-    "        ..., description=\"The color of the peron's eyes if known\"\n",
+    "        ..., description=\"The color of the peron's hair if known\"\n",
     "    )\n",
-    "    height_in_meters: Optional[str] = Field(..., description=\"Height in METERs\")"
+    "    height_in_meters: Optional[str] = Field(\n",
+    "        ..., description=\"Height measured in meters\"\n",
+    "    )"
    ]
   },
   {
@@ -117,14 +119,13 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 8,
    "id": "a5e490f6-35ad-455e-8ae4-2bae021583ff",
    "metadata": {},
    "outputs": [],
    "source": [
     "from typing import Optional\n",
     "\n",
-    "from langchain.chains import create_structured_output_runnable\n",
     "from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder\n",
     "from langchain_core.pydantic_v1 import BaseModel, Field\n",
     "from langchain_openai import ChatOpenAI\n",
@@ -162,14 +163,14 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 31,
    "id": "04d846a6-d5cb-4009-ac19-61e3aac0177e",
    "metadata": {},
    "outputs": [],
    "source": [
     "from langchain_mistralai import ChatMistralAI\n",
     "\n",
-    "llm = ChatMistralAI(model=\"mistral-large-latest\")\n",
+    "llm = ChatMistralAI(model=\"mistral-large-latest\", temperature=0)\n",
     "\n",
     "runnable = prompt | llm.with_structured_output(schema=Person)"
    ]
@@ -184,17 +185,17 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 32,
    "id": "13165ac8-a1dc-44ce-a6ed-f52b577473e4",
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "Person(name='Alan Smith', hair_color='blond', height_in_meters='1.83')"
+       "Person(name='Alan Smith', hair_color='blond', height_in_meters='1.8288')"
       ]
      },
-     "execution_count": 4,
+     "execution_count": 32,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -232,7 +233,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": 33,
    "id": "591a0c16-7a17-4883-91ee-0d6d2fdb265c",
    "metadata": {},
    "outputs": [],
@@ -255,9 +256,11 @@
     "    # Having a good description can help improve extraction results.\n",
     "    name: Optional[str] = Field(..., description=\"The name of the person\")\n",
     "    hair_color: Optional[str] = Field(\n",
-    "        ..., description=\"The color of the peron's eyes if known\"\n",
+    "        ..., description=\"The color of the peron's hair if known\"\n",
+    "    )\n",
+    "    height_in_meters: Optional[str] = Field(\n",
+    "        ..., description=\"Height measured in meters\"\n",
     "    )\n",
-    "    height_in_meters: Optional[str] = Field(..., description=\"Height in meters\")\n",
     "\n",
     "\n",
     "class Data(BaseModel):\n",
@@ -267,26 +270,36 @@
     "    people: List[Person]"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "id": "5f5cda33-fd7b-481e-956a-703f45e40e1d",
+   "metadata": {},
+   "source": [
+    ":::{.callout-important}\n",
+    "Extraction might not be perfect here. Please continue to see how to use **Reference Examples** to improve the quality of extraction, and see the **guidelines** section!\n",
+    ":::"
+   ]
+  },
   {
    "cell_type": "code",
-   "execution_count": 12,
+   "execution_count": 34,
    "id": "cf7062cc-1d1d-4a37-9122-509d1b87f0a6",
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "Data(people=[Person(name='Jeff', hair_color='black', height_in_meters='2'), Person(name='Anna', hair_color=None, height_in_meters=None)])"
+       "Data(people=[Person(name='Jeff', hair_color=None, height_in_meters=None), Person(name='Anna', hair_color=None, height_in_meters=None)])"
       ]
      },
-     "execution_count": 12,
+     "execution_count": 34,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
     "runnable = prompt | llm.with_structured_output(schema=Data)\n",
-    "text = \"My name is Jeff and I am 2 meters. I have black hair. Anna has the same color hair as me.\"\n",
+    "text = \"My name is Jeff, my hair is black and i am 6 feet tall. Anna has the same color hair as me.\"\n",
     "runnable.invoke({\"text\": text})"
    ]
   },
@@ -318,14 +331,6 @@
     "- [Use a Parsing Approach](/docs/use_cases/extraction/how_to/parse): Use a prompt based approach to extract with models that do not support **tool/function calling**.\n",
     "- [Guidelines](/docs/use_cases/extraction/guidelines): Guidelines for getting good performance on extraction tasks."
    ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "082fc1af",
-   "metadata": {},
-   "outputs": [],
-   "source": []
   }
  ],
  "metadata": {
@@ -344,7 +349,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.10.1"
+   "version": "3.11.2"
   }
  },
  "nbformat": 4,