Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

⚡️ Speed up get_default_document_variable_name() by 21% in libs/langchain/langchain/chains/combine_documents/refine.py #32

Open
wants to merge 1 commit into
base: master
Choose a base branch
from

Conversation

codeflash-ai[bot]
Copy link

@codeflash-ai codeflash-ai bot commented Feb 16, 2024

📄 get_default_document_variable_name() in libs/langchain/langchain/chains/combine_documents/refine.py

📈 Performance went up by 21% (0.21x faster)

⏱️ Runtime went down from 7.50μs to 6.20μs

Explanation and details

(click to show)

The provided code could be optimized by eliminating the redundant assignment of llm_chain_variables in both if and else condition. Instead, assign it before checking the conditions. Here's the optimized version.

This minor optimization helps to eliminate the redundant step of calculating llm_chain_variables twice, when document_variable_name is present in values. This can help speed up the execution a bit, especially if accessing values["initial_llm_chain"].prompt.input_variables is computationally expensive.

Correctness verification

The new optimized code was tested for correctness. The results are listed below.

✅ 0 Passed − ⚙️ Existing Unit Tests

✅ 0 Passed − 🎨 Inspired Regression Tests

✅ 10 Passed − 🌀 Generated Regression Tests

(click to show generated tests)
# imports
import pytest  # used for our unit tests
from pydantic import root_validator, BaseModel, ValidationError

# function to test

class BaseCombineDocumentsChain(BaseModel):
    pass
from langchain.chains.combine_documents.refine import RefineDocumentsChain

# helper class to simulate the structure expected by the function
class MockPrompt:
    def __init__(self, input_variables):
        self.input_variables = input_variables

class MockLLMChain:
    def __init__(self, prompt):
        self.prompt = prompt

# unit tests

@pytest.fixture
def single_variable_llm_chain():
    return {"initial_llm_chain": MockLLMChain(MockPrompt(["variable"]))}

@pytest.fixture
def multiple_variable_llm_chain():
    return {"initial_llm_chain": MockLLMChain(MockPrompt(["var1", "var2"]))}

@pytest.fixture
def no_variable_llm_chain():
    return {"initial_llm_chain": MockLLMChain(MockPrompt([]))}

def test_single_variable_without_document_variable_name(single_variable_llm_chain):
    # Scenario 1
    result = RefineDocumentsChain.get_default_document_variable_name(single_variable_llm_chain)
    assert result["document_variable_name"] == "variable"

def test_multiple_variables_without_document_variable_name(multiple_variable_llm_chain):
    # Scenario 2
    with pytest.raises(ValueError):
        RefineDocumentsChain.get_default_document_variable_name(multiple_variable_llm_chain)

def test_no_variables_without_document_variable_name(no_variable_llm_chain):
    # Scenario 3
    with pytest.raises(ValueError):
        RefineDocumentsChain.get_default_document_variable_name(no_variable_llm_chain)

def test_document_variable_name_provided_and_exists(single_variable_llm_chain):
    # Scenario 4
    single_variable_llm_chain["document_variable_name"] = "variable"
    result = RefineDocumentsChain.get_default_document_variable_name(single_variable_llm_chain)
    assert result["document_variable_name"] == "variable"

def test_document_variable_name_provided_but_does_not_exist(multiple_variable_llm_chain):
    # Scenario 5
    multiple_variable_llm_chain["document_variable_name"] = "nonexistent"
    with pytest.raises(ValueError):
        RefineDocumentsChain.get_default_document_variable_name(multiple_variable_llm_chain)

def test_values_missing_initial_llm_chain_key():
    # Scenario 6
    with pytest.raises(KeyError):
        RefineDocumentsChain.get_default_document_variable_name({})

def test_initial_llm_chain_missing_prompt_attribute():
    # Scenario 7
    values = {"initial_llm_chain": MockLLMChain(None)}
    with pytest.raises(AttributeError):
        RefineDocumentsChain.get_default_document_variable_name(values)

def test_initial_llm_chain_prompt_missing_input_variables_attribute():
    # Scenario 7 (continued)
    values = {"initial_llm_chain": MockLLMChain(MockPrompt(None))}
    with pytest.raises(AttributeError):
        RefineDocumentsChain.get_default_document_variable_name(values)

def test_input_variables_not_a_list(single_variable_llm_chain):
    # Scenario 8
    single_variable_llm_chain["initial_llm_chain"].prompt.input_variables = "not_a_list"
    with pytest.raises(TypeError):
        RefineDocumentsChain.get_default_document_variable_name(single_variable_llm_chain)

def test_input_variables_list_with_non_string_elements():
    # Scenario 8 (continued)
    values = {"initial_llm_chain": MockLLMChain(MockPrompt([1, 2, 3]))}
    with pytest.raises(TypeError):
        RefineDocumentsChain.get_default_document_variable_name(values)

def test_empty_values_dictionary():
    # Scenario 9
    with pytest.raises(KeyError):
        RefineDocumentsChain.get_default_document_variable_name({})

def test_document_variable_name_is_none(multiple_variable_llm_chain):
    # Scenario 10
    multiple_variable_llm_chain["document_variable_name"] = None
    with pytest.raises(ValueError):
        RefineDocumentsChain.get_default_document_variable_name(multiple_variable_llm_chain)

def test_document_variable_name_is_empty_string(multiple_variable_llm_chain):
    # Scenario 10 (continued)
    multiple_variable_llm_chain["document_variable_name"] = ""
    with pytest.raises(ValueError):
        RefineDocumentsChain.get_default_document_variable_name(multiple_variable_llm_chain)

@codeflash-ai codeflash-ai bot added the ⚡️ codeflash Optimization PR opened by CodeFlash AI label Feb 16, 2024
@codeflash-ai codeflash-ai bot requested a review from aphexcx February 16, 2024 16:11
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
⚡️ codeflash Optimization PR opened by CodeFlash AI
Projects
None yet
Development

Successfully merging this pull request may close these issues.

0 participants