Skip to content

LLM Parsers Reference

sqldeps.llm_parsers

LLM-based SQL parsers for dependency extraction.

This package provides integrations with various LLM providers for extracting SQL dependencies, with a common interface and factory function.

DeepseekExtractor

Bases: BaseSQLExtractor

DeepSeek-based SQL dependency extractor.

Attributes:

Name Type Description
ENV_VAR_NAME

Environment variable name for the API key

client

OpenAI client instance configured for DeepSeek API

Source code in sqldeps/llm_parsers/deepseek.py
class DeepseekExtractor(BaseSQLExtractor):
    """DeepSeek-based SQL dependency extractor.

    Attributes:
        ENV_VAR_NAME: Environment variable name for the API key
        client: OpenAI client instance configured for DeepSeek API
    """

    # Expected environmental variable with the DeepSeek key
    ENV_VAR_NAME = "DEEPSEEK_API_KEY"

    def __init__(
        self,
        model: str = "deepseek-chat",
        params: dict | None = None,
        api_key: str | None = None,
        prompt_path: Path | None = None,
    ) -> None:
        """Initialize DeepSeek extractor.

        Args:
            model: DeepSeek model name to use
            params: Additional parameters for the API
            api_key: DeepSeek API key (defaults to environment variable)
            prompt_path: Path to custom prompt YAML file

        Raises:
            ValueError: If API key is not provided
        """
        super().__init__(model, params, prompt_path=prompt_path)

        api_key = api_key or os.getenv(self.ENV_VAR_NAME)
        if not api_key:
            raise ValueError(
                "No API key provided. Either pass api_key parameter or set "
                f"{self.ENV_VAR_NAME} environment variable."
            )

        self.client = OpenAI(api_key=api_key, base_url="https://api.deepseek.com")

    def _query_llm(self, user_prompt: str) -> str:
        """Query the DeepSeek LLM with the generated prompt.

        Args:
            user_prompt: Generated prompt to send to DeepSeek

        Returns:
            Response content from DeepSeek
        """
        response = self.client.chat.completions.create(
            model=self.model,
            messages=[
                {"role": "system", "content": self.prompts["system_prompt"]},
                {"role": "user", "content": user_prompt},
            ],
            response_format={"type": "json_object"},
            stream=False,
            **self.params,
        )

        return response.choices[0].message.content

__init__(model='deepseek-chat', params=None, api_key=None, prompt_path=None)

Initialize DeepSeek extractor.

Parameters:

Name Type Description Default
model str

DeepSeek model name to use

'deepseek-chat'
params dict | None

Additional parameters for the API

None
api_key str | None

DeepSeek API key (defaults to environment variable)

None
prompt_path Path | None

Path to custom prompt YAML file

None

Raises:

Type Description
ValueError

If API key is not provided

Source code in sqldeps/llm_parsers/deepseek.py
def __init__(
    self,
    model: str = "deepseek-chat",
    params: dict | None = None,
    api_key: str | None = None,
    prompt_path: Path | None = None,
) -> None:
    """Initialize DeepSeek extractor.

    Args:
        model: DeepSeek model name to use
        params: Additional parameters for the API
        api_key: DeepSeek API key (defaults to environment variable)
        prompt_path: Path to custom prompt YAML file

    Raises:
        ValueError: If API key is not provided
    """
    super().__init__(model, params, prompt_path=prompt_path)

    api_key = api_key or os.getenv(self.ENV_VAR_NAME)
    if not api_key:
        raise ValueError(
            "No API key provided. Either pass api_key parameter or set "
            f"{self.ENV_VAR_NAME} environment variable."
        )

    self.client = OpenAI(api_key=api_key, base_url="https://api.deepseek.com")

GroqExtractor

Bases: BaseSQLExtractor

Groq-based SQL dependency extractor.

Attributes:

Name Type Description
ENV_VAR_NAME

Environment variable name for the API key

client

Groq client instance

Source code in sqldeps/llm_parsers/groq.py
class GroqExtractor(BaseSQLExtractor):
    """Groq-based SQL dependency extractor.

    Attributes:
        ENV_VAR_NAME: Environment variable name for the API key
        client: Groq client instance
    """

    ENV_VAR_NAME = "GROQ_API_KEY"

    def __init__(
        self,
        model: str = "llama-3.3-70b-versatile",
        params: dict | None = None,
        api_key: str | None = None,
        prompt_path: Path | None = None,
    ) -> None:
        """Initialize Groq extractor."""
        super().__init__(model, params, prompt_path=prompt_path)

        api_key = api_key or os.getenv(self.ENV_VAR_NAME)
        if not api_key:
            raise ValueError(
                "No API key provided. Either pass api_key parameter or set "
                f"{self.ENV_VAR_NAME} environment variable."
            )

        self.client = Groq(api_key=api_key)

    def _query_llm(self, user_prompt: str) -> str:
        """Query the Groq LLM with the generated prompt.

        Args:
            user_prompt: Generated prompt to send to Groq

        Returns:
            Response content from Groq
        """
        response = self.client.chat.completions.create(
            model=self.model,
            messages=[
                {"role": "system", "content": self.prompts["system_prompt"]},
                {"role": "user", "content": user_prompt},
            ],
            response_format={"type": "json_object"},
            **self.params,
        )

        return response.choices[0].message.content

__init__(model='llama-3.3-70b-versatile', params=None, api_key=None, prompt_path=None)

Initialize Groq extractor.

Source code in sqldeps/llm_parsers/groq.py
def __init__(
    self,
    model: str = "llama-3.3-70b-versatile",
    params: dict | None = None,
    api_key: str | None = None,
    prompt_path: Path | None = None,
) -> None:
    """Initialize Groq extractor."""
    super().__init__(model, params, prompt_path=prompt_path)

    api_key = api_key or os.getenv(self.ENV_VAR_NAME)
    if not api_key:
        raise ValueError(
            "No API key provided. Either pass api_key parameter or set "
            f"{self.ENV_VAR_NAME} environment variable."
        )

    self.client = Groq(api_key=api_key)

OpenaiExtractor

Bases: BaseSQLExtractor

OpenAI-based SQL dependency extractor.

Attributes:

Name Type Description
ENV_VAR_NAME

Environment variable name for the API key

client

OpenAI client instance

Source code in sqldeps/llm_parsers/openai.py
class OpenaiExtractor(BaseSQLExtractor):
    """OpenAI-based SQL dependency extractor.

    Attributes:
        ENV_VAR_NAME: Environment variable name for the API key
        client: OpenAI client instance
    """

    # Expected environmental variable with the OpenAI key
    ENV_VAR_NAME = "OPENAI_API_KEY"

    def __init__(
        self,
        model: str = "gpt-4o",
        params: dict | None = None,
        api_key: str | None = None,
        prompt_path: Path | None = None,
    ) -> None:
        """Initialize OpenAI extractor.

        Args:
            model: OpenAI model name to use
            params: Additional parameters for the API
            api_key: OpenAI API key (defaults to environment variable)
            prompt_path: Path to custom prompt YAML file

        Raises:
            ValueError: If API key is not provided
        """
        super().__init__(model, params, prompt_path=prompt_path)

        api_key = api_key or os.getenv(self.ENV_VAR_NAME)
        if not api_key:
            raise ValueError(
                "No API key provided. Either pass api_key parameter or set "
                f"{self.ENV_VAR_NAME} environment variable."
            )

        self.client = OpenAI(api_key=api_key)

    def _query_llm(self, user_prompt: str) -> str:
        """Query the OpenAI LLM with the generated prompt.

        Args:
            user_prompt: Generated prompt to send to OpenAI

        Returns:
            Response content from OpenAI
        """
        response = self.client.chat.completions.create(
            model=self.model,
            messages=[
                {"role": "system", "content": self.prompts["system_prompt"]},
                {"role": "user", "content": user_prompt},
            ],
            response_format={"type": "json_object"},
            **self.params,
        )

        return response.choices[0].message.content

__init__(model='gpt-4o', params=None, api_key=None, prompt_path=None)

Initialize OpenAI extractor.

Parameters:

Name Type Description Default
model str

OpenAI model name to use

'gpt-4o'
params dict | None

Additional parameters for the API

None
api_key str | None

OpenAI API key (defaults to environment variable)

None
prompt_path Path | None

Path to custom prompt YAML file

None

Raises:

Type Description
ValueError

If API key is not provided

Source code in sqldeps/llm_parsers/openai.py
def __init__(
    self,
    model: str = "gpt-4o",
    params: dict | None = None,
    api_key: str | None = None,
    prompt_path: Path | None = None,
) -> None:
    """Initialize OpenAI extractor.

    Args:
        model: OpenAI model name to use
        params: Additional parameters for the API
        api_key: OpenAI API key (defaults to environment variable)
        prompt_path: Path to custom prompt YAML file

    Raises:
        ValueError: If API key is not provided
    """
    super().__init__(model, params, prompt_path=prompt_path)

    api_key = api_key or os.getenv(self.ENV_VAR_NAME)
    if not api_key:
        raise ValueError(
            "No API key provided. Either pass api_key parameter or set "
            f"{self.ENV_VAR_NAME} environment variable."
        )

    self.client = OpenAI(api_key=api_key)

create_extractor(framework='groq', model=None, params=None, prompt_path=None)

Create an appropriate SQL extractor based on the specified framework.

Parameters:

Name Type Description Default
framework str

The LLM framework to use ("groq", "openai", or "deepseek")

'groq'
model str | None

The model name within the selected framework (uses default if None)

None
params dict | None

Additional parameters to pass to the LLM API

None
prompt_path Path | None

Path to a custom prompt YAML file

None

Returns:

Type Description
BaseSQLExtractor

An instance of the appropriate SQL extractor

Raises:

Type Description
ValueError

If an unsupported framework is specified

Source code in sqldeps/llm_parsers/__init__.py
def create_extractor(
    framework: str = "groq",
    model: str | None = None,
    params: dict | None = None,
    prompt_path: Path | None = None,
) -> BaseSQLExtractor:
    """Create an appropriate SQL extractor based on the specified framework.

    Args:
        framework: The LLM framework to use ("groq", "openai", or "deepseek")
        model: The model name within the selected framework (uses default if None)
        params: Additional parameters to pass to the LLM API
        prompt_path: Path to a custom prompt YAML file

    Returns:
        An instance of the appropriate SQL extractor

    Raises:
        ValueError: If an unsupported framework is specified
    """
    framework = framework.lower()
    if framework not in DEFAULTS:
        raise ValueError(
            f"Unsupported framework: {framework}. "
            f"Must be one of: {', '.join(DEFAULTS.keys())}"
        )

    config = DEFAULTS[framework]
    extractor_class = config["class"]
    model_name = model or config["model"]

    return extractor_class(model=model_name, params=params, prompt_path=prompt_path)