> ## Documentation Index
> Fetch the complete documentation index at: https://docs.maia.ai/llms.txt
> Use this file to discover all available pages before exploring further.

# Postgres Vector Upsert

export const ComponentMetadata = ({warehouses, unsupportedWarehouses = [], componentType, connectionInputs, connectionOutputs}) => {
  const allWarehouses = [...warehouses.map(w => ({
    name: w,
    supported: true
  })), ...unsupportedWarehouses.map(w => ({
    name: w,
    supported: false
  }))];
  return <div style={{
    background: 'var(--colors-background-light, #f9fafb)',
    border: '1px solid var(--colors-border-default, #e5e7eb)',
    borderRadius: '12px',
    padding: '20px 28px',
    marginBottom: '28px',
    boxShadow: '0 1px 4px rgba(0,0,0,0.10)'
  }}>
      <table style={{
    width: '100%',
    borderCollapse: 'collapse'
  }}>
        <tbody>
          <tr>
            <td style={{
    fontWeight: '600',
    paddingRight: '32px',
    paddingBottom: '14px',
    whiteSpace: 'nowrap',
    verticalAlign: 'middle',
    width: '180px'
  }}>Project Availability</td>
            <td style={{
    paddingBottom: '14px',
    verticalAlign: 'middle'
  }}>
              <div style={{
    display: 'flex',
    flexWrap: 'wrap',
    gap: '8px'
  }}>
                {allWarehouses.map((w, i) => <span key={i} style={{
    background: w.supported ? '#dcfce7' : '#fee2e2',
    color: w.supported ? '#15803d' : '#b91c1c',
    border: `1px solid ${w.supported ? '#bbf7d0' : '#fca5a5'}`,
    borderRadius: '9999px',
    padding: '3px 12px',
    fontSize: '0.85rem',
    fontWeight: '500',
    whiteSpace: 'nowrap'
  }}>
                    {w.name} {w.supported ? '✅' : '❌'}
                  </span>)}
              </div>
            </td>
          </tr>
          <tr>
            <td style={{
    fontWeight: '600',
    paddingRight: '32px',
    paddingBottom: '14px',
    whiteSpace: 'nowrap',
    verticalAlign: 'middle'
  }}>Component Type</td>
            <td style={{
    paddingBottom: '14px',
    verticalAlign: 'middle'
  }}>{componentType}</td>
          </tr>
          <tr>
            <td style={{
    fontWeight: '600',
    paddingRight: '32px',
    paddingBottom: '14px',
    whiteSpace: 'nowrap',
    verticalAlign: 'middle'
  }}>Connection Inputs</td>
            <td style={{
    paddingBottom: '14px',
    verticalAlign: 'middle'
  }}>{connectionInputs}</td>
          </tr>
          <tr>
            <td style={{
    fontWeight: '600',
    paddingRight: '32px',
    whiteSpace: 'nowrap',
    verticalAlign: 'middle'
  }}>Connection Outputs</td>
            <td style={{
    verticalAlign: 'middle'
  }}>{connectionOutputs}</td>
          </tr>
        </tbody>
      </table>
    </div>;
};

<ComponentMetadata warehouses={["Snowflake", "Databricks", "Amazon Redshift"]} componentType="Orchestration" connectionInputs="One" connectionOutputs="Unlimited" />

<Info>
  Production use of this feature is available for specific editions only. [Contact our sales team](https://www.matillion.com/contact) for more information.
</Info>

Postgres Vector Upsert is an orchestration component that lets you convert text data stored in your cloud data warehouse into embeddings and then store these embeddings as vectors in your Postgres vector database.

## Video example

<iframe width="560" height="315" src="https://www.youtube.com/embed/YEzr6yqDaGQ?si=x3HNadgxAE-TDIVl&enablejsapi=1" title="YouTube video player" frameBorder="0" allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture; web-share; fullscreen" referrerPolicy="strict-origin-when-cross-origin" allowFullScreen />

## Properties

<ResponseField name="Name" type="string" required>
  A human-readable name for the component.
</ResponseField>

Select your cloud data warehouse.

<Tabs>
  <Tab title="Snowflake">
    <ResponseField name="Database" type="drop-down" required>
      The Snowflake database. The special value `[Environment Default]` uses the database defined in the environment. Read [Databases, Tables and Views - Overview](https://docs.snowflake.com/en/guides-overview-db) to learn more.
    </ResponseField>

    {/* <!-- param-start:[source.snowflake.schema] | warehouses: [snowflake] --> */}

    <ResponseField name="Schema" type="drop-down" required>
      The Snowflake schema. The special value `[Environment Default]` uses the schema defined in the environment. Read [Database, Schema, and Share DDL](https://docs.snowflake.com/en/sql-reference/ddl-database.html) to learn more.
    </ResponseField>

    {/* <!-- param-start:[source.snowflake.table] | warehouses: [snowflake] --> */}

    <ResponseField name="Table" type="string" required>
      The Snowflake table that holds your source data.
    </ResponseField>
  </Tab>

  <Tab title="Databricks">
    <ResponseField name="Catalog" type="drop-down" required>
      Select a [Databricks Unity Catalog](https://docs.databricks.com/en/data-governance/unity-catalog/index.html). The special value `[Environment Default]` uses the catalog defined in the environment. Selecting a catalog will determine which databases are available in the next parameter.
    </ResponseField>

    {/* <!-- param-start:[source.databricks.schema] | warehouses: [databricks] --> */}

    <ResponseField name="Schema (Database)" type="drop-down" required>
      The Databricks schema. The special value `[Environment Default]` uses the schema defined in the environment. Read [Create and manage schemas](https://docs.databricks.com/en/data-governance/unity-catalog/create-schemas.html) to learn more.
    </ResponseField>

    {/* <!-- param-start:[source.databricks.table] | warehouses: [databricks] --> */}

    <ResponseField name="Table" type="drop-down" required>
      The Databricks table that holds your source data.
    </ResponseField>
  </Tab>

  <Tab title="Amazon Redshift">
    <ResponseField name="Schema" type="drop-down" required>
      The Amazon Redshift schema. The special value `[Environment Default]` uses the schema defined in the environment. Read [Schemas](https://docs.aws.amazon.com/redshift/latest/dg/r_Schemas_and_tables.html) to learn more.

      For more information on using multiple schemas, read [Schemas](https://docs.aws.amazon.com/redshift/latest/dg/r_Schemas_and_tables.html).
    </ResponseField>

    {/* <!-- param-start:[source.redshift.table] | warehouses: [redshift] --> */}

    <ResponseField name="Table" type="drop-down" required>
      An existing Redshift table to use as the input.
    </ResponseField>
  </Tab>
</Tabs>

<ResponseField name="Key Column" type="drop-down" required>
  Set a column as the primary key.
</ResponseField>

{/* <!-- param-start:[source.snowflake.textColumn, source.redshift.textColumn, source.databricks.textColumn] | warehouses: [snowflake, databricks, redshift] --> */}

<ResponseField name="Text Column" type="drop-down" required>
  The column of data to convert into embeddings to then be upserted into your Postgres vector database.
</ResponseField>

{/* <!-- param-start:[source.snowflake.limit, source.redshift.limit, source.databricks.limit] | warehouses: [snowflake, databricks, redshift] --> */}

<ResponseField name="Limit" type="integer">
  Set a limit for the numbers of rows from the table to load. The default is 1000.
</ResponseField>

{/* <!-- param-start:[embeddingGenerator.embeddingProviderType] | warehouses: [snowflake, databricks, redshift] --> */}

<ResponseField name="Embedding Provider" type="drop-down" required>
  The embedding provider is the API service used to convert the search term into a vector. Choose either [OpenAI](https://platform.openai.com/docs/guides/embeddings) or [Amazon Bedrock](https://docs.aws.amazon.com/bedrock/latest/userguide/titan-embedding-models.html). The embedding provider receives a search term (e.g. "How do I log in?") and returns a vector.

  Choose your provider:
</ResponseField>

<Tabs>
  <Tab title="OpenAI">
    {/* <!-- param-start:[embeddingGenerator.openAI.apiKey] | warehouses: [snowflake, databricks, redshift] --> */}

    <ResponseField name="OpenAI API Key" type="drop-down" required>
      Use the drop-down menu to select the corresponding secret definition that denotes the value of your OpenAI API key.

      Read [Secrets and secret definitions](/docs/guides/secrets-and-secret-definitions) to learn how to create a new secret definition.

      To create a new OpenAI API key:

      1. Log in to [OpenAI](https://platform.openai.com/).
      2. Click your avatar in the top-right of the UI.
      3. Click **View API keys**.
      4. Click **+ Create new secret key**.
      5. Give a name for your new secret key and click **Create secret key**.
      6. Copy your new secret key and save it. Then click **Done**.
    </ResponseField>

    {/* <!-- param-end:[embeddingGenerator.openAI.apiKey] --> */}

    {/* <!-- param-start:[embeddingGenerator.openAI.model] | warehouses: [snowflake, databricks, redshift] --> */}

    <ResponseField name="Embedding Model" type="drop-down" required>
      Select an [embedding model](https://platform.openai.com/docs/guides/embeddings).

      Currently supports:

      | Model                  | Dimension |
      | ---------------------- | --------- |
      | text-embedding-ada-002 | 1536      |
      | text-embedding-3-small | 1536      |
      | text-embedding-3-large | 3072      |
    </ResponseField>

    {/* <!-- param-end:[embeddingGenerator.openAI.model] --> */}

    {/* <!-- param-start:[embeddingGenerator.embeddingBatchSize] | warehouses: [snowflake, databricks, redshift] --> */}

    <ResponseField name="API Batch Size" type="integer" required>
      Set the [size of array of data per API call](https://platform.openai.com/docs/api-reference/embeddings/create#embeddings-create-input). The default size is 10. When set to 10, 1000 rows would therefore require 100 API calls.

      You may wish to reduce this number if a row contains a high volume of data; and conversely, increase this number for rows with low data volume.
    </ResponseField>

    {/* <!-- param-end:[embeddingGenerator.embeddingBatchSize] --> */}
  </Tab>

  <Tab title="Amazon Bedrock">
    {/* <!-- param-start:[embeddingGenerator.aws.region] | warehouses: [snowflake, databricks, redshift] --> */}

    <ResponseField name="Region" type="drop-down" required>
      Select your [AWS region](https://docs.aws.amazon.com/AmazonRDS/latest/UserGuide/Concepts.RegionsAndAvailabilityZones.html#Concepts.RegionsAndAvailabilityZones.Regions).
    </ResponseField>

    {/* <!-- param-end:[embeddingGenerator.aws.region] --> */}

    {/* <!-- param-start:[embeddingGenerator.aws.model] | warehouses: [snowflake, databricks, redshift] --> */}

    <ResponseField name="Embedding Model" type="drop-down" required>
      Select an embedding model.

      Currently supports:

      | Model                                                                                                                     | Dimension |
      | ------------------------------------------------------------------------------------------------------------------------- | --------- |
      | [Titan Embeddings G1 - Text](https://docs.aws.amazon.com/bedrock/latest/userguide/model-parameters-titan-embed-text.html) | 1536      |
    </ResponseField>

    {/* <!-- param-end:[embeddingGenerator.aws.model] --> */}
  </Tab>
</Tabs>

<ResponseField name="Host" type="string" required>
  Your Postgres hostname.
</ResponseField>

{/* <!-- param-start:[pgVectorDB.port] | warehouses: [snowflake, databricks, redshift] --> */}

<ResponseField name="Port" type="string" required>
  The TCP port number the Postgres server listens on. The default is `5432`.
</ResponseField>

{/* <!-- param-start:[pgVectorDB.database] | warehouses: [snowflake, databricks, redshift] --> */}

<ResponseField name="Database" type="string" required>
  The name of your Postgres database.
</ResponseField>

{/* <!-- param-start:[pgVectorDB.username] | warehouses: [snowflake, databricks, redshift] --> */}

<ResponseField name="Username" type="string" required>
  Your Postgres username.
</ResponseField>

{/* <!-- param-start:[pgVectorDB.password] | warehouses: [snowflake, databricks, redshift] --> */}

<ResponseField name="Password" type="drop-down" required>
  Use the drop-down menu to select the corresponding secret definition that denotes the value of your Postgres password.

  Read [Secrets and secret definitions](/docs/guides/secrets-and-secret-definitions) to learn how to create a new secret definition.
</ResponseField>

{/* <!-- param-start:[pgVectorDB.schema] | warehouses: [snowflake, databricks, redshift] --> */}

<ResponseField name="Schema" type="drop-down" required>
  The Postgres schema. The available schemas are determined by the Postgres database you have provided.
</ResponseField>

{/* <!-- param-start:[pgVectorDB.table] | warehouses: [snowflake, databricks, redshift] --> */}

<ResponseField name="Table" type="drop-down" required>
  The table to load data from. The available tables are determined by the Postgres schema you have selected.
</ResponseField>

{/* <!-- param-start:[pgVectorDB.keyColumnName] | warehouses: [snowflake, databricks, redshift] --> */}

<ResponseField name="Key Column Name" type="drop-down" required>
  The column in your table to use as the key column.
</ResponseField>

{/* <!-- param-start:[pgVectorDB.textColumnName] | warehouses: [snowflake, databricks, redshift] --> */}

<ResponseField name="Text Column Name" type="drop-down" required>
  The column in your table with your original text data.
</ResponseField>

{/* <!-- param-start:[pgVectorDB.embeddingColumnName] | warehouses: [snowflake, databricks, redshift] --> */}

<ResponseField name="Embedding Column Name" type="drop-down" required>
  The column in your table used to store your embeddings.
</ResponseField>

{/* <!-- param-start:[pgVectorDB.connectionOptions] | warehouses: [snowflake, databricks, redshift] --> */}

<ResponseField name="Connection Options" type="column editor" required>
  * **Parameter:** A JDBC [Postgres parameter](https://jdbc.postgresql.org/documentation/use/#connection-parameters) supported by the database driver.
  * **Value:** A value for the given parameter.
</ResponseField>
