diff --git a/api/core/model_runtime/model_providers/_position.yaml b/api/core/model_runtime/model_providers/_position.yaml
index 97116978c..049ad67a7 100644
--- a/api/core/model_runtime/model_providers/_position.yaml
+++ b/api/core/model_runtime/model_providers/_position.yaml
@@ -2,6 +2,7 @@
- anthropic
- azure_openai
- google
+- nvidia
- cohere
- bedrock
- togetherai
diff --git a/api/core/model_runtime/model_providers/nvidia/__init__.py b/api/core/model_runtime/model_providers/nvidia/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/api/core/model_runtime/model_providers/nvidia/_assets/icon_l_en.png b/api/core/model_runtime/model_providers/nvidia/_assets/icon_l_en.png
new file mode 100644
index 000000000..5a7f42e61
Binary files /dev/null and b/api/core/model_runtime/model_providers/nvidia/_assets/icon_l_en.png differ
diff --git a/api/core/model_runtime/model_providers/nvidia/_assets/icon_s_en.svg b/api/core/model_runtime/model_providers/nvidia/_assets/icon_s_en.svg
new file mode 100644
index 000000000..9fc02f916
--- /dev/null
+++ b/api/core/model_runtime/model_providers/nvidia/_assets/icon_s_en.svg
@@ -0,0 +1,3 @@
+
diff --git a/api/core/model_runtime/model_providers/nvidia/llm/_position.yaml b/api/core/model_runtime/model_providers/nvidia/llm/_position.yaml
new file mode 100644
index 000000000..78ab4cb93
--- /dev/null
+++ b/api/core/model_runtime/model_providers/nvidia/llm/_position.yaml
@@ -0,0 +1,4 @@
+- google/gemma-7b
+- meta/llama2-70b
+- mistralai/mixtral-8x7b-instruct-v0.1
+- fuyu-8b
diff --git a/api/core/model_runtime/model_providers/nvidia/llm/fuyu-8b.yaml b/api/core/model_runtime/model_providers/nvidia/llm/fuyu-8b.yaml
new file mode 100644
index 000000000..49749bba9
--- /dev/null
+++ b/api/core/model_runtime/model_providers/nvidia/llm/fuyu-8b.yaml
@@ -0,0 +1,27 @@
+model: fuyu-8b
+label:
+ zh_Hans: fuyu-8b
+ en_US: fuyu-8b
+model_type: llm
+features:
+ - agent-thought
+ - vision
+model_properties:
+ mode: chat
+ context_size: 16000
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ default: 0.2
+ min: 0.1
+ max: 1
+ - name: top_p
+ use_template: top_p
+ default: 0.7
+ min: 0.1
+ max: 1
+ - name: max_tokens
+ use_template: max_tokens
+ default: 512
+ min: 1
+ max: 1024
diff --git a/api/core/model_runtime/model_providers/nvidia/llm/gemma-7b.yaml b/api/core/model_runtime/model_providers/nvidia/llm/gemma-7b.yaml
new file mode 100644
index 000000000..c50dad4f1
--- /dev/null
+++ b/api/core/model_runtime/model_providers/nvidia/llm/gemma-7b.yaml
@@ -0,0 +1,30 @@
+model: google/gemma-7b
+label:
+ zh_Hans: google/gemma-7b
+ en_US: google/gemma-7b
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 8192
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ - name: top_p
+ use_template: top_p
+ - name: max_tokens
+ use_template: max_tokens
+ default: 512
+ min: 1
+ max: 1024
+ - name: frequency_penalty
+ use_template: frequency_penalty
+ min: -2
+ max: 2
+ default: 0
+ - name: presence_penalty
+ use_template: presence_penalty
+ min: -2
+ max: 2
+ default: 0
diff --git a/api/core/model_runtime/model_providers/nvidia/llm/llama2-70b.yaml b/api/core/model_runtime/model_providers/nvidia/llm/llama2-70b.yaml
new file mode 100644
index 000000000..46422cbdb
--- /dev/null
+++ b/api/core/model_runtime/model_providers/nvidia/llm/llama2-70b.yaml
@@ -0,0 +1,30 @@
+model: meta/llama2-70b
+label:
+ zh_Hans: meta/llama2-70b
+ en_US: meta/llama2-70b
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 32768
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ - name: top_p
+ use_template: top_p
+ - name: max_tokens
+ use_template: max_tokens
+ default: 512
+ min: 1
+ max: 1024
+ - name: frequency_penalty
+ use_template: frequency_penalty
+ min: -2
+ max: 2
+ default: 0
+ - name: presence_penalty
+ use_template: presence_penalty
+ min: -2
+ max: 2
+ default: 0
diff --git a/api/core/model_runtime/model_providers/nvidia/llm/llm.py b/api/core/model_runtime/model_providers/nvidia/llm/llm.py
new file mode 100644
index 000000000..5d05e606b
--- /dev/null
+++ b/api/core/model_runtime/model_providers/nvidia/llm/llm.py
@@ -0,0 +1,247 @@
+import json
+from collections.abc import Generator
+from typing import Optional, Union
+
+import requests
+from yarl import URL
+
+from core.model_runtime.entities.llm_entities import LLMMode, LLMResult
+from core.model_runtime.entities.message_entities import (
+ PromptMessage,
+ PromptMessageContentType,
+ PromptMessageFunction,
+ PromptMessageTool,
+ UserPromptMessage,
+)
+from core.model_runtime.errors.invoke import InvokeError
+from core.model_runtime.errors.validate import CredentialsValidateFailedError
+from core.model_runtime.model_providers.openai_api_compatible.llm.llm import OAIAPICompatLargeLanguageModel
+from core.model_runtime.utils import helper
+
+
+class NVIDIALargeLanguageModel(OAIAPICompatLargeLanguageModel):
+ MODEL_SUFFIX_MAP = {
+ 'fuyu-8b': 'vlm/adept/fuyu-8b',
+ 'mistralai/mixtral-8x7b-instruct-v0.1': '',
+ 'google/gemma-7b': '',
+ 'meta/llama2-70b': ''
+ }
+
+ def _invoke(self, model: str, credentials: dict,
+ prompt_messages: list[PromptMessage], model_parameters: dict,
+ tools: Optional[list[PromptMessageTool]] = None, stop: Optional[list[str]] = None,
+ stream: bool = True, user: Optional[str] = None) \
+ -> Union[LLMResult, Generator]:
+
+ self._add_custom_parameters(credentials, model)
+ prompt_messages = self._transform_prompt_messages(prompt_messages)
+ stop = []
+ user = None
+
+ return super()._invoke(model, credentials, prompt_messages, model_parameters, tools, stop, stream, user)
+
+ def _transform_prompt_messages(self, prompt_messages: list[PromptMessage]) -> list[PromptMessage]:
+ """
+ Handle Image transform
+ """
+ for i, p in enumerate(prompt_messages):
+ if isinstance(p, UserPromptMessage) and isinstance(p.content, list):
+ content = p.content
+ content_text = ''
+ for prompt_content in content:
+ if prompt_content.type == PromptMessageContentType.TEXT:
+ content_text += prompt_content.data
+ else:
+ content_text += f'
'
+
+ prompt_message = UserPromptMessage(
+ content=content_text
+ )
+ prompt_messages[i] = prompt_message
+ return prompt_messages
+
+ def validate_credentials(self, model: str, credentials: dict) -> None:
+ self._add_custom_parameters(credentials, model)
+ self._validate_credentials(model, credentials)
+
+ def _add_custom_parameters(self, credentials: dict, model: str) -> None:
+ credentials['mode'] = 'chat'
+
+ if self.MODEL_SUFFIX_MAP[model]:
+ credentials['server_url'] = f'https://ai.api.nvidia.com/v1/{self.MODEL_SUFFIX_MAP[model]}'
+ credentials.pop('endpoint_url')
+ else:
+ credentials['endpoint_url'] = 'https://integrate.api.nvidia.com/v1'
+
+ credentials['stream_mode_delimiter'] = '\n'
+
+ def _validate_credentials(self, model: str, credentials: dict) -> None:
+ """
+ Validate model credentials using requests to ensure compatibility with all providers following OpenAI's API standard.
+
+ :param model: model name
+ :param credentials: model credentials
+ :return:
+ """
+ try:
+ headers = {
+ 'Content-Type': 'application/json'
+ }
+
+ api_key = credentials.get('api_key')
+ if api_key:
+ headers["Authorization"] = f"Bearer {api_key}"
+
+ endpoint_url = credentials['endpoint_url'] if 'endpoint_url' in credentials else None
+ if endpoint_url and not endpoint_url.endswith('/'):
+ endpoint_url += '/'
+ server_url = credentials['server_url'] if 'server_url' in credentials else None
+
+ # prepare the payload for a simple ping to the model
+ data = {
+ 'model': model,
+ 'max_tokens': 5
+ }
+
+ completion_type = LLMMode.value_of(credentials['mode'])
+
+ if completion_type is LLMMode.CHAT:
+ data['messages'] = [
+ {
+ "role": "user",
+ "content": "ping"
+ },
+ ]
+ if 'endpoint_url' in credentials:
+ endpoint_url = str(URL(endpoint_url) / 'chat' / 'completions')
+ elif 'server_url' in credentials:
+ endpoint_url = server_url
+ elif completion_type is LLMMode.COMPLETION:
+ data['prompt'] = 'ping'
+ if 'endpoint_url' in credentials:
+ endpoint_url = str(URL(endpoint_url) / 'completions')
+ elif 'server_url' in credentials:
+ endpoint_url = server_url
+ else:
+ raise ValueError("Unsupported completion type for model configuration.")
+
+ # send a post request to validate the credentials
+ response = requests.post(
+ endpoint_url,
+ headers=headers,
+ json=data,
+ timeout=(10, 60)
+ )
+
+ if response.status_code != 200:
+ raise CredentialsValidateFailedError(
+ f'Credentials validation failed with status code {response.status_code}')
+
+ try:
+ json_result = response.json()
+ except json.JSONDecodeError as e:
+ raise CredentialsValidateFailedError('Credentials validation failed: JSON decode error')
+ except CredentialsValidateFailedError:
+ raise
+ except Exception as ex:
+ raise CredentialsValidateFailedError(f'An error occurred during credentials validation: {str(ex)}')
+
+ def _generate(self, model: str, credentials: dict, prompt_messages: list[PromptMessage], model_parameters: dict,
+ tools: Optional[list[PromptMessageTool]] = None, stop: Optional[list[str]] = None,
+ stream: bool = True, \
+ user: Optional[str] = None) -> Union[LLMResult, Generator]:
+ """
+ Invoke llm completion model
+
+ :param model: model name
+ :param credentials: credentials
+ :param prompt_messages: prompt messages
+ :param model_parameters: model parameters
+ :param stop: stop words
+ :param stream: is stream response
+ :param user: unique user id
+ :return: full response or stream response chunk generator result
+ """
+ headers = {
+ 'Content-Type': 'application/json',
+ 'Accept-Charset': 'utf-8',
+ }
+
+ api_key = credentials.get('api_key')
+ if api_key:
+ headers['Authorization'] = f'Bearer {api_key}'
+
+ if stream:
+ headers['Accept'] = 'text/event-stream'
+
+ endpoint_url = credentials['endpoint_url'] if 'endpoint_url' in credentials else None
+ if endpoint_url and not endpoint_url.endswith('/'):
+ endpoint_url += '/'
+ server_url = credentials['server_url'] if 'server_url' in credentials else None
+
+ data = {
+ "model": model,
+ "stream": stream,
+ **model_parameters
+ }
+
+ completion_type = LLMMode.value_of(credentials['mode'])
+
+ if completion_type is LLMMode.CHAT:
+ if 'endpoint_url' in credentials:
+ endpoint_url = str(URL(endpoint_url) / 'chat' / 'completions')
+ elif 'server_url' in credentials:
+ endpoint_url = server_url
+ data['messages'] = [self._convert_prompt_message_to_dict(m) for m in prompt_messages]
+ elif completion_type is LLMMode.COMPLETION:
+ data['prompt'] = 'ping'
+ if 'endpoint_url' in credentials:
+ endpoint_url = str(URL(endpoint_url) / 'completions')
+ elif 'server_url' in credentials:
+ endpoint_url = server_url
+ else:
+ raise ValueError("Unsupported completion type for model configuration.")
+
+
+ # annotate tools with names, descriptions, etc.
+ function_calling_type = credentials.get('function_calling_type', 'no_call')
+ formatted_tools = []
+ if tools:
+ if function_calling_type == 'function_call':
+ data['functions'] = [{
+ "name": tool.name,
+ "description": tool.description,
+ "parameters": tool.parameters
+ } for tool in tools]
+ elif function_calling_type == 'tool_call':
+ data["tool_choice"] = "auto"
+
+ for tool in tools:
+ formatted_tools.append(helper.dump_model(PromptMessageFunction(function=tool)))
+
+ data["tools"] = formatted_tools
+
+ if stop:
+ data["stop"] = stop
+
+ if user:
+ data["user"] = user
+
+ response = requests.post(
+ endpoint_url,
+ headers=headers,
+ json=data,
+ timeout=(10, 60),
+ stream=stream
+ )
+
+ if response.encoding is None or response.encoding == 'ISO-8859-1':
+ response.encoding = 'utf-8'
+
+ if not response.ok:
+ raise InvokeError(f"API request failed with status code {response.status_code}: {response.text}")
+
+ if stream:
+ return self._handle_generate_stream_response(model, credentials, response, prompt_messages)
+
+ return self._handle_generate_response(model, credentials, response, prompt_messages)
diff --git a/api/core/model_runtime/model_providers/nvidia/llm/mistralai_mixtral-8x7b-instruct-v0.1.yaml b/api/core/model_runtime/model_providers/nvidia/llm/mistralai_mixtral-8x7b-instruct-v0.1.yaml
new file mode 100644
index 000000000..fbd8cc268
--- /dev/null
+++ b/api/core/model_runtime/model_providers/nvidia/llm/mistralai_mixtral-8x7b-instruct-v0.1.yaml
@@ -0,0 +1,30 @@
+model: mistralai/mixtral-8x7b-instruct-v0.1
+label:
+ zh_Hans: mistralai/mixtral-8x7b-instruct-v0.1
+ en_US: mistralai/mixtral-8x7b-instruct-v0.1
+model_type: llm
+features:
+ - agent-thought
+model_properties:
+ mode: chat
+ context_size: 32768
+parameter_rules:
+ - name: temperature
+ use_template: temperature
+ - name: top_p
+ use_template: top_p
+ - name: max_tokens
+ use_template: max_tokens
+ default: 512
+ min: 1
+ max: 1024
+ - name: frequency_penalty
+ use_template: frequency_penalty
+ min: -2
+ max: 2
+ default: 0
+ - name: presence_penalty
+ use_template: presence_penalty
+ min: -2
+ max: 2
+ default: 0
diff --git a/api/core/model_runtime/model_providers/nvidia/nvidia.py b/api/core/model_runtime/model_providers/nvidia/nvidia.py
new file mode 100644
index 000000000..e83f8badb
--- /dev/null
+++ b/api/core/model_runtime/model_providers/nvidia/nvidia.py
@@ -0,0 +1,30 @@
+import logging
+
+from core.model_runtime.entities.model_entities import ModelType
+from core.model_runtime.errors.validate import CredentialsValidateFailedError
+from core.model_runtime.model_providers.__base.model_provider import ModelProvider
+
+logger = logging.getLogger(__name__)
+
+
+class MistralAIProvider(ModelProvider):
+
+ def validate_provider_credentials(self, credentials: dict) -> None:
+ """
+ Validate provider credentials
+ if validate failed, raise exception
+
+ :param credentials: provider credentials, credentials form defined in `provider_credential_schema`.
+ """
+ try:
+ model_instance = self.get_model_instance(ModelType.LLM)
+
+ model_instance.validate_credentials(
+ model='mistralai/mixtral-8x7b-instruct-v0.1',
+ credentials=credentials
+ )
+ except CredentialsValidateFailedError as ex:
+ raise ex
+ except Exception as ex:
+ logger.exception(f'{self.get_provider_schema().provider} credentials validate failed')
+ raise ex
diff --git a/api/core/model_runtime/model_providers/nvidia/nvidia.yaml b/api/core/model_runtime/model_providers/nvidia/nvidia.yaml
new file mode 100644
index 000000000..c3c316321
--- /dev/null
+++ b/api/core/model_runtime/model_providers/nvidia/nvidia.yaml
@@ -0,0 +1,30 @@
+provider: nvidia
+label:
+ en_US: NVIDIA
+icon_small:
+ en_US: icon_s_en.svg
+icon_large:
+ en_US: icon_l_en.png
+background: "#FFFFFF"
+help:
+ title:
+ en_US: Get your API Key from NVIDIA
+ zh_Hans: 从 NVIDIA 获取 API Key
+ url:
+ en_US: https://build.nvidia.com/explore/discover
+supported_model_types:
+ - llm
+ - text-embedding
+ - rerank
+configurate_methods:
+ - predefined-model
+provider_credential_schema:
+ credential_form_schemas:
+ - variable: api_key
+ label:
+ en_US: API Key
+ type: secret-input
+ required: true
+ placeholder:
+ zh_Hans: 在此输入您的 API Key
+ en_US: Enter your API Key
diff --git a/api/core/model_runtime/model_providers/nvidia/rerank/__init__.py b/api/core/model_runtime/model_providers/nvidia/rerank/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/api/core/model_runtime/model_providers/nvidia/rerank/rerank-qa-mistral-4b.yaml b/api/core/model_runtime/model_providers/nvidia/rerank/rerank-qa-mistral-4b.yaml
new file mode 100644
index 000000000..7703ca21a
--- /dev/null
+++ b/api/core/model_runtime/model_providers/nvidia/rerank/rerank-qa-mistral-4b.yaml
@@ -0,0 +1,4 @@
+model: nv-rerank-qa-mistral-4b:1
+model_type: rerank
+model_properties:
+ context_size: 8192
diff --git a/api/core/model_runtime/model_providers/nvidia/rerank/rerank.py b/api/core/model_runtime/model_providers/nvidia/rerank/rerank.py
new file mode 100644
index 000000000..9d33f55bc
--- /dev/null
+++ b/api/core/model_runtime/model_providers/nvidia/rerank/rerank.py
@@ -0,0 +1,112 @@
+from math import exp
+from typing import Optional
+
+import requests
+
+from core.model_runtime.entities.rerank_entities import RerankDocument, RerankResult
+from core.model_runtime.errors.invoke import (
+ InvokeAuthorizationError,
+ InvokeBadRequestError,
+ InvokeConnectionError,
+ InvokeError,
+ InvokeRateLimitError,
+ InvokeServerUnavailableError,
+)
+from core.model_runtime.errors.validate import CredentialsValidateFailedError
+from core.model_runtime.model_providers.__base.rerank_model import RerankModel
+
+
+class NvidiaRerankModel(RerankModel):
+ """
+ Model class for NVIDIA rerank model.
+ """
+
+ def _sigmoid(self, logit: float) -> float:
+ return 1/(1+exp(-logit))
+
+ def _invoke(self, model: str, credentials: dict,
+ query: str, docs: list[str], score_threshold: Optional[float] = None, top_n: Optional[int] = None,
+ user: Optional[str] = None) -> RerankResult:
+ """
+ Invoke rerank model
+
+ :param model: model name
+ :param credentials: model credentials
+ :param query: search query
+ :param docs: docs for reranking
+ :param score_threshold: score threshold
+ :param top_n: top n documents to return
+ :param user: unique user id
+ :return: rerank result
+ """
+ if len(docs) == 0:
+ return RerankResult(model=model, docs=[])
+
+ try:
+ invoke_url = "https://ai.api.nvidia.com/v1/retrieval/nvidia/reranking"
+
+ headers = {
+ "Authorization": f"Bearer {credentials.get('api_key')}",
+ "Accept": "application/json",
+ }
+ payload = {
+ "model": model,
+ "query": {"text": query},
+ "passages": [{"text": doc} for doc in docs],
+ }
+
+ session = requests.Session()
+ response = session.post(invoke_url, headers=headers, json=payload)
+ response.raise_for_status()
+ results = response.json()
+
+ rerank_documents = []
+ for result in results['rankings']:
+ index = result['index']
+ logit = result['logit']
+ rerank_document = RerankDocument(
+ index=index,
+ text=docs[index],
+ score=self._sigmoid(logit),
+ )
+
+ rerank_documents.append(rerank_document)
+
+ return RerankResult(model=model, docs=rerank_documents)
+ except requests.HTTPError as e:
+ raise InvokeServerUnavailableError(str(e))
+
+ def validate_credentials(self, model: str, credentials: dict) -> None:
+ """
+ Validate model credentials
+
+ :param model: model name
+ :param credentials: model credentials
+ :return:
+ """
+ try:
+ self._invoke(
+ model=model,
+ credentials=credentials,
+ query="What is the GPU memory bandwidth of H100 SXM?",
+ docs=[
+ "Example doc 1",
+ "Example doc 2",
+ "Example doc 3",
+ ],
+ )
+ except Exception as ex:
+ raise CredentialsValidateFailedError(str(ex))
+
+ @property
+ def _invoke_error_mapping(self) -> dict[type[InvokeError], list[type[Exception]]]:
+ """
+ Map model invoke error to unified error
+ """
+ return {
+ InvokeConnectionError: [requests.ConnectionError],
+ InvokeServerUnavailableError: [requests.HTTPError],
+ InvokeRateLimitError: [],
+ InvokeAuthorizationError: [requests.HTTPError],
+ InvokeBadRequestError: [requests.RequestException]
+ }
diff --git a/api/core/model_runtime/model_providers/nvidia/text_embedding/__init__.py b/api/core/model_runtime/model_providers/nvidia/text_embedding/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/api/core/model_runtime/model_providers/nvidia/text_embedding/embed-qa-4.yaml b/api/core/model_runtime/model_providers/nvidia/text_embedding/embed-qa-4.yaml
new file mode 100644
index 000000000..a9b5e25c3
--- /dev/null
+++ b/api/core/model_runtime/model_providers/nvidia/text_embedding/embed-qa-4.yaml
@@ -0,0 +1,5 @@
+model: NV-Embed-QA
+model_type: text-embedding
+model_properties:
+ context_size: 512
+ max_chunks: 1
diff --git a/api/core/model_runtime/model_providers/nvidia/text_embedding/text_embedding.py b/api/core/model_runtime/model_providers/nvidia/text_embedding/text_embedding.py
new file mode 100644
index 000000000..a2adef400
--- /dev/null
+++ b/api/core/model_runtime/model_providers/nvidia/text_embedding/text_embedding.py
@@ -0,0 +1,172 @@
+import time
+from json import JSONDecodeError, dumps
+from typing import Optional
+
+from requests import post
+
+from core.model_runtime.entities.model_entities import PriceType
+from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
+from core.model_runtime.errors.invoke import (
+ InvokeAuthorizationError,
+ InvokeBadRequestError,
+ InvokeConnectionError,
+ InvokeError,
+ InvokeRateLimitError,
+ InvokeServerUnavailableError,
+)
+from core.model_runtime.errors.validate import CredentialsValidateFailedError
+from core.model_runtime.model_providers.__base.text_embedding_model import TextEmbeddingModel
+
+
+class NvidiaTextEmbeddingModel(TextEmbeddingModel):
+ """
+ Model class for Nvidia text embedding model.
+ """
+ api_base: str = 'https://ai.api.nvidia.com/v1/retrieval/nvidia/embeddings'
+ models: list[str] = ['NV-Embed-QA']
+
+ def _invoke(self, model: str, credentials: dict,
+ texts: list[str], user: Optional[str] = None) \
+ -> TextEmbeddingResult:
+ """
+ Invoke text embedding model
+
+ :param model: model name
+ :param credentials: model credentials
+ :param texts: texts to embed
+ :param user: unique user id
+ :return: embeddings result
+ """
+ api_key = credentials['api_key']
+ if model not in self.models:
+ raise InvokeBadRequestError('Invalid model name')
+ if not api_key:
+ raise CredentialsValidateFailedError('api_key is required')
+ url = self.api_base
+ headers = {
+ 'Authorization': 'Bearer ' + api_key,
+ 'Content-Type': 'application/json'
+ }
+
+ data = {
+ 'model': model,
+ 'input': texts[0],
+ 'input_type': 'query'
+ }
+
+ try:
+ response = post(url, headers=headers, data=dumps(data))
+ except Exception as e:
+ raise InvokeConnectionError(str(e))
+
+ if response.status_code != 200:
+ try:
+ resp = response.json()
+ msg = resp['detail']
+ if response.status_code == 401:
+ raise InvokeAuthorizationError(msg)
+ elif response.status_code == 429:
+ raise InvokeRateLimitError(msg)
+ elif response.status_code == 500:
+ raise InvokeServerUnavailableError(msg)
+ else:
+ raise InvokeError(msg)
+ except JSONDecodeError as e:
+ raise InvokeServerUnavailableError(f"Failed to convert response to json: {e} with text: {response.text}")
+
+ try:
+ resp = response.json()
+ embeddings = resp['data']
+ usage = resp['usage']
+ except Exception as e:
+ raise InvokeServerUnavailableError(f"Failed to convert response to json: {e} with text: {response.text}")
+
+ usage = self._calc_response_usage(model=model, credentials=credentials, tokens=usage['total_tokens'])
+
+ result = TextEmbeddingResult(
+ model=model,
+ embeddings=[[
+ float(data) for data in x['embedding']
+ ] for x in embeddings],
+ usage=usage
+ )
+
+ return result
+
+ def get_num_tokens(self, model: str, credentials: dict, texts: list[str]) -> int:
+ """
+ Get number of tokens for given prompt messages
+
+ :param model: model name
+ :param credentials: model credentials
+ :param texts: texts to embed
+ :return:
+ """
+ num_tokens = 0
+ for text in texts:
+ # use JinaTokenizer to get num tokens
+ num_tokens += self._get_num_tokens_by_gpt2(text)
+ return num_tokens
+
+ def validate_credentials(self, model: str, credentials: dict) -> None:
+ """
+ Validate model credentials
+
+ :param model: model name
+ :param credentials: model credentials
+ :return:
+ """
+ try:
+ self._invoke(model=model, credentials=credentials, texts=['ping'])
+ except InvokeAuthorizationError:
+ raise CredentialsValidateFailedError('Invalid api key')
+
+ @property
+ def _invoke_error_mapping(self) -> dict[type[InvokeError], list[type[Exception]]]:
+ return {
+ InvokeConnectionError: [
+ InvokeConnectionError
+ ],
+ InvokeServerUnavailableError: [
+ InvokeServerUnavailableError
+ ],
+ InvokeRateLimitError: [
+ InvokeRateLimitError
+ ],
+ InvokeAuthorizationError: [
+ InvokeAuthorizationError
+ ],
+ InvokeBadRequestError: [
+ KeyError
+ ]
+ }
+
+ def _calc_response_usage(self, model: str, credentials: dict, tokens: int) -> EmbeddingUsage:
+ """
+ Calculate response usage
+
+ :param model: model name
+ :param credentials: model credentials
+ :param tokens: input tokens
+ :return: usage
+ """
+ # get input price info
+ input_price_info = self.get_price(
+ model=model,
+ credentials=credentials,
+ price_type=PriceType.INPUT,
+ tokens=tokens
+ )
+
+ # transform usage
+ usage = EmbeddingUsage(
+ tokens=tokens,
+ total_tokens=tokens,
+ unit_price=input_price_info.unit_price,
+ price_unit=input_price_info.unit,
+ total_price=input_price_info.total_amount,
+ currency=input_price_info.currency,
+ latency=time.perf_counter() - self.started_at
+ )
+
+ return usage