From da25b919804fce335a21478159a2c6e422958789 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E9=9D=9E=E6=B3=95=E6=93=8D=E4=BD=9C?= <hjlarry@163.com>
Date: Tue, 15 Oct 2024 19:13:43 +0800
Subject: [PATCH] fix: remove the stream option of zhipu and gemini (#9319)

---
 .../model_providers/google/llm/gemini-1.5-flash-001.yaml | 9 ---------
 .../model_providers/google/llm/gemini-1.5-flash-002.yaml | 9 ---------
 .../google/llm/gemini-1.5-flash-8b-exp-0827.yaml         | 9 ---------
 .../google/llm/gemini-1.5-flash-8b-exp-0924.yaml         | 9 ---------
 .../google/llm/gemini-1.5-flash-exp-0827.yaml            | 9 ---------
 .../google/llm/gemini-1.5-flash-latest.yaml              | 9 ---------
 .../model_providers/google/llm/gemini-1.5-flash.yaml     | 9 ---------
 .../model_providers/google/llm/gemini-1.5-pro-001.yaml   | 9 ---------
 .../model_providers/google/llm/gemini-1.5-pro-002.yaml   | 9 ---------
 .../google/llm/gemini-1.5-pro-exp-0801.yaml              | 9 ---------
 .../google/llm/gemini-1.5-pro-exp-0827.yaml              | 9 ---------
 .../google/llm/gemini-1.5-pro-latest.yaml                | 9 ---------
 .../model_providers/google/llm/gemini-1.5-pro.yaml       | 9 ---------
 .../model_providers/google/llm/gemini-pro-vision.yaml    | 9 ---------
 .../model_providers/google/llm/gemini-pro.yaml           | 9 ---------
 .../model_providers/zhipuai/llm/chatglm_turbo.yaml       | 9 ---------
 .../model_providers/zhipuai/llm/glm-4-0520.yaml          | 9 ---------
 .../model_providers/zhipuai/llm/glm-4-air.yaml           | 9 ---------
 .../model_providers/zhipuai/llm/glm-4-airx.yaml          | 9 ---------
 .../model_providers/zhipuai/llm/glm-4-flash.yaml         | 9 ---------
 .../model_providers/zhipuai/llm/glm_3_turbo.yaml         | 9 ---------
 .../model_runtime/model_providers/zhipuai/llm/glm_4.yaml | 9 ---------
 .../model_providers/zhipuai/llm/glm_4_long.yaml          | 9 ---------
 .../model_providers/zhipuai/llm/glm_4_plus.yaml          | 9 ---------
 .../model_providers/zhipuai/llm/glm_4v.yaml              | 9 ---------
 .../model_providers/zhipuai/llm/glm_4v_plus.yaml         | 9 ---------
 26 files changed, 234 deletions(-)

diff --git a/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash-001.yaml b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash-001.yaml
index d84e9937e..8d8cd2484 100644
--- a/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash-001.yaml
+++ b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash-001.yaml
@@ -32,15 +32,6 @@ parameter_rules:
     max: 8192
   - name: response_format
     use_template: response_format
-  - name: stream
-    label:
-      zh_Hans: 流式输出
-      en_US: Stream
-    type: boolean
-    help:
-      zh_Hans: 流式输出允许模型在生成文本的过程中逐步返回结果，而不是一次性生成全部结果后再返回。
-      en_US: Streaming output allows the model to return results incrementally as it generates text, rather than generating all the results at once.
-    default: false
 pricing:
   input: '0.00'
   output: '0.00'
diff --git a/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash-002.yaml b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash-002.yaml
index 2ff70564b..ae6b85cb2 100644
--- a/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash-002.yaml
+++ b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash-002.yaml
@@ -32,15 +32,6 @@ parameter_rules:
     max: 8192
   - name: response_format
     use_template: response_format
-  - name: stream
-    label:
-      zh_Hans: 流式输出
-      en_US: Stream
-    type: boolean
-    help:
-      zh_Hans: 流式输出允许模型在生成文本的过程中逐步返回结果，而不是一次性生成全部结果后再返回。
-      en_US: Streaming output allows the model to return results incrementally as it generates text, rather than generating all the results at once.
-    default: false
 pricing:
   input: '0.00'
   output: '0.00'
diff --git a/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash-8b-exp-0827.yaml b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash-8b-exp-0827.yaml
index 4e0209890..bbc697e93 100644
--- a/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash-8b-exp-0827.yaml
+++ b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash-8b-exp-0827.yaml
@@ -32,15 +32,6 @@ parameter_rules:
     max: 8192
   - name: response_format
     use_template: response_format
-  - name: stream
-    label:
-      zh_Hans: 流式输出
-      en_US: Stream
-    type: boolean
-    help:
-      zh_Hans: 流式输出允许模型在生成文本的过程中逐步返回结果，而不是一次性生成全部结果后再返回。
-      en_US: Streaming output allows the model to return results incrementally as it generates text, rather than generating all the results at once.
-    default: false
 pricing:
   input: '0.00'
   output: '0.00'
diff --git a/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash-8b-exp-0924.yaml b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash-8b-exp-0924.yaml
index 2aea8149f..890faf8c3 100644
--- a/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash-8b-exp-0924.yaml
+++ b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash-8b-exp-0924.yaml
@@ -32,15 +32,6 @@ parameter_rules:
     max: 8192
   - name: response_format
     use_template: response_format
-  - name: stream
-    label:
-      zh_Hans: 流式输出
-      en_US: Stream
-    type: boolean
-    help:
-      zh_Hans: 流式输出允许模型在生成文本的过程中逐步返回结果，而不是一次性生成全部结果后再返回。
-      en_US: Streaming output allows the model to return results incrementally as it generates text, rather than generating all the results at once.
-    default: false
 pricing:
   input: '0.00'
   output: '0.00'
diff --git a/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash-exp-0827.yaml b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash-exp-0827.yaml
index faabc5e4d..c5695e5dd 100644
--- a/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash-exp-0827.yaml
+++ b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash-exp-0827.yaml
@@ -32,15 +32,6 @@ parameter_rules:
     max: 8192
   - name: response_format
     use_template: response_format
-  - name: stream
-    label:
-      zh_Hans: 流式输出
-      en_US: Stream
-    type: boolean
-    help:
-      zh_Hans: 流式输出允许模型在生成文本的过程中逐步返回结果，而不是一次性生成全部结果后再返回。
-      en_US: Streaming output allows the model to return results incrementally as it generates text, rather than generating all the results at once.
-    default: false
 pricing:
   input: '0.00'
   output: '0.00'
diff --git a/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash-latest.yaml b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash-latest.yaml
index a22fcca94..d1c264c3a 100644
--- a/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash-latest.yaml
+++ b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash-latest.yaml
@@ -32,15 +32,6 @@ parameter_rules:
     max: 8192
   - name: response_format
     use_template: response_format
-  - name: stream
-    label:
-      zh_Hans: 流式输出
-      en_US: Stream
-    type: boolean
-    help:
-      zh_Hans: 流式输出允许模型在生成文本的过程中逐步返回结果，而不是一次性生成全部结果后再返回。
-      en_US: Streaming output allows the model to return results incrementally as it generates text, rather than generating all the results at once.
-    default: false
 pricing:
   input: '0.00'
   output: '0.00'
diff --git a/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash.yaml b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash.yaml
index dfd55c3a9..6b794e9be 100644
--- a/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash.yaml
+++ b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-flash.yaml
@@ -32,15 +32,6 @@ parameter_rules:
     max: 8192
   - name: response_format
     use_template: response_format
-  - name: stream
-    label:
-      zh_Hans: 流式输出
-      en_US: Stream
-    type: boolean
-    help:
-      zh_Hans: 流式输出允许模型在生成文本的过程中逐步返回结果，而不是一次性生成全部结果后再返回。
-      en_US: Streaming output allows the model to return results incrementally as it generates text, rather than generating all the results at once.
-    default: false
 pricing:
   input: '0.00'
   output: '0.00'
diff --git a/api/core/model_runtime/model_providers/google/llm/gemini-1.5-pro-001.yaml b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-pro-001.yaml
index a1feff171..9ac5e3ad1 100644
--- a/api/core/model_runtime/model_providers/google/llm/gemini-1.5-pro-001.yaml
+++ b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-pro-001.yaml
@@ -32,15 +32,6 @@ parameter_rules:
     max: 8192
   - name: response_format
     use_template: response_format
-  - name: stream
-    label:
-      zh_Hans: 流式输出
-      en_US: Stream
-    type: boolean
-    help:
-      zh_Hans: 流式输出允许模型在生成文本的过程中逐步返回结果，而不是一次性生成全部结果后再返回。
-      en_US: Streaming output allows the model to return results incrementally as it generates text, rather than generating all the results at once.
-    default: false
 pricing:
   input: '0.00'
   output: '0.00'
diff --git a/api/core/model_runtime/model_providers/google/llm/gemini-1.5-pro-002.yaml b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-pro-002.yaml
index 9ae07a06c..f1d01d076 100644
--- a/api/core/model_runtime/model_providers/google/llm/gemini-1.5-pro-002.yaml
+++ b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-pro-002.yaml
@@ -32,15 +32,6 @@ parameter_rules:
     max: 8192
   - name: response_format
     use_template: response_format
-  - name: stream
-    label:
-      zh_Hans: 流式输出
-      en_US: Stream
-    type: boolean
-    help:
-      zh_Hans: 流式输出允许模型在生成文本的过程中逐步返回结果，而不是一次性生成全部结果后再返回。
-      en_US: Streaming output allows the model to return results incrementally as it generates text, rather than generating all the results at once.
-    default: false
 pricing:
   input: '0.00'
   output: '0.00'
diff --git a/api/core/model_runtime/model_providers/google/llm/gemini-1.5-pro-exp-0801.yaml b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-pro-exp-0801.yaml
index 97c68f7a1..0a918e0d7 100644
--- a/api/core/model_runtime/model_providers/google/llm/gemini-1.5-pro-exp-0801.yaml
+++ b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-pro-exp-0801.yaml
@@ -32,15 +32,6 @@ parameter_rules:
     max: 8192
   - name: response_format
     use_template: response_format
-  - name: stream
-    label:
-      zh_Hans: 流式输出
-      en_US: Stream
-    type: boolean
-    help:
-      zh_Hans: 流式输出允许模型在生成文本的过程中逐步返回结果，而不是一次性生成全部结果后再返回。
-      en_US: Streaming output allows the model to return results incrementally as it generates text, rather than generating all the results at once.
-    default: false
 pricing:
   input: '0.00'
   output: '0.00'
diff --git a/api/core/model_runtime/model_providers/google/llm/gemini-1.5-pro-exp-0827.yaml b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-pro-exp-0827.yaml
index 860e4816a..7452ce46e 100644
--- a/api/core/model_runtime/model_providers/google/llm/gemini-1.5-pro-exp-0827.yaml
+++ b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-pro-exp-0827.yaml
@@ -32,15 +32,6 @@ parameter_rules:
     max: 8192
   - name: response_format
     use_template: response_format
-  - name: stream
-    label:
-      zh_Hans: 流式输出
-      en_US: Stream
-    type: boolean
-    help:
-      zh_Hans: 流式输出允许模型在生成文本的过程中逐步返回结果，而不是一次性生成全部结果后再返回。
-      en_US: Streaming output allows the model to return results incrementally as it generates text, rather than generating all the results at once.
-    default: false
 pricing:
   input: '0.00'
   output: '0.00'
diff --git a/api/core/model_runtime/model_providers/google/llm/gemini-1.5-pro-latest.yaml b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-pro-latest.yaml
index d1bf7d269..65c2d97e9 100644
--- a/api/core/model_runtime/model_providers/google/llm/gemini-1.5-pro-latest.yaml
+++ b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-pro-latest.yaml
@@ -32,15 +32,6 @@ parameter_rules:
     max: 8192
   - name: response_format
     use_template: response_format
-  - name: stream
-    label:
-      zh_Hans: 流式输出
-      en_US: Stream
-    type: boolean
-    help:
-      zh_Hans: 流式输出允许模型在生成文本的过程中逐步返回结果，而不是一次性生成全部结果后再返回。
-      en_US: Streaming output allows the model to return results incrementally as it generates text, rather than generating all the results at once.
-    default: false
 pricing:
   input: '0.00'
   output: '0.00'
diff --git a/api/core/model_runtime/model_providers/google/llm/gemini-1.5-pro.yaml b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-pro.yaml
index bdd70b34a..12620b57b 100644
--- a/api/core/model_runtime/model_providers/google/llm/gemini-1.5-pro.yaml
+++ b/api/core/model_runtime/model_providers/google/llm/gemini-1.5-pro.yaml
@@ -32,15 +32,6 @@ parameter_rules:
     max: 8192
   - name: response_format
     use_template: response_format
-  - name: stream
-    label:
-      zh_Hans: 流式输出
-      en_US: Stream
-    type: boolean
-    help:
-      zh_Hans: 流式输出允许模型在生成文本的过程中逐步返回结果，而不是一次性生成全部结果后再返回。
-      en_US: Streaming output allows the model to return results incrementally as it generates text, rather than generating all the results at once.
-    default: false
 pricing:
   input: '0.00'
   output: '0.00'
diff --git a/api/core/model_runtime/model_providers/google/llm/gemini-pro-vision.yaml b/api/core/model_runtime/model_providers/google/llm/gemini-pro-vision.yaml
index 2d213d56a..075e484e4 100644
--- a/api/core/model_runtime/model_providers/google/llm/gemini-pro-vision.yaml
+++ b/api/core/model_runtime/model_providers/google/llm/gemini-pro-vision.yaml
@@ -27,15 +27,6 @@ parameter_rules:
     default: 4096
     min: 1
     max: 4096
-  - name: stream
-    label:
-      zh_Hans: 流式输出
-      en_US: Stream
-    type: boolean
-    help:
-      zh_Hans: 流式输出允许模型在生成文本的过程中逐步返回结果，而不是一次性生成全部结果后再返回。
-      en_US: Streaming output allows the model to return results incrementally as it generates text, rather than generating all the results at once.
-    default: false
 pricing:
   input: '0.00'
   output: '0.00'
diff --git a/api/core/model_runtime/model_providers/google/llm/gemini-pro.yaml b/api/core/model_runtime/model_providers/google/llm/gemini-pro.yaml
index e2f487c1e..4e9f59e7d 100644
--- a/api/core/model_runtime/model_providers/google/llm/gemini-pro.yaml
+++ b/api/core/model_runtime/model_providers/google/llm/gemini-pro.yaml
@@ -31,15 +31,6 @@ parameter_rules:
     max: 2048
   - name: response_format
     use_template: response_format
-  - name: stream
-    label:
-      zh_Hans: 流式输出
-      en_US: Stream
-    type: boolean
-    help:
-      zh_Hans: 流式输出允许模型在生成文本的过程中逐步返回结果，而不是一次性生成全部结果后再返回。
-      en_US: Streaming output allows the model to return results incrementally as it generates text, rather than generating all the results at once.
-    default: false
 pricing:
   input: '0.00'
   output: '0.00'
diff --git a/api/core/model_runtime/model_providers/zhipuai/llm/chatglm_turbo.yaml b/api/core/model_runtime/model_providers/zhipuai/llm/chatglm_turbo.yaml
index 18a0adec6..049fae6c1 100644
--- a/api/core/model_runtime/model_providers/zhipuai/llm/chatglm_turbo.yaml
+++ b/api/core/model_runtime/model_providers/zhipuai/llm/chatglm_turbo.yaml
@@ -28,15 +28,6 @@ parameter_rules:
       zh_Hans: do_sample 为 true 时启用采样策略，do_sample 为 false 时采样策略 temperature、top_p 将不生效。默认值为 true。
       en_US: When `do_sample` is set to true, the sampling strategy is enabled. When `do_sample` is set to false, the sampling strategies such as `temperature` and `top_p` will not take effect. The default value is true.
     default: true
-  - name: stream
-    label:
-      zh_Hans: 流处理
-      en_US: Event Stream
-    type: boolean
-    help:
-      zh_Hans: 使用同步调用时，此参数应当设置为 fasle 或者省略。表示模型生成完所有内容后一次性返回所有内容。默认值为 false。如果设置为 true，模型将通过标准 Event Stream ，逐块返回模型生成内容。Event Stream 结束时会返回一条data：[DONE]消息。注意：在模型流式输出生成内容的过程中，我们会分批对模型生成内容进行检测，当检测到违法及不良信息时，API会返回错误码（1301）。开发者识别到错误码（1301），应及时采取（清屏、重启对话）等措施删除生成内容，并确保不将含有违法及不良信息的内容传递给模型继续生成，避免其造成负面影响。
-      en_US: When using synchronous invocation, this parameter should be set to false or omitted. It indicates that the model will return all the generated content at once after the generation is complete. The default value is false. If set to true, the model will return the generated content in chunks via the standard Event Stream. A data：[DONE] message will be sent at the end of the Event Stream.Note：During the model's streaming output process, we will batch check the generated content. If illegal or harmful information is detected, the API will return an error code (1301). Developers who identify error code (1301) should promptly take actions such as clearing the screen or restarting the conversation to delete the generated content. They should also ensure that no illegal or harmful content is passed back to the model for continued generation to avoid negative impacts.
-    default: false
   - name: return_type
     label:
       zh_Hans: 回复类型
diff --git a/api/core/model_runtime/model_providers/zhipuai/llm/glm-4-0520.yaml b/api/core/model_runtime/model_providers/zhipuai/llm/glm-4-0520.yaml
index 7fcf69220..7c8da51d1 100644
--- a/api/core/model_runtime/model_providers/zhipuai/llm/glm-4-0520.yaml
+++ b/api/core/model_runtime/model_providers/zhipuai/llm/glm-4-0520.yaml
@@ -32,15 +32,6 @@ parameter_rules:
       zh_Hans: do_sample 为 true 时启用采样策略，do_sample 为 false 时采样策略 temperature、top_p 将不生效。默认值为 true。
       en_US: When `do_sample` is set to true, the sampling strategy is enabled. When `do_sample` is set to false, the sampling strategies such as `temperature` and `top_p` will not take effect. The default value is true.
     default: true
-  - name: stream
-    label:
-      zh_Hans: 流处理
-      en_US: Event Stream
-    type: boolean
-    help:
-      zh_Hans: 使用同步调用时，此参数应当设置为 fasle 或者省略。表示模型生成完所有内容后一次性返回所有内容。默认值为 false。如果设置为 true，模型将通过标准 Event Stream ，逐块返回模型生成内容。Event Stream 结束时会返回一条data：[DONE]消息。注意：在模型流式输出生成内容的过程中，我们会分批对模型生成内容进行检测，当检测到违法及不良信息时，API会返回错误码（1301）。开发者识别到错误码（1301），应及时采取（清屏、重启对话）等措施删除生成内容，并确保不将含有违法及不良信息的内容传递给模型继续生成，避免其造成负面影响。
-      en_US: When using synchronous invocation, this parameter should be set to false or omitted. It indicates that the model will return all the generated content at once after the generation is complete. The default value is false. If set to true, the model will return the generated content in chunks via the standard Event Stream. A data：[DONE] message will be sent at the end of the Event Stream.Note：During the model's streaming output process, we will batch check the generated content. If illegal or harmful information is detected, the API will return an error code (1301). Developers who identify error code (1301) should promptly take actions such as clearing the screen or restarting the conversation to delete the generated content. They should also ensure that no illegal or harmful content is passed back to the model for continued generation to avoid negative impacts.
-    default: false
   - name: max_tokens
     use_template: max_tokens
     default: 1024
diff --git a/api/core/model_runtime/model_providers/zhipuai/llm/glm-4-air.yaml b/api/core/model_runtime/model_providers/zhipuai/llm/glm-4-air.yaml
index fcd7c7768..7a7b4b089 100644
--- a/api/core/model_runtime/model_providers/zhipuai/llm/glm-4-air.yaml
+++ b/api/core/model_runtime/model_providers/zhipuai/llm/glm-4-air.yaml
@@ -32,15 +32,6 @@ parameter_rules:
       zh_Hans: do_sample 为 true 时启用采样策略，do_sample 为 false 时采样策略 temperature、top_p 将不生效。默认值为 true。
       en_US: When `do_sample` is set to true, the sampling strategy is enabled. When `do_sample` is set to false, the sampling strategies such as `temperature` and `top_p` will not take effect. The default value is true.
     default: true
-  - name: stream
-    label:
-      zh_Hans: 流处理
-      en_US: Event Stream
-    type: boolean
-    help:
-      zh_Hans: 使用同步调用时，此参数应当设置为 fasle 或者省略。表示模型生成完所有内容后一次性返回所有内容。默认值为 false。如果设置为 true，模型将通过标准 Event Stream ，逐块返回模型生成内容。Event Stream 结束时会返回一条data：[DONE]消息。注意：在模型流式输出生成内容的过程中，我们会分批对模型生成内容进行检测，当检测到违法及不良信息时，API会返回错误码（1301）。开发者识别到错误码（1301），应及时采取（清屏、重启对话）等措施删除生成内容，并确保不将含有违法及不良信息的内容传递给模型继续生成，避免其造成负面影响。
-      en_US: When using synchronous invocation, this parameter should be set to false or omitted. It indicates that the model will return all the generated content at once after the generation is complete. The default value is false. If set to true, the model will return the generated content in chunks via the standard Event Stream. A data：[DONE] message will be sent at the end of the Event Stream.Note：During the model's streaming output process, we will batch check the generated content. If illegal or harmful information is detected, the API will return an error code (1301). Developers who identify error code (1301) should promptly take actions such as clearing the screen or restarting the conversation to delete the generated content. They should also ensure that no illegal or harmful content is passed back to the model for continued generation to avoid negative impacts.
-    default: false
   - name: max_tokens
     use_template: max_tokens
     default: 1024
diff --git a/api/core/model_runtime/model_providers/zhipuai/llm/glm-4-airx.yaml b/api/core/model_runtime/model_providers/zhipuai/llm/glm-4-airx.yaml
index c9ae5abf1..09ad84280 100644
--- a/api/core/model_runtime/model_providers/zhipuai/llm/glm-4-airx.yaml
+++ b/api/core/model_runtime/model_providers/zhipuai/llm/glm-4-airx.yaml
@@ -32,15 +32,6 @@ parameter_rules:
       zh_Hans: do_sample 为 true 时启用采样策略，do_sample 为 false 时采样策略 temperature、top_p 将不生效。默认值为 true。
       en_US: When `do_sample` is set to true, the sampling strategy is enabled. When `do_sample` is set to false, the sampling strategies such as `temperature` and `top_p` will not take effect. The default value is true.
     default: true
-  - name: stream
-    label:
-      zh_Hans: 流处理
-      en_US: Event Stream
-    type: boolean
-    help:
-      zh_Hans: 使用同步调用时，此参数应当设置为 fasle 或者省略。表示模型生成完所有内容后一次性返回所有内容。默认值为 false。如果设置为 true，模型将通过标准 Event Stream ，逐块返回模型生成内容。Event Stream 结束时会返回一条data：[DONE]消息。注意：在模型流式输出生成内容的过程中，我们会分批对模型生成内容进行检测，当检测到违法及不良信息时，API会返回错误码（1301）。开发者识别到错误码（1301），应及时采取（清屏、重启对话）等措施删除生成内容，并确保不将含有违法及不良信息的内容传递给模型继续生成，避免其造成负面影响。
-      en_US: When using synchronous invocation, this parameter should be set to false or omitted. It indicates that the model will return all the generated content at once after the generation is complete. The default value is false. If set to true, the model will return the generated content in chunks via the standard Event Stream. A data：[DONE] message will be sent at the end of the Event Stream.Note：During the model's streaming output process, we will batch check the generated content. If illegal or harmful information is detected, the API will return an error code (1301). Developers who identify error code (1301) should promptly take actions such as clearing the screen or restarting the conversation to delete the generated content. They should also ensure that no illegal or harmful content is passed back to the model for continued generation to avoid negative impacts.
-    default: false
   - name: max_tokens
     use_template: max_tokens
     default: 1024
diff --git a/api/core/model_runtime/model_providers/zhipuai/llm/glm-4-flash.yaml b/api/core/model_runtime/model_providers/zhipuai/llm/glm-4-flash.yaml
index 98c4f72c7..aee82a060 100644
--- a/api/core/model_runtime/model_providers/zhipuai/llm/glm-4-flash.yaml
+++ b/api/core/model_runtime/model_providers/zhipuai/llm/glm-4-flash.yaml
@@ -32,15 +32,6 @@ parameter_rules:
       zh_Hans: do_sample 为 true 时启用采样策略，do_sample 为 false 时采样策略 temperature、top_p 将不生效。默认值为 true。
       en_US: When `do_sample` is set to true, the sampling strategy is enabled. When `do_sample` is set to false, the sampling strategies such as `temperature` and `top_p` will not take effect. The default value is true.
     default: true
-  - name: stream
-    label:
-      zh_Hans: 流处理
-      en_US: Event Stream
-    type: boolean
-    help:
-      zh_Hans: 使用同步调用时，此参数应当设置为 fasle 或者省略。表示模型生成完所有内容后一次性返回所有内容。默认值为 false。如果设置为 true，模型将通过标准 Event Stream ，逐块返回模型生成内容。Event Stream 结束时会返回一条data：[DONE]消息。注意：在模型流式输出生成内容的过程中，我们会分批对模型生成内容进行检测，当检测到违法及不良信息时，API会返回错误码（1301）。开发者识别到错误码（1301），应及时采取（清屏、重启对话）等措施删除生成内容，并确保不将含有违法及不良信息的内容传递给模型继续生成，避免其造成负面影响。
-      en_US: When using synchronous invocation, this parameter should be set to false or omitted. It indicates that the model will return all the generated content at once after the generation is complete. The default value is false. If set to true, the model will return the generated content in chunks via the standard Event Stream. A data：[DONE] message will be sent at the end of the Event Stream.Note：During the model's streaming output process, we will batch check the generated content. If illegal or harmful information is detected, the API will return an error code (1301). Developers who identify error code (1301) should promptly take actions such as clearing the screen or restarting the conversation to delete the generated content. They should also ensure that no illegal or harmful content is passed back to the model for continued generation to avoid negative impacts.
-    default: false
   - name: max_tokens
     use_template: max_tokens
     default: 1024
diff --git a/api/core/model_runtime/model_providers/zhipuai/llm/glm_3_turbo.yaml b/api/core/model_runtime/model_providers/zhipuai/llm/glm_3_turbo.yaml
index 0b5391ce2..791a77ba1 100644
--- a/api/core/model_runtime/model_providers/zhipuai/llm/glm_3_turbo.yaml
+++ b/api/core/model_runtime/model_providers/zhipuai/llm/glm_3_turbo.yaml
@@ -32,15 +32,6 @@ parameter_rules:
       zh_Hans: do_sample 为 true 时启用采样策略，do_sample 为 false 时采样策略 temperature、top_p 将不生效。默认值为 true。
       en_US: When `do_sample` is set to true, the sampling strategy is enabled. When `do_sample` is set to false, the sampling strategies such as `temperature` and `top_p` will not take effect. The default value is true.
     default: true
-  - name: stream
-    label:
-      zh_Hans: 流处理
-      en_US: Event Stream
-    type: boolean
-    help:
-      zh_Hans: 使用同步调用时，此参数应当设置为 fasle 或者省略。表示模型生成完所有内容后一次性返回所有内容。默认值为 false。如果设置为 true，模型将通过标准 Event Stream ，逐块返回模型生成内容。Event Stream 结束时会返回一条data：[DONE]消息。注意：在模型流式输出生成内容的过程中，我们会分批对模型生成内容进行检测，当检测到违法及不良信息时，API会返回错误码（1301）。开发者识别到错误码（1301），应及时采取（清屏、重启对话）等措施删除生成内容，并确保不将含有违法及不良信息的内容传递给模型继续生成，避免其造成负面影响。
-      en_US: When using synchronous invocation, this parameter should be set to false or omitted. It indicates that the model will return all the generated content at once after the generation is complete. The default value is false. If set to true, the model will return the generated content in chunks via the standard Event Stream. A data：[DONE] message will be sent at the end of the Event Stream.Note：During the model's streaming output process, we will batch check the generated content. If illegal or harmful information is detected, the API will return an error code (1301). Developers who identify error code (1301) should promptly take actions such as clearing the screen or restarting the conversation to delete the generated content. They should also ensure that no illegal or harmful content is passed back to the model for continued generation to avoid negative impacts.
-    default: false
   - name: max_tokens
     use_template: max_tokens
     default: 1024
diff --git a/api/core/model_runtime/model_providers/zhipuai/llm/glm_4.yaml b/api/core/model_runtime/model_providers/zhipuai/llm/glm_4.yaml
index 62f453fb7..13ed1e49c 100644
--- a/api/core/model_runtime/model_providers/zhipuai/llm/glm_4.yaml
+++ b/api/core/model_runtime/model_providers/zhipuai/llm/glm_4.yaml
@@ -32,15 +32,6 @@ parameter_rules:
       zh_Hans: do_sample 为 true 时启用采样策略，do_sample 为 false 时采样策略 temperature、top_p 将不生效。默认值为 true。
       en_US: When `do_sample` is set to true, the sampling strategy is enabled. When `do_sample` is set to false, the sampling strategies such as `temperature` and `top_p` will not take effect. The default value is true.
     default: true
-  - name: stream
-    label:
-      zh_Hans: 流处理
-      en_US: Event Stream
-    type: boolean
-    help:
-      zh_Hans: 使用同步调用时，此参数应当设置为 fasle 或者省略。表示模型生成完所有内容后一次性返回所有内容。默认值为 false。如果设置为 true，模型将通过标准 Event Stream ，逐块返回模型生成内容。Event Stream 结束时会返回一条data：[DONE]消息。注意：在模型流式输出生成内容的过程中，我们会分批对模型生成内容进行检测，当检测到违法及不良信息时，API会返回错误码（1301）。开发者识别到错误码（1301），应及时采取（清屏、重启对话）等措施删除生成内容，并确保不将含有违法及不良信息的内容传递给模型继续生成，避免其造成负面影响。
-      en_US: When using synchronous invocation, this parameter should be set to false or omitted. It indicates that the model will return all the generated content at once after the generation is complete. The default value is false. If set to true, the model will return the generated content in chunks via the standard Event Stream. A data：[DONE] message will be sent at the end of the Event Stream.Note：During the model's streaming output process, we will batch check the generated content. If illegal or harmful information is detected, the API will return an error code (1301). Developers who identify error code (1301) should promptly take actions such as clearing the screen or restarting the conversation to delete the generated content. They should also ensure that no illegal or harmful content is passed back to the model for continued generation to avoid negative impacts.
-    default: false
   - name: max_tokens
     use_template: max_tokens
     default: 1024
diff --git a/api/core/model_runtime/model_providers/zhipuai/llm/glm_4_long.yaml b/api/core/model_runtime/model_providers/zhipuai/llm/glm_4_long.yaml
index 350b080c3..badcee22d 100644
--- a/api/core/model_runtime/model_providers/zhipuai/llm/glm_4_long.yaml
+++ b/api/core/model_runtime/model_providers/zhipuai/llm/glm_4_long.yaml
@@ -35,15 +35,6 @@ parameter_rules:
       zh_Hans: do_sample 为 true 时启用采样策略，do_sample 为 false 时采样策略 temperature、top_p 将不生效。默认值为 true。
       en_US: When `do_sample` is set to true, the sampling strategy is enabled. When `do_sample` is set to false, the sampling strategies such as `temperature` and `top_p` will not take effect. The default value is true.
     default: true
-  - name: stream
-    label:
-      zh_Hans: 流处理
-      en_US: Event Stream
-    type: boolean
-    help:
-      zh_Hans: 使用同步调用时，此参数应当设置为 fasle 或者省略。表示模型生成完所有内容后一次性返回所有内容。默认值为 false。如果设置为 true，模型将通过标准 Event Stream ，逐块返回模型生成内容。Event Stream 结束时会返回一条data：[DONE]消息。注意：在模型流式输出生成内容的过程中，我们会分批对模型生成内容进行检测，当检测到违法及不良信息时，API会返回错误码（1301）。开发者识别到错误码（1301），应及时采取（清屏、重启对话）等措施删除生成内容，并确保不将含有违法及不良信息的内容传递给模型继续生成，避免其造成负面影响。
-      en_US: When using synchronous invocation, this parameter should be set to false or omitted. It indicates that the model will return all the generated content at once after the generation is complete. The default value is false. If set to true, the model will return the generated content in chunks via the standard Event Stream. A data：[DONE] message will be sent at the end of the Event Stream.Note：During the model's streaming output process, we will batch check the generated content. If illegal or harmful information is detected, the API will return an error code (1301). Developers who identify error code (1301) should promptly take actions such as clearing the screen or restarting the conversation to delete the generated content. They should also ensure that no illegal or harmful content is passed back to the model for continued generation to avoid negative impacts.
-    default: false
   - name: max_tokens
     use_template: max_tokens
     default: 1024
diff --git a/api/core/model_runtime/model_providers/zhipuai/llm/glm_4_plus.yaml b/api/core/model_runtime/model_providers/zhipuai/llm/glm_4_plus.yaml
index 2d7ebd71c..e2f785e1b 100644
--- a/api/core/model_runtime/model_providers/zhipuai/llm/glm_4_plus.yaml
+++ b/api/core/model_runtime/model_providers/zhipuai/llm/glm_4_plus.yaml
@@ -32,15 +32,6 @@ parameter_rules:
       zh_Hans: do_sample 为 true 时启用采样策略，do_sample 为 false 时采样策略 temperature、top_p 将不生效。默认值为 true。
       en_US: When `do_sample` is set to true, the sampling strategy is enabled. When `do_sample` is set to false, the sampling strategies such as `temperature` and `top_p` will not take effect. The default value is true.
     default: true
-  - name: stream
-    label:
-      zh_Hans: 流处理
-      en_US: Event Stream
-    type: boolean
-    help:
-      zh_Hans: 使用同步调用时，此参数应当设置为 fasle 或者省略。表示模型生成完所有内容后一次性返回所有内容。默认值为 false。如果设置为 true，模型将通过标准 Event Stream ，逐块返回模型生成内容。Event Stream 结束时会返回一条data：[DONE]消息。注意：在模型流式输出生成内容的过程中，我们会分批对模型生成内容进行检测，当检测到违法及不良信息时，API会返回错误码（1301）。开发者识别到错误码（1301），应及时采取（清屏、重启对话）等措施删除生成内容，并确保不将含有违法及不良信息的内容传递给模型继续生成，避免其造成负面影响。
-      en_US: When using synchronous invocation, this parameter should be set to false or omitted. It indicates that the model will return all the generated content at once after the generation is complete. The default value is false. If set to true, the model will return the generated content in chunks via the standard Event Stream. A data：[DONE] message will be sent at the end of the Event Stream.Note：During the model's streaming output process, we will batch check the generated content. If illegal or harmful information is detected, the API will return an error code (1301). Developers who identify error code (1301) should promptly take actions such as clearing the screen or restarting the conversation to delete the generated content. They should also ensure that no illegal or harmful content is passed back to the model for continued generation to avoid negative impacts.
-    default: false
   - name: max_tokens
     use_template: max_tokens
     default: 1024
diff --git a/api/core/model_runtime/model_providers/zhipuai/llm/glm_4v.yaml b/api/core/model_runtime/model_providers/zhipuai/llm/glm_4v.yaml
index 3a1120ff3..3baa29830 100644
--- a/api/core/model_runtime/model_providers/zhipuai/llm/glm_4v.yaml
+++ b/api/core/model_runtime/model_providers/zhipuai/llm/glm_4v.yaml
@@ -30,15 +30,6 @@ parameter_rules:
       zh_Hans: do_sample 为 true 时启用采样策略，do_sample 为 false 时采样策略 temperature、top_p 将不生效。默认值为 true。
       en_US: When `do_sample` is set to true, the sampling strategy is enabled. When `do_sample` is set to false, the sampling strategies such as `temperature` and `top_p` will not take effect. The default value is true.
     default: true
-  - name: stream
-    label:
-      zh_Hans: 流处理
-      en_US: Event Stream
-    type: boolean
-    help:
-      zh_Hans: 使用同步调用时，此参数应当设置为 fasle 或者省略。表示模型生成完所有内容后一次性返回所有内容。默认值为 false。如果设置为 true，模型将通过标准 Event Stream ，逐块返回模型生成内容。Event Stream 结束时会返回一条data：[DONE]消息。注意：在模型流式输出生成内容的过程中，我们会分批对模型生成内容进行检测，当检测到违法及不良信息时，API会返回错误码（1301）。开发者识别到错误码（1301），应及时采取（清屏、重启对话）等措施删除生成内容，并确保不将含有违法及不良信息的内容传递给模型继续生成，避免其造成负面影响。
-      en_US: When using synchronous invocation, this parameter should be set to false or omitted. It indicates that the model will return all the generated content at once after the generation is complete. The default value is false. If set to true, the model will return the generated content in chunks via the standard Event Stream. A data：[DONE] message will be sent at the end of the Event Stream.Note：During the model's streaming output process, we will batch check the generated content. If illegal or harmful information is detected, the API will return an error code (1301). Developers who identify error code (1301) should promptly take actions such as clearing the screen or restarting the conversation to delete the generated content. They should also ensure that no illegal or harmful content is passed back to the model for continued generation to avoid negative impacts.
-    default: false
   - name: max_tokens
     use_template: max_tokens
     default: 1024
diff --git a/api/core/model_runtime/model_providers/zhipuai/llm/glm_4v_plus.yaml b/api/core/model_runtime/model_providers/zhipuai/llm/glm_4v_plus.yaml
index 14b9623e5..91550ceee 100644
--- a/api/core/model_runtime/model_providers/zhipuai/llm/glm_4v_plus.yaml
+++ b/api/core/model_runtime/model_providers/zhipuai/llm/glm_4v_plus.yaml
@@ -30,15 +30,6 @@ parameter_rules:
       zh_Hans: do_sample 为 true 时启用采样策略，do_sample 为 false 时采样策略 temperature、top_p 将不生效。默认值为 true。
       en_US: When `do_sample` is set to true, the sampling strategy is enabled. When `do_sample` is set to false, the sampling strategies such as `temperature` and `top_p` will not take effect. The default value is true.
     default: true
-  - name: stream
-    label:
-      zh_Hans: 流处理
-      en_US: Event Stream
-    type: boolean
-    help:
-      zh_Hans: 使用同步调用时，此参数应当设置为 fasle 或者省略。表示模型生成完所有内容后一次性返回所有内容。默认值为 false。如果设置为 true，模型将通过标准 Event Stream ，逐块返回模型生成内容。Event Stream 结束时会返回一条data：[DONE]消息。注意：在模型流式输出生成内容的过程中，我们会分批对模型生成内容进行检测，当检测到违法及不良信息时，API会返回错误码（1301）。开发者识别到错误码（1301），应及时采取（清屏、重启对话）等措施删除生成内容，并确保不将含有违法及不良信息的内容传递给模型继续生成，避免其造成负面影响。
-      en_US: When using synchronous invocation, this parameter should be set to false or omitted. It indicates that the model will return all the generated content at once after the generation is complete. The default value is false. If set to true, the model will return the generated content in chunks via the standard Event Stream. A data：[DONE] message will be sent at the end of the Event Stream.Note：During the model's streaming output process, we will batch check the generated content. If illegal or harmful information is detected, the API will return an error code (1301). Developers who identify error code (1301) should promptly take actions such as clearing the screen or restarting the conversation to delete the generated content. They should also ensure that no illegal or harmful content is passed back to the model for continued generation to avoid negative impacts.
-    default: false
   - name: max_tokens
     use_template: max_tokens
     default: 1024