fix: 依赖更新

Misuse of ServerConfig.PublicKeyCallback may cause authorization bypass in golang.org/x/crypto
Unstructured has Path Traversal via Malicious MSG Attachment that Allows Arbitrary File Write #106
@isaacs/brace-expansion has Uncontrolled Resource Consumption #107
jwt-go allows excessive memory allocation during header parsing #24
golang.org/x/crypto Vulnerable to Denial of Service (DoS) via Slow or Incomplete Key Exchange #29
部分文件修复
This commit is contained in:
npc0-hue
2026-02-04 18:10:06 +08:00
parent 76d648245c
commit 8c9e7652ec
19 changed files with 322 additions and 95 deletions
+13 -22
View File
@@ -4,13 +4,14 @@ from typing import TYPE_CHECKING, final
from typing_extensions import override
from configs import dify_config
from core.file import file_manager
from core.helper import ssrf_proxy
from core.file.file_manager import file_manager
from core.helper.code_executor.code_executor import CodeExecutor
from core.helper.code_executor.code_node_provider import CodeNodeProvider
from core.helper.ssrf_proxy import ssrf_proxy
from core.tools.tool_file_manager import ToolFileManager
from core.workflow.entities.graph_config import NodeConfigDict
from core.workflow.enums import NodeType
from core.workflow.graph import NodeFactory
from core.workflow.graph.graph import NodeFactory
from core.workflow.nodes.base.node import Node
from core.workflow.nodes.code.code_node import CodeNode
from core.workflow.nodes.code.limits import CodeNodeLimits
@@ -22,7 +23,6 @@ from core.workflow.nodes.template_transform.template_renderer import (
Jinja2TemplateRenderer,
)
from core.workflow.nodes.template_transform.template_transform_node import TemplateTransformNode
from libs.typing import is_str, is_str_dict
if TYPE_CHECKING:
from core.workflow.entities import GraphInitParams
@@ -47,9 +47,9 @@ class DifyNodeFactory(NodeFactory):
code_providers: Sequence[type[CodeNodeProvider]] | None = None,
code_limits: CodeNodeLimits | None = None,
template_renderer: Jinja2TemplateRenderer | None = None,
http_request_http_client: HttpClientProtocol = ssrf_proxy,
http_request_http_client: HttpClientProtocol | None = None,
http_request_tool_file_manager_factory: Callable[[], ToolFileManager] = ToolFileManager,
http_request_file_manager: FileManagerProtocol = file_manager,
http_request_file_manager: FileManagerProtocol | None = None,
) -> None:
self.graph_init_params = graph_init_params
self.graph_runtime_state = graph_runtime_state
@@ -68,12 +68,12 @@ class DifyNodeFactory(NodeFactory):
max_object_array_length=dify_config.CODE_MAX_OBJECT_ARRAY_LENGTH,
)
self._template_renderer = template_renderer or CodeExecutorJinja2TemplateRenderer()
self._http_request_http_client = http_request_http_client
self._http_request_http_client = http_request_http_client or ssrf_proxy
self._http_request_tool_file_manager_factory = http_request_tool_file_manager_factory
self._http_request_file_manager = http_request_file_manager
self._http_request_file_manager = http_request_file_manager or file_manager
@override
def create_node(self, node_config: dict[str, object]) -> Node:
def create_node(self, node_config: NodeConfigDict) -> Node:
"""
Create a Node instance from node configuration data using the traditional mapping.
@@ -82,23 +82,14 @@ class DifyNodeFactory(NodeFactory):
:raises ValueError: if node type is unknown or configuration is invalid
"""
# Get node_id from config
node_id = node_config.get("id")
if not is_str(node_id):
raise ValueError("Node config missing id")
node_id = node_config["id"]
# Get node type from config
node_data = node_config.get("data", {})
if not is_str_dict(node_data):
raise ValueError(f"Node {node_id} missing data information")
node_type_str = node_data.get("type")
if not is_str(node_type_str):
raise ValueError(f"Node {node_id} missing or invalid type information")
node_data = node_config["data"]
try:
node_type = NodeType(node_type_str)
node_type = NodeType(node_data["type"])
except ValueError:
raise ValueError(f"Unknown node type: {node_type_str}")
raise ValueError(f"Unknown node type: {node_data['type']}")
# Get node class
node_mapping = NODE_TYPE_CLASSES_MAPPING.get(node_type)
+1
View File
@@ -17,3 +17,4 @@ __all__ = [
"FileUploadConfig",
"ImageConfig",
]
+19
View File
@@ -104,6 +104,8 @@ def download(f: File, /):
):
return _download_file_content(f.storage_key)
elif f.transfer_method == FileTransferMethod.REMOTE_URL:
if f.remote_url is None:
raise ValueError("Missing file remote_url")
response = ssrf_proxy.get(f.remote_url, follow_redirects=True)
response.raise_for_status()
return response.content
@@ -134,6 +136,8 @@ def _download_file_content(path: str, /):
def _get_encoded_string(f: File, /):
match f.transfer_method:
case FileTransferMethod.REMOTE_URL:
if f.remote_url is None:
raise ValueError("Missing file remote_url")
response = ssrf_proxy.get(f.remote_url, follow_redirects=True)
response.raise_for_status()
data = response.content
@@ -164,3 +168,18 @@ def _to_url(f: File, /):
return sign_tool_file(tool_file_id=f.related_id, extension=f.extension)
else:
raise ValueError(f"Unsupported transfer method: {f.transfer_method}")
class FileManager:
"""
Adapter exposing file manager helpers behind FileManagerProtocol.
This is intentionally a thin wrapper over the existing module-level functions so callers can inject it
where a protocol-typed file manager is expected.
"""
def download(self, f: File, /) -> bytes:
return download(f)
file_manager = FileManager()
+58 -10
View File
@@ -4,8 +4,10 @@ Proxy requests to avoid SSRF
import logging
import time
from typing import Any, TypeAlias
import httpx
from pydantic import TypeAdapter, ValidationError
from configs import dify_config
from core.helper.http_client_pooling import get_pooled_http_client
@@ -18,6 +20,9 @@ SSRF_DEFAULT_MAX_RETRIES = dify_config.SSRF_DEFAULT_MAX_RETRIES
BACKOFF_FACTOR = 0.5
STATUS_FORCELIST = [429, 500, 502, 503, 504]
Headers: TypeAlias = dict[str, str]
_HEADERS_ADAPTER = TypeAdapter(Headers)
_SSL_VERIFIED_POOL_KEY = "ssrf:verified"
_SSL_UNVERIFIED_POOL_KEY = "ssrf:unverified"
_SSRF_CLIENT_LIMITS = httpx.Limits(
@@ -76,7 +81,7 @@ def _get_ssrf_client(ssl_verify_enabled: bool) -> httpx.Client:
)
def _get_user_provided_host_header(headers: dict | None) -> str | None:
def _get_user_provided_host_header(headers: Headers | None) -> str | None:
"""
Extract the user-provided Host header from the headers dict.
@@ -92,7 +97,7 @@ def _get_user_provided_host_header(headers: dict | None) -> str | None:
return None
def _inject_trace_headers(headers: dict | None) -> dict:
def _inject_trace_headers(headers: Headers | None) -> Headers:
"""
Inject W3C traceparent header for distributed tracing.
@@ -125,7 +130,7 @@ def _inject_trace_headers(headers: dict | None) -> dict:
return headers
def make_request(method, url, max_retries=SSRF_DEFAULT_MAX_RETRIES, **kwargs):
def make_request(method: str, url: str, max_retries: int = SSRF_DEFAULT_MAX_RETRIES, **kwargs: Any) -> httpx.Response:
# Convert requests-style allow_redirects to httpx-style follow_redirects
if "allow_redirects" in kwargs:
allow_redirects = kwargs.pop("allow_redirects")
@@ -142,10 +147,15 @@ def make_request(method, url, max_retries=SSRF_DEFAULT_MAX_RETRIES, **kwargs):
# prioritize per-call option, which can be switched on and off inside the HTTP node on the web UI
verify_option = kwargs.pop("ssl_verify", dify_config.HTTP_REQUEST_NODE_SSL_VERIFY)
if not isinstance(verify_option, bool):
raise ValueError("ssl_verify must be a boolean")
client = _get_ssrf_client(verify_option)
# Inject traceparent header for distributed tracing (when OTEL is not enabled)
headers = kwargs.get("headers") or {}
try:
headers: Headers = _HEADERS_ADAPTER.validate_python(kwargs.get("headers") or {})
except ValidationError as e:
raise ValueError("headers must be a mapping of string keys to string values") from e
headers = _inject_trace_headers(headers)
kwargs["headers"] = headers
@@ -198,25 +208,63 @@ def make_request(method, url, max_retries=SSRF_DEFAULT_MAX_RETRIES, **kwargs):
raise MaxRetriesExceededError(f"Reached maximum retries ({max_retries}) for URL {url}")
def get(url, max_retries=SSRF_DEFAULT_MAX_RETRIES, **kwargs):
def get(url: str, max_retries: int = SSRF_DEFAULT_MAX_RETRIES, **kwargs: Any) -> httpx.Response:
return make_request("GET", url, max_retries=max_retries, **kwargs)
def post(url, max_retries=SSRF_DEFAULT_MAX_RETRIES, **kwargs):
def post(url: str, max_retries: int = SSRF_DEFAULT_MAX_RETRIES, **kwargs: Any) -> httpx.Response:
return make_request("POST", url, max_retries=max_retries, **kwargs)
def put(url, max_retries=SSRF_DEFAULT_MAX_RETRIES, **kwargs):
def put(url: str, max_retries: int = SSRF_DEFAULT_MAX_RETRIES, **kwargs: Any) -> httpx.Response:
return make_request("PUT", url, max_retries=max_retries, **kwargs)
def patch(url, max_retries=SSRF_DEFAULT_MAX_RETRIES, **kwargs):
def patch(url: str, max_retries: int = SSRF_DEFAULT_MAX_RETRIES, **kwargs: Any) -> httpx.Response:
return make_request("PATCH", url, max_retries=max_retries, **kwargs)
def delete(url, max_retries=SSRF_DEFAULT_MAX_RETRIES, **kwargs):
def delete(url: str, max_retries: int = SSRF_DEFAULT_MAX_RETRIES, **kwargs: Any) -> httpx.Response:
return make_request("DELETE", url, max_retries=max_retries, **kwargs)
def head(url, max_retries=SSRF_DEFAULT_MAX_RETRIES, **kwargs):
def head(url: str, max_retries: int = SSRF_DEFAULT_MAX_RETRIES, **kwargs: Any) -> httpx.Response:
return make_request("HEAD", url, max_retries=max_retries, **kwargs)
class SSRFProxy:
"""
Adapter exposing SSRF-protected HTTP helpers behind HttpClientProtocol.
This is intentionally a thin wrapper over the existing module-level functions so callers can inject it
where a protocol-typed HTTP client is expected.
"""
@property
def max_retries_exceeded_error(self) -> type[Exception]:
return max_retries_exceeded_error
@property
def request_error(self) -> type[Exception]:
return request_error
def get(self, url: str, max_retries: int = SSRF_DEFAULT_MAX_RETRIES, **kwargs: Any) -> httpx.Response:
return get(url=url, max_retries=max_retries, **kwargs)
def head(self, url: str, max_retries: int = SSRF_DEFAULT_MAX_RETRIES, **kwargs: Any) -> httpx.Response:
return head(url=url, max_retries=max_retries, **kwargs)
def post(self, url: str, max_retries: int = SSRF_DEFAULT_MAX_RETRIES, **kwargs: Any) -> httpx.Response:
return post(url=url, max_retries=max_retries, **kwargs)
def put(self, url: str, max_retries: int = SSRF_DEFAULT_MAX_RETRIES, **kwargs: Any) -> httpx.Response:
return put(url=url, max_retries=max_retries, **kwargs)
def delete(self, url: str, max_retries: int = SSRF_DEFAULT_MAX_RETRIES, **kwargs: Any) -> httpx.Response:
return delete(url=url, max_retries=max_retries, **kwargs)
def patch(self, url: str, max_retries: int = SSRF_DEFAULT_MAX_RETRIES, **kwargs: Any) -> httpx.Response:
return patch(url=url, max_retries=max_retries, **kwargs)
ssrf_proxy = SSRFProxy()
@@ -0,0 +1,24 @@
from __future__ import annotations
import sys
from pydantic import TypeAdapter, with_config
if sys.version_info >= (3, 12):
from typing import TypedDict
else:
from typing_extensions import TypedDict
@with_config(extra="allow")
class NodeConfigData(TypedDict):
type: str
@with_config(extra="allow")
class NodeConfigDict(TypedDict):
id: str
data: NodeConfigData
NodeConfigDictAdapter = TypeAdapter(NodeConfigDict)
+1 -1
View File
@@ -115,7 +115,7 @@ class DefaultValue(BaseModel):
@model_validator(mode="after")
def validate_value_type(self) -> DefaultValue:
# Type validation configuration
type_validators = {
type_validators: dict[DefaultValueType, dict[str, Any]] = {
DefaultValueType.STRING: {
"type": str,
"converter": lambda x: x,
+30 -2
View File
@@ -5,5 +5,33 @@ from core.workflow.nodes.base.node import Node
LATEST_VERSION = "latest"
# Mapping is built by Node.get_node_type_classes_mapping(), which imports and walks core.workflow.nodes
NODE_TYPE_CLASSES_MAPPING: Mapping[NodeType, Mapping[str, type[Node]]] = Node.get_node_type_classes_mapping()
class _LazyNodeTypeClassesMapping(Mapping[NodeType, Mapping[str, type[Node]]]):
"""
Lazy wrapper so the mapping is built on first access, not at import time.
Avoids circular import when modules under core.workflow.nodes (e.g. node_factory)
import from node_mapping.
"""
_cache: Mapping[NodeType, Mapping[str, type[Node]]] | None = None
def _get(self) -> Mapping[NodeType, Mapping[str, type[Node]]]:
if self._cache is None:
self._cache = Node.get_node_type_classes_mapping()
return self._cache
def __getitem__(self, key: NodeType) -> Mapping[str, type[Node]]:
return self._get().__getitem__(key)
def __iter__(self):
return iter(self._get())
def __len__(self) -> int:
return len(self._get())
def __contains__(self, key: object) -> bool:
return key in self._get()
# Built on first access to avoid circular import with node_factory and other dependents.
NODE_TYPE_CLASSES_MAPPING: Mapping[NodeType, Mapping[str, type[Node]]] = _LazyNodeTypeClassesMapping()
@@ -21,19 +21,39 @@ def upgrade():
conn = op.get_bind()
inspector = Inspector.from_engine(conn)
tables = inspector.get_table_names()
if 'system_integration_extend' in tables:
if 'system_integration_extend' not in tables:
return
existing_columns = {c['name'] for c in inspector.get_columns('system_integration_extend')}
columns_to_add = []
if 'test' not in existing_columns:
columns_to_add.append(
sa.Column('test', sa.Boolean(), server_default=sa.text('false'), nullable=True, comment='是否测试链接联通性')
)
if 'config' not in existing_columns:
columns_to_add.append(sa.Column('config', sa.Text(), nullable=True, comment='其他配置'))
if 'app_id' not in existing_columns:
columns_to_add.append(sa.Column('app_id', sa.Text(), nullable=True, comment='应用ID'))
if columns_to_add:
with op.batch_alter_table('system_integration_extend', schema=None) as batch_op:
batch_op.add_column(sa.Column('test', sa.Boolean(), server_default=sa.text('false'), nullable=True, comment='是否测试链接联通性'))
batch_op.add_column(sa.Column('config', sa.Text(), nullable=True, comment='其他配置'))
batch_op.add_column(sa.Column('app_id', sa.Text(), nullable=True, comment='应用ID'))
for col in columns_to_add:
batch_op.add_column(col)
# ### end Alembic commands ###
def downgrade():
# ### commands auto generated by Alembic - please adjust! ###
with op.batch_alter_table('system_integration_extend', schema=None) as batch_op:
batch_op.drop_column('app_id')
batch_op.drop_column('config')
batch_op.drop_column('test')
conn = op.get_bind()
inspector = Inspector.from_engine(conn)
if 'system_integration_extend' not in inspector.get_table_names():
return
existing_columns = {c['name'] for c in inspector.get_columns('system_integration_extend')}
columns_to_drop = [c for c in ('app_id', 'config', 'test') if c in existing_columns]
if columns_to_drop:
with op.batch_alter_table('system_integration_extend', schema=None) as batch_op:
for col_name in columns_to_drop:
batch_op.drop_column(col_name)
# ### end Alembic commands ###
+3 -2
View File
@@ -81,7 +81,6 @@ dependencies = [
"starlette==0.49.1",
"tiktoken~=0.9.0",
"transformers~=4.56.1",
"unstructured[docx,epub,md,ppt,pptx]~=0.16.1",
"yarl~=1.18.3",
"webvtt-py~=0.5.1",
"sseclient-py~=1.8.0",
@@ -100,7 +99,9 @@ dependencies = [
"alibabacloud-dingtalk~=2.1.32",
"ldap3~=2.9.1",
"pypinyin~=0.53.0",
"flask-restful~=0.3.10"
"flask-restful~=0.3.10",
# Unstructured has Path Traversal via Malicious MSG Attachment that Allows Arbitrary File Write #106
"unstructured[docx,epub,md,ppt,pptx]~=0.18.18"
##### stop extend ######
]
# Before adding new dependency, consider place it in
Generated
+6 -5
View File
@@ -1690,7 +1690,7 @@ requires-dist = [
{ name = "tiktoken", specifier = "~=0.9.0" },
{ name = "tokenizers", specifier = "~=0.22.0" },
{ name = "transformers", specifier = "~=4.56.1" },
{ name = "unstructured", extras = ["docx", "epub", "md", "ppt", "pptx"], specifier = "~=0.16.1" },
{ name = "unstructured", extras = ["docx", "epub", "md", "ppt", "pptx"], specifier = "~=0.18.18" },
{ name = "validators", specifier = ">=0.34.0" },
{ name = "weave", specifier = ">=0.52.16" },
{ name = "weaviate-client", specifier = "==4.17.0" },
@@ -6866,12 +6866,12 @@ wheels = [
[[package]]
name = "unstructured"
version = "0.16.25"
version = "0.18.31"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "backoff" },
{ name = "beautifulsoup4" },
{ name = "chardet" },
{ name = "charset-normalizer" },
{ name = "dataclasses-json" },
{ name = "emoji" },
{ name = "filetype" },
@@ -6879,6 +6879,7 @@ dependencies = [
{ name = "langdetect" },
{ name = "lxml" },
{ name = "nltk" },
{ name = "numba" },
{ name = "numpy" },
{ name = "psutil" },
{ name = "python-iso639" },
@@ -6891,9 +6892,9 @@ dependencies = [
{ name = "unstructured-client" },
{ name = "wrapt" },
]
sdist = { url = "https://files.pythonhosted.org/packages/64/31/98c4c78e305d1294888adf87fd5ee30577a4c393951341ca32b43f167f1e/unstructured-0.16.25.tar.gz", hash = "sha256:73b9b0f51dbb687af572ecdb849a6811710b9cac797ddeab8ee80fa07d8aa5e6", size = 1683097, upload-time = "2025-03-07T11:19:39.507Z" }
sdist = { url = "https://files.pythonhosted.org/packages/a9/5f/64285bd69a538bc28753f1423fcaa9d64cd79a9e7c097171b1f0d27e9cdb/unstructured-0.18.31.tar.gz", hash = "sha256:af4bbe32d1894ae6e755f0da6fc0dd307a1d0adeebe0e7cc6278f6cf744339ca", size = 1707700, upload-time = "2026-01-27T15:33:05.378Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/12/4f/ad08585b5c8a33c82ea119494c4d3023f4796958c56e668b15cc282ec0a0/unstructured-0.16.25-py3-none-any.whl", hash = "sha256:14719ccef2830216cf1c5bf654f75e2bf07b17ca5dcee9da5ac74618130fd337", size = 1769286, upload-time = "2025-03-07T11:19:37.299Z" },
{ url = "https://files.pythonhosted.org/packages/c8/4a/9c43f39d9e443c9bc3f2e379b305bca27110adc653b071221b3132c18de5/unstructured-0.18.31-py3-none-any.whl", hash = "sha256:fab4641176cb9b192ed38048758aa0d9843121d03626d18f42275afb31e5b2d3", size = 1794889, upload-time = "2026-01-27T15:33:03.136Z" },
]
[package.optional-dependencies]