Merge tag '1.8.1' into merge-tag-1.8.1

# Conflicts:
#	.gitignore
#	README.md
#	api/.env.example
#	api/Dockerfile
#	api/commands.py
#	api/configs/app_config.py
#	api/controllers/console/__init__.py
#	api/controllers/console/apikey.py
#	api/controllers/console/app/statistic.py
#	api/controllers/service_api/app/app.py
#	api/controllers/service_api/app/audio.py
#	api/controllers/service_api/app/completion.py
#	api/controllers/service_api/app/conversation.py
#	api/controllers/service_api/app/file.py
#	api/controllers/service_api/app/message.py
#	api/controllers/service_api/app/workflow.py
#	api/controllers/service_api/wraps.py
#	api/controllers/web/completion.py
#	api/core/app/apps/advanced_chat/app_generator.py
#	api/core/app/apps/advanced_chat/generate_task_pipeline.py
#	api/core/app/apps/agent_chat/app_generator.py
#	api/core/app/apps/workflow/app_generator.py
#	api/core/app/apps/workflow/generate_task_pipeline.py
#	api/core/app/task_pipeline/workflow_cycle_manage.py
#	api/core/helper/code_executor/code_executor.py
#	api/core/tools/builtin_tool/providers/code/tools/simple_code.py
#	api/core/workflow/nodes/code/code_node.py
#	api/docker/entrypoint.sh
#	api/events/event_handlers/__init__.py
#	api/extensions/ext_celery.py
#	api/extensions/ext_commands.py
#	api/models/model.py
#	api/models/workflow.py
#	api/poetry.lock
#	api/pyproject.toml
#	api/services/app_service.py
#	api/services/feature_service.py
#	api/services/workspace_service.py
#	web/.env.example
#	web/Dockerfile
#	web/app/(commonLayout)/apps/Apps.tsx
#	web/app/components/apps/app-card.tsx
#	web/app/components/base/chat/embedded-chatbot/index.tsx
#	web/app/components/base/mermaid/index.tsx
#	web/app/components/develop/index.tsx
#	web/app/components/develop/secret-key/secret-key-modal.tsx
#	web/app/components/develop/secret-key/style.module.css
#	web/app/components/develop/template/template.zh.mdx
#	web/app/components/explore/app-list/index.tsx
#	web/app/components/explore/category.tsx
#	web/app/components/explore/sidebar/index.tsx
#	web/app/components/header/account-dropdown/index.tsx
#	web/app/components/header/index.tsx
#	web/app/components/share/utils.ts
#	web/app/layout.tsx
#	web/app/signin/components/mail-and-password-auth.tsx
#	web/app/signin/normal-form.tsx
#	web/app/signin/page.module.css
#	web/context/app-context.tsx
#	web/i18n/i18next-config.ts
#	web/i18n/ja-JP/login.ts
#	web/i18n/ko-KR/login.ts
#
    if dify_config.WORKFLOW_LOG_CLEANUP_ENABLED:
        # 2:00 AM every day
        imports.append("schedule.clean_workflow_runlogs_precise")
        beat_schedule["clean_workflow_runlogs_precise"] = {
            "task": "schedule.clean_workflow_runlogs_precise.clean_workflow_runlogs_precise",
            "schedule": crontab(minute="0", hour="2"),
        }	web/package.json
#	web/pnpm-lock.yaml
#	web/types/feature.ts
This commit is contained in:
npc0-hue
2025-09-25 15:55:13 +08:00
3566 changed files with 237942 additions and 62178 deletions
@@ -0,0 +1,49 @@
import time
import click
import app
from extensions.ext_database import db
from models.account import TenantPluginAutoUpgradeStrategy
from tasks.process_tenant_plugin_autoupgrade_check_task import process_tenant_plugin_autoupgrade_check_task
AUTO_UPGRADE_MINIMAL_CHECKING_INTERVAL = 15 * 60 # 15 minutes
@app.celery.task(queue="plugin")
def check_upgradable_plugin_task():
click.echo(click.style("Start check upgradable plugin.", fg="green"))
start_at = time.perf_counter()
now_seconds_of_day = time.time() % 86400 - 30 # we assume the tz is UTC
click.echo(click.style(f"Now seconds of day: {now_seconds_of_day}", fg="green"))
strategies = (
db.session.query(TenantPluginAutoUpgradeStrategy)
.where(
TenantPluginAutoUpgradeStrategy.upgrade_time_of_day >= now_seconds_of_day,
TenantPluginAutoUpgradeStrategy.upgrade_time_of_day
< now_seconds_of_day + AUTO_UPGRADE_MINIMAL_CHECKING_INTERVAL,
TenantPluginAutoUpgradeStrategy.strategy_setting
!= TenantPluginAutoUpgradeStrategy.StrategySetting.DISABLED,
)
.all()
)
for strategy in strategies:
process_tenant_plugin_autoupgrade_check_task.delay(
strategy.tenant_id,
strategy.strategy_setting,
strategy.upgrade_time_of_day,
strategy.upgrade_mode,
strategy.exclude_plugins,
strategy.include_plugins,
)
end_at = time.perf_counter()
click.echo(
click.style(
f"Checked upgradable plugin success latency: {end_at - start_at}",
fg="green",
)
)
+5 -5
View File
@@ -3,7 +3,7 @@ import time
import click
from sqlalchemy import text
from werkzeug.exceptions import NotFound
from sqlalchemy.exc import SQLAlchemyError
import app
from configs import dify_config
@@ -21,14 +21,14 @@ def clean_embedding_cache_task():
try:
embedding_ids = (
db.session.query(Embedding.id)
.filter(Embedding.created_at < thirty_days_ago)
.where(Embedding.created_at < thirty_days_ago)
.order_by(Embedding.created_at.desc())
.limit(100)
.all()
)
embedding_ids = [embedding_id[0] for embedding_id in embedding_ids]
except NotFound:
break
except SQLAlchemyError:
raise
if embedding_ids:
for embedding_id in embedding_ids:
db.session.execute(
@@ -39,4 +39,4 @@ def clean_embedding_cache_task():
else:
break
end_at = time.perf_counter()
click.echo(click.style("Cleaned embedding cache from db success latency: {}".format(end_at - start_at), fg="green"))
click.echo(click.style(f"Cleaned embedding cache from db success latency: {end_at - start_at}", fg="green"))
+24 -16
View File
@@ -1,8 +1,9 @@
import datetime
import logging
import time
import click
from werkzeug.exceptions import NotFound
from sqlalchemy.exc import SQLAlchemyError
import app
from configs import dify_config
@@ -20,6 +21,8 @@ from models.model import (
from models.web import SavedMessage
from services.feature_service import FeatureService
logger = logging.getLogger(__name__)
@app.celery.task(queue="dataset")
def clean_messages():
@@ -31,22 +34,27 @@ def clean_messages():
while True:
try:
# Main query with join and filter
# FIXME:for mypy no paginate method error
messages = (
db.session.query(Message) # type: ignore
.filter(Message.created_at < plan_sandbox_clean_message_day)
db.session.query(Message)
.where(Message.created_at < plan_sandbox_clean_message_day)
.order_by(Message.created_at.desc())
.limit(100)
.all()
)
except NotFound:
break
except SQLAlchemyError:
raise
if not messages:
break
for message in messages:
plan_sandbox_clean_message_day = message.created_at
app = App.query.filter_by(id=message.app_id).first()
app = db.session.query(App).filter_by(id=message.app_id).first()
if not app:
logger.warning(
"Expected App record to exist, but none was found, app_id=%s, message_id=%s",
message.app_id,
message.id,
)
continue
features_cache_key = f"features:{app.tenant_id}"
plan_cache = redis_client.get(features_cache_key)
if plan_cache is None:
@@ -57,25 +65,25 @@ def clean_messages():
plan = plan_cache.decode()
if plan == "sandbox":
# clean related message
db.session.query(MessageFeedback).filter(MessageFeedback.message_id == message.id).delete(
db.session.query(MessageFeedback).where(MessageFeedback.message_id == message.id).delete(
synchronize_session=False
)
db.session.query(MessageAnnotation).filter(MessageAnnotation.message_id == message.id).delete(
db.session.query(MessageAnnotation).where(MessageAnnotation.message_id == message.id).delete(
synchronize_session=False
)
db.session.query(MessageChain).filter(MessageChain.message_id == message.id).delete(
db.session.query(MessageChain).where(MessageChain.message_id == message.id).delete(
synchronize_session=False
)
db.session.query(MessageAgentThought).filter(MessageAgentThought.message_id == message.id).delete(
db.session.query(MessageAgentThought).where(MessageAgentThought.message_id == message.id).delete(
synchronize_session=False
)
db.session.query(MessageFile).filter(MessageFile.message_id == message.id).delete(
db.session.query(MessageFile).where(MessageFile.message_id == message.id).delete(
synchronize_session=False
)
db.session.query(SavedMessage).filter(SavedMessage.message_id == message.id).delete(
db.session.query(SavedMessage).where(SavedMessage.message_id == message.id).delete(
synchronize_session=False
)
db.session.query(Message).filter(Message.id == message.id).delete()
db.session.query(Message).where(Message.id == message.id).delete()
db.session.commit()
end_at = time.perf_counter()
click.echo(click.style("Cleaned messages from db success latency: {}".format(end_at - start_at), fg="green"))
click.echo(click.style(f"Cleaned messages from db success latency: {end_at - start_at}", fg="green"))
+129 -160
View File
@@ -1,9 +1,10 @@
import datetime
import time
from typing import Optional, TypedDict
import click
from sqlalchemy import func
from werkzeug.exceptions import NotFound
from sqlalchemy import func, select
from sqlalchemy.exc import SQLAlchemyError
import app
from configs import dify_config
@@ -14,175 +15,143 @@ from models.dataset import Dataset, DatasetAutoDisableLog, DatasetQuery, Documen
from services.feature_service import FeatureService
class CleanupConfig(TypedDict):
clean_day: datetime.datetime
plan_filter: Optional[str]
add_logs: bool
@app.celery.task(queue="dataset")
def clean_unused_datasets_task():
click.echo(click.style("Start clean unused datasets indexes.", fg="green"))
plan_sandbox_clean_day_setting = dify_config.PLAN_SANDBOX_CLEAN_DAY_SETTING
plan_pro_clean_day_setting = dify_config.PLAN_PRO_CLEAN_DAY_SETTING
start_at = time.perf_counter()
plan_sandbox_clean_day = datetime.datetime.now() - datetime.timedelta(days=plan_sandbox_clean_day_setting)
plan_pro_clean_day = datetime.datetime.now() - datetime.timedelta(days=plan_pro_clean_day_setting)
while True:
try:
# Subquery for counting new documents
document_subquery_new = (
db.session.query(Document.dataset_id, func.count(Document.id).label("document_count"))
.filter(
Document.indexing_status == "completed",
Document.enabled == True,
Document.archived == False,
Document.updated_at > plan_sandbox_clean_day,
)
.group_by(Document.dataset_id)
.subquery()
)
# Subquery for counting old documents
document_subquery_old = (
db.session.query(Document.dataset_id, func.count(Document.id).label("document_count"))
.filter(
Document.indexing_status == "completed",
Document.enabled == True,
Document.archived == False,
Document.updated_at < plan_sandbox_clean_day,
)
.group_by(Document.dataset_id)
.subquery()
)
# Define cleanup configurations
cleanup_configs: list[CleanupConfig] = [
{
"clean_day": datetime.datetime.now() - datetime.timedelta(days=dify_config.PLAN_SANDBOX_CLEAN_DAY_SETTING),
"plan_filter": None,
"add_logs": True,
},
{
"clean_day": datetime.datetime.now() - datetime.timedelta(days=dify_config.PLAN_PRO_CLEAN_DAY_SETTING),
"plan_filter": "sandbox",
"add_logs": False,
},
]
# Main query with join and filter
datasets = (
Dataset.query.outerjoin(document_subquery_new, Dataset.id == document_subquery_new.c.dataset_id)
.outerjoin(document_subquery_old, Dataset.id == document_subquery_old.c.dataset_id)
.filter(
Dataset.created_at < plan_sandbox_clean_day,
func.coalesce(document_subquery_new.c.document_count, 0) == 0,
func.coalesce(document_subquery_old.c.document_count, 0) > 0,
)
.order_by(Dataset.created_at.desc())
.paginate(page=1, per_page=50)
)
for config in cleanup_configs:
clean_day = config["clean_day"]
plan_filter = config["plan_filter"]
add_logs = config["add_logs"]
except NotFound:
break
if datasets.items is None or len(datasets.items) == 0:
break
for dataset in datasets:
dataset_query = (
db.session.query(DatasetQuery)
.filter(DatasetQuery.created_at > plan_sandbox_clean_day, DatasetQuery.dataset_id == dataset.id)
.all()
)
if not dataset_query or len(dataset_query) == 0:
try:
# add auto disable log
documents = (
db.session.query(Document)
.filter(
Document.dataset_id == dataset.id,
Document.enabled == True,
Document.archived == False,
)
.all()
page = 1
while True:
try:
# Subquery for counting new documents
document_subquery_new = (
db.session.query(Document.dataset_id, func.count(Document.id).label("document_count"))
.where(
Document.indexing_status == "completed",
Document.enabled == True,
Document.archived == False,
Document.updated_at > clean_day,
)
for document in documents:
dataset_auto_disable_log = DatasetAutoDisableLog(
tenant_id=dataset.tenant_id,
dataset_id=dataset.id,
document_id=document.id,
)
db.session.add(dataset_auto_disable_log)
# remove index
index_processor = IndexProcessorFactory(dataset.doc_form).init_index_processor()
index_processor.clean(dataset, None)
.group_by(Document.dataset_id)
.subquery()
)
# update document
update_params = {Document.enabled: False}
Document.query.filter_by(dataset_id=dataset.id).update(update_params)
db.session.commit()
click.echo(click.style("Cleaned unused dataset {} from db success!".format(dataset.id), fg="green"))
except Exception as e:
click.echo(
click.style("clean dataset index error: {} {}".format(e.__class__.__name__, str(e)), fg="red")
# Subquery for counting old documents
document_subquery_old = (
db.session.query(Document.dataset_id, func.count(Document.id).label("document_count"))
.where(
Document.indexing_status == "completed",
Document.enabled == True,
Document.archived == False,
Document.updated_at < clean_day,
)
while True:
try:
# Subquery for counting new documents
document_subquery_new = (
db.session.query(Document.dataset_id, func.count(Document.id).label("document_count"))
.filter(
Document.indexing_status == "completed",
Document.enabled == True,
Document.archived == False,
Document.updated_at > plan_pro_clean_day,
.group_by(Document.dataset_id)
.subquery()
)
.group_by(Document.dataset_id)
.subquery()
)
# Subquery for counting old documents
document_subquery_old = (
db.session.query(Document.dataset_id, func.count(Document.id).label("document_count"))
.filter(
Document.indexing_status == "completed",
Document.enabled == True,
Document.archived == False,
Document.updated_at < plan_pro_clean_day,
)
.group_by(Document.dataset_id)
.subquery()
)
# Main query with join and filter
datasets = (
Dataset.query.outerjoin(document_subquery_new, Dataset.id == document_subquery_new.c.dataset_id)
.outerjoin(document_subquery_old, Dataset.id == document_subquery_old.c.dataset_id)
.filter(
Dataset.created_at < plan_pro_clean_day,
func.coalesce(document_subquery_new.c.document_count, 0) == 0,
func.coalesce(document_subquery_old.c.document_count, 0) > 0,
)
.order_by(Dataset.created_at.desc())
.paginate(page=1, per_page=50)
)
except NotFound:
break
if datasets.items is None or len(datasets.items) == 0:
break
for dataset in datasets:
dataset_query = (
db.session.query(DatasetQuery)
.filter(DatasetQuery.created_at > plan_pro_clean_day, DatasetQuery.dataset_id == dataset.id)
.all()
)
if not dataset_query or len(dataset_query) == 0:
try:
features_cache_key = f"features:{dataset.tenant_id}"
plan_cache = redis_client.get(features_cache_key)
if plan_cache is None:
features = FeatureService.get_features(dataset.tenant_id)
redis_client.setex(features_cache_key, 600, features.billing.subscription.plan)
plan = features.billing.subscription.plan
else:
plan = plan_cache.decode()
if plan == "sandbox":
# remove index
index_processor = IndexProcessorFactory(dataset.doc_form).init_index_processor()
index_processor.clean(dataset, None)
# update document
update_params = {Document.enabled: False}
Document.query.filter_by(dataset_id=dataset.id).update(update_params)
db.session.commit()
click.echo(
click.style("Cleaned unused dataset {} from db success!".format(dataset.id), fg="green")
)
except Exception as e:
click.echo(
click.style("clean dataset index error: {} {}".format(e.__class__.__name__, str(e)), fg="red")
# Main query with join and filter
stmt = (
select(Dataset)
.outerjoin(document_subquery_new, Dataset.id == document_subquery_new.c.dataset_id)
.outerjoin(document_subquery_old, Dataset.id == document_subquery_old.c.dataset_id)
.where(
Dataset.created_at < clean_day,
func.coalesce(document_subquery_new.c.document_count, 0) == 0,
func.coalesce(document_subquery_old.c.document_count, 0) > 0,
)
.order_by(Dataset.created_at.desc())
)
datasets = db.paginate(stmt, page=page, per_page=50, error_out=False)
except SQLAlchemyError:
raise
if datasets is None or datasets.items is None or len(datasets.items) == 0:
break
for dataset in datasets:
dataset_query = (
db.session.query(DatasetQuery)
.where(DatasetQuery.created_at > clean_day, DatasetQuery.dataset_id == dataset.id)
.all()
)
if not dataset_query or len(dataset_query) == 0:
try:
should_clean = True
# Check plan filter if specified
if plan_filter:
features_cache_key = f"features:{dataset.tenant_id}"
plan_cache = redis_client.get(features_cache_key)
if plan_cache is None:
features = FeatureService.get_features(dataset.tenant_id)
redis_client.setex(features_cache_key, 600, features.billing.subscription.plan)
plan = features.billing.subscription.plan
else:
plan = plan_cache.decode()
should_clean = plan == plan_filter
if should_clean:
# Add auto disable log if required
if add_logs:
documents = (
db.session.query(Document)
.where(
Document.dataset_id == dataset.id,
Document.enabled == True,
Document.archived == False,
)
.all()
)
for document in documents:
dataset_auto_disable_log = DatasetAutoDisableLog(
tenant_id=dataset.tenant_id,
dataset_id=dataset.id,
document_id=document.id,
)
db.session.add(dataset_auto_disable_log)
# Remove index
index_processor = IndexProcessorFactory(dataset.doc_form).init_index_processor()
index_processor.clean(dataset, None)
# Update document
db.session.query(Document).filter_by(dataset_id=dataset.id).update(
{Document.enabled: False}
)
db.session.commit()
click.echo(click.style(f"Cleaned unused dataset {dataset.id} from db success!", fg="green"))
except Exception as e:
click.echo(click.style(f"clean dataset index error: {e.__class__.__name__} {str(e)}", fg="red"))
page += 1
end_at = time.perf_counter()
click.echo(click.style("Cleaned unused dataset from db success latency: {}".format(end_at - start_at), fg="green"))
click.echo(click.style(f"Cleaned unused dataset from db success latency: {end_at - start_at}", fg="green"))
@@ -0,0 +1,155 @@
import datetime
import logging
import time
import click
import app
from configs import dify_config
from extensions.ext_database import db
from models.model import (
AppAnnotationHitHistory,
Conversation,
Message,
MessageAgentThought,
MessageAnnotation,
MessageChain,
MessageFeedback,
MessageFile,
)
from models.workflow import ConversationVariable, WorkflowAppLog, WorkflowNodeExecutionModel, WorkflowRun
logger = logging.getLogger(__name__)
MAX_RETRIES = 3
BATCH_SIZE = dify_config.WORKFLOW_LOG_CLEANUP_BATCH_SIZE
@app.celery.task(queue="dataset")
def clean_workflow_runlogs_precise():
"""Clean expired workflow run logs with retry mechanism and complete message cascade"""
click.echo(click.style("Start clean workflow run logs (precise mode with complete cascade).", fg="green"))
start_at = time.perf_counter()
retention_days = dify_config.WORKFLOW_LOG_RETENTION_DAYS
cutoff_date = datetime.datetime.now() - datetime.timedelta(days=retention_days)
try:
total_workflow_runs = db.session.query(WorkflowRun).where(WorkflowRun.created_at < cutoff_date).count()
if total_workflow_runs == 0:
logger.info("No expired workflow run logs found")
return
logger.info("Found %s expired workflow run logs to clean", total_workflow_runs)
total_deleted = 0
failed_batches = 0
batch_count = 0
while True:
workflow_runs = (
db.session.query(WorkflowRun.id).where(WorkflowRun.created_at < cutoff_date).limit(BATCH_SIZE).all()
)
if not workflow_runs:
break
workflow_run_ids = [run.id for run in workflow_runs]
batch_count += 1
success = _delete_batch_with_retry(workflow_run_ids, failed_batches)
if success:
total_deleted += len(workflow_run_ids)
failed_batches = 0
else:
failed_batches += 1
if failed_batches >= MAX_RETRIES:
logger.error("Failed to delete batch after %s retries, aborting cleanup for today", MAX_RETRIES)
break
else:
# Calculate incremental delay times: 5, 10, 15 minutes
retry_delay_minutes = failed_batches * 5
logger.warning("Batch deletion failed, retrying in %s minutes...", retry_delay_minutes)
time.sleep(retry_delay_minutes * 60)
continue
logger.info("Cleanup completed: %s expired workflow run logs deleted", total_deleted)
except Exception:
db.session.rollback()
logger.exception("Unexpected error in workflow log cleanup")
raise
end_at = time.perf_counter()
execution_time = end_at - start_at
click.echo(click.style(f"Cleaned workflow run logs from db success latency: {execution_time:.2f}s", fg="green"))
def _delete_batch_with_retry(workflow_run_ids: list[str], attempt_count: int) -> bool:
"""Delete a single batch with a retry mechanism and complete cascading deletion"""
try:
with db.session.begin_nested():
message_data = (
db.session.query(Message.id, Message.conversation_id)
.where(Message.workflow_run_id.in_(workflow_run_ids))
.all()
)
message_id_list = [msg.id for msg in message_data]
conversation_id_list = list({msg.conversation_id for msg in message_data if msg.conversation_id})
if message_id_list:
db.session.query(AppAnnotationHitHistory).where(
AppAnnotationHitHistory.message_id.in_(message_id_list)
).delete(synchronize_session=False)
db.session.query(MessageAgentThought).where(MessageAgentThought.message_id.in_(message_id_list)).delete(
synchronize_session=False
)
db.session.query(MessageChain).where(MessageChain.message_id.in_(message_id_list)).delete(
synchronize_session=False
)
db.session.query(MessageFile).where(MessageFile.message_id.in_(message_id_list)).delete(
synchronize_session=False
)
db.session.query(MessageAnnotation).where(MessageAnnotation.message_id.in_(message_id_list)).delete(
synchronize_session=False
)
db.session.query(MessageFeedback).where(MessageFeedback.message_id.in_(message_id_list)).delete(
synchronize_session=False
)
db.session.query(Message).where(Message.workflow_run_id.in_(workflow_run_ids)).delete(
synchronize_session=False
)
db.session.query(WorkflowAppLog).where(WorkflowAppLog.workflow_run_id.in_(workflow_run_ids)).delete(
synchronize_session=False
)
db.session.query(WorkflowNodeExecutionModel).where(
WorkflowNodeExecutionModel.workflow_run_id.in_(workflow_run_ids)
).delete(synchronize_session=False)
if conversation_id_list:
db.session.query(ConversationVariable).where(
ConversationVariable.conversation_id.in_(conversation_id_list)
).delete(synchronize_session=False)
db.session.query(Conversation).where(Conversation.id.in_(conversation_id_list)).delete(
synchronize_session=False
)
db.session.query(WorkflowRun).where(WorkflowRun.id.in_(workflow_run_ids)).delete(synchronize_session=False)
db.session.commit()
return True
except Exception:
db.session.rollback()
logger.exception("Batch deletion failed (attempt %s)", attempt_count + 1)
return False
+4 -2
View File
@@ -19,7 +19,9 @@ def create_tidb_serverless_task():
while True:
try:
# check the number of idle tidb serverless
idle_tidb_serverless_number = TidbAuthBinding.query.filter(TidbAuthBinding.active == False).count()
idle_tidb_serverless_number = (
db.session.query(TidbAuthBinding).where(TidbAuthBinding.active == False).count()
)
if idle_tidb_serverless_number >= tidb_serverless_number:
break
# create tidb serverless
@@ -31,7 +33,7 @@ def create_tidb_serverless_task():
break
end_at = time.perf_counter()
click.echo(click.style("Create tidb serverless task success latency: {}".format(end_at - start_at), fg="green"))
click.echo(click.style(f"Create tidb serverless task success latency: {end_at - start_at}", fg="green"))
def create_clusters(batch_size):
+25 -19
View File
@@ -3,16 +3,18 @@ import time
from collections import defaultdict
import click
from flask import render_template # type: ignore
import app
from configs import dify_config
from extensions.ext_database import db
from extensions.ext_mail import mail
from libs.email_i18n import EmailType, get_email_i18n_service
from models.account import Account, Tenant, TenantAccountJoin
from models.dataset import Dataset, DatasetAutoDisableLog
from services.feature_service import FeatureService
logger = logging.getLogger(__name__)
@app.celery.task(queue="dataset")
def mail_clean_document_notify_task():
@@ -24,12 +26,14 @@ def mail_clean_document_notify_task():
if not mail.is_inited():
return
logging.info(click.style("Start send document clean notify mail", fg="green"))
logger.info(click.style("Start send document clean notify mail", fg="green"))
start_at = time.perf_counter()
# send document clean notify mail
try:
dataset_auto_disable_logs = DatasetAutoDisableLog.query.filter(DatasetAutoDisableLog.notified == False).all()
dataset_auto_disable_logs = (
db.session.query(DatasetAutoDisableLog).where(DatasetAutoDisableLog.notified == False).all()
)
# group by tenant_id
dataset_auto_disable_logs_map: dict[str, list[DatasetAutoDisableLog]] = defaultdict(list)
for dataset_auto_disable_log in dataset_auto_disable_logs:
@@ -43,14 +47,16 @@ def mail_clean_document_notify_task():
if plan != "sandbox":
knowledge_details = []
# check tenant
tenant = Tenant.query.filter(Tenant.id == tenant_id).first()
tenant = db.session.query(Tenant).where(Tenant.id == tenant_id).first()
if not tenant:
continue
# check current owner
current_owner_join = TenantAccountJoin.query.filter_by(tenant_id=tenant.id, role="owner").first()
current_owner_join = (
db.session.query(TenantAccountJoin).filter_by(tenant_id=tenant.id, role="owner").first()
)
if not current_owner_join:
continue
account = Account.query.filter(Account.id == current_owner_join.account_id).first()
account = db.session.query(Account).where(Account.id == current_owner_join.account_id).first()
if not account:
continue
@@ -63,19 +69,21 @@ def mail_clean_document_notify_task():
)
for dataset_id, document_ids in dataset_auto_dataset_map.items():
dataset = Dataset.query.filter(Dataset.id == dataset_id).first()
dataset = db.session.query(Dataset).where(Dataset.id == dataset_id).first()
if dataset:
document_count = len(document_ids)
knowledge_details.append(rf"Knowledge base {dataset.name}: {document_count} documents")
if knowledge_details:
html_content = render_template(
"clean_document_job_mail_template-US.html",
userName=account.email,
knowledge_details=knowledge_details,
url=url,
)
mail.send(
to=account.email, subject="Dify Knowledge base auto disable notification", html=html_content
email_service = get_email_i18n_service()
email_service.send_email(
email_type=EmailType.DOCUMENT_CLEAN_NOTIFY,
language_code="en-US",
to=account.email,
template_context={
"userName": account.email,
"knowledge_details": knowledge_details,
"url": url,
},
)
# update notified to True
@@ -83,8 +91,6 @@ def mail_clean_document_notify_task():
dataset_auto_disable_log.notified = True
db.session.commit()
end_at = time.perf_counter()
logging.info(
click.style("Send document clean notify mail succeeded: latency: {}".format(end_at - start_at), fg="green")
)
logger.info(click.style(f"Send document clean notify mail succeeded: latency: {end_at - start_at}", fg="green"))
except Exception:
logging.exception("Send document clean notify mail failed")
logger.exception("Send document clean notify mail failed")
+73
View File
@@ -0,0 +1,73 @@
import logging
from datetime import datetime
import click
from kombu.utils.url import parse_url # type: ignore
from redis import Redis
import app
from configs import dify_config
from extensions.ext_database import db
from libs.email_i18n import EmailType, get_email_i18n_service
redis_config = parse_url(dify_config.CELERY_BROKER_URL)
celery_redis = Redis(
host=redis_config.get("hostname") or "localhost",
port=redis_config.get("port") or 6379,
password=redis_config.get("password") or None,
db=int(redis_config.get("virtual_host")) if redis_config.get("virtual_host") else 1,
)
logger = logging.getLogger(__name__)
@app.celery.task(queue="monitor")
def queue_monitor_task():
queue_name = "dataset"
threshold = dify_config.QUEUE_MONITOR_THRESHOLD
if threshold is None:
logger.warning(click.style("QUEUE_MONITOR_THRESHOLD is not configured, skipping monitoring", fg="yellow"))
return
try:
queue_length = celery_redis.llen(f"{queue_name}")
logger.info(click.style(f"Start monitor {queue_name}", fg="green"))
if queue_length is None:
logger.error(
click.style(f"Failed to get queue length for {queue_name} - Redis may be unavailable", fg="red")
)
return
logger.info(click.style(f"Queue length: {queue_length}", fg="green"))
if queue_length >= threshold:
warning_msg = f"Queue {queue_name} task count exceeded the limit.: {queue_length}/{threshold}"
logging.warning(click.style(warning_msg, fg="red"))
alert_emails = dify_config.QUEUE_MONITOR_ALERT_EMAILS
if alert_emails:
to_list = alert_emails.split(",")
email_service = get_email_i18n_service()
for to in to_list:
try:
current_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
email_service.send_email(
email_type=EmailType.QUEUE_MONITOR_ALERT,
language_code="en-US",
to=to,
template_context={
"queue_name": queue_name,
"queue_length": queue_length,
"threshold": threshold,
"alert_time": current_time,
},
)
except Exception:
logger.exception(click.style("Exception occurred during sending email", fg="red"))
except Exception:
logger.exception(click.style("Exception occurred during queue monitoring", fg="red"))
finally:
if db.session.is_active:
db.session.close()
@@ -5,6 +5,7 @@ import click
import app
from configs import dify_config
from core.rag.datasource.vdb.tidb_on_qdrant.tidb_service import TidbService
from extensions.ext_database import db
from models.dataset import TidbAuthBinding
@@ -14,9 +15,11 @@ def update_tidb_serverless_status_task():
start_at = time.perf_counter()
try:
# check the number of idle tidb serverless
tidb_serverless_list = TidbAuthBinding.query.filter(
TidbAuthBinding.active == False, TidbAuthBinding.status == "CREATING"
).all()
tidb_serverless_list = (
db.session.query(TidbAuthBinding)
.where(TidbAuthBinding.active == False, TidbAuthBinding.status == "CREATING")
.all()
)
if len(tidb_serverless_list) == 0:
return
# update tidb serverless status
@@ -26,9 +29,7 @@ def update_tidb_serverless_status_task():
click.echo(click.style(f"Error: {e}", fg="red"))
end_at = time.perf_counter()
click.echo(
click.style("Update tidb serverless status task success latency: {}".format(end_at - start_at), fg="green")
)
click.echo(click.style(f"Update tidb serverless status task success latency: {end_at - start_at}", fg="green"))
def update_clusters(tidb_serverless_list: list[TidbAuthBinding]):