mirror of
https://github.com/YFGaia/dify-plus.git
synced 2026-06-14 20:41:21 +08:00
Merge tag '1.8.1' into merge-tag-1.8.1
# Conflicts:
# .gitignore
# README.md
# api/.env.example
# api/Dockerfile
# api/commands.py
# api/configs/app_config.py
# api/controllers/console/__init__.py
# api/controllers/console/apikey.py
# api/controllers/console/app/statistic.py
# api/controllers/service_api/app/app.py
# api/controllers/service_api/app/audio.py
# api/controllers/service_api/app/completion.py
# api/controllers/service_api/app/conversation.py
# api/controllers/service_api/app/file.py
# api/controllers/service_api/app/message.py
# api/controllers/service_api/app/workflow.py
# api/controllers/service_api/wraps.py
# api/controllers/web/completion.py
# api/core/app/apps/advanced_chat/app_generator.py
# api/core/app/apps/advanced_chat/generate_task_pipeline.py
# api/core/app/apps/agent_chat/app_generator.py
# api/core/app/apps/workflow/app_generator.py
# api/core/app/apps/workflow/generate_task_pipeline.py
# api/core/app/task_pipeline/workflow_cycle_manage.py
# api/core/helper/code_executor/code_executor.py
# api/core/tools/builtin_tool/providers/code/tools/simple_code.py
# api/core/workflow/nodes/code/code_node.py
# api/docker/entrypoint.sh
# api/events/event_handlers/__init__.py
# api/extensions/ext_celery.py
# api/extensions/ext_commands.py
# api/models/model.py
# api/models/workflow.py
# api/poetry.lock
# api/pyproject.toml
# api/services/app_service.py
# api/services/feature_service.py
# api/services/workspace_service.py
# web/.env.example
# web/Dockerfile
# web/app/(commonLayout)/apps/Apps.tsx
# web/app/components/apps/app-card.tsx
# web/app/components/base/chat/embedded-chatbot/index.tsx
# web/app/components/base/mermaid/index.tsx
# web/app/components/develop/index.tsx
# web/app/components/develop/secret-key/secret-key-modal.tsx
# web/app/components/develop/secret-key/style.module.css
# web/app/components/develop/template/template.zh.mdx
# web/app/components/explore/app-list/index.tsx
# web/app/components/explore/category.tsx
# web/app/components/explore/sidebar/index.tsx
# web/app/components/header/account-dropdown/index.tsx
# web/app/components/header/index.tsx
# web/app/components/share/utils.ts
# web/app/layout.tsx
# web/app/signin/components/mail-and-password-auth.tsx
# web/app/signin/normal-form.tsx
# web/app/signin/page.module.css
# web/context/app-context.tsx
# web/i18n/i18next-config.ts
# web/i18n/ja-JP/login.ts
# web/i18n/ko-KR/login.ts
#
if dify_config.WORKFLOW_LOG_CLEANUP_ENABLED:
# 2:00 AM every day
imports.append("schedule.clean_workflow_runlogs_precise")
beat_schedule["clean_workflow_runlogs_precise"] = {
"task": "schedule.clean_workflow_runlogs_precise.clean_workflow_runlogs_precise",
"schedule": crontab(minute="0", hour="2"),
} web/package.json
# web/pnpm-lock.yaml
# web/types/feature.ts
This commit is contained in:
@@ -0,0 +1,49 @@
|
||||
import time
|
||||
|
||||
import click
|
||||
|
||||
import app
|
||||
from extensions.ext_database import db
|
||||
from models.account import TenantPluginAutoUpgradeStrategy
|
||||
from tasks.process_tenant_plugin_autoupgrade_check_task import process_tenant_plugin_autoupgrade_check_task
|
||||
|
||||
AUTO_UPGRADE_MINIMAL_CHECKING_INTERVAL = 15 * 60 # 15 minutes
|
||||
|
||||
|
||||
@app.celery.task(queue="plugin")
|
||||
def check_upgradable_plugin_task():
|
||||
click.echo(click.style("Start check upgradable plugin.", fg="green"))
|
||||
start_at = time.perf_counter()
|
||||
|
||||
now_seconds_of_day = time.time() % 86400 - 30 # we assume the tz is UTC
|
||||
click.echo(click.style(f"Now seconds of day: {now_seconds_of_day}", fg="green"))
|
||||
|
||||
strategies = (
|
||||
db.session.query(TenantPluginAutoUpgradeStrategy)
|
||||
.where(
|
||||
TenantPluginAutoUpgradeStrategy.upgrade_time_of_day >= now_seconds_of_day,
|
||||
TenantPluginAutoUpgradeStrategy.upgrade_time_of_day
|
||||
< now_seconds_of_day + AUTO_UPGRADE_MINIMAL_CHECKING_INTERVAL,
|
||||
TenantPluginAutoUpgradeStrategy.strategy_setting
|
||||
!= TenantPluginAutoUpgradeStrategy.StrategySetting.DISABLED,
|
||||
)
|
||||
.all()
|
||||
)
|
||||
|
||||
for strategy in strategies:
|
||||
process_tenant_plugin_autoupgrade_check_task.delay(
|
||||
strategy.tenant_id,
|
||||
strategy.strategy_setting,
|
||||
strategy.upgrade_time_of_day,
|
||||
strategy.upgrade_mode,
|
||||
strategy.exclude_plugins,
|
||||
strategy.include_plugins,
|
||||
)
|
||||
|
||||
end_at = time.perf_counter()
|
||||
click.echo(
|
||||
click.style(
|
||||
f"Checked upgradable plugin success latency: {end_at - start_at}",
|
||||
fg="green",
|
||||
)
|
||||
)
|
||||
@@ -3,7 +3,7 @@ import time
|
||||
|
||||
import click
|
||||
from sqlalchemy import text
|
||||
from werkzeug.exceptions import NotFound
|
||||
from sqlalchemy.exc import SQLAlchemyError
|
||||
|
||||
import app
|
||||
from configs import dify_config
|
||||
@@ -21,14 +21,14 @@ def clean_embedding_cache_task():
|
||||
try:
|
||||
embedding_ids = (
|
||||
db.session.query(Embedding.id)
|
||||
.filter(Embedding.created_at < thirty_days_ago)
|
||||
.where(Embedding.created_at < thirty_days_ago)
|
||||
.order_by(Embedding.created_at.desc())
|
||||
.limit(100)
|
||||
.all()
|
||||
)
|
||||
embedding_ids = [embedding_id[0] for embedding_id in embedding_ids]
|
||||
except NotFound:
|
||||
break
|
||||
except SQLAlchemyError:
|
||||
raise
|
||||
if embedding_ids:
|
||||
for embedding_id in embedding_ids:
|
||||
db.session.execute(
|
||||
@@ -39,4 +39,4 @@ def clean_embedding_cache_task():
|
||||
else:
|
||||
break
|
||||
end_at = time.perf_counter()
|
||||
click.echo(click.style("Cleaned embedding cache from db success latency: {}".format(end_at - start_at), fg="green"))
|
||||
click.echo(click.style(f"Cleaned embedding cache from db success latency: {end_at - start_at}", fg="green"))
|
||||
|
||||
@@ -1,8 +1,9 @@
|
||||
import datetime
|
||||
import logging
|
||||
import time
|
||||
|
||||
import click
|
||||
from werkzeug.exceptions import NotFound
|
||||
from sqlalchemy.exc import SQLAlchemyError
|
||||
|
||||
import app
|
||||
from configs import dify_config
|
||||
@@ -20,6 +21,8 @@ from models.model import (
|
||||
from models.web import SavedMessage
|
||||
from services.feature_service import FeatureService
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@app.celery.task(queue="dataset")
|
||||
def clean_messages():
|
||||
@@ -31,22 +34,27 @@ def clean_messages():
|
||||
while True:
|
||||
try:
|
||||
# Main query with join and filter
|
||||
# FIXME:for mypy no paginate method error
|
||||
messages = (
|
||||
db.session.query(Message) # type: ignore
|
||||
.filter(Message.created_at < plan_sandbox_clean_message_day)
|
||||
db.session.query(Message)
|
||||
.where(Message.created_at < plan_sandbox_clean_message_day)
|
||||
.order_by(Message.created_at.desc())
|
||||
.limit(100)
|
||||
.all()
|
||||
)
|
||||
|
||||
except NotFound:
|
||||
break
|
||||
except SQLAlchemyError:
|
||||
raise
|
||||
if not messages:
|
||||
break
|
||||
for message in messages:
|
||||
plan_sandbox_clean_message_day = message.created_at
|
||||
app = App.query.filter_by(id=message.app_id).first()
|
||||
app = db.session.query(App).filter_by(id=message.app_id).first()
|
||||
if not app:
|
||||
logger.warning(
|
||||
"Expected App record to exist, but none was found, app_id=%s, message_id=%s",
|
||||
message.app_id,
|
||||
message.id,
|
||||
)
|
||||
continue
|
||||
features_cache_key = f"features:{app.tenant_id}"
|
||||
plan_cache = redis_client.get(features_cache_key)
|
||||
if plan_cache is None:
|
||||
@@ -57,25 +65,25 @@ def clean_messages():
|
||||
plan = plan_cache.decode()
|
||||
if plan == "sandbox":
|
||||
# clean related message
|
||||
db.session.query(MessageFeedback).filter(MessageFeedback.message_id == message.id).delete(
|
||||
db.session.query(MessageFeedback).where(MessageFeedback.message_id == message.id).delete(
|
||||
synchronize_session=False
|
||||
)
|
||||
db.session.query(MessageAnnotation).filter(MessageAnnotation.message_id == message.id).delete(
|
||||
db.session.query(MessageAnnotation).where(MessageAnnotation.message_id == message.id).delete(
|
||||
synchronize_session=False
|
||||
)
|
||||
db.session.query(MessageChain).filter(MessageChain.message_id == message.id).delete(
|
||||
db.session.query(MessageChain).where(MessageChain.message_id == message.id).delete(
|
||||
synchronize_session=False
|
||||
)
|
||||
db.session.query(MessageAgentThought).filter(MessageAgentThought.message_id == message.id).delete(
|
||||
db.session.query(MessageAgentThought).where(MessageAgentThought.message_id == message.id).delete(
|
||||
synchronize_session=False
|
||||
)
|
||||
db.session.query(MessageFile).filter(MessageFile.message_id == message.id).delete(
|
||||
db.session.query(MessageFile).where(MessageFile.message_id == message.id).delete(
|
||||
synchronize_session=False
|
||||
)
|
||||
db.session.query(SavedMessage).filter(SavedMessage.message_id == message.id).delete(
|
||||
db.session.query(SavedMessage).where(SavedMessage.message_id == message.id).delete(
|
||||
synchronize_session=False
|
||||
)
|
||||
db.session.query(Message).filter(Message.id == message.id).delete()
|
||||
db.session.query(Message).where(Message.id == message.id).delete()
|
||||
db.session.commit()
|
||||
end_at = time.perf_counter()
|
||||
click.echo(click.style("Cleaned messages from db success latency: {}".format(end_at - start_at), fg="green"))
|
||||
click.echo(click.style(f"Cleaned messages from db success latency: {end_at - start_at}", fg="green"))
|
||||
|
||||
@@ -1,9 +1,10 @@
|
||||
import datetime
|
||||
import time
|
||||
from typing import Optional, TypedDict
|
||||
|
||||
import click
|
||||
from sqlalchemy import func
|
||||
from werkzeug.exceptions import NotFound
|
||||
from sqlalchemy import func, select
|
||||
from sqlalchemy.exc import SQLAlchemyError
|
||||
|
||||
import app
|
||||
from configs import dify_config
|
||||
@@ -14,175 +15,143 @@ from models.dataset import Dataset, DatasetAutoDisableLog, DatasetQuery, Documen
|
||||
from services.feature_service import FeatureService
|
||||
|
||||
|
||||
class CleanupConfig(TypedDict):
|
||||
clean_day: datetime.datetime
|
||||
plan_filter: Optional[str]
|
||||
add_logs: bool
|
||||
|
||||
|
||||
@app.celery.task(queue="dataset")
|
||||
def clean_unused_datasets_task():
|
||||
click.echo(click.style("Start clean unused datasets indexes.", fg="green"))
|
||||
plan_sandbox_clean_day_setting = dify_config.PLAN_SANDBOX_CLEAN_DAY_SETTING
|
||||
plan_pro_clean_day_setting = dify_config.PLAN_PRO_CLEAN_DAY_SETTING
|
||||
start_at = time.perf_counter()
|
||||
plan_sandbox_clean_day = datetime.datetime.now() - datetime.timedelta(days=plan_sandbox_clean_day_setting)
|
||||
plan_pro_clean_day = datetime.datetime.now() - datetime.timedelta(days=plan_pro_clean_day_setting)
|
||||
while True:
|
||||
try:
|
||||
# Subquery for counting new documents
|
||||
document_subquery_new = (
|
||||
db.session.query(Document.dataset_id, func.count(Document.id).label("document_count"))
|
||||
.filter(
|
||||
Document.indexing_status == "completed",
|
||||
Document.enabled == True,
|
||||
Document.archived == False,
|
||||
Document.updated_at > plan_sandbox_clean_day,
|
||||
)
|
||||
.group_by(Document.dataset_id)
|
||||
.subquery()
|
||||
)
|
||||
|
||||
# Subquery for counting old documents
|
||||
document_subquery_old = (
|
||||
db.session.query(Document.dataset_id, func.count(Document.id).label("document_count"))
|
||||
.filter(
|
||||
Document.indexing_status == "completed",
|
||||
Document.enabled == True,
|
||||
Document.archived == False,
|
||||
Document.updated_at < plan_sandbox_clean_day,
|
||||
)
|
||||
.group_by(Document.dataset_id)
|
||||
.subquery()
|
||||
)
|
||||
# Define cleanup configurations
|
||||
cleanup_configs: list[CleanupConfig] = [
|
||||
{
|
||||
"clean_day": datetime.datetime.now() - datetime.timedelta(days=dify_config.PLAN_SANDBOX_CLEAN_DAY_SETTING),
|
||||
"plan_filter": None,
|
||||
"add_logs": True,
|
||||
},
|
||||
{
|
||||
"clean_day": datetime.datetime.now() - datetime.timedelta(days=dify_config.PLAN_PRO_CLEAN_DAY_SETTING),
|
||||
"plan_filter": "sandbox",
|
||||
"add_logs": False,
|
||||
},
|
||||
]
|
||||
|
||||
# Main query with join and filter
|
||||
datasets = (
|
||||
Dataset.query.outerjoin(document_subquery_new, Dataset.id == document_subquery_new.c.dataset_id)
|
||||
.outerjoin(document_subquery_old, Dataset.id == document_subquery_old.c.dataset_id)
|
||||
.filter(
|
||||
Dataset.created_at < plan_sandbox_clean_day,
|
||||
func.coalesce(document_subquery_new.c.document_count, 0) == 0,
|
||||
func.coalesce(document_subquery_old.c.document_count, 0) > 0,
|
||||
)
|
||||
.order_by(Dataset.created_at.desc())
|
||||
.paginate(page=1, per_page=50)
|
||||
)
|
||||
for config in cleanup_configs:
|
||||
clean_day = config["clean_day"]
|
||||
plan_filter = config["plan_filter"]
|
||||
add_logs = config["add_logs"]
|
||||
|
||||
except NotFound:
|
||||
break
|
||||
if datasets.items is None or len(datasets.items) == 0:
|
||||
break
|
||||
for dataset in datasets:
|
||||
dataset_query = (
|
||||
db.session.query(DatasetQuery)
|
||||
.filter(DatasetQuery.created_at > plan_sandbox_clean_day, DatasetQuery.dataset_id == dataset.id)
|
||||
.all()
|
||||
)
|
||||
if not dataset_query or len(dataset_query) == 0:
|
||||
try:
|
||||
# add auto disable log
|
||||
documents = (
|
||||
db.session.query(Document)
|
||||
.filter(
|
||||
Document.dataset_id == dataset.id,
|
||||
Document.enabled == True,
|
||||
Document.archived == False,
|
||||
)
|
||||
.all()
|
||||
page = 1
|
||||
while True:
|
||||
try:
|
||||
# Subquery for counting new documents
|
||||
document_subquery_new = (
|
||||
db.session.query(Document.dataset_id, func.count(Document.id).label("document_count"))
|
||||
.where(
|
||||
Document.indexing_status == "completed",
|
||||
Document.enabled == True,
|
||||
Document.archived == False,
|
||||
Document.updated_at > clean_day,
|
||||
)
|
||||
for document in documents:
|
||||
dataset_auto_disable_log = DatasetAutoDisableLog(
|
||||
tenant_id=dataset.tenant_id,
|
||||
dataset_id=dataset.id,
|
||||
document_id=document.id,
|
||||
)
|
||||
db.session.add(dataset_auto_disable_log)
|
||||
# remove index
|
||||
index_processor = IndexProcessorFactory(dataset.doc_form).init_index_processor()
|
||||
index_processor.clean(dataset, None)
|
||||
.group_by(Document.dataset_id)
|
||||
.subquery()
|
||||
)
|
||||
|
||||
# update document
|
||||
update_params = {Document.enabled: False}
|
||||
|
||||
Document.query.filter_by(dataset_id=dataset.id).update(update_params)
|
||||
db.session.commit()
|
||||
click.echo(click.style("Cleaned unused dataset {} from db success!".format(dataset.id), fg="green"))
|
||||
except Exception as e:
|
||||
click.echo(
|
||||
click.style("clean dataset index error: {} {}".format(e.__class__.__name__, str(e)), fg="red")
|
||||
# Subquery for counting old documents
|
||||
document_subquery_old = (
|
||||
db.session.query(Document.dataset_id, func.count(Document.id).label("document_count"))
|
||||
.where(
|
||||
Document.indexing_status == "completed",
|
||||
Document.enabled == True,
|
||||
Document.archived == False,
|
||||
Document.updated_at < clean_day,
|
||||
)
|
||||
while True:
|
||||
try:
|
||||
# Subquery for counting new documents
|
||||
document_subquery_new = (
|
||||
db.session.query(Document.dataset_id, func.count(Document.id).label("document_count"))
|
||||
.filter(
|
||||
Document.indexing_status == "completed",
|
||||
Document.enabled == True,
|
||||
Document.archived == False,
|
||||
Document.updated_at > plan_pro_clean_day,
|
||||
.group_by(Document.dataset_id)
|
||||
.subquery()
|
||||
)
|
||||
.group_by(Document.dataset_id)
|
||||
.subquery()
|
||||
)
|
||||
|
||||
# Subquery for counting old documents
|
||||
document_subquery_old = (
|
||||
db.session.query(Document.dataset_id, func.count(Document.id).label("document_count"))
|
||||
.filter(
|
||||
Document.indexing_status == "completed",
|
||||
Document.enabled == True,
|
||||
Document.archived == False,
|
||||
Document.updated_at < plan_pro_clean_day,
|
||||
)
|
||||
.group_by(Document.dataset_id)
|
||||
.subquery()
|
||||
)
|
||||
|
||||
# Main query with join and filter
|
||||
datasets = (
|
||||
Dataset.query.outerjoin(document_subquery_new, Dataset.id == document_subquery_new.c.dataset_id)
|
||||
.outerjoin(document_subquery_old, Dataset.id == document_subquery_old.c.dataset_id)
|
||||
.filter(
|
||||
Dataset.created_at < plan_pro_clean_day,
|
||||
func.coalesce(document_subquery_new.c.document_count, 0) == 0,
|
||||
func.coalesce(document_subquery_old.c.document_count, 0) > 0,
|
||||
)
|
||||
.order_by(Dataset.created_at.desc())
|
||||
.paginate(page=1, per_page=50)
|
||||
)
|
||||
|
||||
except NotFound:
|
||||
break
|
||||
if datasets.items is None or len(datasets.items) == 0:
|
||||
break
|
||||
for dataset in datasets:
|
||||
dataset_query = (
|
||||
db.session.query(DatasetQuery)
|
||||
.filter(DatasetQuery.created_at > plan_pro_clean_day, DatasetQuery.dataset_id == dataset.id)
|
||||
.all()
|
||||
)
|
||||
if not dataset_query or len(dataset_query) == 0:
|
||||
try:
|
||||
features_cache_key = f"features:{dataset.tenant_id}"
|
||||
plan_cache = redis_client.get(features_cache_key)
|
||||
if plan_cache is None:
|
||||
features = FeatureService.get_features(dataset.tenant_id)
|
||||
redis_client.setex(features_cache_key, 600, features.billing.subscription.plan)
|
||||
plan = features.billing.subscription.plan
|
||||
else:
|
||||
plan = plan_cache.decode()
|
||||
if plan == "sandbox":
|
||||
# remove index
|
||||
index_processor = IndexProcessorFactory(dataset.doc_form).init_index_processor()
|
||||
index_processor.clean(dataset, None)
|
||||
|
||||
# update document
|
||||
update_params = {Document.enabled: False}
|
||||
|
||||
Document.query.filter_by(dataset_id=dataset.id).update(update_params)
|
||||
db.session.commit()
|
||||
click.echo(
|
||||
click.style("Cleaned unused dataset {} from db success!".format(dataset.id), fg="green")
|
||||
)
|
||||
except Exception as e:
|
||||
click.echo(
|
||||
click.style("clean dataset index error: {} {}".format(e.__class__.__name__, str(e)), fg="red")
|
||||
# Main query with join and filter
|
||||
stmt = (
|
||||
select(Dataset)
|
||||
.outerjoin(document_subquery_new, Dataset.id == document_subquery_new.c.dataset_id)
|
||||
.outerjoin(document_subquery_old, Dataset.id == document_subquery_old.c.dataset_id)
|
||||
.where(
|
||||
Dataset.created_at < clean_day,
|
||||
func.coalesce(document_subquery_new.c.document_count, 0) == 0,
|
||||
func.coalesce(document_subquery_old.c.document_count, 0) > 0,
|
||||
)
|
||||
.order_by(Dataset.created_at.desc())
|
||||
)
|
||||
|
||||
datasets = db.paginate(stmt, page=page, per_page=50, error_out=False)
|
||||
|
||||
except SQLAlchemyError:
|
||||
raise
|
||||
|
||||
if datasets is None or datasets.items is None or len(datasets.items) == 0:
|
||||
break
|
||||
|
||||
for dataset in datasets:
|
||||
dataset_query = (
|
||||
db.session.query(DatasetQuery)
|
||||
.where(DatasetQuery.created_at > clean_day, DatasetQuery.dataset_id == dataset.id)
|
||||
.all()
|
||||
)
|
||||
|
||||
if not dataset_query or len(dataset_query) == 0:
|
||||
try:
|
||||
should_clean = True
|
||||
|
||||
# Check plan filter if specified
|
||||
if plan_filter:
|
||||
features_cache_key = f"features:{dataset.tenant_id}"
|
||||
plan_cache = redis_client.get(features_cache_key)
|
||||
if plan_cache is None:
|
||||
features = FeatureService.get_features(dataset.tenant_id)
|
||||
redis_client.setex(features_cache_key, 600, features.billing.subscription.plan)
|
||||
plan = features.billing.subscription.plan
|
||||
else:
|
||||
plan = plan_cache.decode()
|
||||
should_clean = plan == plan_filter
|
||||
|
||||
if should_clean:
|
||||
# Add auto disable log if required
|
||||
if add_logs:
|
||||
documents = (
|
||||
db.session.query(Document)
|
||||
.where(
|
||||
Document.dataset_id == dataset.id,
|
||||
Document.enabled == True,
|
||||
Document.archived == False,
|
||||
)
|
||||
.all()
|
||||
)
|
||||
for document in documents:
|
||||
dataset_auto_disable_log = DatasetAutoDisableLog(
|
||||
tenant_id=dataset.tenant_id,
|
||||
dataset_id=dataset.id,
|
||||
document_id=document.id,
|
||||
)
|
||||
db.session.add(dataset_auto_disable_log)
|
||||
|
||||
# Remove index
|
||||
index_processor = IndexProcessorFactory(dataset.doc_form).init_index_processor()
|
||||
index_processor.clean(dataset, None)
|
||||
|
||||
# Update document
|
||||
db.session.query(Document).filter_by(dataset_id=dataset.id).update(
|
||||
{Document.enabled: False}
|
||||
)
|
||||
db.session.commit()
|
||||
click.echo(click.style(f"Cleaned unused dataset {dataset.id} from db success!", fg="green"))
|
||||
except Exception as e:
|
||||
click.echo(click.style(f"clean dataset index error: {e.__class__.__name__} {str(e)}", fg="red"))
|
||||
|
||||
page += 1
|
||||
|
||||
end_at = time.perf_counter()
|
||||
click.echo(click.style("Cleaned unused dataset from db success latency: {}".format(end_at - start_at), fg="green"))
|
||||
click.echo(click.style(f"Cleaned unused dataset from db success latency: {end_at - start_at}", fg="green"))
|
||||
|
||||
@@ -0,0 +1,155 @@
|
||||
import datetime
|
||||
import logging
|
||||
import time
|
||||
|
||||
import click
|
||||
|
||||
import app
|
||||
from configs import dify_config
|
||||
from extensions.ext_database import db
|
||||
from models.model import (
|
||||
AppAnnotationHitHistory,
|
||||
Conversation,
|
||||
Message,
|
||||
MessageAgentThought,
|
||||
MessageAnnotation,
|
||||
MessageChain,
|
||||
MessageFeedback,
|
||||
MessageFile,
|
||||
)
|
||||
from models.workflow import ConversationVariable, WorkflowAppLog, WorkflowNodeExecutionModel, WorkflowRun
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
MAX_RETRIES = 3
|
||||
BATCH_SIZE = dify_config.WORKFLOW_LOG_CLEANUP_BATCH_SIZE
|
||||
|
||||
|
||||
@app.celery.task(queue="dataset")
|
||||
def clean_workflow_runlogs_precise():
|
||||
"""Clean expired workflow run logs with retry mechanism and complete message cascade"""
|
||||
|
||||
click.echo(click.style("Start clean workflow run logs (precise mode with complete cascade).", fg="green"))
|
||||
start_at = time.perf_counter()
|
||||
|
||||
retention_days = dify_config.WORKFLOW_LOG_RETENTION_DAYS
|
||||
cutoff_date = datetime.datetime.now() - datetime.timedelta(days=retention_days)
|
||||
|
||||
try:
|
||||
total_workflow_runs = db.session.query(WorkflowRun).where(WorkflowRun.created_at < cutoff_date).count()
|
||||
if total_workflow_runs == 0:
|
||||
logger.info("No expired workflow run logs found")
|
||||
return
|
||||
logger.info("Found %s expired workflow run logs to clean", total_workflow_runs)
|
||||
|
||||
total_deleted = 0
|
||||
failed_batches = 0
|
||||
batch_count = 0
|
||||
|
||||
while True:
|
||||
workflow_runs = (
|
||||
db.session.query(WorkflowRun.id).where(WorkflowRun.created_at < cutoff_date).limit(BATCH_SIZE).all()
|
||||
)
|
||||
|
||||
if not workflow_runs:
|
||||
break
|
||||
|
||||
workflow_run_ids = [run.id for run in workflow_runs]
|
||||
batch_count += 1
|
||||
|
||||
success = _delete_batch_with_retry(workflow_run_ids, failed_batches)
|
||||
|
||||
if success:
|
||||
total_deleted += len(workflow_run_ids)
|
||||
failed_batches = 0
|
||||
else:
|
||||
failed_batches += 1
|
||||
if failed_batches >= MAX_RETRIES:
|
||||
logger.error("Failed to delete batch after %s retries, aborting cleanup for today", MAX_RETRIES)
|
||||
break
|
||||
else:
|
||||
# Calculate incremental delay times: 5, 10, 15 minutes
|
||||
retry_delay_minutes = failed_batches * 5
|
||||
logger.warning("Batch deletion failed, retrying in %s minutes...", retry_delay_minutes)
|
||||
time.sleep(retry_delay_minutes * 60)
|
||||
continue
|
||||
|
||||
logger.info("Cleanup completed: %s expired workflow run logs deleted", total_deleted)
|
||||
|
||||
except Exception:
|
||||
db.session.rollback()
|
||||
logger.exception("Unexpected error in workflow log cleanup")
|
||||
raise
|
||||
|
||||
end_at = time.perf_counter()
|
||||
execution_time = end_at - start_at
|
||||
click.echo(click.style(f"Cleaned workflow run logs from db success latency: {execution_time:.2f}s", fg="green"))
|
||||
|
||||
|
||||
def _delete_batch_with_retry(workflow_run_ids: list[str], attempt_count: int) -> bool:
|
||||
"""Delete a single batch with a retry mechanism and complete cascading deletion"""
|
||||
try:
|
||||
with db.session.begin_nested():
|
||||
message_data = (
|
||||
db.session.query(Message.id, Message.conversation_id)
|
||||
.where(Message.workflow_run_id.in_(workflow_run_ids))
|
||||
.all()
|
||||
)
|
||||
message_id_list = [msg.id for msg in message_data]
|
||||
conversation_id_list = list({msg.conversation_id for msg in message_data if msg.conversation_id})
|
||||
if message_id_list:
|
||||
db.session.query(AppAnnotationHitHistory).where(
|
||||
AppAnnotationHitHistory.message_id.in_(message_id_list)
|
||||
).delete(synchronize_session=False)
|
||||
|
||||
db.session.query(MessageAgentThought).where(MessageAgentThought.message_id.in_(message_id_list)).delete(
|
||||
synchronize_session=False
|
||||
)
|
||||
|
||||
db.session.query(MessageChain).where(MessageChain.message_id.in_(message_id_list)).delete(
|
||||
synchronize_session=False
|
||||
)
|
||||
|
||||
db.session.query(MessageFile).where(MessageFile.message_id.in_(message_id_list)).delete(
|
||||
synchronize_session=False
|
||||
)
|
||||
|
||||
db.session.query(MessageAnnotation).where(MessageAnnotation.message_id.in_(message_id_list)).delete(
|
||||
synchronize_session=False
|
||||
)
|
||||
|
||||
db.session.query(MessageFeedback).where(MessageFeedback.message_id.in_(message_id_list)).delete(
|
||||
synchronize_session=False
|
||||
)
|
||||
|
||||
db.session.query(Message).where(Message.workflow_run_id.in_(workflow_run_ids)).delete(
|
||||
synchronize_session=False
|
||||
)
|
||||
|
||||
db.session.query(WorkflowAppLog).where(WorkflowAppLog.workflow_run_id.in_(workflow_run_ids)).delete(
|
||||
synchronize_session=False
|
||||
)
|
||||
|
||||
db.session.query(WorkflowNodeExecutionModel).where(
|
||||
WorkflowNodeExecutionModel.workflow_run_id.in_(workflow_run_ids)
|
||||
).delete(synchronize_session=False)
|
||||
|
||||
if conversation_id_list:
|
||||
db.session.query(ConversationVariable).where(
|
||||
ConversationVariable.conversation_id.in_(conversation_id_list)
|
||||
).delete(synchronize_session=False)
|
||||
|
||||
db.session.query(Conversation).where(Conversation.id.in_(conversation_id_list)).delete(
|
||||
synchronize_session=False
|
||||
)
|
||||
|
||||
db.session.query(WorkflowRun).where(WorkflowRun.id.in_(workflow_run_ids)).delete(synchronize_session=False)
|
||||
|
||||
db.session.commit()
|
||||
return True
|
||||
|
||||
except Exception:
|
||||
db.session.rollback()
|
||||
logger.exception("Batch deletion failed (attempt %s)", attempt_count + 1)
|
||||
return False
|
||||
@@ -19,7 +19,9 @@ def create_tidb_serverless_task():
|
||||
while True:
|
||||
try:
|
||||
# check the number of idle tidb serverless
|
||||
idle_tidb_serverless_number = TidbAuthBinding.query.filter(TidbAuthBinding.active == False).count()
|
||||
idle_tidb_serverless_number = (
|
||||
db.session.query(TidbAuthBinding).where(TidbAuthBinding.active == False).count()
|
||||
)
|
||||
if idle_tidb_serverless_number >= tidb_serverless_number:
|
||||
break
|
||||
# create tidb serverless
|
||||
@@ -31,7 +33,7 @@ def create_tidb_serverless_task():
|
||||
break
|
||||
|
||||
end_at = time.perf_counter()
|
||||
click.echo(click.style("Create tidb serverless task success latency: {}".format(end_at - start_at), fg="green"))
|
||||
click.echo(click.style(f"Create tidb serverless task success latency: {end_at - start_at}", fg="green"))
|
||||
|
||||
|
||||
def create_clusters(batch_size):
|
||||
|
||||
@@ -3,16 +3,18 @@ import time
|
||||
from collections import defaultdict
|
||||
|
||||
import click
|
||||
from flask import render_template # type: ignore
|
||||
|
||||
import app
|
||||
from configs import dify_config
|
||||
from extensions.ext_database import db
|
||||
from extensions.ext_mail import mail
|
||||
from libs.email_i18n import EmailType, get_email_i18n_service
|
||||
from models.account import Account, Tenant, TenantAccountJoin
|
||||
from models.dataset import Dataset, DatasetAutoDisableLog
|
||||
from services.feature_service import FeatureService
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@app.celery.task(queue="dataset")
|
||||
def mail_clean_document_notify_task():
|
||||
@@ -24,12 +26,14 @@ def mail_clean_document_notify_task():
|
||||
if not mail.is_inited():
|
||||
return
|
||||
|
||||
logging.info(click.style("Start send document clean notify mail", fg="green"))
|
||||
logger.info(click.style("Start send document clean notify mail", fg="green"))
|
||||
start_at = time.perf_counter()
|
||||
|
||||
# send document clean notify mail
|
||||
try:
|
||||
dataset_auto_disable_logs = DatasetAutoDisableLog.query.filter(DatasetAutoDisableLog.notified == False).all()
|
||||
dataset_auto_disable_logs = (
|
||||
db.session.query(DatasetAutoDisableLog).where(DatasetAutoDisableLog.notified == False).all()
|
||||
)
|
||||
# group by tenant_id
|
||||
dataset_auto_disable_logs_map: dict[str, list[DatasetAutoDisableLog]] = defaultdict(list)
|
||||
for dataset_auto_disable_log in dataset_auto_disable_logs:
|
||||
@@ -43,14 +47,16 @@ def mail_clean_document_notify_task():
|
||||
if plan != "sandbox":
|
||||
knowledge_details = []
|
||||
# check tenant
|
||||
tenant = Tenant.query.filter(Tenant.id == tenant_id).first()
|
||||
tenant = db.session.query(Tenant).where(Tenant.id == tenant_id).first()
|
||||
if not tenant:
|
||||
continue
|
||||
# check current owner
|
||||
current_owner_join = TenantAccountJoin.query.filter_by(tenant_id=tenant.id, role="owner").first()
|
||||
current_owner_join = (
|
||||
db.session.query(TenantAccountJoin).filter_by(tenant_id=tenant.id, role="owner").first()
|
||||
)
|
||||
if not current_owner_join:
|
||||
continue
|
||||
account = Account.query.filter(Account.id == current_owner_join.account_id).first()
|
||||
account = db.session.query(Account).where(Account.id == current_owner_join.account_id).first()
|
||||
if not account:
|
||||
continue
|
||||
|
||||
@@ -63,19 +69,21 @@ def mail_clean_document_notify_task():
|
||||
)
|
||||
|
||||
for dataset_id, document_ids in dataset_auto_dataset_map.items():
|
||||
dataset = Dataset.query.filter(Dataset.id == dataset_id).first()
|
||||
dataset = db.session.query(Dataset).where(Dataset.id == dataset_id).first()
|
||||
if dataset:
|
||||
document_count = len(document_ids)
|
||||
knowledge_details.append(rf"Knowledge base {dataset.name}: {document_count} documents")
|
||||
if knowledge_details:
|
||||
html_content = render_template(
|
||||
"clean_document_job_mail_template-US.html",
|
||||
userName=account.email,
|
||||
knowledge_details=knowledge_details,
|
||||
url=url,
|
||||
)
|
||||
mail.send(
|
||||
to=account.email, subject="Dify Knowledge base auto disable notification", html=html_content
|
||||
email_service = get_email_i18n_service()
|
||||
email_service.send_email(
|
||||
email_type=EmailType.DOCUMENT_CLEAN_NOTIFY,
|
||||
language_code="en-US",
|
||||
to=account.email,
|
||||
template_context={
|
||||
"userName": account.email,
|
||||
"knowledge_details": knowledge_details,
|
||||
"url": url,
|
||||
},
|
||||
)
|
||||
|
||||
# update notified to True
|
||||
@@ -83,8 +91,6 @@ def mail_clean_document_notify_task():
|
||||
dataset_auto_disable_log.notified = True
|
||||
db.session.commit()
|
||||
end_at = time.perf_counter()
|
||||
logging.info(
|
||||
click.style("Send document clean notify mail succeeded: latency: {}".format(end_at - start_at), fg="green")
|
||||
)
|
||||
logger.info(click.style(f"Send document clean notify mail succeeded: latency: {end_at - start_at}", fg="green"))
|
||||
except Exception:
|
||||
logging.exception("Send document clean notify mail failed")
|
||||
logger.exception("Send document clean notify mail failed")
|
||||
|
||||
@@ -0,0 +1,73 @@
|
||||
import logging
|
||||
from datetime import datetime
|
||||
|
||||
import click
|
||||
from kombu.utils.url import parse_url # type: ignore
|
||||
from redis import Redis
|
||||
|
||||
import app
|
||||
from configs import dify_config
|
||||
from extensions.ext_database import db
|
||||
from libs.email_i18n import EmailType, get_email_i18n_service
|
||||
|
||||
redis_config = parse_url(dify_config.CELERY_BROKER_URL)
|
||||
celery_redis = Redis(
|
||||
host=redis_config.get("hostname") or "localhost",
|
||||
port=redis_config.get("port") or 6379,
|
||||
password=redis_config.get("password") or None,
|
||||
db=int(redis_config.get("virtual_host")) if redis_config.get("virtual_host") else 1,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@app.celery.task(queue="monitor")
|
||||
def queue_monitor_task():
|
||||
queue_name = "dataset"
|
||||
threshold = dify_config.QUEUE_MONITOR_THRESHOLD
|
||||
|
||||
if threshold is None:
|
||||
logger.warning(click.style("QUEUE_MONITOR_THRESHOLD is not configured, skipping monitoring", fg="yellow"))
|
||||
return
|
||||
|
||||
try:
|
||||
queue_length = celery_redis.llen(f"{queue_name}")
|
||||
logger.info(click.style(f"Start monitor {queue_name}", fg="green"))
|
||||
|
||||
if queue_length is None:
|
||||
logger.error(
|
||||
click.style(f"Failed to get queue length for {queue_name} - Redis may be unavailable", fg="red")
|
||||
)
|
||||
return
|
||||
|
||||
logger.info(click.style(f"Queue length: {queue_length}", fg="green"))
|
||||
|
||||
if queue_length >= threshold:
|
||||
warning_msg = f"Queue {queue_name} task count exceeded the limit.: {queue_length}/{threshold}"
|
||||
logging.warning(click.style(warning_msg, fg="red"))
|
||||
alert_emails = dify_config.QUEUE_MONITOR_ALERT_EMAILS
|
||||
if alert_emails:
|
||||
to_list = alert_emails.split(",")
|
||||
email_service = get_email_i18n_service()
|
||||
for to in to_list:
|
||||
try:
|
||||
current_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
||||
email_service.send_email(
|
||||
email_type=EmailType.QUEUE_MONITOR_ALERT,
|
||||
language_code="en-US",
|
||||
to=to,
|
||||
template_context={
|
||||
"queue_name": queue_name,
|
||||
"queue_length": queue_length,
|
||||
"threshold": threshold,
|
||||
"alert_time": current_time,
|
||||
},
|
||||
)
|
||||
except Exception:
|
||||
logger.exception(click.style("Exception occurred during sending email", fg="red"))
|
||||
|
||||
except Exception:
|
||||
logger.exception(click.style("Exception occurred during queue monitoring", fg="red"))
|
||||
finally:
|
||||
if db.session.is_active:
|
||||
db.session.close()
|
||||
@@ -5,6 +5,7 @@ import click
|
||||
import app
|
||||
from configs import dify_config
|
||||
from core.rag.datasource.vdb.tidb_on_qdrant.tidb_service import TidbService
|
||||
from extensions.ext_database import db
|
||||
from models.dataset import TidbAuthBinding
|
||||
|
||||
|
||||
@@ -14,9 +15,11 @@ def update_tidb_serverless_status_task():
|
||||
start_at = time.perf_counter()
|
||||
try:
|
||||
# check the number of idle tidb serverless
|
||||
tidb_serverless_list = TidbAuthBinding.query.filter(
|
||||
TidbAuthBinding.active == False, TidbAuthBinding.status == "CREATING"
|
||||
).all()
|
||||
tidb_serverless_list = (
|
||||
db.session.query(TidbAuthBinding)
|
||||
.where(TidbAuthBinding.active == False, TidbAuthBinding.status == "CREATING")
|
||||
.all()
|
||||
)
|
||||
if len(tidb_serverless_list) == 0:
|
||||
return
|
||||
# update tidb serverless status
|
||||
@@ -26,9 +29,7 @@ def update_tidb_serverless_status_task():
|
||||
click.echo(click.style(f"Error: {e}", fg="red"))
|
||||
|
||||
end_at = time.perf_counter()
|
||||
click.echo(
|
||||
click.style("Update tidb serverless status task success latency: {}".format(end_at - start_at), fg="green")
|
||||
)
|
||||
click.echo(click.style(f"Update tidb serverless status task success latency: {end_at - start_at}", fg="green"))
|
||||
|
||||
|
||||
def update_clusters(tidb_serverless_list: list[TidbAuthBinding]):
|
||||
|
||||
Reference in New Issue
Block a user