Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add baidu obs storage #9024

Merged
merged 1 commit into from
Oct 7, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion api/.env.example
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ DB_DATABASE=dify

# Storage configuration
# use for store upload files, private keys...
# storage type: local, s3, azure-blob, google-storage, tencent-cos, huawei-obs, volcengine-tos
# storage type: local, s3, azure-blob, google-storage, tencent-cos, huawei-obs, volcengine-tos, baidu-obs
STORAGE_TYPE=local
STORAGE_LOCAL_PATH=storage
S3_USE_AWS_MANAGED_IAM=false
Expand Down Expand Up @@ -79,6 +79,12 @@ HUAWEI_OBS_SECRET_KEY=your-secret-key
HUAWEI_OBS_ACCESS_KEY=your-access-key
HUAWEI_OBS_SERVER=your-server-url

# Baidu OBS Storage Configuration
BAIDU_OBS_BUCKET_NAME=your-bucket-name
BAIDU_OBS_SECRET_KEY=your-secret-key
BAIDU_OBS_ACCESS_KEY=your-access-key
BAIDU_OBS_ENDPOINT=your-server-url

# OCI Storage configuration
OCI_ENDPOINT=your-endpoint
OCI_BUCKET_NAME=your-bucket-name
Expand Down
8 changes: 5 additions & 3 deletions api/configs/middleware/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from configs.middleware.storage.aliyun_oss_storage_config import AliyunOSSStorageConfig
from configs.middleware.storage.amazon_s3_storage_config import S3StorageConfig
from configs.middleware.storage.azure_blob_storage_config import AzureBlobStorageConfig
from configs.middleware.storage.baidu_obs_storage_config import BaiduOBSStorageConfig
from configs.middleware.storage.google_cloud_storage_config import GoogleCloudStorageConfig
from configs.middleware.storage.huawei_obs_storage_config import HuaweiCloudOBSStorageConfig
from configs.middleware.storage.oci_storage_config import OCIStorageConfig
Expand Down Expand Up @@ -200,12 +201,13 @@ class MiddlewareConfig(
StorageConfig,
AliyunOSSStorageConfig,
AzureBlobStorageConfig,
BaiduOBSStorageConfig,
GoogleCloudStorageConfig,
TencentCloudCOSStorageConfig,
HuaweiCloudOBSStorageConfig,
VolcengineTOSStorageConfig,
S3StorageConfig,
OCIStorageConfig,
S3StorageConfig,
TencentCloudCOSStorageConfig,
VolcengineTOSStorageConfig,
# configs of vdb and vdb providers
VectorStoreConfig,
AnalyticdbConfig,
Expand Down
29 changes: 29 additions & 0 deletions api/configs/middleware/storage/baidu_obs_storage_config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
from typing import Optional

from pydantic import BaseModel, Field


class BaiduOBSStorageConfig(BaseModel):
"""
Configuration settings for Baidu Object Storage Service (OBS)
"""

BAIDU_OBS_BUCKET_NAME: Optional[str] = Field(
description="Name of the Baidu OBS bucket to store and retrieve objects (e.g., 'my-obs-bucket')",
default=None,
)

BAIDU_OBS_ACCESS_KEY: Optional[str] = Field(
description="Access Key ID for authenticating with Baidu OBS",
default=None,
)

BAIDU_OBS_SECRET_KEY: Optional[str] = Field(
description="Secret Access Key for authenticating with Baidu OBS",
default=None,
)

BAIDU_OBS_ENDPOINT: Optional[str] = Field(
description="URL of the Baidu OSS endpoint for your chosen region (e.g., 'https://.bj.bcebos.com')",
default=None,
)
3 changes: 3 additions & 0 deletions api/extensions/ext_storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

from extensions.storage.aliyun_storage import AliyunStorage
from extensions.storage.azure_storage import AzureStorage
from extensions.storage.baidu_storage import BaiduStorage
from extensions.storage.google_storage import GoogleStorage
from extensions.storage.huawei_storage import HuaweiStorage
from extensions.storage.local_storage import LocalStorage
Expand Down Expand Up @@ -35,6 +36,8 @@ def init_app(self, app: Flask):
self.storage_runner = OCIStorage(app=app)
elif storage_type == "huawei-obs":
self.storage_runner = HuaweiStorage(app=app)
elif storage_type == "baidu-obs":
self.storage_runner = BaiduStorage(app=app)
elif storage_type == "volcengine-tos":
self.storage_runner = VolcengineStorage(app=app)
else:
Expand Down
60 changes: 60 additions & 0 deletions api/extensions/storage/baidu_storage.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
import base64
import hashlib
from collections.abc import Generator

from baidubce.auth.bce_credentials import BceCredentials
from baidubce.bce_client_configuration import BceClientConfiguration
from baidubce.services.bos.bos_client import BosClient
from flask import Flask

from extensions.storage.base_storage import BaseStorage


class BaiduStorage(BaseStorage):
"""Implementation for baidu obs storage."""

def __init__(self, app: Flask):
super().__init__(app)
app_config = self.app.config
self.bucket_name = app_config.get("BAIDU_OBS_BUCKET_NAME")
client_config = BceClientConfiguration(
credentials=BceCredentials(
access_key_id=app_config.get("BAIDU_OBS_ACCESS_KEY"),
secret_access_key=app_config.get("BAIDU_OBS_SECRET_KEY"),
),
endpoint=app_config.get("BAIDU_OBS_ENDPOINT"),
)

self.client = BosClient(config=client_config)

def save(self, filename, data):
md5 = hashlib.md5()
md5.update(data)
content_md5 = base64.standard_b64encode(md5.digest())
self.client.put_object(
bucket_name=self.bucket_name, key=filename, data=data, content_length=len(data), content_md5=content_md5
)

def load_once(self, filename: str) -> bytes:
response = self.client.get_object(bucket_name=self.bucket_name, key=filename)
return response.data.read()

def load_stream(self, filename: str) -> Generator:
def generate(filename: str = filename) -> Generator:
response = self.client.get_object(bucket_name=self.bucket_name, key=filename).data
while chunk := response.read(4096):
yield chunk

return generate()

def download(self, filename, target_filepath):
self.client.get_object_to_file(bucket_name=self.bucket_name, key=filename, file_name=target_filepath)

def exists(self, filename):
res = self.client.get_object_meta_data(bucket_name=self.bucket_name, key=filename)
if res is None:
return False
return True

def delete(self, filename):
self.client.delete_object(bucket_name=self.bucket_name, key=filename)
Loading