diff --git a/backend/infra/impl/oceanbase/oceanbase.go b/backend/infra/impl/oceanbase/oceanbase.go index ab3242ed..914a6015 100644 --- a/backend/infra/impl/oceanbase/oceanbase.go +++ b/backend/infra/impl/oceanbase/oceanbase.go @@ -64,15 +64,16 @@ func (c *OceanBaseClient) BatchInsertVectors(ctx context.Context, collectionName } func (c *OceanBaseClient) DeleteVector(ctx context.Context, collectionName string, vectorID string) error { - return c.official.GetDB().WithContext(ctx).Exec("DELETE FROM "+collectionName+" WHERE vector_id = ?", vectorID).Error + return c.official.GetDB().WithContext(ctx).Table(collectionName).Where("vector_id = ?", vectorID).Delete(nil).Error } func (c *OceanBaseClient) InitDatabase(ctx context.Context) error { - return c.official.GetDB().WithContext(ctx).Exec("SELECT 1").Error + var result int + return c.official.GetDB().WithContext(ctx).Raw("SELECT 1").Scan(&result).Error } func (c *OceanBaseClient) DropCollection(ctx context.Context, collectionName string) error { - return c.official.GetDB().WithContext(ctx).Exec("DROP TABLE IF EXISTS " + collectionName).Error + return c.official.GetDB().WithContext(ctx).Migrator().DropTable(collectionName) } type SearchStrategy interface { diff --git a/backend/infra/impl/oceanbase/oceanbase_official.go b/backend/infra/impl/oceanbase/oceanbase_official.go index 03a94079..d5477d7e 100644 --- a/backend/infra/impl/oceanbase/oceanbase_official.go +++ b/backend/infra/impl/oceanbase/oceanbase_official.go @@ -43,6 +43,15 @@ type VectorResult struct { CreatedAt time.Time `json:"created_at"` } +type VectorRecord struct { + VectorID string `gorm:"column:vector_id;primaryKey"` + Content string `gorm:"column:content;type:text;not null"` + Metadata string `gorm:"column:metadata;type:json"` + Embedding string `gorm:"column:embedding;type:vector;not null"` + CreatedAt time.Time `gorm:"column:created_at;type:timestamp;default:CURRENT_TIMESTAMP"` + UpdatedAt time.Time `gorm:"column:updated_at;type:timestamp;default:CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP"` +} + type CollectionInfo struct { Name string `json:"name"` Dimension int `json:"dimension"` @@ -83,21 +92,23 @@ func (c *OceanBaseOfficialClient) setVectorParameters() error { } func (c *OceanBaseOfficialClient) CreateCollection(ctx context.Context, collectionName string, dimension int) error { - createTableSQL := fmt.Sprintf(` - CREATE TABLE IF NOT EXISTS %s ( - vector_id VARCHAR(255) PRIMARY KEY, - content TEXT NOT NULL, - metadata JSON, - embedding VECTOR(%d) NOT NULL, - created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, - updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP, - INDEX idx_created_at (created_at), - INDEX idx_content (content(100)) - ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci - `, collectionName, dimension) - - if err := c.db.WithContext(ctx).Exec(createTableSQL).Error; err != nil { - return fmt.Errorf("failed to create table: %v", err) + if !c.db.WithContext(ctx).Migrator().HasTable(collectionName) { + createTableSQL := fmt.Sprintf(` + CREATE TABLE IF NOT EXISTS %s ( + vector_id VARCHAR(255) PRIMARY KEY, + content TEXT NOT NULL, + metadata JSON, + embedding VECTOR(%d) NOT NULL, + created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP, + INDEX idx_created_at (created_at), + INDEX idx_content (content(100)) + ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci + `, collectionName, dimension) + + if err := c.db.WithContext(ctx).Exec(createTableSQL).Error; err != nil { + return fmt.Errorf("failed to create table: %v", err) + } } createIndexSQL := fmt.Sprintf(` @@ -136,30 +147,19 @@ func (c *OceanBaseOfficialClient) InsertVectors(ctx context.Context, collectionN } func (c *OceanBaseOfficialClient) insertBatch(ctx context.Context, collectionName string, batch []VectorResult) error { - placeholders := make([]string, len(batch)) - values := make([]interface{}, 0, len(batch)*5) - - for j, vector := range batch { - placeholders[j] = "(?, ?, ?, ?, NOW())" - values = append(values, - vector.VectorID, - vector.Content, - vector.Metadata, - c.vectorToString(vector.Embedding), - ) + records := make([]VectorRecord, len(batch)) + for i, vector := range batch { + records[i] = VectorRecord{ + VectorID: vector.VectorID, + Content: vector.Content, + Metadata: vector.Metadata, + Embedding: c.vectorToString(vector.Embedding), + CreatedAt: time.Now(), + UpdatedAt: time.Now(), + } } - sql := fmt.Sprintf(` - INSERT INTO %s (vector_id, content, metadata, embedding, created_at) - VALUES %s - ON DUPLICATE KEY UPDATE - content = VALUES(content), - metadata = VALUES(metadata), - embedding = VALUES(embedding), - updated_at = NOW() - `, collectionName, strings.Join(placeholders, ",")) - - return c.db.WithContext(ctx).Exec(sql, values...).Error + return c.db.WithContext(ctx).Table(collectionName).Save(&records).Error } func (c *OceanBaseOfficialClient) SearchVectors( @@ -341,24 +341,28 @@ func (c *OceanBaseOfficialClient) DebugCollectionData(ctx context.Context, colle log.Printf("[Debug] Collection '%s' exists with %d vectors", collectionName, count) log.Printf("[Debug] Sample data from collection '%s':", collectionName) - rows, err := c.db.WithContext(ctx).Raw(` - SELECT vector_id, content, created_at - FROM ` + collectionName + ` - ORDER BY created_at DESC - LIMIT 5 - `).Rows() + var samples []struct { + VectorID string `gorm:"column:vector_id"` + Content string `gorm:"column:content"` + CreatedAt time.Time `gorm:"column:created_at"` + } + + err := c.db.WithContext(ctx).Table(collectionName). + Select("vector_id, content, created_at"). + Order("created_at DESC"). + Limit(5). + Find(&samples).Error + if err != nil { log.Printf("[Debug] Failed to get sample data: %v", err) } else { - defer rows.Close() - for rows.Next() { - var vectorID, content string - var createdAt time.Time - if err := rows.Scan(&vectorID, &content, &createdAt); err != nil { - log.Printf("[Debug] Failed to scan sample row: %v", err) - continue + for _, sample := range samples { + contentPreview := sample.Content + if len(contentPreview) > 50 { + contentPreview = contentPreview[:50] } - log.Printf("[Debug] Sample: ID=%s, Content=%s, Created=%s", vectorID, content[:min(50, len(content))], createdAt) + log.Printf("[Debug] Sample: ID=%s, Content=%s, Created=%s", + sample.VectorID, contentPreview, sample.CreatedAt) } } diff --git a/docker/docker-compose-oceanbase.yml b/docker/docker-compose-oceanbase.yml index a23c3e7a..d5238bb0 100755 --- a/docker/docker-compose-oceanbase.yml +++ b/docker/docker-compose-oceanbase.yml @@ -252,6 +252,7 @@ services: OB_DATAFILE_SIZE: 1G OB_SYS_PASSWORD: ${OCEANBASE_PASSWORD:-coze123} OB_TENANT_PASSWORD: ${OCEANBASE_PASSWORD:-coze123} + OB_CLUSTER_NAME: ${OCEANBASE_CLUSTER_NAME:-cozeAi} ports: - '2881:2881' volumes: diff --git a/docker/docker-compose-oceanbase_debug.yml b/docker/docker-compose-oceanbase_debug.yml index 98eb459f..4ecff68e 100644 --- a/docker/docker-compose-oceanbase_debug.yml +++ b/docker/docker-compose-oceanbase_debug.yml @@ -345,6 +345,7 @@ services: OB_DATAFILE_SIZE: 1G OB_SYS_PASSWORD: ${OCEANBASE_PASSWORD:-coze123} OB_TENANT_PASSWORD: ${OCEANBASE_PASSWORD:-coze123} + OB_CLUSTER_NAME: ${OCEANBASE_CLUSTER_NAME:-cozeAi} profiles: ['middleware'] env_file: *env_file ports: diff --git a/docs/oceanbase-integration-guide-en.md b/docs/oceanbase-integration-guide-en.md index bcd4cb65..456d32d4 100644 --- a/docs/oceanbase-integration-guide-en.md +++ b/docs/oceanbase-integration-guide-en.md @@ -225,6 +225,266 @@ docker logs coze-oceanbase | grep "slow query" mysql -h localhost -P 2881 -u root -p -e "SHOW PROCESSLIST;" ``` +## Helm Deployment Guide (Kubernetes) + +### 1. Environment Preparation + +Ensure the following tools are installed: + +- Kubernetes cluster (recommended: k3s or kind) +- Helm 3.x +- kubectl + +### 2. Install Dependencies + +#### Install cert-manager + +```bash +# Add cert-manager Helm repository +helm repo add jetstack https://charts.jetstack.io +helm repo update + +# Install cert-manager +kubectl apply -f https://github.com/cert-manager/cert-manager/releases/download/v1.16.2/cert-manager.yaml + +# Wait for cert-manager to be ready +kubectl wait --for=condition=ready pod -l app.kubernetes.io/name=cert-manager -n cert-manager --timeout=300s +``` + +#### Install ob-operator + +```bash +# Add ob-operator Helm repository +helm repo add ob-operator https://oceanbase.github.io/ob-operator/ +helm repo update + +# Install ob-operator +helm install ob-operator ob-operator/ob-operator --set reporter=cozeAi --namespace=oceanbase-system --create-namespace + +# Wait for ob-operator to be ready +kubectl wait --for=condition=ready pod -l control-plane=controller-manager -n oceanbase-system --timeout=300s +``` + +### 3. Deploy OceanBase + +#### Using Integrated Helm Chart + +```bash +# Deploy complete Coze Studio application (including OceanBase) +helm install coze-studio helm/charts/opencoze \ + --set oceanbase.enabled=true \ + --namespace coze-studio \ + --create-namespace + +# Or deploy only OceanBase component +helm install oceanbase-only helm/charts/opencoze \ + --set oceanbase.enabled=true \ + --set mysql.enabled=false \ + --set redis.enabled=false \ + --set minio.enabled=false \ + --set elasticsearch.enabled=false \ + --set milvus.enabled=false \ + --set rocketmq.enabled=false \ + --namespace oceanbase \ + --create-namespace +``` + +#### Custom Configuration + +Create `oceanbase-values.yaml` file: + +```yaml +oceanbase: + enabled: true + port: 2881 + targetPort: 2881 + clusterName: 'cozeAi' + clusterId: 1 + image: + repository: oceanbase/oceanbase-ce + tag: 'latest' + obAgentVersion: '4.2.2-100000042024011120' + monitorEnabled: true + storageClass: '' + observerConfig: + resource: + cpu: 2 + memory: 8Gi + storages: + dataStorage: 10G + redoLogStorage: 5G + logStorage: 5G + monitorResource: + cpu: 100m + memory: 256Mi + generateUserSecrets: true + userSecrets: + root: 'coze123' + monitor: 'coze123' + operator: 'coze123' + proxyro: 'coze123' + topology: + - zone: zone1 + replica: 1 + parameters: + - name: system_memory + value: '4G' + - name: '__min_full_resource_pool_memory' + value: '4294967296' + annotations: {} + backupVolumeEnabled: false +``` + +Deploy with custom configuration: + +```bash +helm install oceanbase-custom helm/charts/opencoze \ + -f oceanbase-values.yaml \ + --namespace oceanbase \ + --create-namespace +``` + +### 4. Verify Deployment + +```bash +# Check OBCluster status +kubectl get obcluster -n oceanbase + +# Check OceanBase pods +kubectl get pods -n oceanbase + +# Check services +kubectl get svc -n oceanbase + +# View detailed status +kubectl describe obcluster -n oceanbase +``` + +### 5. Connection Testing + +#### Port Forwarding + +```bash +# Forward OceanBase port +kubectl port-forward svc/oceanbase-service -n oceanbase 2881:2881 +``` + +#### Using obclient Connection + +```bash +# Connect within cluster +kubectl exec -it deployment/oceanbase-obcluster-zone1 -n oceanbase -- obclient -h127.0.0.1 -P2881 -uroot@test -pcoze123 -Dtest + +# Connect from external (requires port forwarding) +obclient -h127.0.0.1 -P2881 -uroot@test -pcoze123 -Dtest +``` + +#### Using MySQL Client Connection + +```bash +# Using MySQL client +mysql -h127.0.0.1 -P2881 -uroot@test -pcoze123 -Dtest +``` + +### 6. Monitoring and Management + +#### View Logs + +```bash +# View OceanBase logs +kubectl logs -f deployment/oceanbase-obcluster-zone1 -n oceanbase + +# View ob-operator logs +kubectl logs -f deployment/oceanbase-controller-manager -n oceanbase-system +``` + +#### Scaling + +```bash +# Scale replica count +kubectl patch obcluster oceanbase-obcluster -n oceanbase --type='merge' -p='{"spec":{"topology":[{"zone":"zone1","replica":2}]}}' + +# Adjust resource configuration +kubectl patch obcluster oceanbase-obcluster -n oceanbase --type='merge' -p='{"spec":{"observer":{"resource":{"cpu":4,"memory":"16Gi"}}}}' +``` + +#### Backup and Recovery + +```bash +# Create backup +kubectl apply -f - <