diff --git a/audiochat/audioclient/audio_chat_client.go b/audiochat/audioclient/audio_chat_client.go index bf745c39c8edd5c0b6b5143aeb0085f57cb30d6e..bdbfe57ba971d078e799eca73638a5c50a616d94 100644 --- a/audiochat/audioclient/audio_chat_client.go +++ b/audiochat/audioclient/audio_chat_client.go @@ -488,7 +488,7 @@ func (a *AudioChatClient) speechContentStream(ctx context.Context, clientMessage // agent æ¨¡å¼ if a.mode == MODE_AGENT { // chunkChan agent mode下,接å—外部模型的输出 - chunkChan := make(chan []byte, 100) + chunkChan := make(chan Text, 100) a.speechContentStreamAgent(ctx, clientMessage, s, chunkChan, sentenceChan) } else { // 使用ctx打æ–大模型输出 @@ -618,7 +618,7 @@ func (a *AudioChatClient) speechContentStream(ctx context.Context, clientMessage } -func (a *AudioChatClient) speechContentStreamAgent(ctx context.Context, clientMessage *ClientMessage, s string, chunkChan chan []byte, sentenceChan chan string) { +func (a *AudioChatClient) speechContentStreamAgent(ctx context.Context, clientMessage *ClientMessage, s string, chunkChan chan Text, sentenceChan chan string) { // æ–‡æœ¬ä¼ åˆ°å¤–éƒ¨ï¼Œå¤–éƒ¨è°ƒç”¨å¤§æ¨¡åž‹ï¼Œé€šè¿‡é€šé“ä¼ å›žæ•°æ® go func() { @@ -652,13 +652,23 @@ func (a *AudioChatClient) speechContentStreamAgent(ctx context.Context, clientMe return } - s1 := string(chunk) + fmt.Println("------------- evnent:", chunk.ContentEvent, " content:", chunk.Content) + + var s1 string + if chunk.ContentEvent == TextContentStart || chunk.ContentEvent == TextContentEnd || chunk.ContentEvent == TextContentProcessing { + s1 = chunk.Content + } + //fmt.Print(s1) smsg := NewServerMessage() smsg.Type = SERVER_SPEECH_TEXT - smsg.Text = Text{Content: s1} + smsg.Text = chunk a.sendData(&smsg) + if s1 == "" { + continue + } + // 开始转è¯éŸ³ // 拼接文本 buffer += s1 diff --git a/audiochat/audioclient/interface.go b/audiochat/audioclient/interface.go index bc26bfb23709e48ee74cf5c7fd08a8da466b4bd0..ee5d58f6ad244d464a88cec6b145ba3f0897710c 100644 --- a/audiochat/audioclient/interface.go +++ b/audiochat/audioclient/interface.go @@ -50,5 +50,5 @@ type EventHandler interface { ToolCall(message *ClientMessage, resp *schema.ContentResponse) (*ServerMessage, *schema.ChatMessage) // agent 模å¼ä¸‹æ‰ä¼šå¯ç”¨è¿™ä¸ªå›žè°ƒ - SpeechContentStartAgent(ctx context.Context, input string, chunk chan<- []byte) *ServerMessage + SpeechContentStartAgent(ctx context.Context, input string, chunk chan<- Text) *ServerMessage } diff --git a/audiochat/audioclient/options.go b/audiochat/audioclient/options.go index f7703f08fc90f7e0e5dd08a9a7c1369a437fca42..84816264f8dcd046915dd65065d000d9689729f2 100644 --- a/audiochat/audioclient/options.go +++ b/audiochat/audioclient/options.go @@ -69,6 +69,17 @@ const ( VAD_MODE_CLIENT = "client" // æœåŠ¡ç«¯vadæ¨¡å¼ VAD_MODE_SERVER = "server" + + // 文本消æ¯ç±»åž‹ + TextSplitStart TextContentEvent = "split_start" + TextSplitProcessing TextContentEvent = "split_processing" + TextSplitEnd TextContentEvent = "split_end" + TextThinkStart TextContentEvent = "think_start" + TextThinkProcessing TextContentEvent = "think_processing" + TextThinkEnd TextContentEvent = "think_end" + TextContentStart TextContentEvent = "content_start" + TextContentProcessing TextContentEvent = "content_processing" + TextContentEnd TextContentEvent = "content_end" ) type ModeType string @@ -79,6 +90,8 @@ type ContextKey string var CtxKey ContextKey = "cancelKey" +type TextContentEvent string + type VadSetting struct { VadMode VadModeType Url string @@ -157,7 +170,8 @@ type ServerMessage struct { ErrorMsg ErrorMsg `json:"error_msg,omitempty"` } type Text struct { - Content string `json:"content"` + Content string `json:"content"` + ContentEvent TextContentEvent `json:"content_event"` } type Item struct { @@ -253,7 +267,7 @@ func (e *DefaultEventHandler) ToolCall(message *ClientMessage, resp *schema.Cont return nil, nil } -func (e *DefaultEventHandler) SpeechContentStartAgent(ctx context.Context, input string, chunk chan<- []byte) *ServerMessage { +func (e *DefaultEventHandler) SpeechContentStartAgent(ctx context.Context, input string, chunk chan<- Text) *ServerMessage { return nil } diff --git a/examples/audiochat_example/html/main.go b/examples/audiochat_example/html/main.go index 616c2b1ef82cdefce18a737b54bab30ec5cc3043..8972e5de4fd1a319893bd310c4d5a731ca4178ce 100644 --- a/examples/audiochat_example/html/main.go +++ b/examples/audiochat_example/html/main.go @@ -7,6 +7,7 @@ import ( "log" "net/http" "os" + "resp.kaopuai.com/lib/kpllms/doubao" "time" "github.com/gin-gonic/gin" @@ -114,7 +115,7 @@ func WsHandler(w http.ResponseWriter, r *http.Request) { client := audioclient.NewAudioChatClient(conn, llm, asr, tts, eventHandler, messages, maxMessagesSize, "zh_male_guozhoudege_moon_bigtts", 1.5) // 设置自己实现的日志 client.SetLog(myLog) - client.SetModeType(audioclient.MODE_DEFAULT) + client.SetModeType(audioclient.MODE_AGENT) // åˆå§‹åŒ–æ¨¡åž‹è°ƒç”¨å‡ºå‚æ•° client.WithOptions([]kpllms.CallOption{ kpllms.WithTemperature(0.92), @@ -155,7 +156,10 @@ func WsHandler(w http.ResponseWriter, r *http.Request) { }, }), }) - + client.SetVadSetting(&audioclient.VadSetting{ + VadMode: audioclient.VAD_MODE_SERVER, + Url: "", + }) // 处ç†websocketè¯·æ±‚ï¼Œå¹¶å›žå¤æ¶ˆæ¯ï¼Œ 使用eventhandler ä¸Žå¤–éƒ¨é€šè®¯ï¼Œå¯æ‰©å±•eventhandler client.Process(r) } @@ -231,14 +235,14 @@ func (e *MyEventHandler) ToolCall(message *audioclient.ClientMessage, resp *sche return nil, m } -func (e *MyEventHandler) SpeechContentStartAgent(ctx context.Context, input string, chunkChan chan<- []byte) *audioclient.ServerMessage { +func (e *MyEventHandler) SpeechContentStartAgent(ctx context.Context, input string, chunkChan chan<- audioclient.Text) *audioclient.ServerMessage { - clientId := os.Getenv("ERNIE_API_KEY") - clientSecret := os.Getenv("ERNIE_API_SECRET") - fmt.Println("clientId", clientId) - fmt.Println("clientSecret", clientSecret) + //clientId := os.Getenv("ERNIE_API_KEY") + //clientSecret := os.Getenv("ERNIE_API_SECRET") + //fmt.Println("clientId", clientId) + //fmt.Println("clientSecret", clientSecret) - llm, err := ernie.NewChat(ernie.WithClientId(clientId), ernie.WithClientSecret(clientSecret)) + llm, err := doubao.NewBot(doubao.WithModel("bot-20250224132529-pbvq2"), doubao.WithVolcSecretKey("TlRaa09EazBOak13TW1VeE5EWmxPV0UxWkdabU9EQTJPVEJoTlRFMVlXWQ=="), doubao.WithVolcAccessKey("AKLTYTk5MjZiZjQzZDljNGM2Nzk2M2Q2ZGFjMGYzMmE1NTU")) if err != nil { fmt.Println(err) return nil @@ -249,14 +253,53 @@ func (e *MyEventHandler) SpeechContentStartAgent(ctx context.Context, input stri Content: input, }, } - _, err = llm.Chat(ctx, messages, kpllms.WithStreamingFunc(func(ctx context.Context, chunk []byte, innerErr error) error { - chunkChan <- chunk + reasonStart := false + reasonEnd := false + contentStart := false + _, err = llm.Chat(ctx, messages, kpllms.WithReasonStreamingFunc(func(ctx context.Context, content kpllms.ReasonContent, innerErr error) error { + if content.Type == kpllms.ContentTypeReason { + if reasonStart == false { + reasonStart = true + chunkChan <- audioclient.Text{ + Content: "", + ContentEvent: audioclient.TextThinkStart, + } + } + chunkChan <- audioclient.Text{ + Content: content.Content, + ContentEvent: audioclient.TextThinkProcessing, + } + } else { + if reasonEnd == false { + chunkChan <- audioclient.Text{ + Content: "", + ContentEvent: audioclient.TextThinkEnd, + } + reasonEnd = true + } + if contentStart == false { + contentStart = true + chunkChan <- audioclient.Text{ + Content: "", + ContentEvent: audioclient.TextContentStart, + } + } + chunkChan <- audioclient.Text{ + Content: content.Content, + ContentEvent: audioclient.TextContentProcessing, + } + } return nil })) if err != nil { } + chunkChan <- audioclient.Text{ + Content: "", + ContentEvent: audioclient.TextContentEnd, + } + return nil } diff --git a/examples/audiochat_example/static/index.html b/examples/audiochat_example/static/index.html index 2f87a3003fc8ab8d298ddd8694ac49dee973f18f..bc6bea76a58a9f9470012865213d41980439f4ee 100644 --- a/examples/audiochat_example/static/index.html +++ b/examples/audiochat_example/static/index.html @@ -21,7 +21,7 @@ <div class="div_class_recordControl"> asræœåŠ¡å™¨åœ°å€(å¿…å¡«): <br> - <input id="wssip" type="text" onchange="addresschange()" style=" width: 100%;height:100%" value="wss://192.168.7.155:8080/yqtest/ws"/> + <input id="wssip" type="text" onchange="addresschange()" style=" width: 100%;height:100%" value="wss://127.0.0.1:8080/yqtest/ws"/> <br> <a id="wsslink" href="#" onclick="window.open('https://192.168.7.155:8080/yqtest/ws', '_blank')"><div id="info_wslink">点æ¤å¤„手工授æƒwss://192.168.7.155:8080/yqtest/ws</div></a> <br> diff --git a/examples/audiochat_example/static/main.go b/examples/audiochat_example/static/main.go index 004b26dcae9058071d973a3680233e505939acfe..28eac6d03ac25ca35091552f198bfc2e29e0e619 100644 --- a/examples/audiochat_example/static/main.go +++ b/examples/audiochat_example/static/main.go @@ -7,6 +7,7 @@ import ( "log" "net/http" "os" + "resp.kaopuai.com/lib/kpllms/doubao" "time" "github.com/gin-gonic/gin" @@ -114,14 +115,18 @@ func WsHandler(w http.ResponseWriter, r *http.Request) { clientSecret := os.Getenv("ERNIE_API_SECRET") fmt.Println("clientId", clientId) fmt.Println("clientSecret", clientSecret) - + clientId = "123" + clientSecret = "123" llm, err := ernie.NewChat(ernie.WithClientId(clientId), ernie.WithClientSecret(clientSecret)) if err != nil { fmt.Println(err) return } - + asrAppid = "2608469185" + asrToken = "8paEn2-c0kEpG8m_sMrFhavA8FqaIqmY" + ttsAppid = "2608469185" + ttsToken = "8paEn2-c0kEpG8m_sMrFhavA8FqaIqmY" // è¯éŸ³è½¬æ–‡æœ¬ asr := volc.NewVolcAsr(asrAppid, asrToken, asrClusterId) @@ -149,7 +154,7 @@ func WsHandler(w http.ResponseWriter, r *http.Request) { client := audioclient.NewAudioChatClient(conn, llm, asr, tts, eventHandler, messages, maxMessagesSize, "zh_male_guozhoudege_moon_bigtts", 1.5) // 设置自己实现的日志 client.SetLog(myLog) - client.SetModeType(audioclient.MODE_DEFAULT) + client.SetModeType(audioclient.MODE_AGENT) client.SetVadSetting(&audioclient.VadSetting{ VadMode: audioclient.VAD_MODE_SERVER, Url: "wss://192.168.5.96:10096", @@ -270,14 +275,9 @@ func (e *MyEventHandler) ToolCall(message *audioclient.ClientMessage, resp *sche return nil, m } -func (e *MyEventHandler) SpeechContentStartAgent(ctx context.Context, input string, chunkChan chan<- []byte) *audioclient.ServerMessage { +func (e *MyEventHandler) SpeechContentStartAgent(ctx context.Context, input string, chunkChan chan<- audioclient.Text) *audioclient.ServerMessage { - clientId := os.Getenv("ERNIE_API_KEY") - clientSecret := os.Getenv("ERNIE_API_SECRET") - fmt.Println("clientId", clientId) - fmt.Println("clientSecret", clientSecret) - - llm, err := ernie.NewChat(ernie.WithClientId(clientId), ernie.WithClientSecret(clientSecret)) + llm, err := doubao.NewBot(doubao.WithModel("bot-20250224132529-pbvq2"), doubao.WithVolcSecretKey("TlRaa09EazBOak13TW1VeE5EWmxPV0UxWkdabU9EQTJPVEJoTlRFMVlXWQ=="), doubao.WithVolcAccessKey("AKLTYTk5MjZiZjQzZDljNGM2Nzk2M2Q2ZGFjMGYzMmE1NTU")) if err != nil { fmt.Println(err) return nil @@ -288,12 +288,53 @@ func (e *MyEventHandler) SpeechContentStartAgent(ctx context.Context, input stri Content: input, }, } - _, err = llm.Chat(ctx, messages, kpllms.WithStreamingFunc(func(ctx context.Context, chunk []byte, innerErr error) error { - chunkChan <- chunk + reasonStart := false + reasonEnd := false + contentStart := false + fmt.Println("开始请求。。。。。。。") + _, err = llm.Chat(ctx, messages, kpllms.WithReasonStreamingFunc(func(ctx context.Context, content kpllms.ReasonContent, innerErr error) error { + if content.Type == kpllms.ContentTypeReason { + if reasonStart == false { + reasonStart = true + chunkChan <- audioclient.Text{ + Content: "", + ContentEvent: audioclient.TextThinkStart, + } + } + chunkChan <- audioclient.Text{ + Content: content.Content, + ContentEvent: audioclient.TextThinkProcessing, + } + } else { + if reasonEnd == false { + chunkChan <- audioclient.Text{ + Content: "", + ContentEvent: audioclient.TextThinkEnd, + } + reasonEnd = true + } + if contentStart == false { + contentStart = true + chunkChan <- audioclient.Text{ + Content: "", + ContentEvent: audioclient.TextContentStart, + } + } + chunkChan <- audioclient.Text{ + Content: content.Content, + ContentEvent: audioclient.TextContentProcessing, + } + } return nil })) if err != nil { + fmt.Println("请求大模型报错:", err.Error()) + return nil + } + chunkChan <- audioclient.Text{ + Content: "", + ContentEvent: audioclient.TextContentEnd, } return nil diff --git a/examples/doubao_example/doubao_test.go b/examples/doubao_example/doubao_test.go index b512f4c5599c500175c4223e0eb8b606fe57b3e2..63291f8cfcc84f080fe140917d989ade7bc23c44 100644 --- a/examples/doubao_example/doubao_test.go +++ b/examples/doubao_example/doubao_test.go @@ -69,14 +69,14 @@ func TestStream(t *testing.T) { }, } resp, err := llm.Chat(ctx, messages, - kpllms.WithStreamingFunc(func(ctx context.Context, chunk []byte, innerErr error) error { - fmt.Println(string(chunk)) - return nil - }), - //kpllms.WithReasonStreamingFunc(func(ctx context.Context, content kpllms.ReasonContent, innerErr error) error { - // fmt.Println("type: ", content.Type, "content: ", content.Content) + //kpllms.WithStreamingFunc(func(ctx context.Context, chunk []byte, innerErr error) error { + // fmt.Println(string(chunk)) // return nil //}), + kpllms.WithReasonStreamingFunc(func(ctx context.Context, content kpllms.ReasonContent, innerErr error) error { + fmt.Println("type: ", content.Type, "content: ", content.Content) + return nil + }), kpllms.WithTemperature(0.95)) if err != nil { fmt.Println(err.Error())