Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions backend/backend.proto
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ service Backend {
rpc TTSStream(TTSRequest) returns (stream Reply) {}
rpc SoundGeneration(SoundGenerationRequest) returns (Result) {}
rpc TokenizeString(PredictOptions) returns (TokenizationResponse) {}
rpc Detokenize(DetokenizeRequest) returns (DetokenizeResponse) {}
rpc Status(HealthMessage) returns (StatusResponse) {}
rpc Detect(DetectOptions) returns (DetectResponse) {}
rpc FaceVerify(FaceVerifyRequest) returns (FaceVerifyResponse) {}
Expand Down Expand Up @@ -491,6 +492,14 @@ message TokenizationResponse {
repeated int32 tokens = 2;
}

message DetokenizeRequest {
repeated int32 tokens = 1;
}

message DetokenizeResponse {
string content = 1;
}

message MemoryUsageData {
uint64 total = 1;
map<string, uint64> breakdown = 2;
Expand Down
15 changes: 15 additions & 0 deletions backend/cpp/llama-cpp/grpc-server.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3167,6 +3167,21 @@ class BackendServiceImpl final : public backend::Backend::Service {
return grpc::Status::OK;
}

grpc::Status Detokenize(ServerContext* context, const backend::DetokenizeRequest* request, backend::DetokenizeResponse* response) override {
auto auth = checkAuth(context);
if (!auth.ok()) return auth;
if (params_base.model.path.empty()) {
return grpc::Status(grpc::StatusCode::FAILED_PRECONDITION, "Model not loaded");
}

std::string content;
for (const auto token : request->tokens()) {
content.append(common_token_to_piece(ctx_server.get_llama_context(), token));
}
response->set_content(content);
return grpc::Status::OK;
}

grpc::Status GetMetrics(ServerContext* /*context*/, const backend::MetricsRequest* /*request*/, backend::MetricsResponse* response) override {

// request slots data using task queue
Expand Down
67 changes: 67 additions & 0 deletions core/backend/detokenize.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
package backend

import (
"time"

"github.com/mudler/LocalAI/core/config"
"github.com/mudler/LocalAI/core/schema"
"github.com/mudler/LocalAI/core/trace"
"github.com/mudler/LocalAI/pkg/grpc"
pb "github.com/mudler/LocalAI/pkg/grpc/proto"
"github.com/mudler/LocalAI/pkg/model"
)

func ModelDetokenize(tokens []int32, loader *model.ModelLoader, modelConfig config.ModelConfig, appConfig *config.ApplicationConfig) (schema.DetokenizeResponse, error) {

var inferenceModel grpc.Backend
var err error

opts := ModelOptions(modelConfig, appConfig)
inferenceModel, err = loader.Load(opts...)
if err != nil {
recordModelLoadFailure(appConfig, modelConfig.Name, modelConfig.Backend, err, nil)
return schema.DetokenizeResponse{}, err
}

var startTime time.Time
if appConfig.EnableTracing {
trace.InitBackendTracingIfEnabled(appConfig.TracingMaxItems)
startTime = time.Now()
}

resp, err := inferenceModel.Detokenize(appConfig.Context, &pb.DetokenizeRequest{Tokens: tokens})

if appConfig.EnableTracing {
errStr := ""
if err != nil {
errStr = err.Error()
}

content := ""
if resp != nil {
content = resp.Content
}

trace.RecordBackendTrace(trace.BackendTrace{
Timestamp: startTime,
Duration: time.Since(startTime),
Type: trace.BackendTraceTokenize,
ModelName: modelConfig.Name,
Backend: modelConfig.Backend,
Summary: trace.TruncateString(content, 200),
Error: errStr,
Data: map[string]any{
"token_count": len(tokens),
"output_text": trace.TruncateString(content, 1000),
},
})
}

if err != nil {
return schema.DetokenizeResponse{}, err
}

return schema.DetokenizeResponse{
Content: resp.Content,
}, nil
}
1 change: 1 addition & 0 deletions core/http/auth/features.go
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,7 @@ var RouteFeatureRegistry = []RouteFeature{

// Tokenize
{"POST", "/v1/tokenize", FeatureTokenize},
{"POST", "/v1/detokenize", FeatureTokenize},

// Rerank
{"POST", "/v1/rerank", FeatureRerank},
Expand Down
36 changes: 36 additions & 0 deletions core/http/endpoints/localai/detokenize.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
package localai

import (
"github.com/labstack/echo/v4"
"github.com/mudler/LocalAI/core/backend"
"github.com/mudler/LocalAI/core/config"
"github.com/mudler/LocalAI/core/http/middleware"
"github.com/mudler/LocalAI/core/schema"
"github.com/mudler/LocalAI/pkg/model"
)

// DetokenizeEndpoint exposes a REST API to convert token IDs back to text.
// @Summary Detokenize the input.
// @Tags tokenize
// @Param request body schema.DetokenizeRequest true "Request"
// @Success 200 {object} schema.DetokenizeResponse "Response"
// @Router /v1/detokenize [post]
func DetokenizeEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) echo.HandlerFunc {
return func(c echo.Context) error {
input, ok := c.Get(middleware.CONTEXT_LOCALS_KEY_LOCALAI_REQUEST).(*schema.DetokenizeRequest)
if !ok || input.Model == "" {
return echo.ErrBadRequest
}

cfg, ok := c.Get(middleware.CONTEXT_LOCALS_KEY_MODEL_CONFIG).(*config.ModelConfig)
if !ok || cfg == nil {
return echo.ErrBadRequest
}

resp, err := backend.ModelDetokenize(input.Tokens, ml, *cfg, appConfig)
if err != nil {
return err
}
return c.JSON(200, resp)
}
}
17 changes: 12 additions & 5 deletions core/http/routes/localai.go
Original file line number Diff line number Diff line change
Expand Up @@ -291,11 +291,12 @@ func RegisterLocalAIRoutes(router *echo.Echo,
"reload": "/models/reload",
},
"ai_functions": map[string]string{
"tts": "/tts",
"vad": "/vad",
"video": "/video",
"detection": "/v1/detection",
"tokenize": "/v1/tokenize",
"tts": "/tts",
"vad": "/vad",
"video": "/video",
"detection": "/v1/detection",
"tokenize": "/v1/tokenize",
"detokenize": "/v1/detokenize",
},
"monitoring": monitoringRoutes,
"mcp": map[string]string{
Expand Down Expand Up @@ -364,6 +365,12 @@ func RegisterLocalAIRoutes(router *echo.Echo,
requestExtractor.BuildFilteredFirstAvailableDefaultModel(config.BuildUsecaseFilterFn(config.FLAG_TOKENIZE)),
requestExtractor.SetModelAndConfig(func() schema.LocalAIRequest { return new(schema.TokenizeRequest) }))

detokenizeHandler := localai.DetokenizeEndpoint(cl, ml, appConfig)
router.POST("/v1/detokenize",
detokenizeHandler,
requestExtractor.BuildFilteredFirstAvailableDefaultModel(config.BuildUsecaseFilterFn(config.FLAG_TOKENIZE)),
requestExtractor.SetModelAndConfig(func() schema.LocalAIRequest { return new(schema.DetokenizeRequest) }))

// MCP endpoint - supports both streaming and non-streaming modes
// Note: streaming mode is NOT compatible with the OpenAI apis. We have a set which streams more states.
if evaluator != nil && !appConfig.DisableMCP {
Expand Down
9 changes: 9 additions & 0 deletions core/schema/tokenize.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,12 @@ type TokenizeRequest struct {
type TokenizeResponse struct {
Tokens []int32 `json:"tokens"` // token IDs
}

type DetokenizeRequest struct {
BasicModelRequest
Tokens []int32 `json:"tokens"` // token IDs to convert back to text
}

type DetokenizeResponse struct {
Content string `json:"content"` // detokenized text
}
3 changes: 3 additions & 0 deletions core/services/nodes/health_mock_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -193,6 +193,9 @@ func (c *fakeBackendClient) AudioTranscriptionStream(_ context.Context, _ *pb.Tr
func (c *fakeBackendClient) TokenizeString(_ context.Context, _ *pb.PredictOptions, _ ...ggrpc.CallOption) (*pb.TokenizationResponse, error) {
return nil, nil
}
func (c *fakeBackendClient) Detokenize(_ context.Context, _ *pb.DetokenizeRequest, _ ...ggrpc.CallOption) (*pb.DetokenizeResponse, error) {
return nil, nil
}
func (c *fakeBackendClient) Status(_ context.Context) (*pb.StatusResponse, error) {
return nil, nil
}
Expand Down
4 changes: 4 additions & 0 deletions core/services/nodes/inflight_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,10 @@ func (f *fakeGRPCBackend) TokenizeString(_ context.Context, _ *pb.PredictOptions
return &pb.TokenizationResponse{}, nil
}

func (f *fakeGRPCBackend) Detokenize(_ context.Context, _ *pb.DetokenizeRequest, _ ...ggrpc.CallOption) (*pb.DetokenizeResponse, error) {
return &pb.DetokenizeResponse{}, nil
}

func (f *fakeGRPCBackend) Status(_ context.Context) (*pb.StatusResponse, error) {
return &pb.StatusResponse{}, nil
}
Expand Down
2 changes: 1 addition & 1 deletion docs/content/features/authentication.md
Original file line number Diff line number Diff line change
Expand Up @@ -176,7 +176,7 @@ When authentication is enabled, the following endpoints require admin role:
**User-Accessible Endpoints (all authenticated users):**
- `POST /v1/chat/completions`, `POST /v1/embeddings`, `POST /v1/completions`
- `POST /v1/images/generations`, `POST /v1/audio/*`, `POST /tts`, `POST /vad`, `POST /video`
- `GET /v1/models`, `POST /v1/tokenize`, `POST /v1/detection`
- `GET /v1/models`, `POST /v1/tokenize`, `POST /v1/detokenize`, `POST /v1/detection`
- `POST /v1/mcp/chat/completions`, `POST /v1/messages`, `POST /v1/responses`
- `POST /stores/*`, `GET /api/cors-proxy`
- `GET /version`, `GET /api/features`, `GET /swagger/*`, `GET /metrics`
Expand Down
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ require (
github.com/mholt/archiver/v3 v3.5.1
github.com/microcosm-cc/bluemonday v1.0.27
github.com/modelcontextprotocol/go-sdk v1.5.0
github.com/mudler/cogito v0.9.5-0.20260315222927-63abdec7189b
github.com/mudler/cogito v0.9.5-0.20260531081147-2c13b6ac29cf
github.com/mudler/edgevpn v0.32.2
github.com/mudler/go-processmanager v0.1.1
github.com/mudler/memory v0.0.0-20260406210934-424c1ecf2cf8
Expand Down
2 changes: 2 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -972,6 +972,8 @@ github.com/mudler/LocalAGI v0.0.0-20260508125235-37810d918a87 h1:az+2umaD/sT1rRv
github.com/mudler/LocalAGI v0.0.0-20260508125235-37810d918a87/go.mod h1:x77p9W1zKZr+W+UcEwg8/qdp00p4XXOI69wE7WlXZc0=
github.com/mudler/cogito v0.9.5-0.20260315222927-63abdec7189b h1:A74T2Lauvg61KodYqsjTYDY05kPLcW+efVZjd23dghU=
github.com/mudler/cogito v0.9.5-0.20260315222927-63abdec7189b/go.mod h1:6sfja3lcu2nWRzEc0wwqGNu/eCG3EWgij+8s7xyUeQ4=
github.com/mudler/cogito v0.9.5-0.20260531081147-2c13b6ac29cf h1:njoYjFON9qXKcErNvPl1WoxmSzl4zE9Wexyk2YN82FM=
github.com/mudler/cogito v0.9.5-0.20260531081147-2c13b6ac29cf/go.mod h1:6sfja3lcu2nWRzEc0wwqGNu/eCG3EWgij+8s7xyUeQ4=
github.com/mudler/edgevpn v0.32.2 h1:umTPyyZgkom/A81Bk4HbP0p1ZSEU5EFPW3Bg+YPxI8A=
github.com/mudler/edgevpn v0.32.2/go.mod h1:UaMc8MORbcRsAjuO5gVJj9Bn3Nq2AP5U9NTb6epVyv8=
github.com/mudler/go-piper v0.0.0-20241023091659-2494246fd9fc h1:RxwneJl1VgvikiX28EkpdAyL4yQVnJMrbquKospjHyA=
Expand Down
1 change: 1 addition & 0 deletions pkg/grpc/backend.go
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ type Backend interface {
AudioTranscription(ctx context.Context, in *pb.TranscriptRequest, opts ...grpc.CallOption) (*pb.TranscriptResult, error)
AudioTranscriptionStream(ctx context.Context, in *pb.TranscriptRequest, f func(chunk *pb.TranscriptStreamResponse), opts ...grpc.CallOption) error
TokenizeString(ctx context.Context, in *pb.PredictOptions, opts ...grpc.CallOption) (*pb.TokenizationResponse, error)
Detokenize(ctx context.Context, in *pb.DetokenizeRequest, opts ...grpc.CallOption) (*pb.DetokenizeResponse, error)
Status(ctx context.Context) (*pb.StatusResponse, error)

StoresSet(ctx context.Context, in *pb.StoresSetOptions, opts ...grpc.CallOption) (*pb.Result, error)
Expand Down
4 changes: 4 additions & 0 deletions pkg/grpc/base/base.go
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,10 @@ func (llm *Base) TokenizeString(opts *pb.PredictOptions) (pb.TokenizationRespons
return pb.TokenizationResponse{}, fmt.Errorf("unimplemented")
}

func (llm *Base) Detokenize(req *pb.DetokenizeRequest) (pb.DetokenizeResponse, error) {
return pb.DetokenizeResponse{}, fmt.Errorf("unimplemented")
}

func (llm *Base) ModelMetadata(opts *pb.ModelOptions) (*pb.ModelMetadataResponse, error) {
return nil, fmt.Errorf("unimplemented")
}
Expand Down
23 changes: 23 additions & 0 deletions pkg/grpc/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -420,6 +420,29 @@ func (c *Client) TokenizeString(ctx context.Context, in *pb.PredictOptions, opts
return res, nil
}

func (c *Client) Detokenize(ctx context.Context, in *pb.DetokenizeRequest, opts ...grpc.CallOption) (*pb.DetokenizeResponse, error) {
if !c.parallel {
c.opMutex.Lock()
defer c.opMutex.Unlock()
}
c.setBusy(true)
defer c.setBusy(false)
c.wdMark()
defer c.wdUnMark()
conn, err := c.dial()
if err != nil {
return nil, err
}
defer conn.Close()
client := pb.NewBackendClient(conn)

res, err := client.Detokenize(ctx, in, opts...)
if err != nil {
return nil, err
}
return res, nil
}

func (c *Client) Status(ctx context.Context) (*pb.StatusResponse, error) {
if !c.parallel {
c.opMutex.Lock()
Expand Down
4 changes: 4 additions & 0 deletions pkg/grpc/embed.go
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,10 @@ func (e *embedBackend) TokenizeString(ctx context.Context, in *pb.PredictOptions
return e.s.TokenizeString(ctx, in)
}

func (e *embedBackend) Detokenize(ctx context.Context, in *pb.DetokenizeRequest, opts ...grpc.CallOption) (*pb.DetokenizeResponse, error) {
return e.s.Detokenize(ctx, in)
}

func (e *embedBackend) Status(ctx context.Context) (*pb.StatusResponse, error) {
return e.s.Status(ctx, &pb.HealthMessage{})
}
Expand Down
1 change: 1 addition & 0 deletions pkg/grpc/interface.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ type AIModel interface {
TTSStream(*pb.TTSRequest, chan []byte) error
SoundGeneration(*pb.SoundGenerationRequest) error
TokenizeString(*pb.PredictOptions) (pb.TokenizationResponse, error)
Detokenize(*pb.DetokenizeRequest) (pb.DetokenizeResponse, error)
Status() (pb.StatusResponse, error)

StoresSet(*pb.StoresSetOptions) error
Expand Down
12 changes: 12 additions & 0 deletions pkg/grpc/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -308,6 +308,18 @@ func (s *server) TokenizeString(ctx context.Context, in *pb.PredictOptions) (*pb
}, err
}

func (s *server) Detokenize(ctx context.Context, in *pb.DetokenizeRequest) (*pb.DetokenizeResponse, error) {
if s.llm.Locking() {
s.llm.Lock()
defer s.llm.Unlock()
}
res, err := s.llm.Detokenize(in)
if err != nil {
return nil, err
}
return &res, nil
}

func (s *server) Status(ctx context.Context, in *pb.HealthMessage) (*pb.StatusResponse, error) {
res, err := s.llm.Status()
if err != nil {
Expand Down
Loading