Skip to content
This repository was archived by the owner on Jul 4, 2025. It is now read-only.

Commit 3064523

Browse files
Redirect log (#72)
* Redirect log * new logger option --------- Co-authored-by: nguyenhoangthuan99 <=>
1 parent c8b3ae2 commit 3064523

File tree

2 files changed

+136
-5
lines changed

2 files changed

+136
-5
lines changed

cpp/tensorrt_llm/cortex.tensorrt-llm/src/tensorrt-llm_engine.cc

Lines changed: 60 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ constexpr const int k200OK = 200;
2727
constexpr const int k400BadRequest = 400;
2828
constexpr const int k409Conflict = 409;
2929
constexpr const int k500InternalServerError = 500;
30+
constexpr const int kFileLoggerOption = 0;
3031

3132
// '<', '|', 'im', '_', 'end', '|', '>', '</s>', '<|im_end|>'
3233
const std::list<std::vector<int32_t>> kOpenhermesStopWords = {
@@ -111,11 +112,28 @@ void RemoveSpecialTokens(std::vector<int32_t>& v, ModelType model_type) {
111112
}
112113
}
113114
} // namespace
115+
TensorrtllmEngine::TensorrtllmEngine(int log_option) {
116+
trantor::Logger::setLogLevel(trantor::Logger::kError);
117+
if (log_option == kFileLoggerOption) {
118+
std::filesystem::create_directories(log_folder);
119+
asynce_file_logger_ = std::make_unique<trantor::AsyncFileLogger>();
120+
asynce_file_logger_->setFileName(log_base_name);
121+
asynce_file_logger_->startLogging();
122+
trantor::Logger::setOutputFunction(
123+
[&](const char* msg, const uint64_t len) {
124+
asynce_file_logger_->output(msg, len);
125+
},
126+
[&]() { asynce_file_logger_->flush(); });
127+
asynce_file_logger_->setFileSizeLimit(max_log_file_size);
128+
}
129+
}
130+
114131
TensorrtllmEngine::~TensorrtllmEngine() {
115132
model_loaded_ = false;
116133
if (res_thread_ && res_thread_->joinable()) {
117134
res_thread_->join();
118135
}
136+
asynce_file_logger_.reset();
119137
}
120138

121139
void RemoveId(std::vector<int>& vec, int id) {
@@ -364,9 +382,51 @@ void TensorrtllmEngine::HandleChatCompletion(
364382
return;
365383
};
366384

385+
void TensorrtllmEngine::SetLoggerOption(const Json::Value& json_body) {
386+
if (!json_body["log_option"].isNull()) {
387+
int log_option = json_body["log_option"].asInt();
388+
if (log_option != kFileLoggerOption) {
389+
// Revert to default trantor logger output function
390+
trantor::Logger::setOutputFunction(
391+
[](const char* msg, const uint64_t len) {
392+
fwrite(msg, 1, static_cast<size_t>(len), stdout);
393+
},
394+
[]() { fflush(stdout); });
395+
}
396+
}
397+
logger_ = std::make_shared<TllmFileLogger>();
398+
if (!json_body["log_level"].isNull()) {
399+
std::string log_level = json_body["log_level"].asString();
400+
if (log_level == "trace")
401+
{
402+
logger_->setLevel(nvinfer1::ILogger::Severity::kINFO);
403+
trantor::Logger::setLogLevel(trantor::Logger::kTrace);
404+
} else if (log_level == "debug") {
405+
trantor::Logger::setLogLevel(trantor::Logger::kDebug);
406+
logger_->setLevel(nvinfer1::ILogger::Severity::kINFO);
407+
} else if (log_level == "info") {
408+
trantor::Logger::setLogLevel(trantor::Logger::kInfo);
409+
logger_->setLevel(nvinfer1::ILogger::Severity::kINFO);
410+
} else if (log_level == "warn") {
411+
trantor::Logger::setLogLevel(trantor::Logger::kWarn);
412+
logger_->setLevel(nvinfer1::ILogger::Severity::kWARNING);
413+
} else if (log_level == "fatal") {
414+
trantor::Logger::setLogLevel(trantor::Logger::kFatal);
415+
logger_->setLevel(nvinfer1::ILogger::Severity::kWARNING);
416+
} else {
417+
trantor::Logger::setLogLevel(trantor::Logger::kError);
418+
logger_->setLevel(nvinfer1::ILogger::Severity::kERROR);
419+
}
420+
}
421+
else{
422+
logger_->setLevel(nvinfer1::ILogger::Severity::kWARNING);
423+
}
424+
}
425+
367426
void TensorrtllmEngine::LoadModel(
368427
std::shared_ptr<Json::Value> json_body,
369428
std::function<void(Json::Value&&, Json::Value&&)>&& callback) {
429+
SetLoggerOption(*json_body);
370430
model::LoadModelRequest request = model::fromJson(json_body);
371431
if (model_loaded_ && model_type_ == GetModelType(request.model_path)) {
372432
LOG_INFO << "Model already loaded";
@@ -398,8 +458,6 @@ void TensorrtllmEngine::LoadModel(
398458
}
399459
model_id_ = GetModelId(*json_body);
400460

401-
logger_ = std::make_shared<TllmLogger>();
402-
logger_->setLevel(nvinfer1::ILogger::Severity::kINFO);
403461
initTrtLlmPlugins(logger_.get());
404462

405463
std::filesystem::path tokenizer_model_name = model_dir / "tokenizer.model";

cpp/tensorrt_llm/cortex.tensorrt-llm/src/tensorrt-llm_engine.h

Lines changed: 76 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
#include "models/chat_completion_request.h"
1717
#include "models/load_model_request.h"
1818
#include "sentencepiece_processor.h"
19+
#include "tensorrt_llm/common/logger.h"
1920
#include "tensorrt_llm/executor/executor.h"
2021
#include "tensorrt_llm/plugins/api/tllmPlugin.h"
2122
#include "tensorrt_llm/runtime/generationInput.h"
@@ -27,13 +28,83 @@
2728
#include "tensorrt_llm/runtime/tllmLogger.h"
2829
#include "trantor/utils/ConcurrentTaskQueue.h"
2930
#include "trantor/utils/Logger.h"
31+
#include <trantor/utils/AsyncFileLogger.h>
3032

3133
using namespace tensorrt_llm::runtime;
3234

3335
namespace tle = tensorrt_llm::executor;
3436

3537
namespace fs = std::filesystem;
3638

39+
namespace tc = tensorrt_llm::common;
40+
41+
constexpr char log_base_name[] = "logs/cortex";
42+
constexpr char log_folder[] = "logs";
43+
constexpr size_t max_log_file_size = 20000000; // ~20mb
44+
45+
// This class is inspired by https://github.com/NVIDIA/TensorRT-LLM/blob/main/cpp/tensorrt_llm/runtime/tllmLogger.cpp
46+
class TllmFileLogger : public nvinfer1::ILogger {
47+
public:
48+
void log(Severity severity,
49+
nvinfer1::AsciiChar const* msg) noexcept override {
50+
switch (severity) {
51+
case nvinfer1::ILogger::Severity::kINTERNAL_ERROR:
52+
LOG_ERROR << "[TensorRT-LLM][ERROR] " << msg;
53+
break;
54+
case nvinfer1::ILogger::Severity::kERROR:
55+
LOG_ERROR << "[TensorRT-LLM][ERROR] " << msg;
56+
break;
57+
case nvinfer1::ILogger::Severity::kWARNING:
58+
LOG_WARN << "[TensorRT-LLM][WARN] " << msg;
59+
break;
60+
case nvinfer1::ILogger::Severity::kINFO:
61+
LOG_INFO << "[TensorRT-LLM][INFO] " << msg;
62+
break;
63+
case nvinfer1::ILogger::Severity::kVERBOSE:
64+
LOG_DEBUG << "[TensorRT-LLM][DEBUG] " << msg;
65+
break;
66+
default:
67+
LOG_TRACE << "[TensorRT-LLM][TRACE] " << msg;
68+
break;
69+
}
70+
}
71+
Severity getLevel() {
72+
auto* const logger = tc::Logger::getLogger();
73+
switch (logger->getLevel())
74+
{
75+
case tc::Logger::Level::ERROR: return nvinfer1::ILogger::Severity::kERROR;
76+
case tc::Logger::Level::WARNING: return nvinfer1::ILogger::Severity::kWARNING;
77+
case tc::Logger::Level::INFO: return nvinfer1::ILogger::Severity::kINFO;
78+
case tc::Logger::Level::DEBUG:
79+
case tc::Logger::Level::TRACE: return nvinfer1::ILogger::Severity::kVERBOSE;
80+
default: return nvinfer1::ILogger::Severity::kINTERNAL_ERROR;
81+
}
82+
};
83+
84+
void setLevel(Severity level) {
85+
auto* const logger = tc::Logger::getLogger();
86+
switch (level) {
87+
case nvinfer1::ILogger::Severity::kINTERNAL_ERROR:
88+
logger->setLevel(tc::Logger::Level::ERROR);
89+
break;
90+
case nvinfer1::ILogger::Severity::kERROR:
91+
logger->setLevel(tc::Logger::Level::ERROR);
92+
break;
93+
case nvinfer1::ILogger::Severity::kWARNING:
94+
logger->setLevel(tc::Logger::Level::WARNING);
95+
break;
96+
case nvinfer1::ILogger::Severity::kINFO:
97+
logger->setLevel(tc::Logger::Level::INFO);
98+
break;
99+
case nvinfer1::ILogger::Severity::kVERBOSE:
100+
logger->setLevel(tc::Logger::Level::TRACE);
101+
break;
102+
default:
103+
TLLM_THROW("Unsupported severity");
104+
}
105+
};
106+
};
107+
37108
struct RuntimeOptions {
38109
std::string trtEnginePath;
39110

@@ -187,7 +258,7 @@ struct InferenceState {
187258

188259
std::string WaitAndPop() {
189260
std::unique_lock<std::mutex> l(m);
190-
cv.wait(l, [this](){return !texts_to_stream.empty();});
261+
cv.wait(l, [this]() { return !texts_to_stream.empty(); });
191262
auto s = texts_to_stream.front();
192263
texts_to_stream.pop();
193264
return s;
@@ -228,6 +299,7 @@ namespace tensorrtllm {
228299

229300
class TensorrtllmEngine : public EngineI {
230301
public:
302+
TensorrtllmEngine(int log_option = 0);
231303
~TensorrtllmEngine() final;
232304
// ### Interface ###
233305
void HandleChatCompletion(
@@ -252,7 +324,7 @@ class TensorrtllmEngine : public EngineI {
252324
void GetModels(
253325
std::shared_ptr<Json::Value> json_body,
254326
std::function<void(Json::Value&&, Json::Value&&)>&& callback) final;
255-
327+
void SetLoggerOption(const Json::Value& json_body);
256328
private:
257329
bool CheckModelLoaded(
258330
std::function<void(Json::Value&&, Json::Value&&)>& callback);
@@ -288,7 +360,7 @@ class TensorrtllmEngine : public EngineI {
288360
std::unique_ptr<Tokenizer> cortex_tokenizer_;
289361
RuntimeOptions runtime_opts_;
290362
std::unique_ptr<tle::Executor> executor_;
291-
std::shared_ptr<TllmLogger> logger_;
363+
std::shared_ptr<TllmFileLogger> logger_;
292364
std::string user_prompt_;
293365
std::string ai_prompt_;
294366
std::string system_prompt_;
@@ -300,6 +372,7 @@ class TensorrtllmEngine : public EngineI {
300372
std::unique_ptr<trantor::ConcurrentTaskQueue> q_;
301373
ModelType model_type_ = ModelType::kOpenHermes;
302374
int n_parallel_ = 1;
375+
std::unique_ptr<trantor::AsyncFileLogger> asynce_file_logger_;
303376
};
304377

305378
} // namespace tensorrtllm

0 commit comments

Comments
 (0)