1616#include " models/chat_completion_request.h"
1717#include " models/load_model_request.h"
1818#include " sentencepiece_processor.h"
19+ #include " tensorrt_llm/common/logger.h"
1920#include " tensorrt_llm/executor/executor.h"
2021#include " tensorrt_llm/plugins/api/tllmPlugin.h"
2122#include " tensorrt_llm/runtime/generationInput.h"
2728#include " tensorrt_llm/runtime/tllmLogger.h"
2829#include " trantor/utils/ConcurrentTaskQueue.h"
2930#include " trantor/utils/Logger.h"
31+ #include < trantor/utils/AsyncFileLogger.h>
3032
3133using namespace tensorrt_llm ::runtime;
3234
3335namespace tle = tensorrt_llm::executor;
3436
3537namespace fs = std::filesystem;
3638
39+ namespace tc = tensorrt_llm::common;
40+
41+ constexpr char log_base_name[] = " logs/cortex" ;
42+ constexpr char log_folder[] = " logs" ;
43+ constexpr size_t max_log_file_size = 20000000 ; // ~20mb
44+
45+ // This class is inspired by https://github.com/NVIDIA/TensorRT-LLM/blob/main/cpp/tensorrt_llm/runtime/tllmLogger.cpp
46+ class TllmFileLogger : public nvinfer1 ::ILogger {
47+ public:
48+ void log (Severity severity,
49+ nvinfer1::AsciiChar const * msg) noexcept override {
50+ switch (severity) {
51+ case nvinfer1::ILogger::Severity::kINTERNAL_ERROR :
52+ LOG_ERROR << " [TensorRT-LLM][ERROR] " << msg;
53+ break ;
54+ case nvinfer1::ILogger::Severity::kERROR :
55+ LOG_ERROR << " [TensorRT-LLM][ERROR] " << msg;
56+ break ;
57+ case nvinfer1::ILogger::Severity::kWARNING :
58+ LOG_WARN << " [TensorRT-LLM][WARN] " << msg;
59+ break ;
60+ case nvinfer1::ILogger::Severity::kINFO :
61+ LOG_INFO << " [TensorRT-LLM][INFO] " << msg;
62+ break ;
63+ case nvinfer1::ILogger::Severity::kVERBOSE :
64+ LOG_DEBUG << " [TensorRT-LLM][DEBUG] " << msg;
65+ break ;
66+ default :
67+ LOG_TRACE << " [TensorRT-LLM][TRACE] " << msg;
68+ break ;
69+ }
70+ }
71+ Severity getLevel () {
72+ auto * const logger = tc::Logger::getLogger ();
73+ switch (logger->getLevel ())
74+ {
75+ case tc::Logger::Level::ERROR: return nvinfer1::ILogger::Severity::kERROR ;
76+ case tc::Logger::Level::WARNING: return nvinfer1::ILogger::Severity::kWARNING ;
77+ case tc::Logger::Level::INFO: return nvinfer1::ILogger::Severity::kINFO ;
78+ case tc::Logger::Level::DEBUG:
79+ case tc::Logger::Level::TRACE: return nvinfer1::ILogger::Severity::kVERBOSE ;
80+ default : return nvinfer1::ILogger::Severity::kINTERNAL_ERROR ;
81+ }
82+ };
83+
84+ void setLevel (Severity level) {
85+ auto * const logger = tc::Logger::getLogger ();
86+ switch (level) {
87+ case nvinfer1::ILogger::Severity::kINTERNAL_ERROR :
88+ logger->setLevel (tc::Logger::Level::ERROR);
89+ break ;
90+ case nvinfer1::ILogger::Severity::kERROR :
91+ logger->setLevel (tc::Logger::Level::ERROR);
92+ break ;
93+ case nvinfer1::ILogger::Severity::kWARNING :
94+ logger->setLevel (tc::Logger::Level::WARNING);
95+ break ;
96+ case nvinfer1::ILogger::Severity::kINFO :
97+ logger->setLevel (tc::Logger::Level::INFO);
98+ break ;
99+ case nvinfer1::ILogger::Severity::kVERBOSE :
100+ logger->setLevel (tc::Logger::Level::TRACE);
101+ break ;
102+ default :
103+ TLLM_THROW (" Unsupported severity" );
104+ }
105+ };
106+ };
107+
37108struct RuntimeOptions {
38109 std::string trtEnginePath;
39110
@@ -187,7 +258,7 @@ struct InferenceState {
187258
188259 std::string WaitAndPop () {
189260 std::unique_lock<std::mutex> l (m);
190- cv.wait (l, [this ](){ return !texts_to_stream.empty ();});
261+ cv.wait (l, [this ]() { return !texts_to_stream.empty (); });
191262 auto s = texts_to_stream.front ();
192263 texts_to_stream.pop ();
193264 return s;
@@ -228,6 +299,7 @@ namespace tensorrtllm {
228299
229300class TensorrtllmEngine : public EngineI {
230301 public:
302+ TensorrtllmEngine (int log_option = 0 );
231303 ~TensorrtllmEngine () final ;
232304 // ### Interface ###
233305 void HandleChatCompletion (
@@ -252,7 +324,7 @@ class TensorrtllmEngine : public EngineI {
252324 void GetModels (
253325 std::shared_ptr<Json::Value> json_body,
254326 std::function<void (Json::Value&&, Json::Value&&)>&& callback) final ;
255-
327+ void SetLoggerOption ( const Json::Value& json_body);
256328 private:
257329 bool CheckModelLoaded (
258330 std::function<void (Json::Value&&, Json::Value&&)>& callback);
@@ -288,7 +360,7 @@ class TensorrtllmEngine : public EngineI {
288360 std::unique_ptr<Tokenizer> cortex_tokenizer_;
289361 RuntimeOptions runtime_opts_;
290362 std::unique_ptr<tle::Executor> executor_;
291- std::shared_ptr<TllmLogger > logger_;
363+ std::shared_ptr<TllmFileLogger > logger_;
292364 std::string user_prompt_;
293365 std::string ai_prompt_;
294366 std::string system_prompt_;
@@ -300,6 +372,7 @@ class TensorrtllmEngine : public EngineI {
300372 std::unique_ptr<trantor::ConcurrentTaskQueue> q_;
301373 ModelType model_type_ = ModelType::kOpenHermes ;
302374 int n_parallel_ = 1 ;
375+ std::unique_ptr<trantor::AsyncFileLogger> asynce_file_logger_;
303376};
304377
305378} // namespace tensorrtllm
0 commit comments