From 78dc0dc7c96973ae3ed1630edb70011755373d81 Mon Sep 17 00:00:00 2001 From: jingwu2121 <98714649+jingwu2121@users.noreply.github.com> Date: Sat, 13 Jul 2024 07:57:51 +0100 Subject: [PATCH 01/23] Update README.md --- README.md | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/README.md b/README.md index ac8c55a..bb84da5 100644 --- a/README.md +++ b/README.md @@ -29,7 +29,7 @@ ![teaser](./assets/teaser.png) -## Installation +## ⚙️ Installation - Tested on CUDA11.8 + Ubuntu22.04 + NeRFStudio1.0.0 (NVIDIA RTX A5000 24G) @@ -71,7 +71,7 @@ pip install -e . ns-train -h ``` -## Data +## 🗄️ Data ### Use Our Preprocessed Data @@ -85,13 +85,13 @@ We thank these authors for their great work! ### Customize Your Data -We recommend to pre-process your data to 512x512, and follow [this page](https://docs.nerf.studio/quickstart/custom_dataset.html) to process your data. +We recommend to pre-process your data to 512x512, and following [this page](https://docs.nerf.studio/quickstart/custom_dataset.html) to process your data. -## Get Started +## :arrow_forward: Get Started ![Method](./assets/method.png) ### 1. Train a 3DGS -To get started, you firstly need to train your 3DGS model. We use `splatfacto` from NeRFStudio. +To get started, you first need to train your 3DGS model. We use `splatfacto` from NeRFStudio. ```bash ns-train splatfacto --output-dir {output/folder} --experiment-name EXPEIMENT_NAME nerfstudio-data --data {path/to/your/data} @@ -112,15 +112,15 @@ Please note that the Lang-SAM is optional here. If you are editing the environme ns-train gaussctrl --load-checkpoint {output/folder/.../nerfstudio_models/step-000029999.ckpt} --experiment-name EXPEIMENT_NAME --output-dir {output/folder} --pipeline.datamanager.data {path/to/your/data} --pipeline.prompt "YOUR PROMPT" --pipeline.guidance_scale 5 --pipeline.chunk_size {batch size of images during editing} ``` -Here, `--pipeline.guidance_scale` denotes the classifier free guidance used when editing the images. `--pipeline.chunk_size` denotes the number of images edited together during 1 batch. We are using **NVIDIA RTX A5000** GPU (24G), and the maximum chunk size is 3. (~22G) +Here, `--pipeline.guidance_scale` denotes the classifier-free guidance used when editing the images. `--pipeline.chunk_size` denotes the number of images edited together during 1 batch. We are using **NVIDIA RTX A5000** GPU (24G), and the maximum chunk size is 3. (~22G) ### Small Tips -- If your find your editings are not as expected, please check the images edited by ControlNet. -- Normally, conditioning your editing on the good ControlNet editing views is very helpful, which means it is better to choose those good ControlNet editing views as reference views. +- If your editings are not as expected, please check the images edited by ControlNet. +- Normally, conditioning your editing on the good ControlNet editing views is very helpful, which means choosing those good ControlNet editing views as reference views is better. -## Reproduce Our Results +## :wrench: Reproduce Our Results -Experiments in the main paper are inclued in `scripts` folder. To reproduce the results, first train the `splatfacto` model. We take the `bear` case as an example here. +Experiments in the main paper are included in the `scripts` folder. To reproduce the results, first train the `splatfacto` model. We take the `bear` case as an example here. ```bash ns-train splatfacto --output-dir unedited_models --experiment-name bear nerfstudio-data --data data/bear ``` @@ -132,13 +132,13 @@ ns-train gaussctrl --load-checkpoint {unedited_models/bear/splatfacto/.../nerfst In our experiments, We sampled 40 views randomly from the entire dataset to accelerate the method, which is set in `gc_datamanager.py` by default. We split the entire set into 4 subsets, and randomly sampled 10 images in each subset split. Feel free to decrease/increase the number to see the difference by modifying `--pipeline.datamanager.subset-num` and `--pipeline.datamanager.sampled-views-every-subset`. Set `--pipeline.datamanager.load-all` to `True`, if you want to edit all the images in the dataset. -## View Results Using NeRFStudio Viewer +## :camera: View Results Using NeRFStudio Viewer ```bash ns-viewer --load-config {outputs/.../config.yml} ``` -## Render Your Results -- Render the all the dataset views. +## :movie_camera: Render Your Results +- Render all the dataset views. ```bash ns-gaussctrl-render dataset --load-config {outputs/.../config.yml} --output_path {render/EXPEIMENT_NAME} ``` @@ -160,4 +160,4 @@ title = {{GaussCtrl: Multi-View Consistent Text-Driven 3D Gaussian Splatting Edi booktitle = {ECCV}, year = {2024}, } -``` \ No newline at end of file +``` From 984cf62d18eb669ed9eb34c21e8f63a2499c8732 Mon Sep 17 00:00:00 2001 From: jingwu2121 <98714649+jingwu2121@users.noreply.github.com> Date: Sat, 13 Jul 2024 08:35:15 +0100 Subject: [PATCH 02/23] Update requirements.txt --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 07df8d8..e847e61 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,3 @@ diffusers==0.26.0 transformers==4.34.1 -pip install huggingface-hub==0.20.3 \ No newline at end of file +huggingface-hub==0.20.3 From 1e8b4ed67998bab47195e238f1110bf21a5bc3e6 Mon Sep 17 00:00:00 2001 From: jingwu2121 Date: Sat, 13 Jul 2024 07:42:08 +0000 Subject: [PATCH 03/23] rm breakpoint --- gaussctrl/gc_datamanager.py | 1 - 1 file changed, 1 deletion(-) diff --git a/gaussctrl/gc_datamanager.py b/gaussctrl/gc_datamanager.py index 7423d45..fcc7804 100755 --- a/gaussctrl/gc_datamanager.py +++ b/gaussctrl/gc_datamanager.py @@ -86,7 +86,6 @@ def __init__(self, self.step_every = 1 self.edited_image_dict = {} - breakpoint() # Sample data if len(self.train_dataset._dataparser_outputs.image_filenames) <= self.config.subset_num * self.config.sampled_views_every_subset or self.config.load_all: self.cameras = self.train_dataset.cameras From ab1de29829eb1354a249997f1d7f9aec9b677565 Mon Sep 17 00:00:00 2001 From: jingwu2121 Date: Sun, 14 Jul 2024 11:11:23 +0000 Subject: [PATCH 04/23] add control to reference view number --- README.md | 2 +- gaussctrl/gc_pipeline.py | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index bb84da5..7ea6fe9 100644 --- a/README.md +++ b/README.md @@ -112,7 +112,7 @@ Please note that the Lang-SAM is optional here. If you are editing the environme ns-train gaussctrl --load-checkpoint {output/folder/.../nerfstudio_models/step-000029999.ckpt} --experiment-name EXPEIMENT_NAME --output-dir {output/folder} --pipeline.datamanager.data {path/to/your/data} --pipeline.prompt "YOUR PROMPT" --pipeline.guidance_scale 5 --pipeline.chunk_size {batch size of images during editing} ``` -Here, `--pipeline.guidance_scale` denotes the classifier-free guidance used when editing the images. `--pipeline.chunk_size` denotes the number of images edited together during 1 batch. We are using **NVIDIA RTX A5000** GPU (24G), and the maximum chunk size is 3. (~22G) +Here, `--pipeline.guidance_scale` denotes the classifier-free guidance used when editing the images. `--pipeline.chunk_size` denotes the number of images edited together during 1 batch. We are using **NVIDIA RTX A5000** GPU (24G), and the maximum chunk size is 3. (~22G) Control the number of reference views using `--pipeline.ref_view_num`, by default, it is set to 4. ### Small Tips - If your editings are not as expected, please check the images edited by ControlNet. diff --git a/gaussctrl/gc_pipeline.py b/gaussctrl/gc_pipeline.py index 61f7bff..3dfacfb 100755 --- a/gaussctrl/gc_pipeline.py +++ b/gaussctrl/gc_pipeline.py @@ -65,6 +65,7 @@ class GaussCtrlPipelineConfig(VanillaPipelineConfig): """Inference steps""" chunk_size: int = 5 """Batch size for image editing, feel free to reduce to fit your GPU""" + ref_view_num: int = 4 class GaussCtrlPipeline(VanillaPipeline): @@ -99,7 +100,7 @@ def __init__( self.negative_prompts = 'longbody, lowres, bad anatomy, bad hands, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality' view_num = len(self.datamanager.cameras) - anchors = list(range(0, view_num, view_num // 4)) + [view_num] + anchors = [(view_num * i) // self.config.ref_view_num for i in range(self.config.ref_view_num)] + [view_num] random.seed(13789) self.ref_indices = [random.randint(anchor, anchors[idx+1]) for idx, anchor in enumerate(anchors[:-1])] @@ -150,7 +151,6 @@ def render_reverse(self): def edit_images(self): '''Edit images with ControlNet and AttnAlign''' - # if self.test_mode == "val": # Set up ControlNet and AttnAlign self.pipe.scheduler = self.ddim_scheduler self.pipe.unet.set_attn_processor( @@ -163,7 +163,7 @@ def edit_images(self): print("#############################") CONSOLE.print("Start Editing: ", style="bold yellow") - CONSOLE.print(f"Reference views are {[j+1 for j in self.ref_indices]}, counting from 1", style="bold yellow") + CONSOLE.print(f"Reference views are {[j+1 for j in self.ref_indices]}", style="bold yellow") print("#############################") ref_disparity_list = [] ref_z0_list = [] From 599c1fa7eeaee60ed17e2e47839207f622eb21e2 Mon Sep 17 00:00:00 2001 From: jingwu2121 Date: Sun, 14 Jul 2024 11:12:20 +0000 Subject: [PATCH 05/23] minor --- README.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 7ea6fe9..037a0d2 100644 --- a/README.md +++ b/README.md @@ -112,7 +112,9 @@ Please note that the Lang-SAM is optional here. If you are editing the environme ns-train gaussctrl --load-checkpoint {output/folder/.../nerfstudio_models/step-000029999.ckpt} --experiment-name EXPEIMENT_NAME --output-dir {output/folder} --pipeline.datamanager.data {path/to/your/data} --pipeline.prompt "YOUR PROMPT" --pipeline.guidance_scale 5 --pipeline.chunk_size {batch size of images during editing} ``` -Here, `--pipeline.guidance_scale` denotes the classifier-free guidance used when editing the images. `--pipeline.chunk_size` denotes the number of images edited together during 1 batch. We are using **NVIDIA RTX A5000** GPU (24G), and the maximum chunk size is 3. (~22G) Control the number of reference views using `--pipeline.ref_view_num`, by default, it is set to 4. +Here, `--pipeline.guidance_scale` denotes the classifier-free guidance used when editing the images. `--pipeline.chunk_size` denotes the number of images edited together during 1 batch. We are using **NVIDIA RTX A5000** GPU (24G), and the maximum chunk size is 3. (~22G) + +Control the number of reference views using `--pipeline.ref_view_num`, by default, it is set to 4. ### Small Tips - If your editings are not as expected, please check the images edited by ControlNet. From 4afcf037e59d39a4f64bc04504d6046ba9d7d117 Mon Sep 17 00:00:00 2001 From: jingwu2121 Date: Tue, 16 Jul 2024 08:21:47 +0000 Subject: [PATCH 06/23] minor --- scripts/dinosaur.sh | 2 +- scripts/face.sh | 2 +- scripts/fangzhou.sh | 2 +- scripts/garden.sh | 2 +- scripts/stone_horse.sh | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/scripts/dinosaur.sh b/scripts/dinosaur.sh index 0ed9ebb..23f9040 100644 --- a/scripts/dinosaur.sh +++ b/scripts/dinosaur.sh @@ -1,4 +1,4 @@ -# ns-train splatfacto --output-dir unedited_models --experiment-name dinosaur --viewer.quit-on-train-completion True nerfstudio-data --data data/dinosaur +ns-train splatfacto --output-dir unedited_models --experiment-name dinosaur --viewer.quit-on-train-completion True nerfstudio-data --data data/dinosaur ns-train gaussctrl --load-checkpoint unedited_models/dinosaur/splatfacto/2024-07-11_173113/nerfstudio_models/step-000029999.ckpt --experiment-name dinosaur --output-dir outputs --pipeline.datamanager.data data/dinosaur --pipeline.prompt "a photo of a robot dinosaur on the road side" --pipeline.guidance_scale 7.5 --pipeline.chunk_size 3 --pipeline.langsam_obj 'dinosaur statue' --viewer.quit-on-train-completion True diff --git a/scripts/face.sh b/scripts/face.sh index 126e257..06d4364 100644 --- a/scripts/face.sh +++ b/scripts/face.sh @@ -1,4 +1,4 @@ -# ns-train splatfacto --output-dir unedited_models --experiment-name face --viewer.quit-on-train-completion True nerfstudio-data --data data/face +ns-train splatfacto --output-dir unedited_models --experiment-name face --viewer.quit-on-train-completion True nerfstudio-data --data data/face ns-train gaussctrl --load-checkpoint unedited_models/face/splatfacto/2024-07-11_173339/nerfstudio_models/step-000029999.ckpt --experiment-name face --output-dir outputs --pipeline.datamanager.data data/face --pipeline.prompt "a photo of a face of a man with a moustache" --pipeline.guidance_scale 3 --pipeline.chunk_size 3 --pipeline.langsam_obj 'man' --viewer.quit-on-train-completion True diff --git a/scripts/fangzhou.sh b/scripts/fangzhou.sh index 40d30ae..f708500 100644 --- a/scripts/fangzhou.sh +++ b/scripts/fangzhou.sh @@ -1,4 +1,4 @@ -# ns-train splatfacto --output-dir unedited_models --experiment-name fangzhou --viewer.quit-on-train-completion True nerfstudio-data --data data/fangzhou +ns-train splatfacto --output-dir unedited_models --experiment-name fangzhou --viewer.quit-on-train-completion True nerfstudio-data --data data/fangzhou ns-train gaussctrl --load-checkpoint unedited_models/fangzhou/splatfacto/2024-07-11_173620/nerfstudio_models/step-000029999.ckpt --experiment-name fangzhou --output-dir outputs --pipeline.datamanager.data data/fangzhou --pipeline.prompt "a photo of a face of an old man" --pipeline.guidance_scale 7.5 --pipeline.chunk_size 3 --pipeline.langsam_obj 'man' --viewer.quit-on-train-completion True diff --git a/scripts/garden.sh b/scripts/garden.sh index a8f52ae..d1ad929 100644 --- a/scripts/garden.sh +++ b/scripts/garden.sh @@ -1,3 +1,3 @@ -# ns-train splatfacto --output-dir unedited_models --experiment-name garden --viewer.quit-on-train-completion True nerfstudio-data --data data/garden +ns-train splatfacto --output-dir unedited_models --experiment-name garden --viewer.quit-on-train-completion True nerfstudio-data --data data/garden ns-train gaussctrl --load-checkpoint unedited_models/garden/splatfacto/2024-07-11_173647/nerfstudio_models/step-000029999.ckpt --experiment-name garden --output-dir outputs --pipeline.datamanager.data data/garden --pipeline.prompt "a photo of a fake plant on a table in the garden in the snow" --pipeline.guidance_scale 7.5 --pipeline.chunk_size 3 --viewer.quit-on-train-completion True diff --git a/scripts/stone_horse.sh b/scripts/stone_horse.sh index f309e8c..ab0c3b8 100644 --- a/scripts/stone_horse.sh +++ b/scripts/stone_horse.sh @@ -1,4 +1,4 @@ -# ns-train splatfacto --output-dir unedited_models --experiment-name stone_horse --viewer.quit-on-train-completion True nerfstudio-data --data data/stone_horse +ns-train splatfacto --output-dir unedited_models --experiment-name stone_horse --viewer.quit-on-train-completion True nerfstudio-data --data data/stone_horse ns-train gaussctrl --load-checkpoint unedited_models/stone_horse/splatfacto/2024-07-11_173710/nerfstudio_models/step-000029999.ckpt --experiment-name stone_horse --output-dir outputs --pipeline.datamanager.data data/stone_horse --pipeline.prompt "a photo of a giraffe in front of the museum" --pipeline.guidance_scale 5 --pipeline.chunk_size 3 --pipeline.langsam_obj 'stone horse' --viewer.quit-on-train-completion True From 01b29eed7e85bab81758580b4117c4954e5d09f0 Mon Sep 17 00:00:00 2001 From: jingwu2121 Date: Tue, 16 Jul 2024 08:24:14 +0000 Subject: [PATCH 07/23] minor --- gaussctrl/gc_pipeline.py | 4 ++-- gaussctrl/utils.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/gaussctrl/gc_pipeline.py b/gaussctrl/gc_pipeline.py index 3dfacfb..1af9da0 100755 --- a/gaussctrl/gc_pipeline.py +++ b/gaussctrl/gc_pipeline.py @@ -154,10 +154,10 @@ def edit_images(self): # Set up ControlNet and AttnAlign self.pipe.scheduler = self.ddim_scheduler self.pipe.unet.set_attn_processor( - processor=utils.CrossFrameAttnProcessor(self_attn_coeff=0.6, + processor=utils.CrossViewAttnProcessor(self_attn_coeff=0.6, unet_chunk_size=2)) self.pipe.controlnet.set_attn_processor( - processor=utils.CrossFrameAttnProcessor(self_attn_coeff=0, + processor=utils.CrossViewAttnProcessor(self_attn_coeff=0, unet_chunk_size=2)) CONSOLE.print("Done Reset Attention Processor", style="bold blue") diff --git a/gaussctrl/utils.py b/gaussctrl/utils.py index 7db56a8..2f99d2a 100755 --- a/gaussctrl/utils.py +++ b/gaussctrl/utils.py @@ -36,7 +36,7 @@ def compute_attn(attn, query, key, value, video_length, ref_frame_index, attenti hidden_states_ref_cross = torch.bmm(attention_probs, value_ref_cross) return hidden_states_ref_cross -class CrossFrameAttnProcessor: +class CrossViewAttnProcessor: def __init__(self, self_attn_coeff, unet_chunk_size=2): self.unet_chunk_size = unet_chunk_size self.self_attn_coeff = self_attn_coeff From 3ecfe08377cf9a2217289ca6adaf320b07721983 Mon Sep 17 00:00:00 2001 From: jingwu2121 <98714649+jingwu2121@users.noreply.github.com> Date: Sun, 28 Jul 2024 19:13:22 +0100 Subject: [PATCH 08/23] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 037a0d2..66ef6e9 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@

-

[ECCV 2024] GaussCtrl: Multi-View Consistent Text-Driven 3D Gaussian Splatting Editing

+

🎥 [ECCV 2024] GaussCtrl: Multi-View Consistent Text-Driven 3D Gaussian Splatting Editing

Jing Wu*1 , From f65a7edf9bd90fc03fe2c021bf520863d7af10ea Mon Sep 17 00:00:00 2001 From: Mars <2836635695@qq.com> Date: Tue, 30 Jul 2024 14:29:40 +0800 Subject: [PATCH 09/23] Specify the version of gsplat --- README.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 66ef6e9..90e229c 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,5 @@

- +

🎥 [ECCV 2024] GaussCtrl: Multi-View Consistent Text-Driven 3D Gaussian Splatting Editing

@@ -51,6 +51,8 @@ GaussCtrl is built upon NeRFStudio, follow [this link](https://docs.nerf.studio/ ```bash pip install nerfstudio==1.0.0 + +pip install gsplat==0.1.3 ``` Install Lang-SAM for mask extraction. From 5dc9b8f1283f3d0a422e19f2407a4cfd999c6842 Mon Sep 17 00:00:00 2001 From: jingwu2121 <98714649+jingwu2121@users.noreply.github.com> Date: Fri, 16 Aug 2024 16:02:11 +0100 Subject: [PATCH 10/23] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 90e229c..fda2dd6 100644 --- a/README.md +++ b/README.md @@ -161,7 +161,7 @@ If you find this code or find the paper useful for your research, please conside @article{gaussctrl2024, author = {Wu, Jing and Bian, Jia-Wang and Li, Xinghui and Wang, Guangrun and Reid, Ian and Torr, Philip and Prisacariu, Victor}, title = {{GaussCtrl: Multi-View Consistent Text-Driven 3D Gaussian Splatting Editing}}, -booktitle = {ECCV}, +journal = {ECCV}, year = {2024}, } ``` From 83cb7bfa812d04519f221dd72f72955243deb2af Mon Sep 17 00:00:00 2001 From: jingwu2121 <98714649+jingwu2121@users.noreply.github.com> Date: Wed, 4 Sep 2024 20:25:16 +0100 Subject: [PATCH 11/23] Update README.md add news, fix sd1-5 issues --- README.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/README.md b/README.md index fda2dd6..88e07a1 100644 --- a/README.md +++ b/README.md @@ -29,6 +29,9 @@ ![teaser](./assets/teaser.png) +## ✨ News +- [9.4.2024] Our original results utilise stable-diffusion-v1-5 from runwayml for editing, which is now unavailable. Please change the diffusion checkpoint to other available models, e.g. `CompVis/stable-diffusion-v1-4`, by using `--pipeline.diffusion_ckpt "CompVis/stable-diffusion-v1-4"`. Reproduce our original results by using the checkpoint `--pipeline.diffusion_ckpt "jinggogogo/gaussctrl-sd15"` + ## ⚙️ Installation - Tested on CUDA11.8 + Ubuntu22.04 + NeRFStudio1.0.0 (NVIDIA RTX A5000 24G) From 4049bc7d8d027fb74278db6720eddc211ed5fd87 Mon Sep 17 00:00:00 2001 From: jingwu2121 Date: Wed, 4 Sep 2024 19:35:39 +0000 Subject: [PATCH 12/23] fix runwayml sd1-5 --- .gitignore | 2 ++ gaussctrl/gc_pipeline.py | 9 ++++++--- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/.gitignore b/.gitignore index 8475fa1..2aa0689 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,8 @@ processed_data/ gaussctrl/__pycache__ +test.sh +test.py cmd.sh diff --git a/gaussctrl/gc_pipeline.py b/gaussctrl/gc_pipeline.py index 1af9da0..0be3dfa 100755 --- a/gaussctrl/gc_pipeline.py +++ b/gaussctrl/gc_pipeline.py @@ -66,6 +66,9 @@ class GaussCtrlPipelineConfig(VanillaPipelineConfig): chunk_size: int = 5 """Batch size for image editing, feel free to reduce to fit your GPU""" ref_view_num: int = 4 + """Number of reference frames""" + diffusion_ckpt: str = 'CompVis/stable-diffusion-v1-4' + """Diffusion checkpoints""" class GaussCtrlPipeline(VanillaPipeline): @@ -88,11 +91,11 @@ def __init__( self.prompt = self.config.prompt self.pipe_device = 'cuda:0' - self.ddim_scheduler = DDIMScheduler.from_pretrained("runwayml/stable-diffusion-v1-5", subfolder="scheduler") - self.ddim_inverser = DDIMInverseScheduler.from_pretrained("runwayml/stable-diffusion-v1-5", subfolder="scheduler") + self.ddim_scheduler = DDIMScheduler.from_pretrained(self.config.diffusion_ckpt, subfolder="scheduler") + self.ddim_inverser = DDIMInverseScheduler.from_pretrained(self.config.diffusion_ckpt, subfolder="scheduler") controlnet = ControlNetModel.from_pretrained("lllyasviel/sd-controlnet-depth") - self.pipe = StableDiffusionControlNetPipeline.from_pretrained("runwayml/stable-diffusion-v1-5", controlnet=controlnet).to(self.device).to(torch.float16) + self.pipe = StableDiffusionControlNetPipeline.from_pretrained(self.config.diffusion_ckpt, controlnet=controlnet).to(self.device).to(torch.float16) self.pipe.to(self.pipe_device) added_prompt = 'best quality, extremely detailed' From a8e1e0456735abd65cf32ce92d367cd0479af734 Mon Sep 17 00:00:00 2001 From: jingwu2121 Date: Fri, 6 Sep 2024 10:52:04 +0000 Subject: [PATCH 13/23] upgrade transformers version --- requirements.txt | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/requirements.txt b/requirements.txt index e847e61..ee8fe29 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,2 @@ +transformers>=4.38.0 diffusers==0.26.0 -transformers==4.34.1 -huggingface-hub==0.20.3 From e5e09a19e827bd537e1b794ce4eecb8fd0090352 Mon Sep 17 00:00:00 2001 From: jingwu2121 Date: Fri, 13 Sep 2024 11:54:29 +0000 Subject: [PATCH 14/23] minor --- gaussctrl/gc_pipeline.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gaussctrl/gc_pipeline.py b/gaussctrl/gc_pipeline.py index 0be3dfa..c853711 100755 --- a/gaussctrl/gc_pipeline.py +++ b/gaussctrl/gc_pipeline.py @@ -162,7 +162,7 @@ def edit_images(self): self.pipe.controlnet.set_attn_processor( processor=utils.CrossViewAttnProcessor(self_attn_coeff=0, unet_chunk_size=2)) - CONSOLE.print("Done Reset Attention Processor", style="bold blue") + CONSOLE.print("Done Resetting Attention Processor", style="bold blue") print("#############################") CONSOLE.print("Start Editing: ", style="bold yellow") From 92fe448acc2414c680e643a4dce2177c443268f3 Mon Sep 17 00:00:00 2001 From: jingwu2121 Date: Fri, 13 Sep 2024 13:03:12 +0000 Subject: [PATCH 15/23] fix load_all bug --- gaussctrl/gc_datamanager.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gaussctrl/gc_datamanager.py b/gaussctrl/gc_datamanager.py index fcc7804..7e11a24 100755 --- a/gaussctrl/gc_datamanager.py +++ b/gaussctrl/gc_datamanager.py @@ -225,7 +225,7 @@ def next_train(self, step: int) -> Tuple[Cameras, Dict]: data["image"] = data["image"].to(self.device) assert len(self.train_dataset.cameras.shape) == 1, "Assumes single batch dimension" - if len(self.train_dataset._dataparser_outputs.image_filenames) <= self.config.subset_num * self.config.sampled_views_every_subset: + if len(self.train_dataset._dataparser_outputs.image_filenames) <= self.config.subset_num * self.config.sampled_views_every_subset or self.config.load_all: camera = self.cameras[image_idx : image_idx + 1].to(self.device) else: camera = self.cameras[image_idx : image_idx + 1][0].to(self.device) From 4a97f0ed80d8dad8c4837ce61db86c401cf90955 Mon Sep 17 00:00:00 2001 From: jingwu2121 Date: Tue, 8 Oct 2024 09:10:05 +0000 Subject: [PATCH 16/23] fix reverse prompt --- gaussctrl/gc_pipeline.py | 12 ++++++++---- scripts/bear.sh | 6 +++--- scripts/dinosaur.sh | 10 +++++----- scripts/face.sh | 16 ++++++++-------- scripts/fangzhou.sh | 16 ++++++++-------- scripts/garden.sh | 2 +- scripts/stone_horse.sh | 4 ++-- 7 files changed, 35 insertions(+), 31 deletions(-) diff --git a/gaussctrl/gc_pipeline.py b/gaussctrl/gc_pipeline.py index c853711..c204f0f 100755 --- a/gaussctrl/gc_pipeline.py +++ b/gaussctrl/gc_pipeline.py @@ -55,8 +55,10 @@ class GaussCtrlPipelineConfig(VanillaPipelineConfig): """specifies the datamanager config""" render_rate: int = 500 """how many gauss steps for gauss training""" - prompt: str = "" + edit_prompt: str = "" """Positive Prompt""" + reverse_prompt: str = "" + """DDIM Inversion Prompt""" langsam_obj: str = "" """The object to be edited""" guidance_scale: float = 5 @@ -89,7 +91,8 @@ def __init__( self.test_mode = test_mode self.langsam = LangSAM() - self.prompt = self.config.prompt + self.edit_prompt = self.config.edit_prompt + self.reverse_prompt = self.config.reverse_prompt self.pipe_device = 'cuda:0' self.ddim_scheduler = DDIMScheduler.from_pretrained(self.config.diffusion_ckpt, subfolder="scheduler") self.ddim_inverser = DDIMInverseScheduler.from_pretrained(self.config.diffusion_ckpt, subfolder="scheduler") @@ -99,7 +102,8 @@ def __init__( self.pipe.to(self.pipe_device) added_prompt = 'best quality, extremely detailed' - self.positive_prompt = self.prompt + ', ' + added_prompt + self.positive_prompt = self.edit_prompt + ', ' + added_prompt + self.positive_reverse_prompt = self.reverse_prompt + ', ' + added_prompt self.negative_prompts = 'longbody, lowres, bad anatomy, bad hands, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality' view_num = len(self.datamanager.cameras) @@ -135,7 +139,7 @@ def render_reverse(self): disparity = self.depth2disparity_torch(rendered_depth[:,:,0][None]) self.pipe.scheduler = self.ddim_inverser - latent, _ = self.pipe(prompt=self.positive_prompt, # placeholder here, since cfg=0 + latent, _ = self.pipe(prompt=self.positive_reverse_prompt, # placeholder here, since cfg=0 num_inference_steps=self.num_inference_steps, latents=init_latent, image=disparity, return_dict=False, guidance_scale=0, output_type='latent') diff --git a/scripts/bear.sh b/scripts/bear.sh index 4e30067..786aab4 100644 --- a/scripts/bear.sh +++ b/scripts/bear.sh @@ -1,7 +1,7 @@ ns-train splatfacto --output-dir unedited_models --experiment-name bear --viewer.quit-on-train-completion True nerfstudio-data --data data/bear -ns-train gaussctrl --load-checkpoint unedited_models/bear/splatfacto/2024-07-10_170906/nerfstudio_models/step-000029999.ckpt --experiment-name bear --output-dir outputs --pipeline.datamanager.data data/bear --pipeline.prompt "a photo of a polar bear in the forest" --pipeline.guidance_scale 5 --pipeline.chunk_size 3 --pipeline.langsam_obj 'bear' --viewer.quit-on-train-completion True +ns-train gaussctrl --load-checkpoint unedited_models/bear/splatfacto/2024-07-10_170906/nerfstudio_models/step-000029999.ckpt --experiment-name bear --output-dir outputs --pipeline.datamanager.data data/bear --pipeline.edit_prompt "a photo of a polar bear in the forest" --pipeline.reverse_prompt "a photo of a bear statue in the forest" --pipeline.guidance_scale 5 --pipeline.chunk_size 3 --pipeline.langsam_obj 'bear' --viewer.quit-on-train-completion True -ns-train gaussctrl --load-checkpoint unedited_models/bear/splatfacto/2024-07-10_170906/nerfstudio_models/step-000029999.ckpt --experiment-name bear --output-dir outputs --pipeline.datamanager.data data/bear --pipeline.prompt "a photo of a grizzly bear in the forest" --pipeline.guidance_scale 5 --pipeline.chunk_size 3 --pipeline.langsam_obj 'bear' --viewer.quit-on-train-completion True +ns-train gaussctrl --load-checkpoint unedited_models/bear/splatfacto/2024-07-10_170906/nerfstudio_models/step-000029999.ckpt --experiment-name bear --output-dir outputs --pipeline.datamanager.data data/bear --pipeline.edit_prompt "a photo of a grizzly bear in the forest" --pipeline.reverse_prompt "a photo of a bear statue in the forest" --pipeline.guidance_scale 5 --pipeline.chunk_size 3 --pipeline.langsam_obj 'bear' --viewer.quit-on-train-completion True -ns-train gaussctrl --load-checkpoint unedited_models/bear/splatfacto/2024-07-10_170906/nerfstudio_models/step-000029999.ckpt --experiment-name bear --output-dir outputs --pipeline.datamanager.data data/bear --pipeline.prompt "a photo of a golden bear statue in the forest" --pipeline.guidance_scale 5 --pipeline.chunk_size 3 --pipeline.langsam_obj 'bear' --viewer.quit-on-train-completion True \ No newline at end of file +ns-train gaussctrl --load-checkpoint unedited_models/bear/splatfacto/2024-07-10_170906/nerfstudio_models/step-000029999.ckpt --experiment-name bear --output-dir outputs --pipeline.datamanager.data data/bear --pipeline.edit_prompt "a photo of a golden bear statue in the forest" --pipeline.reverse_prompt "a photo of a bear statue in the forest" --pipeline.guidance_scale 5 --pipeline.chunk_size 3 --pipeline.langsam_obj 'bear' --viewer.quit-on-train-completion True \ No newline at end of file diff --git a/scripts/dinosaur.sh b/scripts/dinosaur.sh index 23f9040..20b33b2 100644 --- a/scripts/dinosaur.sh +++ b/scripts/dinosaur.sh @@ -1,11 +1,11 @@ ns-train splatfacto --output-dir unedited_models --experiment-name dinosaur --viewer.quit-on-train-completion True nerfstudio-data --data data/dinosaur -ns-train gaussctrl --load-checkpoint unedited_models/dinosaur/splatfacto/2024-07-11_173113/nerfstudio_models/step-000029999.ckpt --experiment-name dinosaur --output-dir outputs --pipeline.datamanager.data data/dinosaur --pipeline.prompt "a photo of a robot dinosaur on the road side" --pipeline.guidance_scale 7.5 --pipeline.chunk_size 3 --pipeline.langsam_obj 'dinosaur statue' --viewer.quit-on-train-completion True +ns-train gaussctrl --load-checkpoint unedited_models/dinosaur/splatfacto/2024-07-11_173113/nerfstudio_models/step-000029999.ckpt --experiment-name dinosaur --output-dir outputs --pipeline.datamanager.data data/dinosaur --pipeline.edit_prompt "a photo of a robot dinosaur on the road side" --pipeline.reverse_prompt "a photo of a dinosaur statue on the road side" --pipeline.guidance_scale 7.5 --pipeline.chunk_size 3 --pipeline.langsam_obj 'dinosaur statue' --viewer.quit-on-train-completion True -ns-train gaussctrl --load-checkpoint unedited_models/dinosaur/splatfacto/2024-07-11_173113/nerfstudio_models/step-000029999.ckpt --experiment-name dinosaur --output-dir outputs --pipeline.datamanager.data data/dinosaur --pipeline.prompt "a photo of a dinosaur statue under the water" --pipeline.guidance_scale 7.5 --pipeline.chunk_size 3 --viewer.quit-on-train-completion True +ns-train gaussctrl --load-checkpoint unedited_models/dinosaur/splatfacto/2024-07-11_173113/nerfstudio_models/step-000029999.ckpt --experiment-name dinosaur --output-dir outputs --pipeline.datamanager.data data/dinosaur --pipeline.edit_prompt "a photo of a dinosaur statue under the water" --pipeline.reverse_prompt "a photo of a dinosaur statue on the road side" --pipeline.guidance_scale 7.5 --pipeline.chunk_size 3 --viewer.quit-on-train-completion True -ns-train gaussctrl --load-checkpoint unedited_models/dinosaur/splatfacto/2024-07-11_173113/nerfstudio_models/step-000029999.ckpt --experiment-name dinosaur --output-dir outputs --pipeline.datamanager.data data/dinosaur --pipeline.prompt "a photo of a dinosaur statue in the snow" --pipeline.guidance_scale 7.5 --pipeline.chunk_size 3 --viewer.quit-on-train-completion True +ns-train gaussctrl --load-checkpoint unedited_models/dinosaur/splatfacto/2024-07-11_173113/nerfstudio_models/step-000029999.ckpt --experiment-name dinosaur --output-dir outputs --pipeline.datamanager.data data/dinosaur --pipeline.edit_prompt "a photo of a dinosaur statue in the snow" --pipeline.reverse_prompt "a photo of a dinosaur statue on the road side" --pipeline.guidance_scale 7.5 --pipeline.chunk_size 3 --viewer.quit-on-train-completion True -ns-train gaussctrl --load-checkpoint unedited_models/dinosaur/splatfacto/2024-07-11_173113/nerfstudio_models/step-000029999.ckpt --experiment-name dinosaur --output-dir outputs --pipeline.datamanager.data data/dinosaur --pipeline.prompt "a photo of a dinosaur statue at night" --pipeline.guidance_scale 7.5 --pipeline.chunk_size 3 --viewer.quit-on-train-completion True +ns-train gaussctrl --load-checkpoint unedited_models/dinosaur/splatfacto/2024-07-11_173113/nerfstudio_models/step-000029999.ckpt --experiment-name dinosaur --output-dir outputs --pipeline.datamanager.data data/dinosaur --pipeline.edit_prompt "a photo of a dinosaur statue at night" --pipeline.reverse_prompt "a photo of a dinosaur statue on the road side" --pipeline.guidance_scale 7.5 --pipeline.chunk_size 3 --viewer.quit-on-train-completion True -ns-train gaussctrl --load-checkpoint unedited_models/dinosaur/splatfacto/2024-07-11_173113/nerfstudio_models/step-000029999.ckpt --experiment-name dinosaur --output-dir outputs --pipeline.datamanager.data data/dinosaur --pipeline.prompt "a photo of a dinosaur statue in the storm" --pipeline.guidance_scale 7.5 --pipeline.chunk_size 3 --viewer.quit-on-train-completion True +ns-train gaussctrl --load-checkpoint unedited_models/dinosaur/splatfacto/2024-07-11_173113/nerfstudio_models/step-000029999.ckpt --experiment-name dinosaur --output-dir outputs --pipeline.datamanager.data data/dinosaur --pipeline.edit_prompt "a photo of a dinosaur statue in the storm" --pipeline.reverse_prompt "a photo of a dinosaur statue on the road side" --pipeline.guidance_scale 7.5 --pipeline.chunk_size 3 --viewer.quit-on-train-completion True diff --git a/scripts/face.sh b/scripts/face.sh index 06d4364..80608da 100644 --- a/scripts/face.sh +++ b/scripts/face.sh @@ -1,17 +1,17 @@ ns-train splatfacto --output-dir unedited_models --experiment-name face --viewer.quit-on-train-completion True nerfstudio-data --data data/face -ns-train gaussctrl --load-checkpoint unedited_models/face/splatfacto/2024-07-11_173339/nerfstudio_models/step-000029999.ckpt --experiment-name face --output-dir outputs --pipeline.datamanager.data data/face --pipeline.prompt "a photo of a face of a man with a moustache" --pipeline.guidance_scale 3 --pipeline.chunk_size 3 --pipeline.langsam_obj 'man' --viewer.quit-on-train-completion True +ns-train gaussctrl --load-checkpoint unedited_models/face/splatfacto/2024-07-11_173339/nerfstudio_models/step-000029999.ckpt --experiment-name face --output-dir outputs --pipeline.datamanager.data data/face --pipeline.edit_prompt "a photo of a face of a man with a moustache" --pipeline.reverse_prompt "a photo of a face of a man" --pipeline.guidance_scale 3 --pipeline.chunk_size 3 --pipeline.langsam_obj 'man' --viewer.quit-on-train-completion True -ns-train gaussctrl --load-checkpoint unedited_models/face/splatfacto/2024-07-11_173339/nerfstudio_models/step-000029999.ckpt --experiment-name face --output-dir outputs --pipeline.datamanager.data data/face --pipeline.prompt "a photo of bronze bust statue of a man" --pipeline.guidance_scale 3 --pipeline.chunk_size 3 --pipeline.langsam_obj 'man' --viewer.quit-on-train-completion True +ns-train gaussctrl --load-checkpoint unedited_models/face/splatfacto/2024-07-11_173339/nerfstudio_models/step-000029999.ckpt --experiment-name face --output-dir outputs --pipeline.datamanager.data data/face --pipeline.edit_prompt "a photo of bronze bust statue of a man" --pipeline.reverse_prompt "a photo of a face of a man" --pipeline.guidance_scale 3 --pipeline.chunk_size 3 --pipeline.langsam_obj 'man' --viewer.quit-on-train-completion True -ns-train gaussctrl --load-checkpoint unedited_models/face/splatfacto/2024-07-11_173339/nerfstudio_models/step-000029999.ckpt --experiment-name face --output-dir outputs --pipeline.datamanager.data data/face --pipeline.prompt "a photo of a man wearing a pair of glasses" --pipeline.guidance_scale 3 --pipeline.chunk_size 3 --pipeline.langsam_obj 'man' --viewer.quit-on-train-completion True +ns-train gaussctrl --load-checkpoint unedited_models/face/splatfacto/2024-07-11_173339/nerfstudio_models/step-000029999.ckpt --experiment-name face --output-dir outputs --pipeline.datamanager.data data/face --pipeline.edit_prompt "a photo of a man wearing a pair of glasses" --pipeline.reverse_prompt "a photo of a face of a man" --pipeline.guidance_scale 3 --pipeline.chunk_size 3 --pipeline.langsam_obj 'man' --viewer.quit-on-train-completion True -ns-train gaussctrl --load-checkpoint unedited_models/face/splatfacto/2024-07-11_173339/nerfstudio_models/step-000029999.ckpt --experiment-name face --output-dir outputs --pipeline.datamanager.data data/face --pipeline.prompt "a photo of a face of a Jocker with green hair" --pipeline.guidance_scale 5 --pipeline.chunk_size 3 --pipeline.langsam_obj 'man' --viewer.quit-on-train-completion True +ns-train gaussctrl --load-checkpoint unedited_models/face/splatfacto/2024-07-11_173339/nerfstudio_models/step-000029999.ckpt --experiment-name face --output-dir outputs --pipeline.datamanager.data data/face --pipeline.edit_prompt "a photo of a face of a Jocker with green hair" --pipeline.reverse_prompt "a photo of a face of a man" --pipeline.guidance_scale 5 --pipeline.chunk_size 3 --pipeline.langsam_obj 'man' --viewer.quit-on-train-completion True -ns-train gaussctrl --load-checkpoint unedited_models/face/splatfacto/2024-07-11_173339/nerfstudio_models/step-000029999.ckpt --experiment-name face --output-dir outputs --pipeline.datamanager.data data/face --pipeline.prompt "a photo of a face of an old man with wrinkles" --pipeline.guidance_scale 3 --pipeline.chunk_size 3 --pipeline.langsam_obj 'man' --viewer.quit-on-train-completion True +ns-train gaussctrl --load-checkpoint unedited_models/face/splatfacto/2024-07-11_173339/nerfstudio_models/step-000029999.ckpt --experiment-name face --output-dir outputs --pipeline.datamanager.data data/face --pipeline.edit_prompt "a photo of a face of an old man with wrinkles" --pipeline.reverse_prompt "a photo of a face of a man" --pipeline.guidance_scale 3 --pipeline.chunk_size 3 --pipeline.langsam_obj 'man' --viewer.quit-on-train-completion True -ns-train gaussctrl --load-checkpoint unedited_models/face/splatfacto/2024-07-11_173339/nerfstudio_models/step-000029999.ckpt --experiment-name face --output-dir outputs --pipeline.datamanager.data data/face --pipeline.prompt "a photo of a man wearing a pair of sunglasses" --pipeline.guidance_scale 3 --pipeline.chunk_size 3 --pipeline.langsam_obj 'man' --viewer.quit-on-train-completion True +ns-train gaussctrl --load-checkpoint unedited_models/face/splatfacto/2024-07-11_173339/nerfstudio_models/step-000029999.ckpt --experiment-name face --output-dir outputs --pipeline.datamanager.data data/face --pipeline.edit_prompt "a photo of a man wearing a pair of sunglasses" --pipeline.reverse_prompt "a photo of a face of a man" --pipeline.guidance_scale 3 --pipeline.chunk_size 3 --pipeline.langsam_obj 'man' --viewer.quit-on-train-completion True -ns-train gaussctrl --load-checkpoint unedited_models/face/splatfacto/2024-07-11_173339/nerfstudio_models/step-000029999.ckpt --experiment-name face --output-dir outputs --pipeline.datamanager.data data/face --pipeline.prompt "a photo of a face of a woman with thick made-up" --pipeline.guidance_scale 3 --pipeline.chunk_size 3 --pipeline.langsam_obj 'man' --viewer.quit-on-train-completion True +ns-train gaussctrl --load-checkpoint unedited_models/face/splatfacto/2024-07-11_173339/nerfstudio_models/step-000029999.ckpt --experiment-name face --output-dir outputs --pipeline.datamanager.data data/face --pipeline.edit_prompt "a photo of a face of a woman with thick made-up" --pipeline.reverse_prompt "a photo of a face of a man" --pipeline.guidance_scale 3 --pipeline.chunk_size 3 --pipeline.langsam_obj 'man' --viewer.quit-on-train-completion True -ns-train gaussctrl --load-checkpoint unedited_models/face/splatfacto/2024-07-11_173339/nerfstudio_models/step-000029999.ckpt --experiment-name face --output-dir outputs --pipeline.datamanager.data data/face --pipeline.prompt "a photo of a face of a man with red hair" --pipeline.guidance_scale 5 --pipeline.chunk_size 3 --pipeline.langsam_obj 'man' --viewer.quit-on-train-completion True +ns-train gaussctrl --load-checkpoint unedited_models/face/splatfacto/2024-07-11_173339/nerfstudio_models/step-000029999.ckpt --experiment-name face --output-dir outputs --pipeline.datamanager.data data/face --pipeline.edit_prompt "a photo of a face of a man with red hair" --pipeline.reverse_prompt "a photo of a face of a man" --pipeline.guidance_scale 5 --pipeline.chunk_size 3 --pipeline.langsam_obj 'man' --viewer.quit-on-train-completion True diff --git a/scripts/fangzhou.sh b/scripts/fangzhou.sh index f708500..d774063 100644 --- a/scripts/fangzhou.sh +++ b/scripts/fangzhou.sh @@ -1,17 +1,17 @@ ns-train splatfacto --output-dir unedited_models --experiment-name fangzhou --viewer.quit-on-train-completion True nerfstudio-data --data data/fangzhou -ns-train gaussctrl --load-checkpoint unedited_models/fangzhou/splatfacto/2024-07-11_173620/nerfstudio_models/step-000029999.ckpt --experiment-name fangzhou --output-dir outputs --pipeline.datamanager.data data/fangzhou --pipeline.prompt "a photo of a face of an old man" --pipeline.guidance_scale 7.5 --pipeline.chunk_size 3 --pipeline.langsam_obj 'man' --viewer.quit-on-train-completion True +ns-train gaussctrl --load-checkpoint unedited_models/fangzhou/splatfacto/2024-07-11_173620/nerfstudio_models/step-000029999.ckpt --experiment-name fangzhou --output-dir outputs --pipeline.datamanager.data data/fangzhou --pipeline.edit_prompt "a photo of a face of an old man" --pipeline.reverse_prompt "a photo of a face of a man" --pipeline.guidance_scale 7.5 --pipeline.chunk_size 3 --pipeline.langsam_obj 'man' --viewer.quit-on-train-completion True -ns-train gaussctrl --load-checkpoint unedited_models/fangzhou/splatfacto/2024-07-11_173620/nerfstudio_models/step-000029999.ckpt --experiment-name fangzhou --output-dir outputs --pipeline.datamanager.data data/fangzhou --pipeline.prompt "a photo of a face of a man with maasai face paint" --pipeline.guidance_scale 7.5 --pipeline.chunk_size 3 --pipeline.langsam_obj 'man' --viewer.quit-on-train-completion True +ns-train gaussctrl --load-checkpoint unedited_models/fangzhou/splatfacto/2024-07-11_173620/nerfstudio_models/step-000029999.ckpt --experiment-name fangzhou --output-dir outputs --pipeline.datamanager.data data/fangzhou --pipeline.edit_prompt "a photo of a face of a man with maasai face paint" --pipeline.reverse_prompt "a photo of a face of a man" --pipeline.guidance_scale 7.5 --pipeline.chunk_size 3 --pipeline.langsam_obj 'man' --viewer.quit-on-train-completion True -ns-train gaussctrl --load-checkpoint unedited_models/fangzhou/splatfacto/2024-07-11_173620/nerfstudio_models/step-000029999.ckpt --experiment-name fangzhou --output-dir outputs --pipeline.datamanager.data data/fangzhou --pipeline.prompt "a photo of a face of an old lady" --pipeline.guidance_scale 7.5 --pipeline.chunk_size 3 --pipeline.langsam_obj 'man' --viewer.quit-on-train-completion True +ns-train gaussctrl --load-checkpoint unedited_models/fangzhou/splatfacto/2024-07-11_173620/nerfstudio_models/step-000029999.ckpt --experiment-name fangzhou --output-dir outputs --pipeline.datamanager.data data/fangzhou --pipeline.edit_prompt "a photo of a face of an old lady" --pipeline.reverse_prompt "a photo of a face of a man" --pipeline.guidance_scale 7.5 --pipeline.chunk_size 3 --pipeline.langsam_obj 'man' --viewer.quit-on-train-completion True -ns-train gaussctrl --load-checkpoint unedited_models/fangzhou/splatfacto/2024-07-11_173620/nerfstudio_models/step-000029999.ckpt --experiment-name fangzhou --output-dir outputs --pipeline.datamanager.data data/fangzhou --pipeline.prompt "a photo of a face of a man wearing a pair of glasses" --pipeline.guidance_scale 7.5 --pipeline.chunk_size 3 --pipeline.langsam_obj 'man' --viewer.quit-on-train-completion True +ns-train gaussctrl --load-checkpoint unedited_models/fangzhou/splatfacto/2024-07-11_173620/nerfstudio_models/step-000029999.ckpt --experiment-name fangzhou --output-dir outputs --pipeline.datamanager.data data/fangzhou --pipeline.edit_prompt "a photo of a face of a man wearing a pair of glasses" --pipeline.reverse_prompt "a photo of a face of a man" --pipeline.guidance_scale 7.5 --pipeline.chunk_size 3 --pipeline.langsam_obj 'man' --viewer.quit-on-train-completion True -ns-train gaussctrl --load-checkpoint unedited_models/fangzhou/splatfacto/2024-07-11_173620/nerfstudio_models/step-000029999.ckpt --experiment-name fangzhou --output-dir outputs --pipeline.datamanager.data data/fangzhou --pipeline.prompt "a photo of bronze bust statue of a man" --pipeline.guidance_scale 7.5 --pipeline.chunk_size 3 --pipeline.langsam_obj 'man' --viewer.quit-on-train-completion True +ns-train gaussctrl --load-checkpoint unedited_models/fangzhou/splatfacto/2024-07-11_173620/nerfstudio_models/step-000029999.ckpt --experiment-name fangzhou --output-dir outputs --pipeline.datamanager.data data/fangzhou --pipeline.edit_prompt "a photo of bronze bust statue of a man" --pipeline.reverse_prompt "a photo of a face of a man" --pipeline.guidance_scale 7.5 --pipeline.chunk_size 3 --pipeline.langsam_obj 'man' --viewer.quit-on-train-completion True -ns-train gaussctrl --load-checkpoint unedited_models/fangzhou/splatfacto/2024-07-11_173620/nerfstudio_models/step-000029999.ckpt --experiment-name fangzhou --output-dir outputs --pipeline.datamanager.data data/fangzhou --pipeline.prompt "a photo of a face of a man with chinese opera face paint" --pipeline.guidance_scale 7.5 --pipeline.chunk_size 3 --pipeline.langsam_obj 'man' --viewer.quit-on-train-completion True +ns-train gaussctrl --load-checkpoint unedited_models/fangzhou/splatfacto/2024-07-11_173620/nerfstudio_models/step-000029999.ckpt --experiment-name fangzhou --output-dir outputs --pipeline.datamanager.data data/fangzhou --pipeline.edit_prompt "a photo of a face of a man with chinese opera face paint" --pipeline.reverse_prompt "a photo of a face of a man" --pipeline.guidance_scale 7.5 --pipeline.chunk_size 3 --pipeline.langsam_obj 'man' --viewer.quit-on-train-completion True -ns-train gaussctrl --load-checkpoint unedited_models/fangzhou/splatfacto/2024-07-11_173620/nerfstudio_models/step-000029999.ckpt --experiment-name fangzhou --output-dir outputs --pipeline.datamanager.data data/fangzhou --pipeline.prompt "a photo of a face of a newborn baby" --pipeline.guidance_scale 7.5 --pipeline.chunk_size 3 --pipeline.langsam_obj 'man' --viewer.quit-on-train-completion True +ns-train gaussctrl --load-checkpoint unedited_models/fangzhou/splatfacto/2024-07-11_173620/nerfstudio_models/step-000029999.ckpt --experiment-name fangzhou --output-dir outputs --pipeline.datamanager.data data/fangzhou --pipeline.edit_prompt "a photo of a face of a newborn baby" --pipeline.reverse_prompt "a photo of a face of a man" --pipeline.guidance_scale 7.5 --pipeline.chunk_size 3 --pipeline.langsam_obj 'man' --viewer.quit-on-train-completion True -ns-train gaussctrl --load-checkpoint unedited_models/fangzhou/splatfacto/2024-07-11_173620/nerfstudio_models/step-000029999.ckpt --experiment-name fangzhou --output-dir outputs --pipeline.datamanager.data data/fangzhou --pipeline.prompt "a photo of a face of a woman with thick make-up" --pipeline.guidance_scale 7.5 --pipeline.chunk_size 3 --pipeline.langsam_obj 'man' --viewer.quit-on-train-completion True +ns-train gaussctrl --load-checkpoint unedited_models/fangzhou/splatfacto/2024-07-11_173620/nerfstudio_models/step-000029999.ckpt --experiment-name fangzhou --output-dir outputs --pipeline.datamanager.data data/fangzhou --pipeline.edit_prompt "a photo of a face of a woman with thick make-up" --pipeline.reverse_prompt "a photo of a face of a man" --pipeline.guidance_scale 7.5 --pipeline.chunk_size 3 --pipeline.langsam_obj 'man' --viewer.quit-on-train-completion True diff --git a/scripts/garden.sh b/scripts/garden.sh index d1ad929..ae8143f 100644 --- a/scripts/garden.sh +++ b/scripts/garden.sh @@ -1,3 +1,3 @@ ns-train splatfacto --output-dir unedited_models --experiment-name garden --viewer.quit-on-train-completion True nerfstudio-data --data data/garden -ns-train gaussctrl --load-checkpoint unedited_models/garden/splatfacto/2024-07-11_173647/nerfstudio_models/step-000029999.ckpt --experiment-name garden --output-dir outputs --pipeline.datamanager.data data/garden --pipeline.prompt "a photo of a fake plant on a table in the garden in the snow" --pipeline.guidance_scale 7.5 --pipeline.chunk_size 3 --viewer.quit-on-train-completion True +ns-train gaussctrl --load-checkpoint unedited_models/garden/splatfacto/2024-07-11_173647/nerfstudio_models/step-000029999.ckpt --experiment-name garden --output-dir outputs --pipeline.datamanager.data data/garden --pipeline.edit_prompt "a photo of a fake plant on a table in the garden in the snow" --pipeline.reverse_prompt "a photo of a fake plant on a table in the garden" --pipeline.guidance_scale 7.5 --pipeline.chunk_size 3 --viewer.quit-on-train-completion True diff --git a/scripts/stone_horse.sh b/scripts/stone_horse.sh index ab0c3b8..b18119a 100644 --- a/scripts/stone_horse.sh +++ b/scripts/stone_horse.sh @@ -1,5 +1,5 @@ ns-train splatfacto --output-dir unedited_models --experiment-name stone_horse --viewer.quit-on-train-completion True nerfstudio-data --data data/stone_horse -ns-train gaussctrl --load-checkpoint unedited_models/stone_horse/splatfacto/2024-07-11_173710/nerfstudio_models/step-000029999.ckpt --experiment-name stone_horse --output-dir outputs --pipeline.datamanager.data data/stone_horse --pipeline.prompt "a photo of a giraffe in front of the museum" --pipeline.guidance_scale 5 --pipeline.chunk_size 3 --pipeline.langsam_obj 'stone horse' --viewer.quit-on-train-completion True +ns-train gaussctrl --load-checkpoint unedited_models/stone_horse/splatfacto/2024-07-11_173710/nerfstudio_models/step-000029999.ckpt --experiment-name stone_horse --output-dir outputs --pipeline.datamanager.data data/stone_horse --pipeline.edit_prompt "a photo of a giraffe in front of the museum" --pipeline.reverse_prompt "a photo of a stone horse in front of the museum" --pipeline.guidance_scale 5 --pipeline.chunk_size 3 --pipeline.langsam_obj 'stone horse' --viewer.quit-on-train-completion True -ns-train gaussctrl --load-checkpoint unedited_models/stone_horse/splatfacto/2024-07-11_173710/nerfstudio_models/step-000029999.ckpt --experiment-name stone_horse --output-dir outputs --pipeline.datamanager.data data/stone_horse --pipeline.prompt "a photo of a zebra in front of the museum" --pipeline.guidance_scale 5 --pipeline.chunk_size 3 --pipeline.langsam_obj 'stone horse' --viewer.quit-on-train-completion True +ns-train gaussctrl --load-checkpoint unedited_models/stone_horse/splatfacto/2024-07-11_173710/nerfstudio_models/step-000029999.ckpt --experiment-name stone_horse --output-dir outputs --pipeline.datamanager.data data/stone_horse --pipeline.edit_prompt "a photo of a zebra in front of the museum" --pipeline.reverse_prompt "a photo of a stone horse in front of the museum" --pipeline.guidance_scale 5 --pipeline.chunk_size 3 --pipeline.langsam_obj 'stone horse' --viewer.quit-on-train-completion True From aa938b27169ef7516f08a3472fd622755387d76f Mon Sep 17 00:00:00 2001 From: jingwu2121 Date: Tue, 8 Oct 2024 09:24:14 +0000 Subject: [PATCH 17/23] update readme --- README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 88e07a1..5500351 100644 --- a/README.md +++ b/README.md @@ -108,13 +108,13 @@ Once you finish training the `splatfacto` model, the checkpoints will be saved t Start editing your model by running: ```bash -ns-train gaussctrl --load-checkpoint {output/folder/.../nerfstudio_models/step-000029999.ckpt} --experiment-name EXPEIMENT_NAME --output-dir {output/folder} --pipeline.datamanager.data {path/to/your/data} --pipeline.prompt "YOUR PROMPT" --pipeline.guidance_scale 5 --pipeline.chunk_size {batch size of images during editing} --pipeline.langsam_obj 'OBJECT TO BE EDITED' +ns-train gaussctrl --load-checkpoint {output/folder/.../nerfstudio_models/step-000029999.ckpt} --experiment-name EXPEIMENT_NAME --output-dir {output/folder} --pipeline.datamanager.data {path/to/your/data} --pipeline.edit_prompt "YOUR PROMPT" --pipeline.reverse_prompt "PROMPT TO DESCRIBE THE UNEDITED SCENE" --pipeline.guidance_scale 5 --pipeline.chunk_size {batch size of images during editing} --pipeline.langsam_obj 'OBJECT TO BE EDITED' ``` Please note that the Lang-SAM is optional here. If you are editing the environment, please remove this argument. ```bash -ns-train gaussctrl --load-checkpoint {output/folder/.../nerfstudio_models/step-000029999.ckpt} --experiment-name EXPEIMENT_NAME --output-dir {output/folder} --pipeline.datamanager.data {path/to/your/data} --pipeline.prompt "YOUR PROMPT" --pipeline.guidance_scale 5 --pipeline.chunk_size {batch size of images during editing} +ns-train gaussctrl --load-checkpoint {output/folder/.../nerfstudio_models/step-000029999.ckpt} --experiment-name EXPEIMENT_NAME --output-dir {output/folder} --pipeline.datamanager.data {path/to/your/data} --pipeline.edit_prompt "YOUR PROMPT" --pipeline.reverse_prompt "PROMPT TO DESCRIBE THE UNEDITED SCENE" --pipeline.guidance_scale 5 --pipeline.chunk_size {batch size of images during editing} ``` Here, `--pipeline.guidance_scale` denotes the classifier-free guidance used when editing the images. `--pipeline.chunk_size` denotes the number of images edited together during 1 batch. We are using **NVIDIA RTX A5000** GPU (24G), and the maximum chunk size is 3. (~22G) @@ -134,7 +134,7 @@ ns-train splatfacto --output-dir unedited_models --experiment-name bear nerfstud Then edit the 3DGS by running: ```bash -ns-train gaussctrl --load-checkpoint {unedited_models/bear/splatfacto/.../nerfstudio_models/step-000029999.ckpt} --experiment-name bear --output-dir outputs --pipeline.datamanager.data data/bear --pipeline.prompt "a photo of a polar bear in the forest" --pipeline.guidance_scale 5 --pipeline.chunk_size 3 --pipeline.langsam_obj 'bear' +ns-train gaussctrl --load-checkpoint unedited_models/bear/splatfacto/2024-07-10_170906/nerfstudio_models/step-000029999.ckpt --experiment-name bear --output-dir outputs --pipeline.datamanager.data data/bear --pipeline.edit_prompt "a photo of a polar bear in the forest" --pipeline.reverse_prompt "a photo of a bear statue in the forest" --pipeline.guidance_scale 5 --pipeline.chunk_size 3 --pipeline.langsam_obj 'bear' --viewer.quit-on-train-completion True ``` In our experiments, We sampled 40 views randomly from the entire dataset to accelerate the method, which is set in `gc_datamanager.py` by default. We split the entire set into 4 subsets, and randomly sampled 10 images in each subset split. Feel free to decrease/increase the number to see the difference by modifying `--pipeline.datamanager.subset-num` and `--pipeline.datamanager.sampled-views-every-subset`. Set `--pipeline.datamanager.load-all` to `True`, if you want to edit all the images in the dataset. From 77cb634b2e099a3e5355c78e65ec80e084f5a3ee Mon Sep 17 00:00:00 2001 From: jingwu2121 <98714649+jingwu2121@users.noreply.github.com> Date: Tue, 5 Nov 2024 13:08:36 +0000 Subject: [PATCH 18/23] Revert "Specify the version of gsplat" --- README.md | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/README.md b/README.md index 5500351..f52efa1 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,5 @@

- +

🎥 [ECCV 2024] GaussCtrl: Multi-View Consistent Text-Driven 3D Gaussian Splatting Editing

@@ -54,8 +54,6 @@ GaussCtrl is built upon NeRFStudio, follow [this link](https://docs.nerf.studio/ ```bash pip install nerfstudio==1.0.0 - -pip install gsplat==0.1.3 ``` Install Lang-SAM for mask extraction. From 41ca14602a5291c5d7da14c130af4098b33956c5 Mon Sep 17 00:00:00 2001 From: jingwu2121 <98714649+jingwu2121@users.noreply.github.com> Date: Tue, 5 Nov 2024 13:11:44 +0000 Subject: [PATCH 19/23] Update README.md --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index f52efa1..e28a6ee 100644 --- a/README.md +++ b/README.md @@ -54,6 +54,8 @@ GaussCtrl is built upon NeRFStudio, follow [this link](https://docs.nerf.studio/ ```bash pip install nerfstudio==1.0.0 + +pip install gsplat==0.1.2.1 ``` Install Lang-SAM for mask extraction. From d6666dcf128faf01828722c337964a3bafc63b0c Mon Sep 17 00:00:00 2001 From: jingwu2121 <98714649+jingwu2121@users.noreply.github.com> Date: Thu, 7 Nov 2024 13:07:07 +0000 Subject: [PATCH 20/23] Update README.md --- README.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index e28a6ee..de15ceb 100644 --- a/README.md +++ b/README.md @@ -55,7 +55,9 @@ GaussCtrl is built upon NeRFStudio, follow [this link](https://docs.nerf.studio/ ```bash pip install nerfstudio==1.0.0 -pip install gsplat==0.1.2.1 +# Try either of these two if one is not working +pip install gsplat==0.1.2 +pip install gsplat==0.1.3 ``` Install Lang-SAM for mask extraction. From a82312cccc5f973a30e413bb06d3e6a612fa337f Mon Sep 17 00:00:00 2001 From: Jing Wu <98714649+jingwu2121@users.noreply.github.com> Date: Thu, 29 May 2025 23:10:06 +0100 Subject: [PATCH 21/23] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index de15ceb..879b3f5 100644 --- a/README.md +++ b/README.md @@ -92,7 +92,7 @@ We thank these authors for their great work! ### Customize Your Data -We recommend to pre-process your data to 512x512, and following [this page](https://docs.nerf.studio/quickstart/custom_dataset.html) to process your data. +We recommend to pre-process your data to 512x512, and follow [this page](https://docs.nerf.studio/quickstart/custom_dataset.html) to process your data. ## :arrow_forward: Get Started ![Method](./assets/method.png) From 24b2ba67bbafb165bdfca5aee8a019d562567dc4 Mon Sep 17 00:00:00 2001 From: Jing Wu <98714649+jingwu2121@users.noreply.github.com> Date: Thu, 29 May 2025 23:11:09 +0100 Subject: [PATCH 22/23] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 879b3f5..8c6db5c 100644 --- a/README.md +++ b/README.md @@ -125,7 +125,7 @@ Control the number of reference views using `--pipeline.ref_view_num`, by defaul ### Small Tips - If your editings are not as expected, please check the images edited by ControlNet. -- Normally, conditioning your editing on the good ControlNet editing views is very helpful, which means choosing those good ControlNet editing views as reference views is better. +- Empirically, conditioning your editing on the good ControlNet editing views is very helpful, which means choosing those good ControlNet editing views as reference views is better. ## :wrench: Reproduce Our Results From 369990dc5cacba52c1a0a3f296e1a038824ef402 Mon Sep 17 00:00:00 2001 From: manolisfosteris Date: Tue, 10 Mar 2026 14:20:53 +0100 Subject: [PATCH 23/23] Fix LangSAM API: pass lists instead of bare string to predict() The updated LangSAM API expects predict([image], [text]) with lists. Passing a bare string caused it to iterate over characters (e.g. "bear" -> ["b","e","a","r"]), breaking mask prediction entirely. Also handles the new list[dict] return format and guards against empty mask results. Co-Authored-By: Claude Sonnet 4.6 --- gaussctrl/gc_pipeline.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/gaussctrl/gc_pipeline.py b/gaussctrl/gc_pipeline.py index c204f0f..f201888 100755 --- a/gaussctrl/gc_pipeline.py +++ b/gaussctrl/gc_pipeline.py @@ -148,8 +148,11 @@ def render_reverse(self): if self.config.langsam_obj != "": langsam_obj = self.config.langsam_obj langsam_rgb_pil = Image.fromarray((rendered_rgb.cpu().numpy() * 255).astype(np.uint8)) - masks, _, _, _ = self.langsam.predict(langsam_rgb_pil, langsam_obj) - mask_npy = masks.clone().cpu().numpy()[0] * 1 + # The new LangSAM API expects lists; passing a bare string causes it to + # iterate over characters (e.g. "bear" -> ["b","e","a","r"]), breaking batching. + results = self.langsam.predict([langsam_rgb_pil], [langsam_obj]) + result_masks = results[0]["masks"] # new API returns list[dict] + mask_npy = result_masks[0] * 1 if len(result_masks) > 0 else None if self.config.langsam_obj != "": self.update_datasets(cam_idx, rendered_rgb.cpu(), rendered_depth, latent, mask_npy)