mirror of
https://github.com/SillyTavern/SillyTavern-Extras.git
synced 2026-05-01 03:41:24 +00:00
Live2d Init
This commit is contained in:
BIN
live2d/Character Card Guide.png
Normal file
BIN
live2d/Character Card Guide.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 170 KiB |
0
live2d/tha3/__init__.py
Normal file
0
live2d/tha3/__init__.py
Normal file
0
live2d/tha3/app/__init__.py
Normal file
0
live2d/tha3/app/__init__.py
Normal file
628
live2d/tha3/app/app.py
Normal file
628
live2d/tha3/app/app.py
Normal file
@@ -0,0 +1,628 @@
|
||||
import argparse
|
||||
import cv2
|
||||
import os
|
||||
import random
|
||||
import requests
|
||||
import sys
|
||||
import threading
|
||||
import time
|
||||
import torch
|
||||
import torch.nn.functional as F
|
||||
import wx
|
||||
from PIL import Image
|
||||
from torchvision import transforms
|
||||
from flask import Flask, render_template, Response, send_file, request
|
||||
from flask_cors import CORS
|
||||
from io import BytesIO
|
||||
|
||||
sys.path.append(os.getcwd())
|
||||
from tha3.mocap.ifacialmocap_constants import *
|
||||
from tha3.mocap.ifacialmocap_pose import create_default_ifacialmocap_pose
|
||||
from tha3.mocap.ifacialmocap_pose_converter import IFacialMocapPoseConverter
|
||||
from tha3.mocap.ifacialmocap_poser_converter_25 import create_ifacialmocap_pose_converter
|
||||
from tha3.poser.modes.load_poser import load_poser
|
||||
from tha3.poser.poser import Poser
|
||||
from tha3.util import (
|
||||
torch_linear_to_srgb, resize_PIL_image, extract_PIL_image_from_filelike,
|
||||
extract_pytorch_image_from_PIL_image
|
||||
)
|
||||
from typing import Optional
|
||||
|
||||
# Add the current working directory to the system path
|
||||
sys.path.append(os.getcwd())
|
||||
|
||||
# Global Variables
|
||||
global_source_image = None
|
||||
global_source_image_path = None
|
||||
global_result_image = None
|
||||
global_reload = None
|
||||
is_talking_override = False
|
||||
is_talking = False
|
||||
|
||||
# Flask setup
|
||||
app = Flask(__name__)
|
||||
CORS(app)
|
||||
|
||||
def start_talking():
|
||||
global is_talking_override
|
||||
is_talking_override = True
|
||||
#return send_file(global_source_image_path, mimetype='image/png')
|
||||
return "started"
|
||||
|
||||
def stop_talking():
|
||||
global is_talking_override
|
||||
is_talking_override = False
|
||||
return "stopped"
|
||||
|
||||
def result_feed():
|
||||
def generate():
|
||||
while True:
|
||||
if global_result_image is not None:
|
||||
|
||||
try:
|
||||
# Encode the numpy array to PNG
|
||||
_, buffer = cv2.imencode('.png', global_result_image)
|
||||
except Exception as e:
|
||||
print(f"Error when trying to write image: {e}")
|
||||
|
||||
# Send the PNG image
|
||||
yield (b'--frame\r\n'
|
||||
b'Content-Type: image/png\r\n\r\n' + buffer.tobytes() + b'\r\n')
|
||||
else:
|
||||
time.sleep(0.1)
|
||||
|
||||
return Response(generate(), mimetype='multipart/x-mixed-replace; boundary=frame')
|
||||
|
||||
def live2d_load_url(url):
|
||||
img = None
|
||||
global global_source_image
|
||||
global global_reload
|
||||
response = requests.get(url)
|
||||
try:
|
||||
img = Image.open(BytesIO(response.content))
|
||||
except Image.UnidentifiedImageError:
|
||||
print(f"Could not identify image from URL: {url}")
|
||||
global_reload = img
|
||||
return 'OK'
|
||||
|
||||
def convert_linear_to_srgb(image: torch.Tensor) -> torch.Tensor:
|
||||
rgb_image = torch_linear_to_srgb(image[0:3, :, :])
|
||||
return torch.cat([rgb_image, image[3:4, :, :]], dim=0)
|
||||
|
||||
def launch_gui(device, model):
|
||||
parser = argparse.ArgumentParser(description='uWu Waifu')
|
||||
|
||||
# Add other parser arguments here
|
||||
|
||||
args, unknown = parser.parse_known_args()
|
||||
|
||||
try:
|
||||
poser = load_poser(model, device)
|
||||
pose_converter = create_ifacialmocap_pose_converter()
|
||||
|
||||
app = wx.App()
|
||||
main_frame = MainFrame(poser, pose_converter, device)
|
||||
main_frame.SetSize((750, 600))
|
||||
|
||||
#Lload default image (you can pass args.char if required)
|
||||
full_path = os.path.join(os.getcwd(), "live2d\\tha3\\images\\lambda_00.png")
|
||||
main_frame.load_image(None, full_path)
|
||||
|
||||
#main_frame.Show(True)
|
||||
main_frame.capture_timer.Start(100)
|
||||
main_frame.animation_timer.Start(100)
|
||||
app.MainLoop()
|
||||
|
||||
except RuntimeError as e:
|
||||
print(e)
|
||||
sys.exit()
|
||||
|
||||
class FpsStatistics:
|
||||
def __init__(self):
|
||||
self.count = 100
|
||||
self.fps = []
|
||||
|
||||
def add_fps(self, fps):
|
||||
self.fps.append(fps)
|
||||
while len(self.fps) > self.count:
|
||||
del self.fps[0]
|
||||
|
||||
def get_average_fps(self):
|
||||
if len(self.fps) == 0:
|
||||
return 0.0
|
||||
else:
|
||||
return sum(self.fps) / len(self.fps)
|
||||
|
||||
class MainFrame(wx.Frame):
|
||||
def __init__(self, poser: Poser, pose_converter: IFacialMocapPoseConverter, device: torch.device):
|
||||
super().__init__(None, wx.ID_ANY, "uWu Waifu")
|
||||
self.pose_converter = pose_converter
|
||||
self.poser = poser
|
||||
self.device = device
|
||||
|
||||
self.image_load_counter = 0
|
||||
self.custom_background_image = None # Add this line
|
||||
|
||||
self.sliders = {}
|
||||
self.ifacialmocap_pose = create_default_ifacialmocap_pose()
|
||||
self.source_image_bitmap = wx.Bitmap(self.poser.get_image_size(), self.poser.get_image_size())
|
||||
self.result_image_bitmap = wx.Bitmap(self.poser.get_image_size(), self.poser.get_image_size())
|
||||
self.wx_source_image = None
|
||||
self.torch_source_image = None
|
||||
self.last_pose = None
|
||||
self.fps_statistics = FpsStatistics()
|
||||
self.last_update_time = None
|
||||
|
||||
self.create_ui()
|
||||
|
||||
self.create_timers()
|
||||
self.Bind(wx.EVT_CLOSE, self.on_close)
|
||||
|
||||
self.update_source_image_bitmap()
|
||||
self.update_result_image_bitmap()
|
||||
|
||||
def create_timers(self):
|
||||
self.capture_timer = wx.Timer(self, wx.ID_ANY)
|
||||
self.Bind(wx.EVT_TIMER, self.update_capture_panel, id=self.capture_timer.GetId())
|
||||
self.animation_timer = wx.Timer(self, wx.ID_ANY)
|
||||
self.Bind(wx.EVT_TIMER, self.update_result_image_bitmap, id=self.animation_timer.GetId())
|
||||
|
||||
def on_close(self, event: wx.Event):
|
||||
# Stop the timers
|
||||
self.animation_timer.Stop()
|
||||
self.capture_timer.Stop()
|
||||
|
||||
# Destroy the windows
|
||||
self.Destroy()
|
||||
event.Skip()
|
||||
|
||||
def on_start_capture(self, event: wx.Event):
|
||||
message_dialog = wx.MessageDialog(self, "", "Error!", wx.OK)
|
||||
message_dialog.ShowModal()
|
||||
message_dialog.Destroy()
|
||||
return
|
||||
|
||||
def random_generate_value(self, min, max, origin_value):
|
||||
random_value = random.choice(list(range(min, max, 1))) / 2500.0
|
||||
randomized = origin_value + random_value
|
||||
if randomized > 1.0:
|
||||
randomized = 1.0
|
||||
if randomized < 0:
|
||||
randomized = 0
|
||||
return randomized
|
||||
|
||||
def random_generate_pose(self):
|
||||
global is_talking
|
||||
current_pose = self.ifacialmocap_pose
|
||||
|
||||
# NOTE: randomize mouth
|
||||
for blendshape_name in BLENDSHAPE_NAMES:
|
||||
if "jawOpen" in blendshape_name:
|
||||
if is_talking or is_talking_override:
|
||||
current_pose[blendshape_name] = self.random_generate_value(-5000, 5000, abs(1 - current_pose[blendshape_name]))
|
||||
else:
|
||||
current_pose[blendshape_name] = 0
|
||||
|
||||
# NOTE: randomize head and eye bones
|
||||
#for key in [HEAD_BONE_Y, LEFT_EYE_BONE_X, LEFT_EYE_BONE_Y, LEFT_EYE_BONE_Z, RIGHT_EYE_BONE_X, RIGHT_EYE_BONE_Y]:
|
||||
#current_pose[key] = self.random_generate_value(-20, 20, current_pose[key])
|
||||
|
||||
#Make her blink
|
||||
if random.random() <= 0.03:
|
||||
current_pose["eyeBlinkRight"] = 1
|
||||
current_pose["eyeBlinkLeft"] = 1
|
||||
else:
|
||||
current_pose["eyeBlinkRight"] = 0
|
||||
current_pose["eyeBlinkLeft"] = 0
|
||||
|
||||
|
||||
return current_pose #print(current_pose)
|
||||
|
||||
def read_ifacialmocap_pose(self):
|
||||
if not self.animation_timer.IsRunning():
|
||||
return self.ifacialmocap_pose
|
||||
self.ifacialmocap_pose = self.random_generate_pose()
|
||||
return self.ifacialmocap_pose
|
||||
|
||||
def on_erase_background(self, event: wx.Event):
|
||||
pass
|
||||
|
||||
def create_animation_panel(self, parent):
|
||||
self.animation_panel = wx.Panel(parent, style=wx.RAISED_BORDER)
|
||||
self.animation_panel_sizer = wx.BoxSizer(wx.HORIZONTAL)
|
||||
self.animation_panel.SetSizer(self.animation_panel_sizer)
|
||||
self.animation_panel.SetAutoLayout(1)
|
||||
|
||||
image_size = self.poser.get_image_size()
|
||||
|
||||
# Left Column (Image)
|
||||
self.animation_left_panel = wx.Panel(self.animation_panel, style=wx.SIMPLE_BORDER)
|
||||
self.animation_left_panel_sizer = wx.BoxSizer(wx.VERTICAL)
|
||||
self.animation_left_panel.SetSizer(self.animation_left_panel_sizer)
|
||||
self.animation_left_panel.SetAutoLayout(1)
|
||||
self.animation_panel_sizer.Add(self.animation_left_panel, 1, wx.EXPAND)
|
||||
|
||||
self.result_image_panel = wx.Panel(self.animation_left_panel, size=(image_size, image_size),
|
||||
style=wx.SIMPLE_BORDER)
|
||||
self.result_image_panel.Bind(wx.EVT_PAINT, self.paint_result_image_panel)
|
||||
self.result_image_panel.Bind(wx.EVT_ERASE_BACKGROUND, self.on_erase_background)
|
||||
self.result_image_panel.Bind(wx.EVT_LEFT_DOWN, self.load_image)
|
||||
self.animation_left_panel_sizer.Add(self.result_image_panel, 1, wx.EXPAND)
|
||||
|
||||
separator = wx.StaticLine(self.animation_left_panel, -1, size=(256, 1))
|
||||
self.animation_left_panel_sizer.Add(separator, 0, wx.EXPAND)
|
||||
|
||||
self.fps_text = wx.StaticText(self.animation_left_panel, label="")
|
||||
self.animation_left_panel_sizer.Add(self.fps_text, wx.SizerFlags().Border())
|
||||
|
||||
|
||||
self.animation_left_panel_sizer.Fit(self.animation_left_panel)
|
||||
|
||||
# Right Column (Sliders)
|
||||
|
||||
self.animation_right_panel = wx.Panel(self.animation_panel, style=wx.SIMPLE_BORDER)
|
||||
self.animation_right_panel_sizer = wx.BoxSizer(wx.VERTICAL)
|
||||
self.animation_right_panel.SetSizer(self.animation_right_panel_sizer)
|
||||
self.animation_right_panel.SetAutoLayout(1)
|
||||
self.animation_panel_sizer.Add(self.animation_right_panel, 1, wx.EXPAND)
|
||||
|
||||
separator = wx.StaticLine(self.animation_right_panel, -1, size=(256, 5))
|
||||
self.animation_right_panel_sizer.Add(separator, 0, wx.EXPAND)
|
||||
|
||||
background_text = wx.StaticText(self.animation_right_panel, label="--- Background ---", style=wx.ALIGN_CENTER)
|
||||
self.animation_right_panel_sizer.Add(background_text, 0, wx.EXPAND)
|
||||
|
||||
self.output_background_choice = wx.Choice(
|
||||
self.animation_right_panel,
|
||||
choices=[
|
||||
"TRANSPARENT",
|
||||
"GREEN",
|
||||
"BLUE",
|
||||
"BLACK",
|
||||
"WHITE",
|
||||
"LOADED",
|
||||
"CUSTOM"
|
||||
]
|
||||
)
|
||||
self.output_background_choice.SetSelection(0)
|
||||
self.animation_right_panel_sizer.Add(self.output_background_choice, 0, wx.EXPAND)
|
||||
|
||||
|
||||
#self.pose_converter.init_pose_converter_panel(self.animation_panel) # this changes sliders to breathing on
|
||||
|
||||
#sliders go here
|
||||
|
||||
|
||||
blendshape_groups = {
|
||||
'Eyes': ['eyeLookOutLeft', 'eyeLookOutRight', 'eyeLookDownLeft', 'eyeLookUpLeft', 'eyeWideLeft', 'eyeWideRight'],
|
||||
'Mouth': ['mouthFrownLeft'],
|
||||
'Cheek': ['cheekSquintLeft', 'cheekSquintRight', 'cheekPuff'],
|
||||
'Brow': ['browDownLeft', 'browOuterUpLeft', 'browDownRight', 'browOuterUpRight', 'browInnerUp'],
|
||||
'Eyelash': ['mouthSmileLeft'],
|
||||
'Nose': ['noseSneerLeft', 'noseSneerRight'],
|
||||
'Misc': ['tongueOut']
|
||||
}
|
||||
|
||||
for group_name, variables in blendshape_groups.items():
|
||||
collapsible_pane = wx.CollapsiblePane(self.animation_right_panel, label=group_name, style=wx.CP_DEFAULT_STYLE | wx.CP_NO_TLW_RESIZE)
|
||||
collapsible_pane.Bind(wx.EVT_COLLAPSIBLEPANE_CHANGED, self.on_pane_changed)
|
||||
self.animation_right_panel_sizer.Add(collapsible_pane, 0, wx.EXPAND)
|
||||
pane_sizer = wx.BoxSizer(wx.VERTICAL)
|
||||
collapsible_pane.GetPane().SetSizer(pane_sizer)
|
||||
|
||||
for variable in variables:
|
||||
variable_label = wx.StaticText(collapsible_pane.GetPane(), label=variable)
|
||||
|
||||
# Multiply min and max values by 100 for the slider
|
||||
slider = wx.Slider(
|
||||
collapsible_pane.GetPane(),
|
||||
value=0,
|
||||
minValue=0,
|
||||
maxValue=100,
|
||||
size=(150, -1), # Set the width to 150 and height to default
|
||||
style=wx.SL_HORIZONTAL | wx.SL_LABELS
|
||||
)
|
||||
|
||||
slider.SetName(variable)
|
||||
slider.Bind(wx.EVT_SLIDER, self.on_slider_change)
|
||||
self.sliders[slider.GetId()] = slider
|
||||
|
||||
pane_sizer.Add(variable_label, 0, wx.ALIGN_CENTER | wx.ALL, 5)
|
||||
pane_sizer.Add(slider, 0, wx.EXPAND)
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
self.animation_right_panel_sizer.Fit(self.animation_right_panel)
|
||||
self.animation_panel_sizer.Fit(self.animation_panel)
|
||||
|
||||
def on_pane_changed(self, event):
|
||||
# Update the layout when a collapsible pane is expanded or collapsed
|
||||
self.animation_right_panel.Layout()
|
||||
|
||||
def on_slider_change(self, event):
|
||||
slider = event.GetEventObject()
|
||||
value = slider.GetValue() / 100.0 # Divide by 100 to get the actual float value
|
||||
#print(value)
|
||||
slider_name = slider.GetName()
|
||||
self.ifacialmocap_pose[slider_name] = value
|
||||
|
||||
def create_ui(self):
|
||||
#MAke the UI Elements
|
||||
self.main_sizer = wx.BoxSizer(wx.VERTICAL)
|
||||
self.SetSizer(self.main_sizer)
|
||||
self.SetAutoLayout(1)
|
||||
|
||||
self.capture_pose_lock = threading.Lock()
|
||||
|
||||
#Main panel with JPS
|
||||
self.create_animation_panel(self)
|
||||
self.main_sizer.Add(self.animation_panel, wx.SizerFlags(0).Expand().Border(wx.ALL, 5))
|
||||
|
||||
def update_capture_panel(self, event: wx.Event):
|
||||
data = self.ifacialmocap_pose
|
||||
for rotation_name in ROTATION_NAMES:
|
||||
value = data[rotation_name]
|
||||
|
||||
@staticmethod
|
||||
def convert_to_100(x):
|
||||
return int(max(0.0, min(1.0, x)) * 100)
|
||||
|
||||
def paint_source_image_panel(self, event: wx.Event):
|
||||
wx.BufferedPaintDC(self.source_image_panel, self.source_image_bitmap)
|
||||
|
||||
def update_source_image_bitmap(self):
|
||||
dc = wx.MemoryDC()
|
||||
dc.SelectObject(self.source_image_bitmap)
|
||||
if self.wx_source_image is None:
|
||||
self.draw_nothing_yet_string(dc)
|
||||
else:
|
||||
dc.Clear()
|
||||
dc.DrawBitmap(self.wx_source_image, 0, 0, True)
|
||||
del dc
|
||||
|
||||
def draw_nothing_yet_string(self, dc):
|
||||
dc.Clear()
|
||||
font = wx.Font(wx.FontInfo(14).Family(wx.FONTFAMILY_SWISS))
|
||||
dc.SetFont(font)
|
||||
w, h = dc.GetTextExtent("Nothing yet!")
|
||||
dc.DrawText("Nothing yet!", (self.poser.get_image_size() - w) // 2, (self.poser.get_image_size() - h) // 2)
|
||||
|
||||
def paint_result_image_panel(self, event: wx.Event):
|
||||
wx.BufferedPaintDC(self.result_image_panel, self.result_image_bitmap)
|
||||
|
||||
def update_result_image_bitmap(self, event: Optional[wx.Event] = None):
|
||||
|
||||
global global_result_image # Declare global_source_image as a global variable
|
||||
global global_reload
|
||||
|
||||
if global_reload is not None:
|
||||
#print("Global Reload the Image")
|
||||
MainFrame.load_image(self, event=None, file_path=None) # call load_image function here
|
||||
return
|
||||
|
||||
|
||||
|
||||
ifacialmocap_pose = self.read_ifacialmocap_pose()
|
||||
current_pose = self.pose_converter.convert(ifacialmocap_pose)
|
||||
if self.last_pose is not None and self.last_pose == current_pose:
|
||||
return
|
||||
self.last_pose = current_pose
|
||||
|
||||
if self.torch_source_image is None:
|
||||
dc = wx.MemoryDC()
|
||||
dc.SelectObject(self.result_image_bitmap)
|
||||
self.draw_nothing_yet_string(dc)
|
||||
del dc
|
||||
return
|
||||
|
||||
pose = torch.tensor(current_pose, device=self.device, dtype=self.poser.get_dtype())
|
||||
|
||||
|
||||
with torch.no_grad():
|
||||
output_image = self.poser.pose(self.torch_source_image, pose)[0].float()
|
||||
output_image = convert_linear_to_srgb((output_image + 1.0) / 2.0)
|
||||
|
||||
background_choice = self.output_background_choice.GetSelection()
|
||||
if background_choice == 6: # Custom background
|
||||
self.image_load_counter += 1 # Increment the counter
|
||||
if self.image_load_counter <= 1: # Only open the file dialog if the counter is 5 or less
|
||||
file_dialog = wx.FileDialog(self, "Choose a background image", "", "", "*.png", wx.FD_OPEN)
|
||||
if file_dialog.ShowModal() == wx.ID_OK:
|
||||
background_image_path = file_dialog.GetPath()
|
||||
# Load the image and convert it to a torch tensor
|
||||
pil_image = Image.open(background_image_path).convert("RGBA")
|
||||
tensor_image = transforms.ToTensor()(pil_image).to(self.device)
|
||||
# Resize the image to match the output image size
|
||||
tensor_image = F.interpolate(tensor_image.unsqueeze(0), size=output_image.shape[1:], mode="bilinear").squeeze(0)
|
||||
self.custom_background_image = tensor_image # Store the custom background image
|
||||
self.output_background_choice.SetSelection(5)
|
||||
else:
|
||||
# If the user cancelled the dialog or didn't choose a file, reset the choice to "TRANSPARENT"
|
||||
self.output_background_choice.SetSelection(5)
|
||||
else:
|
||||
# Use the stored custom background image
|
||||
output_image = self.blend_with_background(output_image, self.custom_background_image)
|
||||
|
||||
|
||||
else: # Predefined colors
|
||||
self.image_load_counter = 0
|
||||
if background_choice == 0: # Transparent
|
||||
pass
|
||||
elif background_choice == 1: # Green
|
||||
background = torch.zeros(4, output_image.shape[1], output_image.shape[2], device=self.device)
|
||||
background[3, :, :] = 1.0 # set alpha to 1.0
|
||||
background[1, :, :] = 1.0
|
||||
output_image = self.blend_with_background(output_image, background)
|
||||
elif background_choice == 2: # Blue
|
||||
background = torch.zeros(4, output_image.shape[1], output_image.shape[2], device=self.device)
|
||||
background[3, :, :] = 1.0 # set alpha to 1.0
|
||||
background[2, :, :] = 1.0
|
||||
output_image = self.blend_with_background(output_image, background)
|
||||
elif background_choice == 3: # Black
|
||||
background = torch.zeros(4, output_image.shape[1], output_image.shape[2], device=self.device)
|
||||
background[3, :, :] = 1.0 # set alpha to 1.0
|
||||
output_image = self.blend_with_background(output_image, background)
|
||||
elif background_choice == 4: # White
|
||||
background = torch.zeros(4, output_image.shape[1], output_image.shape[2], device=self.device)
|
||||
background[3, :, :] = 1.0 # set alpha to 1.0
|
||||
background[0:3, :, :] = 1.0
|
||||
output_image = self.blend_with_background(output_image, background)
|
||||
elif background_choice == 5: # Saved Image
|
||||
output_image = self.blend_with_background(output_image, self.custom_background_image)
|
||||
else:
|
||||
pass
|
||||
|
||||
|
||||
|
||||
c, h, w = output_image.shape
|
||||
output_image = (255.0 * torch.transpose(output_image.reshape(c, h * w), 0, 1)).reshape(h, w, c).byte()
|
||||
|
||||
|
||||
numpy_image = output_image.detach().cpu().numpy()
|
||||
wx_image = wx.ImageFromBuffer(numpy_image.shape[0],
|
||||
numpy_image.shape[1],
|
||||
numpy_image[:, :, 0:3].tobytes(),
|
||||
numpy_image[:, :, 3].tobytes())
|
||||
wx_bitmap = wx_image.ConvertToBitmap()
|
||||
|
||||
dc = wx.MemoryDC()
|
||||
dc.SelectObject(self.result_image_bitmap)
|
||||
dc.Clear()
|
||||
dc.DrawBitmap(wx_bitmap,
|
||||
(self.poser.get_image_size() - numpy_image.shape[0]) // 2,
|
||||
(self.poser.get_image_size() - numpy_image.shape[1]) // 2, True)
|
||||
|
||||
|
||||
# Assuming numpy_image has shape (height, width, 4) and the channels are in RGB order
|
||||
# Convert color channels from RGB to BGR and keep alpha channel
|
||||
numpy_image_bgra = numpy_image[:, :, [2, 1, 0, 3]]
|
||||
#cv2.imwrite('test2.png', numpy_image_bgra)
|
||||
|
||||
global_result_image = numpy_image_bgra
|
||||
|
||||
|
||||
del dc
|
||||
|
||||
time_now = time.time_ns()
|
||||
if self.last_update_time is not None:
|
||||
elapsed_time = time_now - self.last_update_time
|
||||
fps = 1.0 / (elapsed_time / 10**9)
|
||||
if self.torch_source_image is not None:
|
||||
self.fps_statistics.add_fps(fps)
|
||||
self.fps_text.SetLabelText("FPS = %0.2f" % self.fps_statistics.get_average_fps())
|
||||
self.last_update_time = time_now
|
||||
|
||||
self.Refresh()
|
||||
|
||||
def blend_with_background(self, numpy_image, background):
|
||||
if background is not None:
|
||||
alpha = numpy_image[3:4, :, :]
|
||||
color = numpy_image[0:3, :, :]
|
||||
new_color = color * alpha + (1.0 - alpha) * background[0:3, :, :]
|
||||
return torch.cat([new_color, background[3:4, :, :]], dim=0)
|
||||
else:
|
||||
return numpy_image
|
||||
|
||||
def resize_image(image, size=(512, 512)):
|
||||
image.thumbnail(size, Image.LANCZOS) # Step 1: Resize the image to maintain the aspect ratio with the larger dimension being 512 pixels
|
||||
new_image = Image.new("RGBA", size) # Step 2: Create a new image of size 512x512 with transparency
|
||||
new_image.paste(image, ((size[0] - image.size[0]) // 2,
|
||||
(size[1] - image.size[1]) // 2)) # Step 3: Paste the resized image into the new image, centered
|
||||
return new_image
|
||||
|
||||
def load_image(self, event: wx.Event, file_path=None):
|
||||
|
||||
global global_source_image # Declare global_source_image as a global variable
|
||||
global global_source_image_path # Declare global_source_image as a global variable
|
||||
global global_reload
|
||||
|
||||
if global_reload is not None:
|
||||
file_path = "global_reload"
|
||||
|
||||
#if file_path is None and global_reload is not None:
|
||||
|
||||
if file_path is None:
|
||||
dir_name = "data/images"
|
||||
file_dialog = wx.FileDialog(self, "Choose an image", dir_name, "", "*.png", wx.FD_OPEN)
|
||||
if file_dialog.ShowModal() == wx.ID_OK:
|
||||
file_path = os.path.join(file_dialog.GetDirectory(), file_dialog.GetFilename())
|
||||
file_dialog.Destroy()
|
||||
|
||||
if file_path:
|
||||
try:
|
||||
|
||||
|
||||
if file_path == "global_reload":
|
||||
pil_image = global_reload # use global_reload directly
|
||||
#print("Loading from Var")
|
||||
else:
|
||||
pil_image = resize_PIL_image(
|
||||
extract_PIL_image_from_filelike(file_path),
|
||||
(self.poser.get_image_size(), self.poser.get_image_size()))
|
||||
|
||||
|
||||
w, h = pil_image.size
|
||||
|
||||
if pil_image.size != (512, 512):
|
||||
print("Resizing Char Card to work")
|
||||
pil_image = MainFrame.resize_image(pil_image)
|
||||
|
||||
w, h = pil_image.size
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
if pil_image.mode != 'RGBA':
|
||||
self.source_image_string = "Image must have alpha channel!"
|
||||
self.wx_source_image = None
|
||||
self.torch_source_image = None
|
||||
else:
|
||||
self.wx_source_image = wx.Bitmap.FromBufferRGBA(w, h, pil_image.convert("RGBA").tobytes())
|
||||
self.torch_source_image = extract_pytorch_image_from_PIL_image(pil_image) \
|
||||
.to(self.device).to(self.poser.get_dtype())
|
||||
|
||||
global_source_image = self.torch_source_image # Set global_source_image as a global variable
|
||||
|
||||
global_source_image_path = image_path = os.path.join(file_path) #set file path
|
||||
|
||||
self.update_source_image_bitmap()
|
||||
|
||||
|
||||
|
||||
except Exception as error:
|
||||
print("Error:")
|
||||
print(error)
|
||||
#message_dialog = wx.MessageDialog(self, "Could not load image " + file_path, "Poser", wx.OK)
|
||||
#message_dialog.ShowModal()
|
||||
#message_dialog.Destroy()
|
||||
global_reload = None #reset the globe load
|
||||
#print("Reseting Load Variable")
|
||||
self.Refresh()
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser(description='uWu Waifu')
|
||||
parser.add_argument(
|
||||
'--model',
|
||||
type=str,
|
||||
required=False,
|
||||
default='separable_float',
|
||||
choices=['standard_float', 'separable_float', 'standard_half', 'separable_half'],
|
||||
help='The model to use.'
|
||||
)
|
||||
parser.add_argument('--char', type=str, required=False, help='The path to the character image.')
|
||||
parser.add_argument(
|
||||
'--device',
|
||||
type=str,
|
||||
required=False,
|
||||
default='cuda',
|
||||
choices=['cpu', 'cuda'],
|
||||
help='The device to use for PyTorch ("cuda" for GPU, "cpu" for CPU).'
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
# Add the line below to pass the 'args' object to the launch_gui() function
|
||||
launch_gui(device=args.device, model=args.model)
|
||||
62
live2d/tha3/app/endpoints.save
Normal file
62
live2d/tha3/app/endpoints.save
Normal file
@@ -0,0 +1,62 @@
|
||||
@app.route("/load", methods=["POST"])
|
||||
def live2d_load():
|
||||
img = None
|
||||
global global_source_image
|
||||
global global_reload
|
||||
# get variable from POST data IE http://localhost:8000/characters/Aqua.png
|
||||
#curl -X POST -d "live2d_loadchar=http://localhost:8000/characters/Aqua.png" http://localhost:5555/load
|
||||
|
||||
live2d_loadchar = request.form.get('live2d_loadchar')
|
||||
print(live2d_loadchar)
|
||||
# get the image from the url at live2d_loadchar and load it into global_source_variable
|
||||
|
||||
#loads the /Name/live.png vs char Card
|
||||
url = live2d_loadchar.replace('.png', '/live2d.png')
|
||||
|
||||
|
||||
response = requests.get(url)
|
||||
|
||||
try:
|
||||
img = Image.open(BytesIO(response.content))
|
||||
except Image.UnidentifiedImageError:
|
||||
print(f"Could not identify image from URL: {url}")
|
||||
|
||||
global_reload = img
|
||||
return 'OK'
|
||||
|
||||
@app.route('/source_feed')
|
||||
def source_feed():
|
||||
return send_file(global_source_image_path, mimetype='image/png')
|
||||
|
||||
@app.route('/start_talking')
|
||||
def start_talking():
|
||||
global is_talking_override
|
||||
is_talking_override = True
|
||||
#return send_file(global_source_image_path, mimetype='image/png')
|
||||
return "started"
|
||||
|
||||
@app.route('/stop_talking')
|
||||
def stop_talking():
|
||||
global is_talking_override
|
||||
is_talking_override = False
|
||||
return "stopped"
|
||||
|
||||
@app.route('/result_feed')
|
||||
def result_feed():
|
||||
def generate():
|
||||
while True:
|
||||
if global_result_image is not None:
|
||||
|
||||
try:
|
||||
# Encode the numpy array to PNG
|
||||
_, buffer = cv2.imencode('.png', global_result_image)
|
||||
except Exception as e:
|
||||
print(f"Error when trying to write image: {e}")
|
||||
|
||||
# Send the PNG image
|
||||
yield (b'--frame\r\n'
|
||||
b'Content-Type: image/png\r\n\r\n' + buffer.tobytes() + b'\r\n')
|
||||
else:
|
||||
time.sleep(0.1)
|
||||
|
||||
return Response(generate(), mimetype='multipart/x-mixed-replace; boundary=frame')
|
||||
439
live2d/tha3/app/ifacialmocap_puppeteer.py
Normal file
439
live2d/tha3/app/ifacialmocap_puppeteer.py
Normal file
@@ -0,0 +1,439 @@
|
||||
import argparse
|
||||
import os
|
||||
import socket
|
||||
import sys
|
||||
import threading
|
||||
import time
|
||||
from typing import Optional
|
||||
|
||||
sys.path.append(os.getcwd())
|
||||
|
||||
from tha3.mocap.ifacialmocap_pose import create_default_ifacialmocap_pose
|
||||
from tha3.mocap.ifacialmocap_v2 import IFACIALMOCAP_PORT, IFACIALMOCAP_START_STRING, parse_ifacialmocap_v2_pose, \
|
||||
parse_ifacialmocap_v1_pose
|
||||
from tha3.poser.modes.load_poser import load_poser
|
||||
|
||||
import torch
|
||||
import wx
|
||||
|
||||
from tha3.poser.poser import Poser
|
||||
from tha3.mocap.ifacialmocap_constants import *
|
||||
from tha3.mocap.ifacialmocap_pose_converter import IFacialMocapPoseConverter
|
||||
from tha3.util import torch_linear_to_srgb, resize_PIL_image, extract_PIL_image_from_filelike, \
|
||||
extract_pytorch_image_from_PIL_image
|
||||
|
||||
|
||||
def convert_linear_to_srgb(image: torch.Tensor) -> torch.Tensor:
|
||||
rgb_image = torch_linear_to_srgb(image[0:3, :, :])
|
||||
return torch.cat([rgb_image, image[3:4, :, :]], dim=0)
|
||||
|
||||
|
||||
class FpsStatistics:
|
||||
def __init__(self):
|
||||
self.count = 100
|
||||
self.fps = []
|
||||
|
||||
def add_fps(self, fps):
|
||||
self.fps.append(fps)
|
||||
while len(self.fps) > self.count:
|
||||
del self.fps[0]
|
||||
|
||||
def get_average_fps(self):
|
||||
if len(self.fps) == 0:
|
||||
return 0.0
|
||||
else:
|
||||
return sum(self.fps) / len(self.fps)
|
||||
|
||||
|
||||
class MainFrame(wx.Frame):
|
||||
def __init__(self, poser: Poser, pose_converter: IFacialMocapPoseConverter, device: torch.device):
|
||||
super().__init__(None, wx.ID_ANY, "iFacialMocap Puppeteer (Marigold)")
|
||||
self.pose_converter = pose_converter
|
||||
self.poser = poser
|
||||
self.device = device
|
||||
|
||||
|
||||
self.ifacialmocap_pose = create_default_ifacialmocap_pose()
|
||||
self.source_image_bitmap = wx.Bitmap(self.poser.get_image_size(), self.poser.get_image_size())
|
||||
self.result_image_bitmap = wx.Bitmap(self.poser.get_image_size(), self.poser.get_image_size())
|
||||
self.wx_source_image = None
|
||||
self.torch_source_image = None
|
||||
self.last_pose = None
|
||||
self.fps_statistics = FpsStatistics()
|
||||
self.last_update_time = None
|
||||
|
||||
self.create_receiving_socket()
|
||||
self.create_ui()
|
||||
self.create_timers()
|
||||
self.Bind(wx.EVT_CLOSE, self.on_close)
|
||||
|
||||
self.update_source_image_bitmap()
|
||||
self.update_result_image_bitmap()
|
||||
|
||||
def create_receiving_socket(self):
|
||||
self.receiving_socket = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
|
||||
self.receiving_socket.bind(("", IFACIALMOCAP_PORT))
|
||||
self.receiving_socket.setblocking(False)
|
||||
|
||||
def create_timers(self):
|
||||
self.capture_timer = wx.Timer(self, wx.ID_ANY)
|
||||
self.Bind(wx.EVT_TIMER, self.update_capture_panel, id=self.capture_timer.GetId())
|
||||
self.animation_timer = wx.Timer(self, wx.ID_ANY)
|
||||
self.Bind(wx.EVT_TIMER, self.update_result_image_bitmap, id=self.animation_timer.GetId())
|
||||
|
||||
def on_close(self, event: wx.Event):
|
||||
# Stop the timers
|
||||
self.animation_timer.Stop()
|
||||
self.capture_timer.Stop()
|
||||
|
||||
# Close receiving socket
|
||||
self.receiving_socket.close()
|
||||
|
||||
# Destroy the windows
|
||||
self.Destroy()
|
||||
event.Skip()
|
||||
|
||||
def on_start_capture(self, event: wx.Event):
|
||||
capture_device_ip_address = self.capture_device_ip_text_ctrl.GetValue()
|
||||
out_socket = None
|
||||
try:
|
||||
address = (capture_device_ip_address, IFACIALMOCAP_PORT)
|
||||
out_socket = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
|
||||
out_socket.sendto(IFACIALMOCAP_START_STRING, address)
|
||||
except Exception as e:
|
||||
message_dialog = wx.MessageDialog(self, str(e), "Error!", wx.OK)
|
||||
message_dialog.ShowModal()
|
||||
message_dialog.Destroy()
|
||||
finally:
|
||||
if out_socket is not None:
|
||||
out_socket.close()
|
||||
|
||||
def read_ifacialmocap_pose(self):
|
||||
if not self.animation_timer.IsRunning():
|
||||
return self.ifacialmocap_pose
|
||||
socket_bytes = None
|
||||
while True:
|
||||
try:
|
||||
socket_bytes = self.receiving_socket.recv(8192)
|
||||
except socket.error as e:
|
||||
break
|
||||
if socket_bytes is not None:
|
||||
socket_string = socket_bytes.decode("utf-8")
|
||||
self.ifacialmocap_pose = parse_ifacialmocap_v2_pose(socket_string)
|
||||
return self.ifacialmocap_pose
|
||||
|
||||
def on_erase_background(self, event: wx.Event):
|
||||
pass
|
||||
|
||||
def create_animation_panel(self, parent):
|
||||
self.animation_panel = wx.Panel(parent, style=wx.RAISED_BORDER)
|
||||
self.animation_panel_sizer = wx.BoxSizer(wx.HORIZONTAL)
|
||||
self.animation_panel.SetSizer(self.animation_panel_sizer)
|
||||
self.animation_panel.SetAutoLayout(1)
|
||||
|
||||
image_size = self.poser.get_image_size()
|
||||
|
||||
if True:
|
||||
self.input_panel = wx.Panel(self.animation_panel, size=(image_size, image_size + 128),
|
||||
style=wx.SIMPLE_BORDER)
|
||||
self.input_panel_sizer = wx.BoxSizer(wx.VERTICAL)
|
||||
self.input_panel.SetSizer(self.input_panel_sizer)
|
||||
self.input_panel.SetAutoLayout(1)
|
||||
self.animation_panel_sizer.Add(self.input_panel, 0, wx.FIXED_MINSIZE)
|
||||
|
||||
self.source_image_panel = wx.Panel(self.input_panel, size=(image_size, image_size), style=wx.SIMPLE_BORDER)
|
||||
self.source_image_panel.Bind(wx.EVT_PAINT, self.paint_source_image_panel)
|
||||
self.source_image_panel.Bind(wx.EVT_ERASE_BACKGROUND, self.on_erase_background)
|
||||
self.input_panel_sizer.Add(self.source_image_panel, 0, wx.FIXED_MINSIZE)
|
||||
|
||||
self.load_image_button = wx.Button(self.input_panel, wx.ID_ANY, "Load Image")
|
||||
self.input_panel_sizer.Add(self.load_image_button, 1, wx.EXPAND)
|
||||
self.load_image_button.Bind(wx.EVT_BUTTON, self.load_image)
|
||||
|
||||
self.input_panel_sizer.Fit(self.input_panel)
|
||||
|
||||
if True:
|
||||
self.pose_converter.init_pose_converter_panel(self.animation_panel)
|
||||
|
||||
if True:
|
||||
self.animation_left_panel = wx.Panel(self.animation_panel, style=wx.SIMPLE_BORDER)
|
||||
self.animation_left_panel_sizer = wx.BoxSizer(wx.VERTICAL)
|
||||
self.animation_left_panel.SetSizer(self.animation_left_panel_sizer)
|
||||
self.animation_left_panel.SetAutoLayout(1)
|
||||
self.animation_panel_sizer.Add(self.animation_left_panel, 0, wx.EXPAND)
|
||||
|
||||
self.result_image_panel = wx.Panel(self.animation_left_panel, size=(image_size, image_size),
|
||||
style=wx.SIMPLE_BORDER)
|
||||
self.result_image_panel.Bind(wx.EVT_PAINT, self.paint_result_image_panel)
|
||||
self.result_image_panel.Bind(wx.EVT_ERASE_BACKGROUND, self.on_erase_background)
|
||||
self.animation_left_panel_sizer.Add(self.result_image_panel, 0, wx.FIXED_MINSIZE)
|
||||
|
||||
separator = wx.StaticLine(self.animation_left_panel, -1, size=(256, 5))
|
||||
self.animation_left_panel_sizer.Add(separator, 0, wx.EXPAND)
|
||||
|
||||
background_text = wx.StaticText(self.animation_left_panel, label="--- Background ---",
|
||||
style=wx.ALIGN_CENTER)
|
||||
self.animation_left_panel_sizer.Add(background_text, 0, wx.EXPAND)
|
||||
|
||||
self.output_background_choice = wx.Choice(
|
||||
self.animation_left_panel,
|
||||
choices=[
|
||||
"TRANSPARENT",
|
||||
"GREEN",
|
||||
"BLUE",
|
||||
"BLACK",
|
||||
"WHITE"
|
||||
])
|
||||
self.output_background_choice.SetSelection(0)
|
||||
self.animation_left_panel_sizer.Add(self.output_background_choice, 0, wx.EXPAND)
|
||||
|
||||
separator = wx.StaticLine(self.animation_left_panel, -1, size=(256, 5))
|
||||
self.animation_left_panel_sizer.Add(separator, 0, wx.EXPAND)
|
||||
|
||||
self.fps_text = wx.StaticText(self.animation_left_panel, label="")
|
||||
self.animation_left_panel_sizer.Add(self.fps_text, wx.SizerFlags().Border())
|
||||
|
||||
self.animation_left_panel_sizer.Fit(self.animation_left_panel)
|
||||
|
||||
self.animation_panel_sizer.Fit(self.animation_panel)
|
||||
|
||||
def create_ui(self):
|
||||
self.main_sizer = wx.BoxSizer(wx.VERTICAL)
|
||||
self.SetSizer(self.main_sizer)
|
||||
self.SetAutoLayout(1)
|
||||
|
||||
self.capture_pose_lock = threading.Lock()
|
||||
|
||||
self.create_connection_panel(self)
|
||||
self.main_sizer.Add(self.connection_panel, wx.SizerFlags(0).Expand().Border(wx.ALL, 5))
|
||||
|
||||
self.create_animation_panel(self)
|
||||
self.main_sizer.Add(self.animation_panel, wx.SizerFlags(0).Expand().Border(wx.ALL, 5))
|
||||
|
||||
self.create_capture_panel(self)
|
||||
self.main_sizer.Add(self.capture_panel, wx.SizerFlags(0).Expand().Border(wx.ALL, 5))
|
||||
|
||||
self.main_sizer.Fit(self)
|
||||
|
||||
def create_connection_panel(self, parent):
|
||||
self.connection_panel = wx.Panel(parent, style=wx.RAISED_BORDER)
|
||||
self.connection_panel_sizer = wx.BoxSizer(wx.HORIZONTAL)
|
||||
self.connection_panel.SetSizer(self.connection_panel_sizer)
|
||||
self.connection_panel.SetAutoLayout(1)
|
||||
|
||||
capture_device_ip_text = wx.StaticText(self.connection_panel, label="Capture Device IP:", style=wx.ALIGN_RIGHT)
|
||||
self.connection_panel_sizer.Add(capture_device_ip_text, wx.SizerFlags(0).FixedMinSize().Border(wx.ALL, 3))
|
||||
|
||||
self.capture_device_ip_text_ctrl = wx.TextCtrl(self.connection_panel, value="192.168.0.1")
|
||||
self.connection_panel_sizer.Add(self.capture_device_ip_text_ctrl, wx.SizerFlags(1).Expand().Border(wx.ALL, 3))
|
||||
|
||||
self.start_capture_button = wx.Button(self.connection_panel, label="START CAPTURE!")
|
||||
self.connection_panel_sizer.Add(self.start_capture_button, wx.SizerFlags(0).FixedMinSize().Border(wx.ALL, 3))
|
||||
self.start_capture_button.Bind(wx.EVT_BUTTON, self.on_start_capture)
|
||||
|
||||
def create_capture_panel(self, parent):
|
||||
self.capture_panel = wx.Panel(parent, style=wx.RAISED_BORDER)
|
||||
self.capture_panel_sizer = wx.FlexGridSizer(cols=5)
|
||||
for i in range(5):
|
||||
self.capture_panel_sizer.AddGrowableCol(i)
|
||||
self.capture_panel.SetSizer(self.capture_panel_sizer)
|
||||
self.capture_panel.SetAutoLayout(1)
|
||||
|
||||
self.rotation_labels = {}
|
||||
self.rotation_value_labels = {}
|
||||
rotation_column_0 = self.create_rotation_column(self.capture_panel, RIGHT_EYE_BONE_ROTATIONS)
|
||||
self.capture_panel_sizer.Add(rotation_column_0, wx.SizerFlags(0).Expand().Border(wx.ALL, 3))
|
||||
rotation_column_1 = self.create_rotation_column(self.capture_panel, LEFT_EYE_BONE_ROTATIONS)
|
||||
self.capture_panel_sizer.Add(rotation_column_1, wx.SizerFlags(0).Expand().Border(wx.ALL, 3))
|
||||
rotation_column_2 = self.create_rotation_column(self.capture_panel, HEAD_BONE_ROTATIONS)
|
||||
self.capture_panel_sizer.Add(rotation_column_2, wx.SizerFlags(0).Expand().Border(wx.ALL, 3))
|
||||
|
||||
def create_rotation_column(self, parent, rotation_names):
|
||||
column_panel = wx.Panel(parent, style=wx.SIMPLE_BORDER)
|
||||
column_panel_sizer = wx.FlexGridSizer(cols=2)
|
||||
column_panel_sizer.AddGrowableCol(1)
|
||||
column_panel.SetSizer(column_panel_sizer)
|
||||
column_panel.SetAutoLayout(1)
|
||||
|
||||
for rotation_name in rotation_names:
|
||||
self.rotation_labels[rotation_name] = wx.StaticText(
|
||||
column_panel, label=rotation_name, style=wx.ALIGN_RIGHT)
|
||||
column_panel_sizer.Add(self.rotation_labels[rotation_name],
|
||||
wx.SizerFlags(1).Expand().Border(wx.ALL, 3))
|
||||
|
||||
self.rotation_value_labels[rotation_name] = wx.TextCtrl(
|
||||
column_panel, style=wx.TE_RIGHT)
|
||||
self.rotation_value_labels[rotation_name].SetValue("0.00")
|
||||
self.rotation_value_labels[rotation_name].Disable()
|
||||
column_panel_sizer.Add(self.rotation_value_labels[rotation_name],
|
||||
wx.SizerFlags(1).Expand().Border(wx.ALL, 3))
|
||||
|
||||
column_panel.GetSizer().Fit(column_panel)
|
||||
return column_panel
|
||||
|
||||
def paint_capture_panel(self, event: wx.Event):
|
||||
self.update_capture_panel(event)
|
||||
|
||||
def update_capture_panel(self, event: wx.Event):
|
||||
data = self.ifacialmocap_pose
|
||||
for rotation_name in ROTATION_NAMES:
|
||||
value = data[rotation_name]
|
||||
self.rotation_value_labels[rotation_name].SetValue("%0.2f" % value)
|
||||
|
||||
@staticmethod
|
||||
def convert_to_100(x):
|
||||
return int(max(0.0, min(1.0, x)) * 100)
|
||||
|
||||
def paint_source_image_panel(self, event: wx.Event):
|
||||
wx.BufferedPaintDC(self.source_image_panel, self.source_image_bitmap)
|
||||
|
||||
def update_source_image_bitmap(self):
|
||||
dc = wx.MemoryDC()
|
||||
dc.SelectObject(self.source_image_bitmap)
|
||||
if self.wx_source_image is None:
|
||||
self.draw_nothing_yet_string(dc)
|
||||
else:
|
||||
dc.Clear()
|
||||
dc.DrawBitmap(self.wx_source_image, 0, 0, True)
|
||||
del dc
|
||||
|
||||
def draw_nothing_yet_string(self, dc):
|
||||
dc.Clear()
|
||||
font = wx.Font(wx.FontInfo(14).Family(wx.FONTFAMILY_SWISS))
|
||||
dc.SetFont(font)
|
||||
w, h = dc.GetTextExtent("Nothing yet!")
|
||||
dc.DrawText("Nothing yet!", (self.poser.get_image_size() - w) // 2, (self.poser.get_image_size() - h) // 2)
|
||||
|
||||
def paint_result_image_panel(self, event: wx.Event):
|
||||
wx.BufferedPaintDC(self.result_image_panel, self.result_image_bitmap)
|
||||
|
||||
def update_result_image_bitmap(self, event: Optional[wx.Event] = None):
|
||||
ifacialmocap_pose = self.read_ifacialmocap_pose()
|
||||
current_pose = self.pose_converter.convert(ifacialmocap_pose)
|
||||
if self.last_pose is not None and self.last_pose == current_pose:
|
||||
return
|
||||
self.last_pose = current_pose
|
||||
|
||||
if self.torch_source_image is None:
|
||||
dc = wx.MemoryDC()
|
||||
dc.SelectObject(self.result_image_bitmap)
|
||||
self.draw_nothing_yet_string(dc)
|
||||
del dc
|
||||
return
|
||||
|
||||
pose = torch.tensor(current_pose, device=self.device, dtype=self.poser.get_dtype())
|
||||
|
||||
with torch.no_grad():
|
||||
output_image = self.poser.pose(self.torch_source_image, pose)[0].float()
|
||||
output_image = convert_linear_to_srgb((output_image + 1.0) / 2.0)
|
||||
|
||||
background_choice = self.output_background_choice.GetSelection()
|
||||
if background_choice == 0:
|
||||
pass
|
||||
else:
|
||||
background = torch.zeros(4, output_image.shape[1], output_image.shape[2], device=self.device)
|
||||
background[3, :, :] = 1.0
|
||||
if background_choice == 1:
|
||||
background[1, :, :] = 1.0
|
||||
output_image = self.blend_with_background(output_image, background)
|
||||
elif background_choice == 2:
|
||||
background[2, :, :] = 1.0
|
||||
output_image = self.blend_with_background(output_image, background)
|
||||
elif background_choice == 3:
|
||||
output_image = self.blend_with_background(output_image, background)
|
||||
else:
|
||||
background[0:3, :, :] = 1.0
|
||||
output_image = self.blend_with_background(output_image, background)
|
||||
|
||||
c, h, w = output_image.shape
|
||||
output_image = 255.0 * torch.transpose(output_image.reshape(c, h * w), 0, 1).reshape(h, w, c)
|
||||
output_image = output_image.byte()
|
||||
|
||||
numpy_image = output_image.detach().cpu().numpy()
|
||||
wx_image = wx.ImageFromBuffer(numpy_image.shape[0],
|
||||
numpy_image.shape[1],
|
||||
numpy_image[:, :, 0:3].tobytes(),
|
||||
numpy_image[:, :, 3].tobytes())
|
||||
wx_bitmap = wx_image.ConvertToBitmap()
|
||||
|
||||
dc = wx.MemoryDC()
|
||||
dc.SelectObject(self.result_image_bitmap)
|
||||
dc.Clear()
|
||||
dc.DrawBitmap(wx_bitmap,
|
||||
(self.poser.get_image_size() - numpy_image.shape[0]) // 2,
|
||||
(self.poser.get_image_size() - numpy_image.shape[1]) // 2, True)
|
||||
del dc
|
||||
|
||||
time_now = time.time_ns()
|
||||
if self.last_update_time is not None:
|
||||
elapsed_time = time_now - self.last_update_time
|
||||
fps = 1.0 / (elapsed_time / 10**9)
|
||||
if self.torch_source_image is not None:
|
||||
self.fps_statistics.add_fps(fps)
|
||||
self.fps_text.SetLabelText("FPS = %0.2f" % self.fps_statistics.get_average_fps())
|
||||
self.last_update_time = time_now
|
||||
|
||||
self.Refresh()
|
||||
|
||||
def blend_with_background(self, numpy_image, background):
|
||||
alpha = numpy_image[3:4, :, :]
|
||||
color = numpy_image[0:3, :, :]
|
||||
new_color = color * alpha + (1.0 - alpha) * background[0:3, :, :]
|
||||
return torch.cat([new_color, background[3:4, :, :]], dim=0)
|
||||
|
||||
def load_image(self, event: wx.Event):
|
||||
dir_name = "data/images"
|
||||
file_dialog = wx.FileDialog(self, "Choose an image", dir_name, "", "*.png", wx.FD_OPEN)
|
||||
if file_dialog.ShowModal() == wx.ID_OK:
|
||||
image_file_name = os.path.join(file_dialog.GetDirectory(), file_dialog.GetFilename())
|
||||
try:
|
||||
pil_image = resize_PIL_image(
|
||||
extract_PIL_image_from_filelike(image_file_name),
|
||||
(self.poser.get_image_size(), self.poser.get_image_size()))
|
||||
w, h = pil_image.size
|
||||
if pil_image.mode != 'RGBA':
|
||||
self.source_image_string = "Image must have alpha channel!"
|
||||
self.wx_source_image = None
|
||||
self.torch_source_image = None
|
||||
else:
|
||||
self.wx_source_image = wx.Bitmap.FromBufferRGBA(w, h, pil_image.convert("RGBA").tobytes())
|
||||
self.torch_source_image = extract_pytorch_image_from_PIL_image(pil_image) \
|
||||
.to(self.device).to(self.poser.get_dtype())
|
||||
self.update_source_image_bitmap()
|
||||
except:
|
||||
message_dialog = wx.MessageDialog(self, "Could not load image " + image_file_name, "Poser", wx.OK)
|
||||
message_dialog.ShowModal()
|
||||
message_dialog.Destroy()
|
||||
file_dialog.Destroy()
|
||||
self.Refresh()
|
||||
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser(description='Control characters with movement captured by iFacialMocap.')
|
||||
parser.add_argument(
|
||||
'--model',
|
||||
type=str,
|
||||
required=False,
|
||||
default='standard_float',
|
||||
choices=['standard_float', 'separable_float', 'standard_half', 'separable_half'],
|
||||
help='The model to use.')
|
||||
args = parser.parse_args()
|
||||
|
||||
device = torch.device('cuda')
|
||||
try:
|
||||
poser = load_poser(args.model, device)
|
||||
except RuntimeError as e:
|
||||
print(e)
|
||||
sys.exit()
|
||||
|
||||
from tha3.mocap.ifacialmocap_poser_converter_25 import create_ifacialmocap_pose_converter
|
||||
|
||||
pose_converter = create_ifacialmocap_pose_converter()
|
||||
|
||||
app = wx.App()
|
||||
main_frame = MainFrame(poser, pose_converter, device)
|
||||
main_frame.Show(True)
|
||||
main_frame.capture_timer.Start(10)
|
||||
main_frame.animation_timer.Start(10)
|
||||
app.MainLoop()
|
||||
464
live2d/tha3/app/manual_poser.py
Normal file
464
live2d/tha3/app/manual_poser.py
Normal file
@@ -0,0 +1,464 @@
|
||||
import argparse
|
||||
import logging
|
||||
import os
|
||||
import sys
|
||||
from typing import List
|
||||
|
||||
sys.path.append(os.getcwd())
|
||||
|
||||
import PIL.Image
|
||||
import numpy
|
||||
import torch
|
||||
import wx
|
||||
|
||||
from tha3.poser.modes.load_poser import load_poser
|
||||
from tha3.poser.poser import Poser, PoseParameterCategory, PoseParameterGroup
|
||||
from tha3.util import extract_pytorch_image_from_filelike, rgba_to_numpy_image, grid_change_to_numpy_image, \
|
||||
rgb_to_numpy_image, resize_PIL_image, extract_PIL_image_from_filelike, extract_pytorch_image_from_PIL_image
|
||||
|
||||
|
||||
class MorphCategoryControlPanel(wx.Panel):
|
||||
def __init__(self,
|
||||
parent,
|
||||
title: str,
|
||||
pose_param_category: PoseParameterCategory,
|
||||
param_groups: List[PoseParameterGroup]):
|
||||
super().__init__(parent, style=wx.SIMPLE_BORDER)
|
||||
self.pose_param_category = pose_param_category
|
||||
self.sizer = wx.BoxSizer(wx.VERTICAL)
|
||||
self.SetSizer(self.sizer)
|
||||
self.SetAutoLayout(1)
|
||||
|
||||
title_text = wx.StaticText(self, label=title, style=wx.ALIGN_CENTER)
|
||||
self.sizer.Add(title_text, 0, wx.EXPAND)
|
||||
|
||||
self.param_groups = [group for group in param_groups if group.get_category() == pose_param_category]
|
||||
self.choice = wx.Choice(self, choices=[group.get_group_name() for group in self.param_groups])
|
||||
if len(self.param_groups) > 0:
|
||||
self.choice.SetSelection(0)
|
||||
self.choice.Bind(wx.EVT_CHOICE, self.on_choice_updated)
|
||||
self.sizer.Add(self.choice, 0, wx.EXPAND)
|
||||
|
||||
self.left_slider = wx.Slider(self, minValue=-1000, maxValue=1000, value=-1000, style=wx.HORIZONTAL)
|
||||
self.sizer.Add(self.left_slider, 0, wx.EXPAND)
|
||||
|
||||
self.right_slider = wx.Slider(self, minValue=-1000, maxValue=1000, value=-1000, style=wx.HORIZONTAL)
|
||||
self.sizer.Add(self.right_slider, 0, wx.EXPAND)
|
||||
|
||||
self.checkbox = wx.CheckBox(self, label="Show")
|
||||
self.checkbox.SetValue(True)
|
||||
self.sizer.Add(self.checkbox, 0, wx.SHAPED | wx.ALIGN_CENTER)
|
||||
|
||||
self.update_ui()
|
||||
|
||||
self.sizer.Fit(self)
|
||||
|
||||
def update_ui(self):
|
||||
param_group = self.param_groups[self.choice.GetSelection()]
|
||||
if param_group.is_discrete():
|
||||
self.left_slider.Enable(False)
|
||||
self.right_slider.Enable(False)
|
||||
self.checkbox.Enable(True)
|
||||
elif param_group.get_arity() == 1:
|
||||
self.left_slider.Enable(True)
|
||||
self.right_slider.Enable(False)
|
||||
self.checkbox.Enable(False)
|
||||
else:
|
||||
self.left_slider.Enable(True)
|
||||
self.right_slider.Enable(True)
|
||||
self.checkbox.Enable(False)
|
||||
|
||||
def on_choice_updated(self, event: wx.Event):
|
||||
param_group = self.param_groups[self.choice.GetSelection()]
|
||||
if param_group.is_discrete():
|
||||
self.checkbox.SetValue(True)
|
||||
self.update_ui()
|
||||
|
||||
def set_param_value(self, pose: List[float]):
|
||||
if len(self.param_groups) == 0:
|
||||
return
|
||||
selected_morph_index = self.choice.GetSelection()
|
||||
param_group = self.param_groups[selected_morph_index]
|
||||
param_index = param_group.get_parameter_index()
|
||||
if param_group.is_discrete():
|
||||
if self.checkbox.GetValue():
|
||||
for i in range(param_group.get_arity()):
|
||||
pose[param_index + i] = 1.0
|
||||
else:
|
||||
param_range = param_group.get_range()
|
||||
alpha = (self.left_slider.GetValue() + 1000) / 2000.0
|
||||
pose[param_index] = param_range[0] + (param_range[1] - param_range[0]) * alpha
|
||||
if param_group.get_arity() == 2:
|
||||
alpha = (self.right_slider.GetValue() + 1000) / 2000.0
|
||||
pose[param_index + 1] = param_range[0] + (param_range[1] - param_range[0]) * alpha
|
||||
|
||||
|
||||
class SimpleParamGroupsControlPanel(wx.Panel):
|
||||
def __init__(self, parent,
|
||||
pose_param_category: PoseParameterCategory,
|
||||
param_groups: List[PoseParameterGroup]):
|
||||
super().__init__(parent, style=wx.SIMPLE_BORDER)
|
||||
self.sizer = wx.BoxSizer(wx.VERTICAL)
|
||||
self.SetSizer(self.sizer)
|
||||
self.SetAutoLayout(1)
|
||||
|
||||
self.param_groups = [group for group in param_groups if group.get_category() == pose_param_category]
|
||||
for param_group in self.param_groups:
|
||||
assert not param_group.is_discrete()
|
||||
assert param_group.get_arity() == 1
|
||||
|
||||
self.sliders = []
|
||||
for param_group in self.param_groups:
|
||||
static_text = wx.StaticText(
|
||||
self,
|
||||
label=" ------------ %s ------------ " % param_group.get_group_name(), style=wx.ALIGN_CENTER)
|
||||
self.sizer.Add(static_text, 0, wx.EXPAND)
|
||||
range = param_group.get_range()
|
||||
min_value = int(range[0] * 1000)
|
||||
max_value = int(range[1] * 1000)
|
||||
slider = wx.Slider(self, minValue=min_value, maxValue=max_value, value=0, style=wx.HORIZONTAL)
|
||||
self.sizer.Add(slider, 0, wx.EXPAND)
|
||||
self.sliders.append(slider)
|
||||
|
||||
self.sizer.Fit(self)
|
||||
|
||||
def set_param_value(self, pose: List[float]):
|
||||
if len(self.param_groups) == 0:
|
||||
return
|
||||
for param_group_index in range(len(self.param_groups)):
|
||||
param_group = self.param_groups[param_group_index]
|
||||
slider = self.sliders[param_group_index]
|
||||
param_range = param_group.get_range()
|
||||
param_index = param_group.get_parameter_index()
|
||||
alpha = (slider.GetValue() - slider.GetMin()) * 1.0 / (slider.GetMax() - slider.GetMin())
|
||||
pose[param_index] = param_range[0] + (param_range[1] - param_range[0]) * alpha
|
||||
|
||||
|
||||
def convert_output_image_from_torch_to_numpy(output_image):
|
||||
if output_image.shape[2] == 2:
|
||||
h, w, c = output_image.shape
|
||||
numpy_image = torch.transpose(output_image.reshape(h * w, c), 0, 1).reshape(c, h, w)
|
||||
elif output_image.shape[0] == 4:
|
||||
numpy_image = rgba_to_numpy_image(output_image)
|
||||
elif output_image.shape[0] == 3:
|
||||
numpy_image = rgb_to_numpy_image(output_image)
|
||||
elif output_image.shape[0] == 1:
|
||||
c, h, w = output_image.shape
|
||||
alpha_image = torch.cat([output_image.repeat(3, 1, 1) * 2.0 - 1.0, torch.ones(1, h, w)], dim=0)
|
||||
numpy_image = rgba_to_numpy_image(alpha_image)
|
||||
elif output_image.shape[0] == 2:
|
||||
numpy_image = grid_change_to_numpy_image(output_image, num_channels=4)
|
||||
else:
|
||||
raise RuntimeError("Unsupported # image channels: %d" % output_image.shape[0])
|
||||
numpy_image = numpy.uint8(numpy.rint(numpy_image * 255.0))
|
||||
return numpy_image
|
||||
|
||||
|
||||
class MainFrame(wx.Frame):
|
||||
def __init__(self, poser: Poser, device: torch.device):
|
||||
super().__init__(None, wx.ID_ANY, "Poser")
|
||||
self.poser = poser
|
||||
self.dtype = self.poser.get_dtype()
|
||||
self.device = device
|
||||
self.image_size = self.poser.get_image_size()
|
||||
|
||||
self.wx_source_image = None
|
||||
self.torch_source_image = None
|
||||
|
||||
self.main_sizer = wx.BoxSizer(wx.HORIZONTAL)
|
||||
self.SetSizer(self.main_sizer)
|
||||
self.SetAutoLayout(1)
|
||||
self.init_left_panel()
|
||||
self.init_control_panel()
|
||||
self.init_right_panel()
|
||||
self.main_sizer.Fit(self)
|
||||
|
||||
self.timer = wx.Timer(self, wx.ID_ANY)
|
||||
self.Bind(wx.EVT_TIMER, self.update_images, self.timer)
|
||||
|
||||
save_image_id = wx.NewIdRef()
|
||||
self.Bind(wx.EVT_MENU, self.on_save_image, id=save_image_id)
|
||||
accelerator_table = wx.AcceleratorTable([
|
||||
(wx.ACCEL_CTRL, ord('S'), save_image_id)
|
||||
])
|
||||
self.SetAcceleratorTable(accelerator_table)
|
||||
|
||||
self.last_pose = None
|
||||
self.last_output_index = self.output_index_choice.GetSelection()
|
||||
self.last_output_numpy_image = None
|
||||
|
||||
self.wx_source_image = None
|
||||
self.torch_source_image = None
|
||||
self.source_image_bitmap = wx.Bitmap(self.image_size, self.image_size)
|
||||
self.result_image_bitmap = wx.Bitmap(self.image_size, self.image_size)
|
||||
self.source_image_dirty = True
|
||||
|
||||
def init_left_panel(self):
|
||||
self.control_panel = wx.Panel(self, style=wx.SIMPLE_BORDER, size=(self.image_size, -1))
|
||||
self.left_panel = wx.Panel(self, style=wx.SIMPLE_BORDER)
|
||||
left_panel_sizer = wx.BoxSizer(wx.VERTICAL)
|
||||
self.left_panel.SetSizer(left_panel_sizer)
|
||||
self.left_panel.SetAutoLayout(1)
|
||||
|
||||
self.source_image_panel = wx.Panel(self.left_panel, size=(self.image_size, self.image_size),
|
||||
style=wx.SIMPLE_BORDER)
|
||||
self.source_image_panel.Bind(wx.EVT_PAINT, self.paint_source_image_panel)
|
||||
self.source_image_panel.Bind(wx.EVT_ERASE_BACKGROUND, self.on_erase_background)
|
||||
left_panel_sizer.Add(self.source_image_panel, 0, wx.FIXED_MINSIZE)
|
||||
|
||||
self.load_image_button = wx.Button(self.left_panel, wx.ID_ANY, "\nLoad Image\n\n")
|
||||
left_panel_sizer.Add(self.load_image_button, 1, wx.EXPAND)
|
||||
self.load_image_button.Bind(wx.EVT_BUTTON, self.load_image)
|
||||
|
||||
left_panel_sizer.Fit(self.left_panel)
|
||||
self.main_sizer.Add(self.left_panel, 0, wx.FIXED_MINSIZE)
|
||||
|
||||
def on_erase_background(self, event: wx.Event):
|
||||
pass
|
||||
|
||||
def init_control_panel(self):
|
||||
self.control_panel_sizer = wx.BoxSizer(wx.VERTICAL)
|
||||
self.control_panel.SetSizer(self.control_panel_sizer)
|
||||
self.control_panel.SetMinSize(wx.Size(256, 1))
|
||||
|
||||
morph_categories = [
|
||||
PoseParameterCategory.EYEBROW,
|
||||
PoseParameterCategory.EYE,
|
||||
PoseParameterCategory.MOUTH,
|
||||
PoseParameterCategory.IRIS_MORPH
|
||||
]
|
||||
morph_category_titles = {
|
||||
PoseParameterCategory.EYEBROW: " ------------ Eyebrow ------------ ",
|
||||
PoseParameterCategory.EYE: " ------------ Eye ------------ ",
|
||||
PoseParameterCategory.MOUTH: " ------------ Mouth ------------ ",
|
||||
PoseParameterCategory.IRIS_MORPH: " ------------ Iris morphs ------------ ",
|
||||
}
|
||||
self.morph_control_panels = {}
|
||||
for category in morph_categories:
|
||||
param_groups = self.poser.get_pose_parameter_groups()
|
||||
filtered_param_groups = [group for group in param_groups if group.get_category() == category]
|
||||
if len(filtered_param_groups) == 0:
|
||||
continue
|
||||
control_panel = MorphCategoryControlPanel(
|
||||
self.control_panel,
|
||||
morph_category_titles[category],
|
||||
category,
|
||||
self.poser.get_pose_parameter_groups())
|
||||
self.morph_control_panels[category] = control_panel
|
||||
self.control_panel_sizer.Add(control_panel, 0, wx.EXPAND)
|
||||
|
||||
self.non_morph_control_panels = {}
|
||||
non_morph_categories = [
|
||||
PoseParameterCategory.IRIS_ROTATION,
|
||||
PoseParameterCategory.FACE_ROTATION,
|
||||
PoseParameterCategory.BODY_ROTATION,
|
||||
PoseParameterCategory.BREATHING
|
||||
]
|
||||
for category in non_morph_categories:
|
||||
param_groups = self.poser.get_pose_parameter_groups()
|
||||
filtered_param_groups = [group for group in param_groups if group.get_category() == category]
|
||||
if len(filtered_param_groups) == 0:
|
||||
continue
|
||||
control_panel = SimpleParamGroupsControlPanel(
|
||||
self.control_panel,
|
||||
category,
|
||||
self.poser.get_pose_parameter_groups())
|
||||
self.non_morph_control_panels[category] = control_panel
|
||||
self.control_panel_sizer.Add(control_panel, 0, wx.EXPAND)
|
||||
|
||||
self.control_panel_sizer.Fit(self.control_panel)
|
||||
self.main_sizer.Add(self.control_panel, 1, wx.FIXED_MINSIZE)
|
||||
|
||||
def init_right_panel(self):
|
||||
self.right_panel = wx.Panel(self, style=wx.SIMPLE_BORDER)
|
||||
right_panel_sizer = wx.BoxSizer(wx.VERTICAL)
|
||||
self.right_panel.SetSizer(right_panel_sizer)
|
||||
self.right_panel.SetAutoLayout(1)
|
||||
|
||||
self.result_image_panel = wx.Panel(self.right_panel,
|
||||
size=(self.image_size, self.image_size),
|
||||
style=wx.SIMPLE_BORDER)
|
||||
self.result_image_panel.Bind(wx.EVT_PAINT, self.paint_result_image_panel)
|
||||
self.result_image_panel.Bind(wx.EVT_ERASE_BACKGROUND, self.on_erase_background)
|
||||
self.output_index_choice = wx.Choice(
|
||||
self.right_panel,
|
||||
choices=[str(i) for i in range(self.poser.get_output_length())])
|
||||
self.output_index_choice.SetSelection(0)
|
||||
right_panel_sizer.Add(self.result_image_panel, 0, wx.FIXED_MINSIZE)
|
||||
right_panel_sizer.Add(self.output_index_choice, 0, wx.EXPAND)
|
||||
|
||||
self.save_image_button = wx.Button(self.right_panel, wx.ID_ANY, "\nSave Image\n\n")
|
||||
right_panel_sizer.Add(self.save_image_button, 1, wx.EXPAND)
|
||||
self.save_image_button.Bind(wx.EVT_BUTTON, self.on_save_image)
|
||||
|
||||
right_panel_sizer.Fit(self.right_panel)
|
||||
self.main_sizer.Add(self.right_panel, 0, wx.FIXED_MINSIZE)
|
||||
|
||||
def create_param_category_choice(self, param_category: PoseParameterCategory):
|
||||
params = []
|
||||
for param_group in self.poser.get_pose_parameter_groups():
|
||||
if param_group.get_category() == param_category:
|
||||
params.append(param_group.get_group_name())
|
||||
choice = wx.Choice(self.control_panel, choices=params)
|
||||
if len(params) > 0:
|
||||
choice.SetSelection(0)
|
||||
return choice
|
||||
|
||||
def load_image(self, event: wx.Event):
|
||||
dir_name = "data/images"
|
||||
file_dialog = wx.FileDialog(self, "Choose an image", dir_name, "", "*.png", wx.FD_OPEN)
|
||||
if file_dialog.ShowModal() == wx.ID_OK:
|
||||
image_file_name = os.path.join(file_dialog.GetDirectory(), file_dialog.GetFilename())
|
||||
try:
|
||||
pil_image = resize_PIL_image(extract_PIL_image_from_filelike(image_file_name),
|
||||
(self.poser.get_image_size(), self.poser.get_image_size()))
|
||||
w, h = pil_image.size
|
||||
if pil_image.mode != 'RGBA':
|
||||
self.source_image_string = "Image must have alpha channel!"
|
||||
self.wx_source_image = None
|
||||
self.torch_source_image = None
|
||||
else:
|
||||
self.wx_source_image = wx.Bitmap.FromBufferRGBA(w, h, pil_image.convert("RGBA").tobytes())
|
||||
self.torch_source_image = extract_pytorch_image_from_PIL_image(pil_image)\
|
||||
.to(self.device).to(self.dtype)
|
||||
self.source_image_dirty = True
|
||||
self.Refresh()
|
||||
self.Update()
|
||||
except:
|
||||
message_dialog = wx.MessageDialog(self, "Could not load image " + image_file_name, "Poser", wx.OK)
|
||||
message_dialog.ShowModal()
|
||||
message_dialog.Destroy()
|
||||
file_dialog.Destroy()
|
||||
|
||||
def paint_source_image_panel(self, event: wx.Event):
|
||||
wx.BufferedPaintDC(self.source_image_panel, self.source_image_bitmap)
|
||||
|
||||
def paint_result_image_panel(self, event: wx.Event):
|
||||
wx.BufferedPaintDC(self.result_image_panel, self.result_image_bitmap)
|
||||
|
||||
def draw_nothing_yet_string_to_bitmap(self, bitmap):
|
||||
dc = wx.MemoryDC()
|
||||
dc.SelectObject(bitmap)
|
||||
|
||||
dc.Clear()
|
||||
font = wx.Font(wx.FontInfo(14).Family(wx.FONTFAMILY_SWISS))
|
||||
dc.SetFont(font)
|
||||
w, h = dc.GetTextExtent("Nothing yet!")
|
||||
dc.DrawText("Nothing yet!", (self.image_size - w) // 2, (self.image_size - - h) // 2)
|
||||
|
||||
del dc
|
||||
|
||||
def get_current_pose(self):
|
||||
current_pose = [0.0 for i in range(self.poser.get_num_parameters())]
|
||||
for morph_control_panel in self.morph_control_panels.values():
|
||||
morph_control_panel.set_param_value(current_pose)
|
||||
for rotation_control_panel in self.non_morph_control_panels.values():
|
||||
rotation_control_panel.set_param_value(current_pose)
|
||||
return current_pose
|
||||
|
||||
def update_images(self, event: wx.Event):
|
||||
current_pose = self.get_current_pose()
|
||||
if not self.source_image_dirty \
|
||||
and self.last_pose is not None \
|
||||
and self.last_pose == current_pose \
|
||||
and self.last_output_index == self.output_index_choice.GetSelection():
|
||||
return
|
||||
self.last_pose = current_pose
|
||||
self.last_output_index = self.output_index_choice.GetSelection()
|
||||
|
||||
if self.torch_source_image is None:
|
||||
self.draw_nothing_yet_string_to_bitmap(self.source_image_bitmap)
|
||||
self.draw_nothing_yet_string_to_bitmap(self.result_image_bitmap)
|
||||
self.source_image_dirty = False
|
||||
self.Refresh()
|
||||
self.Update()
|
||||
return
|
||||
|
||||
if self.source_image_dirty:
|
||||
dc = wx.MemoryDC()
|
||||
dc.SelectObject(self.source_image_bitmap)
|
||||
dc.Clear()
|
||||
dc.DrawBitmap(self.wx_source_image, 0, 0)
|
||||
self.source_image_dirty = False
|
||||
|
||||
pose = torch.tensor(current_pose, device=self.device, dtype=self.dtype)
|
||||
output_index = self.output_index_choice.GetSelection()
|
||||
with torch.no_grad():
|
||||
output_image = self.poser.pose(self.torch_source_image, pose, output_index)[0].detach().cpu()
|
||||
|
||||
numpy_image = convert_output_image_from_torch_to_numpy(output_image)
|
||||
self.last_output_numpy_image = numpy_image
|
||||
wx_image = wx.ImageFromBuffer(
|
||||
numpy_image.shape[0],
|
||||
numpy_image.shape[1],
|
||||
numpy_image[:, :, 0:3].tobytes(),
|
||||
numpy_image[:, :, 3].tobytes())
|
||||
wx_bitmap = wx_image.ConvertToBitmap()
|
||||
|
||||
dc = wx.MemoryDC()
|
||||
dc.SelectObject(self.result_image_bitmap)
|
||||
dc.Clear()
|
||||
dc.DrawBitmap(wx_bitmap,
|
||||
(self.image_size - numpy_image.shape[0]) // 2,
|
||||
(self.image_size - numpy_image.shape[1]) // 2,
|
||||
True)
|
||||
del dc
|
||||
|
||||
self.Refresh()
|
||||
self.Update()
|
||||
|
||||
def on_save_image(self, event: wx.Event):
|
||||
if self.last_output_numpy_image is None:
|
||||
logging.info("There is no output image to save!!!")
|
||||
return
|
||||
|
||||
dir_name = "data/images"
|
||||
file_dialog = wx.FileDialog(self, "Choose an image", dir_name, "", "*.png", wx.FD_SAVE)
|
||||
if file_dialog.ShowModal() == wx.ID_OK:
|
||||
image_file_name = os.path.join(file_dialog.GetDirectory(), file_dialog.GetFilename())
|
||||
try:
|
||||
if os.path.exists(image_file_name):
|
||||
message_dialog = wx.MessageDialog(self, f"Override {image_file_name}", "Manual Poser",
|
||||
wx.YES_NO | wx.ICON_QUESTION)
|
||||
result = message_dialog.ShowModal()
|
||||
if result == wx.ID_YES:
|
||||
self.save_last_numpy_image(image_file_name)
|
||||
message_dialog.Destroy()
|
||||
else:
|
||||
self.save_last_numpy_image(image_file_name)
|
||||
except:
|
||||
message_dialog = wx.MessageDialog(self, f"Could not save {image_file_name}", "Manual Poser", wx.OK)
|
||||
message_dialog.ShowModal()
|
||||
message_dialog.Destroy()
|
||||
file_dialog.Destroy()
|
||||
|
||||
def save_last_numpy_image(self, image_file_name):
|
||||
numpy_image = self.last_output_numpy_image
|
||||
pil_image = PIL.Image.fromarray(numpy_image, mode='RGBA')
|
||||
os.makedirs(os.path.dirname(image_file_name), exist_ok=True)
|
||||
pil_image.save(image_file_name)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser(description='Manually pose a character image.')
|
||||
parser.add_argument(
|
||||
'--model',
|
||||
type=str,
|
||||
required=False,
|
||||
default='standard_float',
|
||||
choices=['standard_float', 'separable_float', 'standard_half', 'separable_half'],
|
||||
help='The model to use.')
|
||||
args = parser.parse_args()
|
||||
|
||||
device = torch.device('cuda')
|
||||
try:
|
||||
poser = load_poser(args.model, device)
|
||||
except RuntimeError as e:
|
||||
print(e)
|
||||
sys.exit()
|
||||
|
||||
app = wx.App()
|
||||
main_frame = MainFrame(poser, device)
|
||||
main_frame.Show(True)
|
||||
main_frame.timer.Start(30)
|
||||
app.MainLoop()
|
||||
0
live2d/tha3/compute/__init__.py
Normal file
0
live2d/tha3/compute/__init__.py
Normal file
9
live2d/tha3/compute/cached_computation_func.py
Normal file
9
live2d/tha3/compute/cached_computation_func.py
Normal file
@@ -0,0 +1,9 @@
|
||||
from typing import Callable, Dict, List
|
||||
|
||||
from torch import Tensor
|
||||
from torch.nn import Module
|
||||
|
||||
TensorCachedComputationFunc = Callable[
|
||||
[Dict[str, Module], List[Tensor], Dict[str, List[Tensor]]], Tensor]
|
||||
TensorListCachedComputationFunc = Callable[
|
||||
[Dict[str, Module], List[Tensor], Dict[str, List[Tensor]]], List[Tensor]]
|
||||
43
live2d/tha3/compute/cached_computation_protocol.py
Normal file
43
live2d/tha3/compute/cached_computation_protocol.py
Normal file
@@ -0,0 +1,43 @@
|
||||
from abc import ABC, abstractmethod
|
||||
from typing import Dict, List
|
||||
|
||||
from torch import Tensor
|
||||
from torch.nn import Module
|
||||
|
||||
from tha3.compute.cached_computation_func import TensorCachedComputationFunc, TensorListCachedComputationFunc
|
||||
|
||||
|
||||
class CachedComputationProtocol(ABC):
|
||||
def get_output(self,
|
||||
key: str,
|
||||
modules: Dict[str, Module],
|
||||
batch: List[Tensor],
|
||||
outputs: Dict[str, List[Tensor]]):
|
||||
if key in outputs:
|
||||
return outputs[key]
|
||||
else:
|
||||
output = self.compute_output(key, modules, batch, outputs)
|
||||
outputs[key] = output
|
||||
return outputs[key]
|
||||
|
||||
@abstractmethod
|
||||
def compute_output(self,
|
||||
key: str,
|
||||
modules: Dict[str, Module],
|
||||
batch: List[Tensor],
|
||||
outputs: Dict[str, List[Tensor]]) -> List[Tensor]:
|
||||
pass
|
||||
|
||||
def get_output_tensor_func(self, key: str, index: int) -> TensorCachedComputationFunc:
|
||||
def func(modules: Dict[str, Module],
|
||||
batch: List[Tensor],
|
||||
outputs: Dict[str, List[Tensor]]):
|
||||
return self.get_output(key, modules, batch, outputs)[index]
|
||||
return func
|
||||
|
||||
def get_output_tensor_list_func(self, key: str) -> TensorListCachedComputationFunc:
|
||||
def func(modules: Dict[str, Module],
|
||||
batch: List[Tensor],
|
||||
outputs: Dict[str, List[Tensor]]):
|
||||
return self.get_output(key, modules, batch, outputs)
|
||||
return func
|
||||
408
live2d/tha3/images/CC-BY-NC.txt
Normal file
408
live2d/tha3/images/CC-BY-NC.txt
Normal file
@@ -0,0 +1,408 @@
|
||||
Attribution-NonCommercial 4.0 International
|
||||
|
||||
=======================================================================
|
||||
|
||||
Creative Commons Corporation ("Creative Commons") is not a law firm and
|
||||
does not provide legal services or legal advice. Distribution of
|
||||
Creative Commons public licenses does not create a lawyer-client or
|
||||
other relationship. Creative Commons makes its licenses and related
|
||||
information available on an "as-is" basis. Creative Commons gives no
|
||||
warranties regarding its licenses, any material licensed under their
|
||||
terms and conditions, or any related information. Creative Commons
|
||||
disclaims all liability for damages resulting from their use to the
|
||||
fullest extent possible.
|
||||
|
||||
Using Creative Commons Public Licenses
|
||||
|
||||
Creative Commons public licenses provide a standard set of terms and
|
||||
conditions that creators and other rights holders may use to share
|
||||
original works of authorship and other material subject to copyright
|
||||
and certain other rights specified in the public license below. The
|
||||
following considerations are for informational purposes only, are not
|
||||
exhaustive, and do not form part of our licenses.
|
||||
|
||||
Considerations for licensors: Our public licenses are
|
||||
intended for use by those authorized to give the public
|
||||
permission to use material in ways otherwise restricted by
|
||||
copyright and certain other rights. Our licenses are
|
||||
irrevocable. Licensors should read and understand the terms
|
||||
and conditions of the license they choose before applying it.
|
||||
Licensors should also secure all rights necessary before
|
||||
applying our licenses so that the public can reuse the
|
||||
material as expected. Licensors should clearly mark any
|
||||
material not subject to the license. This includes other CC-
|
||||
licensed material, or material used under an exception or
|
||||
limitation to copyright. More considerations for licensors:
|
||||
wiki.creativecommons.org/Considerations_for_licensors
|
||||
|
||||
Considerations for the public: By using one of our public
|
||||
licenses, a licensor grants the public permission to use the
|
||||
licensed material under specified terms and conditions. If
|
||||
the licensor's permission is not necessary for any reason--for
|
||||
example, because of any applicable exception or limitation to
|
||||
copyright--then that use is not regulated by the license. Our
|
||||
licenses grant only permissions under copyright and certain
|
||||
other rights that a licensor has authority to grant. Use of
|
||||
the licensed material may still be restricted for other
|
||||
reasons, including because others have copyright or other
|
||||
rights in the material. A licensor may make special requests,
|
||||
such as asking that all changes be marked or described.
|
||||
Although not required by our licenses, you are encouraged to
|
||||
respect those requests where reasonable. More considerations
|
||||
for the public:
|
||||
wiki.creativecommons.org/Considerations_for_licensees
|
||||
|
||||
=======================================================================
|
||||
|
||||
Creative Commons Attribution-NonCommercial 4.0 International Public
|
||||
License
|
||||
|
||||
By exercising the Licensed Rights (defined below), You accept and agree
|
||||
to be bound by the terms and conditions of this Creative Commons
|
||||
Attribution-NonCommercial 4.0 International Public License ("Public
|
||||
License"). To the extent this Public License may be interpreted as a
|
||||
contract, You are granted the Licensed Rights in consideration of Your
|
||||
acceptance of these terms and conditions, and the Licensor grants You
|
||||
such rights in consideration of benefits the Licensor receives from
|
||||
making the Licensed Material available under these terms and
|
||||
conditions.
|
||||
|
||||
|
||||
Section 1 -- Definitions.
|
||||
|
||||
a. Adapted Material means material subject to Copyright and Similar
|
||||
Rights that is derived from or based upon the Licensed Material
|
||||
and in which the Licensed Material is translated, altered,
|
||||
arranged, transformed, or otherwise modified in a manner requiring
|
||||
permission under the Copyright and Similar Rights held by the
|
||||
Licensor. For purposes of this Public License, where the Licensed
|
||||
Material is a musical work, performance, or sound recording,
|
||||
Adapted Material is always produced where the Licensed Material is
|
||||
synched in timed relation with a moving image.
|
||||
|
||||
b. Adapter's License means the license You apply to Your Copyright
|
||||
and Similar Rights in Your contributions to Adapted Material in
|
||||
accordance with the terms and conditions of this Public License.
|
||||
|
||||
c. Copyright and Similar Rights means copyright and/or similar rights
|
||||
closely related to copyright including, without limitation,
|
||||
performance, broadcast, sound recording, and Sui Generis Database
|
||||
Rights, without regard to how the rights are labeled or
|
||||
categorized. For purposes of this Public License, the rights
|
||||
specified in Section 2(b)(1)-(2) are not Copyright and Similar
|
||||
Rights.
|
||||
d. Effective Technological Measures means those measures that, in the
|
||||
absence of proper authority, may not be circumvented under laws
|
||||
fulfilling obligations under Article 11 of the WIPO Copyright
|
||||
Treaty adopted on December 20, 1996, and/or similar international
|
||||
agreements.
|
||||
|
||||
e. Exceptions and Limitations means fair use, fair dealing, and/or
|
||||
any other exception or limitation to Copyright and Similar Rights
|
||||
that applies to Your use of the Licensed Material.
|
||||
|
||||
f. Licensed Material means the artistic or literary work, database,
|
||||
or other material to which the Licensor applied this Public
|
||||
License.
|
||||
|
||||
g. Licensed Rights means the rights granted to You subject to the
|
||||
terms and conditions of this Public License, which are limited to
|
||||
all Copyright and Similar Rights that apply to Your use of the
|
||||
Licensed Material and that the Licensor has authority to license.
|
||||
|
||||
h. Licensor means the individual(s) or entity(ies) granting rights
|
||||
under this Public License.
|
||||
|
||||
i. NonCommercial means not primarily intended for or directed towards
|
||||
commercial advantage or monetary compensation. For purposes of
|
||||
this Public License, the exchange of the Licensed Material for
|
||||
other material subject to Copyright and Similar Rights by digital
|
||||
file-sharing or similar means is NonCommercial provided there is
|
||||
no payment of monetary compensation in connection with the
|
||||
exchange.
|
||||
|
||||
j. Share means to provide material to the public by any means or
|
||||
process that requires permission under the Licensed Rights, such
|
||||
as reproduction, public display, public performance, distribution,
|
||||
dissemination, communication, or importation, and to make material
|
||||
available to the public including in ways that members of the
|
||||
public may access the material from a place and at a time
|
||||
individually chosen by them.
|
||||
|
||||
k. Sui Generis Database Rights means rights other than copyright
|
||||
resulting from Directive 96/9/EC of the European Parliament and of
|
||||
the Council of 11 March 1996 on the legal protection of databases,
|
||||
as amended and/or succeeded, as well as other essentially
|
||||
equivalent rights anywhere in the world.
|
||||
|
||||
l. You means the individual or entity exercising the Licensed Rights
|
||||
under this Public License. Your has a corresponding meaning.
|
||||
|
||||
|
||||
Section 2 -- Scope.
|
||||
|
||||
a. License grant.
|
||||
|
||||
1. Subject to the terms and conditions of this Public License,
|
||||
the Licensor hereby grants You a worldwide, royalty-free,
|
||||
non-sublicensable, non-exclusive, irrevocable license to
|
||||
exercise the Licensed Rights in the Licensed Material to:
|
||||
|
||||
a. reproduce and Share the Licensed Material, in whole or
|
||||
in part, for NonCommercial purposes only; and
|
||||
|
||||
b. produce, reproduce, and Share Adapted Material for
|
||||
NonCommercial purposes only.
|
||||
|
||||
2. Exceptions and Limitations. For the avoidance of doubt, where
|
||||
Exceptions and Limitations apply to Your use, this Public
|
||||
License does not apply, and You do not need to comply with
|
||||
its terms and conditions.
|
||||
|
||||
3. Term. The term of this Public License is specified in Section
|
||||
6(a).
|
||||
|
||||
4. Media and formats; technical modifications allowed. The
|
||||
Licensor authorizes You to exercise the Licensed Rights in
|
||||
all media and formats whether now known or hereafter created,
|
||||
and to make technical modifications necessary to do so. The
|
||||
Licensor waives and/or agrees not to assert any right or
|
||||
authority to forbid You from making technical modifications
|
||||
necessary to exercise the Licensed Rights, including
|
||||
technical modifications necessary to circumvent Effective
|
||||
Technological Measures. For purposes of this Public License,
|
||||
simply making modifications authorized by this Section 2(a)
|
||||
(4) never produces Adapted Material.
|
||||
|
||||
5. Downstream recipients.
|
||||
|
||||
a. Offer from the Licensor -- Licensed Material. Every
|
||||
recipient of the Licensed Material automatically
|
||||
receives an offer from the Licensor to exercise the
|
||||
Licensed Rights under the terms and conditions of this
|
||||
Public License.
|
||||
|
||||
b. No downstream restrictions. You may not offer or impose
|
||||
any additional or different terms or conditions on, or
|
||||
apply any Effective Technological Measures to, the
|
||||
Licensed Material if doing so restricts exercise of the
|
||||
Licensed Rights by any recipient of the Licensed
|
||||
Material.
|
||||
|
||||
6. No endorsement. Nothing in this Public License constitutes or
|
||||
may be construed as permission to assert or imply that You
|
||||
are, or that Your use of the Licensed Material is, connected
|
||||
with, or sponsored, endorsed, or granted official status by,
|
||||
the Licensor or others designated to receive attribution as
|
||||
provided in Section 3(a)(1)(A)(i).
|
||||
|
||||
b. Other rights.
|
||||
|
||||
1. Moral rights, such as the right of integrity, are not
|
||||
licensed under this Public License, nor are publicity,
|
||||
privacy, and/or other similar personality rights; however, to
|
||||
the extent possible, the Licensor waives and/or agrees not to
|
||||
assert any such rights held by the Licensor to the limited
|
||||
extent necessary to allow You to exercise the Licensed
|
||||
Rights, but not otherwise.
|
||||
|
||||
2. Patent and trademark rights are not licensed under this
|
||||
Public License.
|
||||
|
||||
3. To the extent possible, the Licensor waives any right to
|
||||
collect royalties from You for the exercise of the Licensed
|
||||
Rights, whether directly or through a collecting society
|
||||
under any voluntary or waivable statutory or compulsory
|
||||
licensing scheme. In all other cases the Licensor expressly
|
||||
reserves any right to collect such royalties, including when
|
||||
the Licensed Material is used other than for NonCommercial
|
||||
purposes.
|
||||
|
||||
|
||||
Section 3 -- License Conditions.
|
||||
|
||||
Your exercise of the Licensed Rights is expressly made subject to the
|
||||
following conditions.
|
||||
|
||||
a. Attribution.
|
||||
|
||||
1. If You Share the Licensed Material (including in modified
|
||||
form), You must:
|
||||
|
||||
a. retain the following if it is supplied by the Licensor
|
||||
with the Licensed Material:
|
||||
|
||||
i. identification of the creator(s) of the Licensed
|
||||
Material and any others designated to receive
|
||||
attribution, in any reasonable manner requested by
|
||||
the Licensor (including by pseudonym if
|
||||
designated);
|
||||
|
||||
ii. a copyright notice;
|
||||
|
||||
iii. a notice that refers to this Public License;
|
||||
|
||||
iv. a notice that refers to the disclaimer of
|
||||
warranties;
|
||||
|
||||
v. a URI or hyperlink to the Licensed Material to the
|
||||
extent reasonably practicable;
|
||||
|
||||
b. indicate if You modified the Licensed Material and
|
||||
retain an indication of any previous modifications; and
|
||||
|
||||
c. indicate the Licensed Material is licensed under this
|
||||
Public License, and include the text of, or the URI or
|
||||
hyperlink to, this Public License.
|
||||
|
||||
2. You may satisfy the conditions in Section 3(a)(1) in any
|
||||
reasonable manner based on the medium, means, and context in
|
||||
which You Share the Licensed Material. For example, it may be
|
||||
reasonable to satisfy the conditions by providing a URI or
|
||||
hyperlink to a resource that includes the required
|
||||
information.
|
||||
|
||||
3. If requested by the Licensor, You must remove any of the
|
||||
information required by Section 3(a)(1)(A) to the extent
|
||||
reasonably practicable.
|
||||
|
||||
4. If You Share Adapted Material You produce, the Adapter's
|
||||
License You apply must not prevent recipients of the Adapted
|
||||
Material from complying with this Public License.
|
||||
|
||||
|
||||
Section 4 -- Sui Generis Database Rights.
|
||||
|
||||
Where the Licensed Rights include Sui Generis Database Rights that
|
||||
apply to Your use of the Licensed Material:
|
||||
|
||||
a. for the avoidance of doubt, Section 2(a)(1) grants You the right
|
||||
to extract, reuse, reproduce, and Share all or a substantial
|
||||
portion of the contents of the database for NonCommercial purposes
|
||||
only;
|
||||
|
||||
b. if You include all or a substantial portion of the database
|
||||
contents in a database in which You have Sui Generis Database
|
||||
Rights, then the database in which You have Sui Generis Database
|
||||
Rights (but not its individual contents) is Adapted Material; and
|
||||
|
||||
c. You must comply with the conditions in Section 3(a) if You Share
|
||||
all or a substantial portion of the contents of the database.
|
||||
|
||||
For the avoidance of doubt, this Section 4 supplements and does not
|
||||
replace Your obligations under this Public License where the Licensed
|
||||
Rights include other Copyright and Similar Rights.
|
||||
|
||||
|
||||
Section 5 -- Disclaimer of Warranties and Limitation of Liability.
|
||||
|
||||
a. UNLESS OTHERWISE SEPARATELY UNDERTAKEN BY THE LICENSOR, TO THE
|
||||
EXTENT POSSIBLE, THE LICENSOR OFFERS THE LICENSED MATERIAL AS-IS
|
||||
AND AS-AVAILABLE, AND MAKES NO REPRESENTATIONS OR WARRANTIES OF
|
||||
ANY KIND CONCERNING THE LICENSED MATERIAL, WHETHER EXPRESS,
|
||||
IMPLIED, STATUTORY, OR OTHER. THIS INCLUDES, WITHOUT LIMITATION,
|
||||
WARRANTIES OF TITLE, MERCHANTABILITY, FITNESS FOR A PARTICULAR
|
||||
PURPOSE, NON-INFRINGEMENT, ABSENCE OF LATENT OR OTHER DEFECTS,
|
||||
ACCURACY, OR THE PRESENCE OR ABSENCE OF ERRORS, WHETHER OR NOT
|
||||
KNOWN OR DISCOVERABLE. WHERE DISCLAIMERS OF WARRANTIES ARE NOT
|
||||
ALLOWED IN FULL OR IN PART, THIS DISCLAIMER MAY NOT APPLY TO YOU.
|
||||
|
||||
b. TO THE EXTENT POSSIBLE, IN NO EVENT WILL THE LICENSOR BE LIABLE
|
||||
TO YOU ON ANY LEGAL THEORY (INCLUDING, WITHOUT LIMITATION,
|
||||
NEGLIGENCE) OR OTHERWISE FOR ANY DIRECT, SPECIAL, INDIRECT,
|
||||
INCIDENTAL, CONSEQUENTIAL, PUNITIVE, EXEMPLARY, OR OTHER LOSSES,
|
||||
COSTS, EXPENSES, OR DAMAGES ARISING OUT OF THIS PUBLIC LICENSE OR
|
||||
USE OF THE LICENSED MATERIAL, EVEN IF THE LICENSOR HAS BEEN
|
||||
ADVISED OF THE POSSIBILITY OF SUCH LOSSES, COSTS, EXPENSES, OR
|
||||
DAMAGES. WHERE A LIMITATION OF LIABILITY IS NOT ALLOWED IN FULL OR
|
||||
IN PART, THIS LIMITATION MAY NOT APPLY TO YOU.
|
||||
|
||||
c. The disclaimer of warranties and limitation of liability provided
|
||||
above shall be interpreted in a manner that, to the extent
|
||||
possible, most closely approximates an absolute disclaimer and
|
||||
waiver of all liability.
|
||||
|
||||
|
||||
Section 6 -- Term and Termination.
|
||||
|
||||
a. This Public License applies for the term of the Copyright and
|
||||
Similar Rights licensed here. However, if You fail to comply with
|
||||
this Public License, then Your rights under this Public License
|
||||
terminate automatically.
|
||||
|
||||
b. Where Your right to use the Licensed Material has terminated under
|
||||
Section 6(a), it reinstates:
|
||||
|
||||
1. automatically as of the date the violation is cured, provided
|
||||
it is cured within 30 days of Your discovery of the
|
||||
violation; or
|
||||
|
||||
2. upon express reinstatement by the Licensor.
|
||||
|
||||
For the avoidance of doubt, this Section 6(b) does not affect any
|
||||
right the Licensor may have to seek remedies for Your violations
|
||||
of this Public License.
|
||||
|
||||
c. For the avoidance of doubt, the Licensor may also offer the
|
||||
Licensed Material under separate terms or conditions or stop
|
||||
distributing the Licensed Material at any time; however, doing so
|
||||
will not terminate this Public License.
|
||||
|
||||
d. Sections 1, 5, 6, 7, and 8 survive termination of this Public
|
||||
License.
|
||||
|
||||
|
||||
Section 7 -- Other Terms and Conditions.
|
||||
|
||||
a. The Licensor shall not be bound by any additional or different
|
||||
terms or conditions communicated by You unless expressly agreed.
|
||||
|
||||
b. Any arrangements, understandings, or agreements regarding the
|
||||
Licensed Material not stated herein are separate from and
|
||||
independent of the terms and conditions of this Public License.
|
||||
|
||||
|
||||
Section 8 -- Interpretation.
|
||||
|
||||
a. For the avoidance of doubt, this Public License does not, and
|
||||
shall not be interpreted to, reduce, limit, restrict, or impose
|
||||
conditions on any use of the Licensed Material that could lawfully
|
||||
be made without permission under this Public License.
|
||||
|
||||
b. To the extent possible, if any provision of this Public License is
|
||||
deemed unenforceable, it shall be automatically reformed to the
|
||||
minimum extent necessary to make it enforceable. If the provision
|
||||
cannot be reformed, it shall be severed from this Public License
|
||||
without affecting the enforceability of the remaining terms and
|
||||
conditions.
|
||||
|
||||
c. No term or condition of this Public License will be waived and no
|
||||
failure to comply consented to unless expressly agreed to by the
|
||||
Licensor.
|
||||
|
||||
d. Nothing in this Public License constitutes or may be interpreted
|
||||
as a limitation upon, or waiver of, any privileges and immunities
|
||||
that apply to the Licensor or You, including from the legal
|
||||
processes of any jurisdiction or authority.
|
||||
|
||||
=======================================================================
|
||||
|
||||
Creative Commons is not a party to its public
|
||||
licenses. Notwithstanding, Creative Commons may elect to apply one of
|
||||
its public licenses to material it publishes and in those instances
|
||||
will be considered the “Licensor.” The text of the Creative Commons
|
||||
public licenses is dedicated to the public domain under the CC0 Public
|
||||
Domain Dedication. Except for the limited purpose of indicating that
|
||||
material is shared under a Creative Commons public license or as
|
||||
otherwise permitted by the Creative Commons policies published at
|
||||
creativecommons.org/policies, Creative Commons does not authorize the
|
||||
use of the trademark "Creative Commons" or any other trademark or logo
|
||||
of Creative Commons without its prior written consent including,
|
||||
without limitation, in connection with any unauthorized modifications
|
||||
to any of its public licenses or any other arrangements,
|
||||
understandings, or agreements concerning use of licensed material. For
|
||||
the avoidance of doubt, this paragraph does not form part of the
|
||||
public licenses.
|
||||
|
||||
Creative Commons may be contacted at creativecommons.org.
|
||||
|
||||
7
live2d/tha3/images/README.md
Normal file
7
live2d/tha3/images/README.md
Normal file
@@ -0,0 +1,7 @@
|
||||
# Crypko Characters
|
||||
|
||||
The files ``crypko_00.png``, ``crypko_01.png``, ..., ``crypko_07.png`` were created with [Crypko](http://crypko.ai). You can make use of these files only according to the [Crypko Guideline](https://crypko.ai/guideline/).
|
||||
|
||||
# Lambda Characters
|
||||
|
||||
The characters in ``lambda_00.png`` and ``lambda_01.png`` were create by [garun](http://twitter.com/garunstudio), a Thai comic artist who has [published in Japan](https://www.amazon.co.jp/dp/4758066116), through a commission by me, Pramook Khungurn. The copyright of the files and the characters, however, belongs to me. The artworks are licensed to the general public under the [Creative Commons Attribution-NonCommercial 4.0 International](https://creativecommons.org/licenses/by-nc/4.0/legalcode) license.
|
||||
BIN
live2d/tha3/images/input_spec.png
Normal file
BIN
live2d/tha3/images/input_spec.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 170 KiB |
BIN
live2d/tha3/images/lambda_00.png
Normal file
BIN
live2d/tha3/images/lambda_00.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 108 KiB |
0
live2d/tha3/mocap/__init__.py
Normal file
0
live2d/tha3/mocap/__init__.py
Normal file
239
live2d/tha3/mocap/ifacialmocap_constants.py
Normal file
239
live2d/tha3/mocap/ifacialmocap_constants.py
Normal file
@@ -0,0 +1,239 @@
|
||||
EYE_LOOK_IN_LEFT = "eyeLookInLeft"
|
||||
EYE_LOOK_OUT_LEFT = "eyeLookOutLeft"
|
||||
EYE_LOOK_DOWN_LEFT = "eyeLookDownLeft"
|
||||
EYE_LOOK_UP_LEFT = "eyeLookUpLeft"
|
||||
EYE_BLINK_LEFT = "eyeBlinkLeft"
|
||||
EYE_SQUINT_LEFT = "eyeSquintLeft"
|
||||
EYE_WIDE_LEFT = "eyeWideLeft"
|
||||
EYE_LOOK_IN_RIGHT = "eyeLookInRight"
|
||||
EYE_LOOK_OUT_RIGHT = "eyeLookOutRight"
|
||||
EYE_LOOK_DOWN_RIGHT = "eyeLookDownRight"
|
||||
EYE_LOOK_UP_RIGHT = "eyeLookUpRight"
|
||||
EYE_BLINK_RIGHT = "eyeBlinkRight"
|
||||
EYE_SQUINT_RIGHT = "eyeSquintRight"
|
||||
EYE_WIDE_RIGHT = "eyeWideRight"
|
||||
BROW_DOWN_LEFT = "browDownLeft"
|
||||
BROW_OUTER_UP_LEFT = "browOuterUpLeft"
|
||||
BROW_DOWN_RIGHT = "browDownRight"
|
||||
BROW_OUTER_UP_RIGHT = "browOuterUpRight"
|
||||
BROW_INNER_UP = "browInnerUp"
|
||||
NOSE_SNEER_LEFT = "noseSneerLeft"
|
||||
NOSE_SNEER_RIGHT = "noseSneerRight"
|
||||
CHEEK_SQUINT_LEFT = "cheekSquintLeft"
|
||||
CHEEK_SQUINT_RIGHT = "cheekSquintRight"
|
||||
CHEEK_PUFF = "cheekPuff"
|
||||
MOUTH_LEFT = "mouthLeft"
|
||||
MOUTH_DIMPLE_LEFT = "mouthDimpleLeft"
|
||||
MOUTH_FROWN_LEFT = "mouthFrownLeft"
|
||||
MOUTH_LOWER_DOWN_LEFT = "mouthLowerDownLeft"
|
||||
MOUTH_PRESS_LEFT = "mouthPressLeft"
|
||||
MOUTH_SMILE_LEFT = "mouthSmileLeft"
|
||||
MOUTH_STRETCH_LEFT = "mouthStretchLeft"
|
||||
MOUTH_UPPER_UP_LEFT = "mouthUpperUpLeft"
|
||||
MOUTH_RIGHT = "mouthRight"
|
||||
MOUTH_DIMPLE_RIGHT = "mouthDimpleRight"
|
||||
MOUTH_FROWN_RIGHT = "mouthFrownRight"
|
||||
MOUTH_LOWER_DOWN_RIGHT = "mouthLowerDownRight"
|
||||
MOUTH_PRESS_RIGHT = "mouthPressRight"
|
||||
MOUTH_SMILE_RIGHT = "mouthSmileRight"
|
||||
MOUTH_STRETCH_RIGHT = "mouthStretchRight"
|
||||
MOUTH_UPPER_UP_RIGHT = "mouthUpperUpRight"
|
||||
MOUTH_CLOSE = "mouthClose"
|
||||
MOUTH_FUNNEL = "mouthFunnel"
|
||||
MOUTH_PUCKER = "mouthPucker"
|
||||
MOUTH_ROLL_LOWER = "mouthRollLower"
|
||||
MOUTH_ROLL_UPPER = "mouthRollUpper"
|
||||
MOUTH_SHRUG_LOWER = "mouthShrugLower"
|
||||
MOUTH_SHRUG_UPPER = "mouthShrugUpper"
|
||||
JAW_LEFT = "jawLeft"
|
||||
JAW_RIGHT = "jawRight"
|
||||
JAW_FORWARD = "jawForward"
|
||||
JAW_OPEN = "jawOpen"
|
||||
TONGUE_OUT = "tongueOut"
|
||||
|
||||
BLENDSHAPE_NAMES = [
|
||||
EYE_LOOK_IN_LEFT, # 0
|
||||
EYE_LOOK_OUT_LEFT, # 1
|
||||
EYE_LOOK_DOWN_LEFT, # 2
|
||||
EYE_LOOK_UP_LEFT, # 3
|
||||
EYE_BLINK_LEFT, # 4
|
||||
EYE_SQUINT_LEFT, # 5
|
||||
EYE_WIDE_LEFT, # 6
|
||||
EYE_LOOK_IN_RIGHT, # 7
|
||||
EYE_LOOK_OUT_RIGHT, # 8
|
||||
EYE_LOOK_DOWN_RIGHT, # 9
|
||||
EYE_LOOK_UP_RIGHT, # 10
|
||||
EYE_BLINK_RIGHT, # 11
|
||||
EYE_SQUINT_RIGHT, # 12
|
||||
EYE_WIDE_RIGHT, # 13
|
||||
BROW_DOWN_LEFT, # 14
|
||||
BROW_OUTER_UP_LEFT, # 15
|
||||
BROW_DOWN_RIGHT, # 16
|
||||
BROW_OUTER_UP_RIGHT, # 17
|
||||
BROW_INNER_UP, # 18
|
||||
NOSE_SNEER_LEFT, # 19
|
||||
NOSE_SNEER_RIGHT, # 20
|
||||
CHEEK_SQUINT_LEFT, # 21
|
||||
CHEEK_SQUINT_RIGHT, # 22
|
||||
CHEEK_PUFF, # 23
|
||||
MOUTH_LEFT, # 24
|
||||
MOUTH_DIMPLE_LEFT, # 25
|
||||
MOUTH_FROWN_LEFT, # 26
|
||||
MOUTH_LOWER_DOWN_LEFT, # 27
|
||||
MOUTH_PRESS_LEFT, # 28
|
||||
MOUTH_SMILE_LEFT, # 29
|
||||
MOUTH_STRETCH_LEFT, # 30
|
||||
MOUTH_UPPER_UP_LEFT, # 31
|
||||
MOUTH_RIGHT, # 32
|
||||
MOUTH_DIMPLE_RIGHT, # 33
|
||||
MOUTH_FROWN_RIGHT, # 34
|
||||
MOUTH_LOWER_DOWN_RIGHT, # 35
|
||||
MOUTH_PRESS_RIGHT, # 36
|
||||
MOUTH_SMILE_RIGHT, # 37
|
||||
MOUTH_STRETCH_RIGHT, # 38
|
||||
MOUTH_UPPER_UP_RIGHT, # 39
|
||||
MOUTH_CLOSE, # 40
|
||||
MOUTH_FUNNEL, # 41
|
||||
MOUTH_PUCKER, # 42
|
||||
MOUTH_ROLL_LOWER, # 43
|
||||
MOUTH_ROLL_UPPER, # 44
|
||||
MOUTH_SHRUG_LOWER, # 45
|
||||
MOUTH_SHRUG_UPPER, # 46
|
||||
JAW_LEFT, # 47
|
||||
JAW_RIGHT, # 48
|
||||
JAW_FORWARD, # 49
|
||||
JAW_OPEN, # 50
|
||||
TONGUE_OUT, # 51
|
||||
]
|
||||
|
||||
EYE_LEFT_BLENDSHAPES = [
|
||||
EYE_LOOK_IN_LEFT, # 0
|
||||
EYE_LOOK_OUT_LEFT, # 1
|
||||
EYE_LOOK_DOWN_LEFT, # 2
|
||||
EYE_LOOK_UP_LEFT, # 3
|
||||
EYE_BLINK_LEFT, # 4
|
||||
EYE_SQUINT_LEFT, # 5
|
||||
EYE_WIDE_LEFT, # 6
|
||||
]
|
||||
|
||||
EYE_RIGHT_BLENDSHAPES = [
|
||||
EYE_LOOK_IN_RIGHT, # 7
|
||||
EYE_LOOK_OUT_RIGHT, # 8
|
||||
EYE_LOOK_DOWN_RIGHT, # 9
|
||||
EYE_LOOK_UP_RIGHT, # 10
|
||||
EYE_BLINK_RIGHT, # 11
|
||||
EYE_SQUINT_RIGHT, # 12
|
||||
EYE_WIDE_RIGHT, # 13
|
||||
]
|
||||
|
||||
BROW_LEFT_BLENDSHAPES = [
|
||||
BROW_DOWN_LEFT, # 14
|
||||
BROW_OUTER_UP_LEFT, # 15
|
||||
|
||||
]
|
||||
|
||||
BROW_RIGHT_BLENDSHAPES = [
|
||||
BROW_DOWN_RIGHT, # 16
|
||||
BROW_OUTER_UP_RIGHT, # 17
|
||||
|
||||
]
|
||||
|
||||
BROW_BOTH_BLENDSHAPES = [
|
||||
BROW_INNER_UP, # 18
|
||||
]
|
||||
|
||||
NOSE_BLENDSHAPES = [
|
||||
NOSE_SNEER_LEFT, # 19
|
||||
NOSE_SNEER_RIGHT, # 20
|
||||
]
|
||||
|
||||
CHECK_BLENDSHAPES = [
|
||||
CHEEK_SQUINT_LEFT, # 21
|
||||
CHEEK_SQUINT_RIGHT, # 22
|
||||
CHEEK_PUFF, # 23
|
||||
]
|
||||
|
||||
MOUTH_LEFT_BLENDSHAPES = [
|
||||
MOUTH_LEFT, # 24
|
||||
MOUTH_DIMPLE_LEFT, # 25
|
||||
MOUTH_FROWN_LEFT, # 26
|
||||
MOUTH_LOWER_DOWN_LEFT, # 27
|
||||
MOUTH_PRESS_LEFT, # 28
|
||||
MOUTH_SMILE_LEFT, # 29
|
||||
MOUTH_STRETCH_LEFT, # 30
|
||||
MOUTH_UPPER_UP_LEFT, # 31
|
||||
]
|
||||
|
||||
MOUTH_RIGHT_BLENDSHAPES = [
|
||||
MOUTH_RIGHT, # 32
|
||||
MOUTH_DIMPLE_RIGHT, # 33
|
||||
MOUTH_FROWN_RIGHT, # 34
|
||||
MOUTH_LOWER_DOWN_RIGHT, # 35
|
||||
MOUTH_PRESS_RIGHT, # 36
|
||||
MOUTH_SMILE_RIGHT, # 37
|
||||
MOUTH_STRETCH_RIGHT, # 38
|
||||
MOUTH_UPPER_UP_RIGHT, # 39
|
||||
]
|
||||
|
||||
MOUTH_BOTH_BLENDSHAPES = [
|
||||
MOUTH_CLOSE, # 40
|
||||
MOUTH_FUNNEL, # 41
|
||||
MOUTH_PUCKER, # 42
|
||||
MOUTH_ROLL_LOWER, # 43
|
||||
MOUTH_ROLL_UPPER, # 44
|
||||
MOUTH_SHRUG_LOWER, # 45
|
||||
MOUTH_SHRUG_UPPER, # 46
|
||||
]
|
||||
|
||||
JAW_BLENDSHAPES = [
|
||||
JAW_LEFT, # 47
|
||||
JAW_RIGHT, # 48
|
||||
JAW_FORWARD, # 49
|
||||
JAW_OPEN, # 50
|
||||
]
|
||||
|
||||
TONGUE_BLENDSHAPES = [
|
||||
TONGUE_OUT, # 51
|
||||
]
|
||||
|
||||
COLUMN_0_BLENDSHAPES = EYE_RIGHT_BLENDSHAPES + BROW_RIGHT_BLENDSHAPES + [NOSE_SNEER_RIGHT, CHEEK_SQUINT_RIGHT]
|
||||
COLUMN_1_BLENDSHAPES = EYE_LEFT_BLENDSHAPES + BROW_LEFT_BLENDSHAPES + [NOSE_SNEER_LEFT, CHEEK_SQUINT_LEFT]
|
||||
COLUMN_2_BLENDSHAPES = MOUTH_RIGHT_BLENDSHAPES + [JAW_RIGHT]
|
||||
COLUMN_3_BLENDSHAPES = MOUTH_LEFT_BLENDSHAPES + [JAW_LEFT]
|
||||
COLUMN_4_BLENDSHAPES = [BROW_INNER_UP, CHEEK_PUFF] + MOUTH_BOTH_BLENDSHAPES + [JAW_FORWARD, JAW_OPEN, TONGUE_OUT]
|
||||
|
||||
BLENDSHAPE_COLUMNS = [
|
||||
COLUMN_0_BLENDSHAPES,
|
||||
COLUMN_1_BLENDSHAPES,
|
||||
COLUMN_2_BLENDSHAPES,
|
||||
COLUMN_3_BLENDSHAPES,
|
||||
COLUMN_4_BLENDSHAPES,
|
||||
]
|
||||
|
||||
RIGHT_EYE_BONE_X = "rightEyeBoneX"
|
||||
RIGHT_EYE_BONE_Y = "rightEyeBoneY"
|
||||
RIGHT_EYE_BONE_Z = "rightEyeBoneZ"
|
||||
RIGHT_EYE_BONE_ROTATIONS = [RIGHT_EYE_BONE_X, RIGHT_EYE_BONE_Y, RIGHT_EYE_BONE_Z]
|
||||
|
||||
LEFT_EYE_BONE_X = "leftEyeBoneX"
|
||||
LEFT_EYE_BONE_Y = "leftEyeBoneY"
|
||||
LEFT_EYE_BONE_Z = "leftEyeBoneZ"
|
||||
LEFT_EYE_BONE_ROTATIONS = [LEFT_EYE_BONE_X, LEFT_EYE_BONE_Y, LEFT_EYE_BONE_Z]
|
||||
|
||||
HEAD_BONE_X = "headBoneX"
|
||||
HEAD_BONE_Y = "headBoneY"
|
||||
HEAD_BONE_Z = "headBoneZ"
|
||||
HEAD_BONE_ROTATIONS = [HEAD_BONE_X, HEAD_BONE_Y, HEAD_BONE_Z]
|
||||
|
||||
ROTATION_NAMES = RIGHT_EYE_BONE_ROTATIONS + LEFT_EYE_BONE_ROTATIONS + HEAD_BONE_ROTATIONS
|
||||
|
||||
RIGHT_EYE_BONE_QUAT = "rightEyeBoneQuat"
|
||||
LEFT_EYE_BONE_QUAT = "leftEyeBoneQuat"
|
||||
HEAD_BONE_QUAT = "headBoneQuat"
|
||||
QUATERNION_NAMES = [
|
||||
RIGHT_EYE_BONE_QUAT,
|
||||
LEFT_EYE_BONE_QUAT,
|
||||
HEAD_BONE_QUAT
|
||||
]
|
||||
|
||||
IFACIALMOCAP_DATETIME_FORMAT = "%Y/%m/%d-%H:%M:%S.%f"
|
||||
27
live2d/tha3/mocap/ifacialmocap_pose.py
Normal file
27
live2d/tha3/mocap/ifacialmocap_pose.py
Normal file
@@ -0,0 +1,27 @@
|
||||
from tha3.mocap.ifacialmocap_constants import BLENDSHAPE_NAMES, HEAD_BONE_X, HEAD_BONE_Y, HEAD_BONE_Z, \
|
||||
HEAD_BONE_QUAT, LEFT_EYE_BONE_X, LEFT_EYE_BONE_Y, LEFT_EYE_BONE_Z, LEFT_EYE_BONE_QUAT, RIGHT_EYE_BONE_X, \
|
||||
RIGHT_EYE_BONE_Y, RIGHT_EYE_BONE_Z, RIGHT_EYE_BONE_QUAT
|
||||
|
||||
|
||||
def create_default_ifacialmocap_pose():
|
||||
data = {}
|
||||
|
||||
for blendshape_name in BLENDSHAPE_NAMES:
|
||||
data[blendshape_name] = 0.0
|
||||
|
||||
data[HEAD_BONE_X] = 0.0
|
||||
data[HEAD_BONE_Y] = 0.0
|
||||
data[HEAD_BONE_Z] = 0.0
|
||||
data[HEAD_BONE_QUAT] = [0.0, 0.0, 0.0, 1.0]
|
||||
|
||||
data[LEFT_EYE_BONE_X] = 0.0
|
||||
data[LEFT_EYE_BONE_Y] = 0.0
|
||||
data[LEFT_EYE_BONE_Z] = 0.0
|
||||
data[LEFT_EYE_BONE_QUAT] = [0.0, 0.0, 0.0, 1.0]
|
||||
|
||||
data[RIGHT_EYE_BONE_X] = 0.0
|
||||
data[RIGHT_EYE_BONE_Y] = 0.0
|
||||
data[RIGHT_EYE_BONE_Z] = 0.0
|
||||
data[RIGHT_EYE_BONE_QUAT] = [0.0, 0.0, 0.0, 1.0]
|
||||
|
||||
return data
|
||||
12
live2d/tha3/mocap/ifacialmocap_pose_converter.py
Normal file
12
live2d/tha3/mocap/ifacialmocap_pose_converter.py
Normal file
@@ -0,0 +1,12 @@
|
||||
from abc import ABC, abstractmethod
|
||||
from typing import Dict, List
|
||||
|
||||
|
||||
class IFacialMocapPoseConverter(ABC):
|
||||
@abstractmethod
|
||||
def convert(self, ifacialmocap_pose: Dict[str, float]) -> List[float]:
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def init_pose_converter_panel(self, parent):
|
||||
pass
|
||||
491
live2d/tha3/mocap/ifacialmocap_poser_converter_25.py
Normal file
491
live2d/tha3/mocap/ifacialmocap_poser_converter_25.py
Normal file
@@ -0,0 +1,491 @@
|
||||
import math
|
||||
import time
|
||||
from enum import Enum
|
||||
from typing import Optional, Dict, List
|
||||
|
||||
import numpy
|
||||
import scipy.optimize
|
||||
import wx
|
||||
|
||||
from tha3.mocap.ifacialmocap_constants import MOUTH_SMILE_LEFT, MOUTH_SHRUG_UPPER, MOUTH_SMILE_RIGHT, \
|
||||
BROW_INNER_UP, BROW_OUTER_UP_RIGHT, BROW_OUTER_UP_LEFT, BROW_DOWN_LEFT, BROW_DOWN_RIGHT, EYE_WIDE_LEFT, \
|
||||
EYE_WIDE_RIGHT, EYE_BLINK_LEFT, EYE_BLINK_RIGHT, CHEEK_SQUINT_LEFT, CHEEK_SQUINT_RIGHT, EYE_LOOK_IN_LEFT, \
|
||||
EYE_LOOK_OUT_LEFT, EYE_LOOK_IN_RIGHT, EYE_LOOK_OUT_RIGHT, EYE_LOOK_UP_LEFT, EYE_LOOK_UP_RIGHT, EYE_LOOK_DOWN_RIGHT, \
|
||||
EYE_LOOK_DOWN_LEFT, HEAD_BONE_X, HEAD_BONE_Y, HEAD_BONE_Z, JAW_OPEN, MOUTH_FROWN_LEFT, MOUTH_FROWN_RIGHT, \
|
||||
MOUTH_LOWER_DOWN_LEFT, MOUTH_LOWER_DOWN_RIGHT, MOUTH_FUNNEL, MOUTH_PUCKER
|
||||
from tha3.mocap.ifacialmocap_pose_converter import IFacialMocapPoseConverter
|
||||
from tha3.poser.modes.pose_parameters import get_pose_parameters
|
||||
|
||||
|
||||
class EyebrowDownMode(Enum):
|
||||
TROUBLED = 1
|
||||
ANGRY = 2
|
||||
LOWERED = 3
|
||||
SERIOUS = 4
|
||||
|
||||
|
||||
class WinkMode(Enum):
|
||||
NORMAL = 1
|
||||
RELAXED = 2
|
||||
|
||||
|
||||
def rad_to_deg(rad):
|
||||
return rad * 180.0 / math.pi
|
||||
|
||||
|
||||
def deg_to_rad(deg):
|
||||
return deg * math.pi / 180.0
|
||||
|
||||
|
||||
def clamp(x, min_value, max_value):
|
||||
return max(min_value, min(max_value, x))
|
||||
|
||||
|
||||
class IFacialMocapPoseConverter25Args:
|
||||
def __init__(self,
|
||||
lower_smile_threshold: float = 0.4,
|
||||
upper_smile_threshold: float = 0.6,
|
||||
eyebrow_down_mode: EyebrowDownMode = EyebrowDownMode.ANGRY,
|
||||
wink_mode: WinkMode = WinkMode.NORMAL,
|
||||
eye_surprised_max_value: float = 0.5,
|
||||
eye_wink_max_value: float = 0.8,
|
||||
eyebrow_down_max_value: float = 0.4,
|
||||
cheek_squint_min_value: float = 0.1,
|
||||
cheek_squint_max_value: float = 0.7,
|
||||
eye_rotation_factor: float = 1.0 / 0.75,
|
||||
jaw_open_min_value: float = 0.1,
|
||||
jaw_open_max_value: float = 0.4,
|
||||
mouth_frown_max_value: float = 0.6,
|
||||
mouth_funnel_min_value: float = 0.25,
|
||||
mouth_funnel_max_value: float = 0.5,
|
||||
iris_small_left=0.0,
|
||||
iris_small_right=0.0):
|
||||
self.iris_small_right = iris_small_left
|
||||
self.iris_small_left = iris_small_right
|
||||
self.wink_mode = wink_mode
|
||||
self.mouth_funnel_max_value = mouth_funnel_max_value
|
||||
self.mouth_funnel_min_value = mouth_funnel_min_value
|
||||
self.mouth_frown_max_value = mouth_frown_max_value
|
||||
self.jaw_open_max_value = jaw_open_max_value
|
||||
self.jaw_open_min_value = jaw_open_min_value
|
||||
self.eye_rotation_factor = eye_rotation_factor
|
||||
self.cheek_squint_max_value = cheek_squint_max_value
|
||||
self.cheek_squint_min_value = cheek_squint_min_value
|
||||
self.eyebrow_down_max_value = eyebrow_down_max_value
|
||||
self.eye_blink_max_value = eye_wink_max_value
|
||||
self.eye_wide_max_value = eye_surprised_max_value
|
||||
self.eyebrow_down_mode = eyebrow_down_mode
|
||||
self.lower_smile_threshold = lower_smile_threshold
|
||||
self.upper_smile_threshold = upper_smile_threshold
|
||||
|
||||
|
||||
class IFacialMocapPoseConverter25(IFacialMocapPoseConverter):
|
||||
def __init__(self, args: Optional[IFacialMocapPoseConverter25Args] = None):
|
||||
super().__init__()
|
||||
if args is None:
|
||||
args = IFacialMocapPoseConverter25Args()
|
||||
self.args = args
|
||||
pose_parameters = get_pose_parameters()
|
||||
self.pose_size = 45
|
||||
|
||||
self.eyebrow_troubled_left_index = pose_parameters.get_parameter_index("eyebrow_troubled_left")
|
||||
self.eyebrow_troubled_right_index = pose_parameters.get_parameter_index("eyebrow_troubled_right")
|
||||
self.eyebrow_angry_left_index = pose_parameters.get_parameter_index("eyebrow_angry_left")
|
||||
self.eyebrow_angry_right_index = pose_parameters.get_parameter_index("eyebrow_angry_right")
|
||||
self.eyebrow_happy_left_index = pose_parameters.get_parameter_index("eyebrow_happy_left")
|
||||
self.eyebrow_happy_right_index = pose_parameters.get_parameter_index("eyebrow_happy_right")
|
||||
self.eyebrow_raised_left_index = pose_parameters.get_parameter_index("eyebrow_raised_left")
|
||||
self.eyebrow_raised_right_index = pose_parameters.get_parameter_index("eyebrow_raised_right")
|
||||
self.eyebrow_lowered_left_index = pose_parameters.get_parameter_index("eyebrow_lowered_left")
|
||||
self.eyebrow_lowered_right_index = pose_parameters.get_parameter_index("eyebrow_lowered_right")
|
||||
self.eyebrow_serious_left_index = pose_parameters.get_parameter_index("eyebrow_serious_left")
|
||||
self.eyebrow_serious_right_index = pose_parameters.get_parameter_index("eyebrow_serious_right")
|
||||
|
||||
self.eye_surprised_left_index = pose_parameters.get_parameter_index("eye_surprised_left")
|
||||
self.eye_surprised_right_index = pose_parameters.get_parameter_index("eye_surprised_right")
|
||||
self.eye_wink_left_index = pose_parameters.get_parameter_index("eye_wink_left")
|
||||
self.eye_wink_right_index = pose_parameters.get_parameter_index("eye_wink_right")
|
||||
self.eye_happy_wink_left_index = pose_parameters.get_parameter_index("eye_happy_wink_left")
|
||||
self.eye_happy_wink_right_index = pose_parameters.get_parameter_index("eye_happy_wink_right")
|
||||
self.eye_relaxed_left_index = pose_parameters.get_parameter_index("eye_relaxed_left")
|
||||
self.eye_relaxed_right_index = pose_parameters.get_parameter_index("eye_relaxed_right")
|
||||
self.eye_raised_lower_eyelid_left_index = pose_parameters.get_parameter_index("eye_raised_lower_eyelid_left")
|
||||
self.eye_raised_lower_eyelid_right_index = pose_parameters.get_parameter_index("eye_raised_lower_eyelid_right")
|
||||
|
||||
self.iris_small_left_index = pose_parameters.get_parameter_index("iris_small_left")
|
||||
self.iris_small_right_index = pose_parameters.get_parameter_index("iris_small_right")
|
||||
|
||||
self.iris_rotation_x_index = pose_parameters.get_parameter_index("iris_rotation_x")
|
||||
self.iris_rotation_y_index = pose_parameters.get_parameter_index("iris_rotation_y")
|
||||
|
||||
self.head_x_index = pose_parameters.get_parameter_index("head_x")
|
||||
self.head_y_index = pose_parameters.get_parameter_index("head_y")
|
||||
self.neck_z_index = pose_parameters.get_parameter_index("neck_z")
|
||||
|
||||
self.mouth_aaa_index = pose_parameters.get_parameter_index("mouth_aaa")
|
||||
self.mouth_iii_index = pose_parameters.get_parameter_index("mouth_iii")
|
||||
self.mouth_uuu_index = pose_parameters.get_parameter_index("mouth_uuu")
|
||||
self.mouth_eee_index = pose_parameters.get_parameter_index("mouth_eee")
|
||||
self.mouth_ooo_index = pose_parameters.get_parameter_index("mouth_ooo")
|
||||
|
||||
self.mouth_lowered_corner_left_index = pose_parameters.get_parameter_index("mouth_lowered_corner_left")
|
||||
self.mouth_lowered_corner_right_index = pose_parameters.get_parameter_index("mouth_lowered_corner_right")
|
||||
self.mouth_raised_corner_left_index = pose_parameters.get_parameter_index("mouth_raised_corner_left")
|
||||
self.mouth_raised_corner_right_index = pose_parameters.get_parameter_index("mouth_raised_corner_right")
|
||||
|
||||
self.body_y_index = pose_parameters.get_parameter_index("body_y")
|
||||
self.body_z_index = pose_parameters.get_parameter_index("body_z")
|
||||
self.breathing_index = pose_parameters.get_parameter_index("breathing")
|
||||
|
||||
self.breathing_start_time = time.time()
|
||||
|
||||
self.panel = None
|
||||
|
||||
def init_pose_converter_panel(self, parent):
|
||||
self.panel = wx.Panel(parent, style=wx.SIMPLE_BORDER)
|
||||
self.panel_sizer = wx.BoxSizer(wx.VERTICAL)
|
||||
self.panel.SetSizer(self.panel_sizer)
|
||||
self.panel.SetAutoLayout(1)
|
||||
parent.GetSizer().Add(self.panel, 0, wx.EXPAND)
|
||||
|
||||
if True:
|
||||
eyebrow_down_mode_text = wx.StaticText(self.panel, label=" --- Eyebrow Down Mode --- ",
|
||||
style=wx.ALIGN_CENTER)
|
||||
self.panel_sizer.Add(eyebrow_down_mode_text, 0, wx.EXPAND)
|
||||
|
||||
self.eyebrow_down_mode_choice = wx.Choice(
|
||||
self.panel,
|
||||
choices=[
|
||||
"ANGRY",
|
||||
"TROUBLED",
|
||||
"SERIOUS",
|
||||
"LOWERED",
|
||||
])
|
||||
self.eyebrow_down_mode_choice.SetSelection(0)
|
||||
self.panel_sizer.Add(self.eyebrow_down_mode_choice, 0, wx.EXPAND)
|
||||
self.eyebrow_down_mode_choice.Bind(wx.EVT_CHOICE, self.change_eyebrow_down_mode)
|
||||
|
||||
separator = wx.StaticLine(self.panel, -1, size=(256, 5))
|
||||
self.panel_sizer.Add(separator, 0, wx.EXPAND)
|
||||
|
||||
if True:
|
||||
wink_mode_text = wx.StaticText(self.panel, label=" --- Wink Mode --- ", style=wx.ALIGN_CENTER)
|
||||
self.panel_sizer.Add(wink_mode_text, 0, wx.EXPAND)
|
||||
|
||||
self.wink_mode_choice = wx.Choice(
|
||||
self.panel,
|
||||
choices=[
|
||||
"NORMAL",
|
||||
"RELAXED",
|
||||
])
|
||||
self.wink_mode_choice.SetSelection(0)
|
||||
self.panel_sizer.Add(self.wink_mode_choice, 0, wx.EXPAND)
|
||||
self.wink_mode_choice.Bind(wx.EVT_CHOICE, self.change_wink_mode)
|
||||
|
||||
separator = wx.StaticLine(self.panel, -1, size=(256, 5))
|
||||
self.panel_sizer.Add(separator, 0, wx.EXPAND)
|
||||
|
||||
if True:
|
||||
iris_size_text = wx.StaticText(self.panel, label=" --- Iris Size --- ", style=wx.ALIGN_CENTER)
|
||||
self.panel_sizer.Add(iris_size_text, 0, wx.EXPAND)
|
||||
|
||||
self.iris_left_slider = wx.Slider(self.panel, minValue=0, maxValue=1000, value=0, style=wx.HORIZONTAL)
|
||||
self.panel_sizer.Add(self.iris_left_slider, 0, wx.EXPAND)
|
||||
self.iris_left_slider.Bind(wx.EVT_SLIDER, self.change_iris_size)
|
||||
|
||||
self.iris_right_slider = wx.Slider(self.panel, minValue=0, maxValue=1000, value=0, style=wx.HORIZONTAL)
|
||||
self.panel_sizer.Add(self.iris_right_slider, 0, wx.EXPAND)
|
||||
self.iris_right_slider.Bind(wx.EVT_SLIDER, self.change_iris_size)
|
||||
self.iris_right_slider.Enable(False)
|
||||
|
||||
self.link_left_right_irises = wx.CheckBox(
|
||||
self.panel, label="Use same value for both sides")
|
||||
self.link_left_right_irises.SetValue(True)
|
||||
self.panel_sizer.Add(self.link_left_right_irises, wx.SizerFlags().CenterHorizontal().Border())
|
||||
self.link_left_right_irises.Bind(wx.EVT_CHECKBOX, self.link_left_right_irises_clicked)
|
||||
|
||||
separator = wx.StaticLine(self.panel, -1, size=(256, 5))
|
||||
self.panel_sizer.Add(separator, 0, wx.EXPAND)
|
||||
|
||||
if True:
|
||||
iris_size_text = wx.StaticText(self.panel, label=" --- Iris Size --- ", style=wx.ALIGN_CENTER)
|
||||
self.panel_sizer.Add(iris_size_text, 0, wx.EXPAND)
|
||||
|
||||
self.iris_left_slider = wx.Slider(self.panel, minValue=0, maxValue=1000, value=0, style=wx.HORIZONTAL)
|
||||
self.panel_sizer.Add(self.iris_left_slider, 0, wx.EXPAND)
|
||||
self.iris_left_slider.Bind(wx.EVT_SLIDER, self.change_iris_size)
|
||||
|
||||
self.iris_right_slider = wx.Slider(self.panel, minValue=0, maxValue=1000, value=0, style=wx.HORIZONTAL)
|
||||
self.panel_sizer.Add(self.iris_right_slider, 0, wx.EXPAND)
|
||||
self.iris_right_slider.Bind(wx.EVT_SLIDER, self.change_iris_size)
|
||||
self.iris_right_slider.Enable(False)
|
||||
|
||||
self.link_left_right_irises = wx.CheckBox(
|
||||
self.panel, label="Use same value for both sides")
|
||||
self.link_left_right_irises.SetValue(True)
|
||||
self.panel_sizer.Add(self.link_left_right_irises, wx.SizerFlags().CenterHorizontal().Border())
|
||||
self.link_left_right_irises.Bind(wx.EVT_CHECKBOX, self.link_left_right_irises_clicked)
|
||||
|
||||
separator = wx.StaticLine(self.panel, -1, size=(256, 5))
|
||||
self.panel_sizer.Add(separator, 0, wx.EXPAND)
|
||||
|
||||
if True:
|
||||
breathing_frequency_text = wx.StaticText(
|
||||
self.panel, label=" --- Breathing --- ", style=wx.ALIGN_CENTER)
|
||||
self.panel_sizer.Add(breathing_frequency_text, 0, wx.EXPAND)
|
||||
|
||||
self.restart_breathing_cycle_button = wx.Button(self.panel, label="Restart Breathing Cycle")
|
||||
self.restart_breathing_cycle_button.Bind(wx.EVT_BUTTON, self.restart_breathing_cycle_clicked)
|
||||
self.panel_sizer.Add(self.restart_breathing_cycle_button, 0, wx.EXPAND)
|
||||
|
||||
self.breathing_frequency_slider = wx.Slider(
|
||||
self.panel, minValue=0, maxValue=60, value=20, style=wx.HORIZONTAL)
|
||||
self.panel_sizer.Add(self.breathing_frequency_slider, 0, wx.EXPAND)
|
||||
|
||||
self.breathing_gauge = wx.Gauge(self.panel, style=wx.GA_HORIZONTAL, range=1000)
|
||||
self.panel_sizer.Add(self.breathing_gauge, 0, wx.EXPAND)
|
||||
|
||||
self.panel_sizer.Fit(self.panel)
|
||||
|
||||
def restart_breathing_cycle_clicked(self, event: wx.Event):
|
||||
self.breathing_start_time = time.time()
|
||||
|
||||
def change_eyebrow_down_mode(self, event: wx.Event):
|
||||
selected_index = self.eyebrow_down_mode_choice.GetSelection()
|
||||
if selected_index == 0:
|
||||
self.args.eyebrow_down_mode = EyebrowDownMode.ANGRY
|
||||
elif selected_index == 1:
|
||||
self.args.eyebrow_down_mode = EyebrowDownMode.TROUBLED
|
||||
elif selected_index == 2:
|
||||
self.args.eyebrow_down_mode = EyebrowDownMode.SERIOUS
|
||||
else:
|
||||
self.args.eyebrow_down_mode = EyebrowDownMode.LOWERED
|
||||
|
||||
def change_wink_mode(self, event: wx.Event):
|
||||
selected_index = self.wink_mode_choice.GetSelection()
|
||||
if selected_index == 0:
|
||||
self.args.wink_mode = WinkMode.NORMAL
|
||||
else:
|
||||
self.args.wink_mode = WinkMode.RELAXED
|
||||
|
||||
def change_iris_size(self, event: wx.Event):
|
||||
if self.link_left_right_irises.GetValue():
|
||||
left_value = self.iris_left_slider.GetValue()
|
||||
right_value = self.iris_right_slider.GetValue()
|
||||
if left_value != right_value:
|
||||
self.iris_right_slider.SetValue(left_value)
|
||||
self.args.iris_small_left = left_value / 1000.0
|
||||
self.args.iris_small_right = left_value / 1000.0
|
||||
else:
|
||||
self.args.iris_small_left = self.iris_left_slider.GetValue() / 1000.0
|
||||
self.args.iris_small_right = self.iris_right_slider.GetValue() / 1000.0
|
||||
|
||||
def link_left_right_irises_clicked(self, event: wx.Event):
|
||||
if self.link_left_right_irises.GetValue():
|
||||
self.iris_right_slider.Enable(False)
|
||||
else:
|
||||
self.iris_right_slider.Enable(True)
|
||||
self.change_iris_size(event)
|
||||
|
||||
def decompose_head_body_param(self, param, threshold=2.0 / 3):
|
||||
|
||||
if abs(param) < threshold:
|
||||
return (param, 0.0)
|
||||
else:
|
||||
if param < 0:
|
||||
sign = -1.0
|
||||
else:
|
||||
sign = 1.0
|
||||
return (threshold * sign, (abs(param) - threshold) * sign)
|
||||
|
||||
breathing_start_time = time.time()
|
||||
def convert(self, ifacialmocap_pose: Dict[str, float]) -> List[float]:
|
||||
pose = [0.0 for i in range(self.pose_size)]
|
||||
|
||||
smile_value = \
|
||||
(ifacialmocap_pose[MOUTH_SMILE_LEFT] + ifacialmocap_pose[MOUTH_SMILE_RIGHT]) / 2.0 \
|
||||
+ ifacialmocap_pose[MOUTH_SHRUG_UPPER]
|
||||
if smile_value < self.args.lower_smile_threshold:
|
||||
smile_degree = 0.0
|
||||
elif smile_value > self.args.upper_smile_threshold:
|
||||
smile_degree = 1.0
|
||||
else:
|
||||
smile_degree = (smile_value - self.args.lower_smile_threshold) / (
|
||||
self.args.upper_smile_threshold - self.args.lower_smile_threshold)
|
||||
|
||||
# Eyebrow
|
||||
if True:
|
||||
brow_inner_up = ifacialmocap_pose[BROW_INNER_UP]
|
||||
brow_outer_up_right = ifacialmocap_pose[BROW_OUTER_UP_RIGHT]
|
||||
brow_outer_up_left = ifacialmocap_pose[BROW_OUTER_UP_LEFT]
|
||||
|
||||
brow_up_left = clamp(brow_inner_up + brow_outer_up_left, 0.0, 1.0)
|
||||
brow_up_right = clamp(brow_inner_up + brow_outer_up_right, 0.0, 1.0)
|
||||
pose[self.eyebrow_raised_left_index] = brow_up_left
|
||||
pose[self.eyebrow_raised_right_index] = brow_up_right
|
||||
|
||||
brow_down_left = (1.0 - smile_degree) \
|
||||
* clamp(ifacialmocap_pose[BROW_DOWN_LEFT] / self.args.eyebrow_down_max_value, 0.0, 1.0)
|
||||
brow_down_right = (1.0 - smile_degree) \
|
||||
* clamp(ifacialmocap_pose[BROW_DOWN_RIGHT] / self.args.eyebrow_down_max_value, 0.0, 1.0)
|
||||
if self.args.eyebrow_down_mode == EyebrowDownMode.TROUBLED:
|
||||
pose[self.eyebrow_troubled_left_index] = brow_down_left
|
||||
pose[self.eyebrow_troubled_right_index] = brow_down_right
|
||||
elif self.args.eyebrow_down_mode == EyebrowDownMode.ANGRY:
|
||||
pose[self.eyebrow_angry_left_index] = brow_down_left
|
||||
pose[self.eyebrow_angry_right_index] = brow_down_right
|
||||
elif self.args.eyebrow_down_mode == EyebrowDownMode.LOWERED:
|
||||
pose[self.eyebrow_lowered_left_index] = brow_down_left
|
||||
pose[self.eyebrow_lowered_right_index] = brow_down_right
|
||||
elif self.args.eyebrow_down_mode == EyebrowDownMode.SERIOUS:
|
||||
pose[self.eyebrow_serious_left_index] = brow_down_left
|
||||
pose[self.eyebrow_serious_right_index] = brow_down_right
|
||||
|
||||
brow_happy_value = clamp(smile_value, 0.0, 1.0) * smile_degree
|
||||
pose[self.eyebrow_happy_left_index] = brow_happy_value
|
||||
pose[self.eyebrow_happy_right_index] = brow_happy_value
|
||||
|
||||
# Eye
|
||||
if True:
|
||||
# Surprised
|
||||
pose[self.eye_surprised_left_index] = clamp(
|
||||
ifacialmocap_pose[EYE_WIDE_LEFT] / self.args.eye_wide_max_value, 0.0, 1.0)
|
||||
pose[self.eye_surprised_right_index] = clamp(
|
||||
ifacialmocap_pose[EYE_WIDE_RIGHT] / self.args.eye_wide_max_value, 0.0, 1.0)
|
||||
|
||||
# Wink
|
||||
if self.args.wink_mode == WinkMode.NORMAL:
|
||||
wink_left_index = self.eye_wink_left_index
|
||||
wink_right_index = self.eye_wink_right_index
|
||||
else:
|
||||
wink_left_index = self.eye_relaxed_left_index
|
||||
wink_right_index = self.eye_relaxed_right_index
|
||||
pose[wink_left_index] = (1.0 - smile_degree) * clamp(
|
||||
ifacialmocap_pose[EYE_BLINK_LEFT] / self.args.eye_blink_max_value, 0.0, 1.0)
|
||||
pose[wink_right_index] = (1.0 - smile_degree) * clamp(
|
||||
ifacialmocap_pose[EYE_BLINK_RIGHT] / self.args.eye_blink_max_value, 0.0, 1.0)
|
||||
pose[self.eye_happy_wink_left_index] = smile_degree * clamp(
|
||||
ifacialmocap_pose[EYE_BLINK_LEFT] / self.args.eye_blink_max_value, 0.0, 1.0)
|
||||
pose[self.eye_happy_wink_right_index] = smile_degree * clamp(
|
||||
ifacialmocap_pose[EYE_BLINK_RIGHT] / self.args.eye_blink_max_value, 0.0, 1.0)
|
||||
|
||||
# Lower eyelid
|
||||
cheek_squint_denom = self.args.cheek_squint_max_value - self.args.cheek_squint_min_value
|
||||
pose[self.eye_raised_lower_eyelid_left_index] = \
|
||||
clamp(
|
||||
(ifacialmocap_pose[CHEEK_SQUINT_LEFT] - self.args.cheek_squint_min_value) / cheek_squint_denom,
|
||||
0.0, 1.0)
|
||||
pose[self.eye_raised_lower_eyelid_right_index] = \
|
||||
clamp(
|
||||
(ifacialmocap_pose[CHEEK_SQUINT_RIGHT] - self.args.cheek_squint_min_value) / cheek_squint_denom,
|
||||
0.0, 1.0)
|
||||
|
||||
# Iris rotation
|
||||
if True:
|
||||
eye_rotation_y = (ifacialmocap_pose[EYE_LOOK_IN_LEFT]
|
||||
- ifacialmocap_pose[EYE_LOOK_OUT_LEFT]
|
||||
- ifacialmocap_pose[EYE_LOOK_IN_RIGHT]
|
||||
+ ifacialmocap_pose[EYE_LOOK_OUT_RIGHT]) / 2.0 * self.args.eye_rotation_factor
|
||||
pose[self.iris_rotation_y_index] = clamp(eye_rotation_y, -1.0, 1.0)
|
||||
|
||||
eye_rotation_x = (ifacialmocap_pose[EYE_LOOK_UP_LEFT]
|
||||
+ ifacialmocap_pose[EYE_LOOK_UP_RIGHT]
|
||||
- ifacialmocap_pose[EYE_LOOK_DOWN_LEFT]
|
||||
- ifacialmocap_pose[EYE_LOOK_DOWN_RIGHT]) / 2.0 * self.args.eye_rotation_factor
|
||||
pose[self.iris_rotation_x_index] = clamp(eye_rotation_x, -1.0, 1.0)
|
||||
|
||||
# Iris size
|
||||
if True:
|
||||
pose[self.iris_small_left_index] = self.args.iris_small_left
|
||||
pose[self.iris_small_right_index] = self.args.iris_small_right
|
||||
|
||||
# Head rotation
|
||||
if True:
|
||||
x_param = clamp(-ifacialmocap_pose[HEAD_BONE_X] * 180.0 / math.pi, -15.0, 15.0) / 15.0
|
||||
pose[self.head_x_index] = x_param
|
||||
|
||||
y_param = clamp(-ifacialmocap_pose[HEAD_BONE_Y] * 180.0 / math.pi, -10.0, 10.0) / 10.0
|
||||
pose[self.head_y_index] = y_param
|
||||
pose[self.body_y_index] = y_param
|
||||
|
||||
z_param = clamp(ifacialmocap_pose[HEAD_BONE_Z] * 180.0 / math.pi, -15.0, 15.0) / 15.0
|
||||
pose[self.neck_z_index] = z_param
|
||||
pose[self.body_z_index] = z_param
|
||||
|
||||
# Mouth
|
||||
if True:
|
||||
jaw_open_denom = self.args.jaw_open_max_value - self.args.jaw_open_min_value
|
||||
mouth_open = clamp((ifacialmocap_pose[JAW_OPEN] - self.args.jaw_open_min_value) / jaw_open_denom, 0.0, 1.0)
|
||||
pose[self.mouth_aaa_index] = mouth_open
|
||||
pose[self.mouth_raised_corner_left_index] = clamp(smile_value, 0.0, 1.0)
|
||||
pose[self.mouth_raised_corner_right_index] = clamp(smile_value, 0.0, 1.0)
|
||||
|
||||
is_mouth_open = mouth_open > 0.0
|
||||
if not is_mouth_open:
|
||||
mouth_frown_value = clamp(
|
||||
(ifacialmocap_pose[MOUTH_FROWN_LEFT] + ifacialmocap_pose[
|
||||
MOUTH_FROWN_RIGHT]) / self.args.mouth_frown_max_value, 0.0, 1.0)
|
||||
pose[self.mouth_lowered_corner_left_index] = mouth_frown_value
|
||||
pose[self.mouth_lowered_corner_right_index] = mouth_frown_value
|
||||
else:
|
||||
mouth_lower_down = clamp(
|
||||
ifacialmocap_pose[MOUTH_LOWER_DOWN_LEFT] + ifacialmocap_pose[MOUTH_LOWER_DOWN_RIGHT], 0.0, 1.0)
|
||||
mouth_funnel = ifacialmocap_pose[MOUTH_FUNNEL]
|
||||
mouth_pucker = ifacialmocap_pose[MOUTH_PUCKER]
|
||||
|
||||
mouth_point = [mouth_open, mouth_lower_down, mouth_funnel, mouth_pucker]
|
||||
|
||||
aaa_point = [1.0, 1.0, 0.0, 0.0]
|
||||
iii_point = [0.0, 1.0, 0.0, 0.0]
|
||||
uuu_point = [0.5, 0.3, 0.25, 0.75]
|
||||
ooo_point = [1.0, 0.5, 0.5, 0.4]
|
||||
|
||||
decomp = numpy.array([0, 0, 0, 0])
|
||||
M = numpy.array([
|
||||
aaa_point,
|
||||
iii_point,
|
||||
uuu_point,
|
||||
ooo_point
|
||||
])
|
||||
|
||||
def loss(decomp):
|
||||
return numpy.linalg.norm(numpy.matmul(decomp, M) - mouth_point) \
|
||||
+ 0.01 * numpy.linalg.norm(decomp, ord=1)
|
||||
|
||||
opt_result = scipy.optimize.minimize(
|
||||
loss, decomp, bounds=[(0.0, 1.0), (0.0, 1.0), (0.0, 1.0), (0.0, 1.0)])
|
||||
decomp = opt_result["x"]
|
||||
restricted_decomp = [decomp.item(0), decomp.item(1), decomp.item(2), decomp.item(3)]
|
||||
pose[self.mouth_aaa_index] = restricted_decomp[0]
|
||||
pose[self.mouth_iii_index] = restricted_decomp[1]
|
||||
mouth_funnel_denom = self.args.mouth_funnel_max_value - self.args.mouth_funnel_min_value
|
||||
ooo_alpha = clamp((mouth_funnel - self.args.mouth_funnel_min_value) / mouth_funnel_denom, 0.0, 1.0)
|
||||
uo_value = clamp(restricted_decomp[2] + restricted_decomp[3], 0.0, 1.0)
|
||||
pose[self.mouth_uuu_index] = uo_value * (1.0 - ooo_alpha)
|
||||
pose[self.mouth_ooo_index] = uo_value * ooo_alpha
|
||||
|
||||
#if self.panel is not None:
|
||||
#frequency = self.breathing_frequency_slider.GetValue()
|
||||
frequency = 18 #breathing rate 10-50
|
||||
if frequency == 0:
|
||||
#value = 0.0
|
||||
#pose[self.breathing_index] = value
|
||||
self.breathing_start_time = time.time()
|
||||
else:
|
||||
period = 60.0 / frequency
|
||||
now = time.time()
|
||||
diff = now - self.breathing_start_time
|
||||
frac = (diff % period) / period
|
||||
value = (-math.cos(2 * math.pi * frac) + 1.0) / 2.0
|
||||
pose[self.breathing_index] = value
|
||||
#print("pose", pose[self.breathing_index])
|
||||
#self.breathing_gauge.SetValue(int(1000 * value))
|
||||
|
||||
|
||||
|
||||
return pose
|
||||
|
||||
|
||||
def create_ifacialmocap_pose_converter(
|
||||
args: Optional[IFacialMocapPoseConverter25Args] = None) -> IFacialMocapPoseConverter:
|
||||
return IFacialMocapPoseConverter25(args)
|
||||
89
live2d/tha3/mocap/ifacialmocap_v2.py
Normal file
89
live2d/tha3/mocap/ifacialmocap_v2.py
Normal file
@@ -0,0 +1,89 @@
|
||||
import math
|
||||
|
||||
from tha3.mocap.ifacialmocap_constants import BLENDSHAPE_NAMES, HEAD_BONE_X, HEAD_BONE_Y, HEAD_BONE_Z, \
|
||||
RIGHT_EYE_BONE_X, RIGHT_EYE_BONE_Y, RIGHT_EYE_BONE_Z, LEFT_EYE_BONE_X, LEFT_EYE_BONE_Y, LEFT_EYE_BONE_Z, \
|
||||
HEAD_BONE_QUAT, LEFT_EYE_BONE_QUAT, RIGHT_EYE_BONE_QUAT
|
||||
|
||||
IFACIALMOCAP_PORT = 49983
|
||||
IFACIALMOCAP_START_STRING = "iFacialMocap_sahuasouryya9218sauhuiayeta91555dy3719|sendDataVersion=v2".encode('utf-8')
|
||||
|
||||
|
||||
def parse_ifacialmocap_v2_pose(ifacialmocap_output):
|
||||
output = {}
|
||||
parts = ifacialmocap_output.split("|")
|
||||
for part in parts:
|
||||
part = part.strip()
|
||||
if len(part) == 0:
|
||||
continue
|
||||
if "&" in part:
|
||||
components = part.split("&")
|
||||
assert len(components) == 2
|
||||
key = components[0]
|
||||
value = float(components[1]) / 100.0
|
||||
if key.endswith("_L"):
|
||||
key = key[:-2] + "Left"
|
||||
elif key.endswith("_R"):
|
||||
key = key[:-2] + "Right"
|
||||
if key in BLENDSHAPE_NAMES:
|
||||
output[key] = value
|
||||
elif part.startswith("=head#"):
|
||||
components = part[len("=head#"):].split(",")
|
||||
assert len(components) == 6
|
||||
output[HEAD_BONE_X] = float(components[0]) * math.pi / 180
|
||||
output[HEAD_BONE_Y] = float(components[1]) * math.pi / 180
|
||||
output[HEAD_BONE_Z] = float(components[2]) * math.pi / 180
|
||||
elif part.startswith("rightEye#"):
|
||||
components = part[len("rightEye#"):].split(",")
|
||||
output[RIGHT_EYE_BONE_X] = float(components[0]) * math.pi / 180
|
||||
output[RIGHT_EYE_BONE_Y] = float(components[1]) * math.pi / 180
|
||||
output[RIGHT_EYE_BONE_Z] = float(components[2]) * math.pi / 180
|
||||
elif part.startswith("leftEye#"):
|
||||
components = part[len("leftEye#"):].split(",")
|
||||
output[LEFT_EYE_BONE_X] = float(components[0]) * math.pi / 180
|
||||
output[LEFT_EYE_BONE_Y] = float(components[1]) * math.pi / 180
|
||||
output[LEFT_EYE_BONE_Z] = float(components[2]) * math.pi / 180
|
||||
output[HEAD_BONE_QUAT] = [0.0, 0.0, 0.0, 1.0]
|
||||
output[LEFT_EYE_BONE_QUAT] = [0.0, 0.0, 0.0, 1.0]
|
||||
output[RIGHT_EYE_BONE_QUAT] = [0.0, 0.0, 0.0, 1.0]
|
||||
return output
|
||||
|
||||
|
||||
def parse_ifacialmocap_v1_pose(ifacialmocap_output):
|
||||
output = {}
|
||||
parts = ifacialmocap_output.split("|")
|
||||
for part in parts:
|
||||
part = part.strip()
|
||||
if len(part) == 0:
|
||||
continue
|
||||
if part.startswith("=head#"):
|
||||
components = part[len("=head#"):].split(",")
|
||||
assert len(components) == 6
|
||||
output[HEAD_BONE_X] = float(components[0]) * math.pi / 180
|
||||
output[HEAD_BONE_Y] = float(components[1]) * math.pi / 180
|
||||
output[HEAD_BONE_Z] = float(components[2]) * math.pi / 180
|
||||
elif part.startswith("rightEye#"):
|
||||
components = part[len("rightEye#"):].split(",")
|
||||
output[RIGHT_EYE_BONE_X] = float(components[0]) * math.pi / 180
|
||||
output[RIGHT_EYE_BONE_Y] = float(components[1]) * math.pi / 180
|
||||
output[RIGHT_EYE_BONE_Z] = float(components[2]) * math.pi / 180
|
||||
elif part.startswith("leftEye#"):
|
||||
components = part[len("leftEye#"):].split(",")
|
||||
output[LEFT_EYE_BONE_X] = float(components[0]) * math.pi / 180
|
||||
output[LEFT_EYE_BONE_Y] = float(components[1]) * math.pi / 180
|
||||
output[LEFT_EYE_BONE_Z] = float(components[2]) * math.pi / 180
|
||||
else:
|
||||
components = part.split("-")
|
||||
assert len(components) == 2
|
||||
key = components[0]
|
||||
value = float(components[1]) / 100.0
|
||||
if key.endswith("_L"):
|
||||
key = key[:-2] + "Left"
|
||||
elif key.endswith("_R"):
|
||||
key = key[:-2] + "Right"
|
||||
if key in BLENDSHAPE_NAMES:
|
||||
output[key] = value
|
||||
output[HEAD_BONE_QUAT] = [0.0, 0.0, 0.0, 1.0]
|
||||
output[LEFT_EYE_BONE_QUAT] = [0.0, 0.0, 0.0, 1.0]
|
||||
output[RIGHT_EYE_BONE_QUAT] = [0.0, 0.0, 0.0, 1.0]
|
||||
return output
|
||||
|
||||
396
live2d/tha3/models/LICENSE.txt
Normal file
396
live2d/tha3/models/LICENSE.txt
Normal file
@@ -0,0 +1,396 @@
|
||||
Attribution 4.0 International
|
||||
|
||||
=======================================================================
|
||||
|
||||
Creative Commons Corporation ("Creative Commons") is not a law firm and
|
||||
does not provide legal services or legal advice. Distribution of
|
||||
Creative Commons public licenses does not create a lawyer-client or
|
||||
other relationship. Creative Commons makes its licenses and related
|
||||
information available on an "as-is" basis. Creative Commons gives no
|
||||
warranties regarding its licenses, any material licensed under their
|
||||
terms and conditions, or any related information. Creative Commons
|
||||
disclaims all liability for damages resulting from their use to the
|
||||
fullest extent possible.
|
||||
|
||||
Using Creative Commons Public Licenses
|
||||
|
||||
Creative Commons public licenses provide a standard set of terms and
|
||||
conditions that creators and other rights holders may use to share
|
||||
original works of authorship and other material subject to copyright
|
||||
and certain other rights specified in the public license below. The
|
||||
following considerations are for informational purposes only, are not
|
||||
exhaustive, and do not form part of our licenses.
|
||||
|
||||
Considerations for licensors: Our public licenses are
|
||||
intended for use by those authorized to give the public
|
||||
permission to use material in ways otherwise restricted by
|
||||
copyright and certain other rights. Our licenses are
|
||||
irrevocable. Licensors should read and understand the terms
|
||||
and conditions of the license they choose before applying it.
|
||||
Licensors should also secure all rights necessary before
|
||||
applying our licenses so that the public can reuse the
|
||||
material as expected. Licensors should clearly mark any
|
||||
material not subject to the license. This includes other CC-
|
||||
licensed material, or material used under an exception or
|
||||
limitation to copyright. More considerations for licensors:
|
||||
wiki.creativecommons.org/Considerations_for_licensors
|
||||
|
||||
Considerations for the public: By using one of our public
|
||||
licenses, a licensor grants the public permission to use the
|
||||
licensed material under specified terms and conditions. If
|
||||
the licensor's permission is not necessary for any reason--for
|
||||
example, because of any applicable exception or limitation to
|
||||
copyright--then that use is not regulated by the license. Our
|
||||
licenses grant only permissions under copyright and certain
|
||||
other rights that a licensor has authority to grant. Use of
|
||||
the licensed material may still be restricted for other
|
||||
reasons, including because others have copyright or other
|
||||
rights in the material. A licensor may make special requests,
|
||||
such as asking that all changes be marked or described.
|
||||
Although not required by our licenses, you are encouraged to
|
||||
respect those requests where reasonable. More considerations
|
||||
for the public:
|
||||
wiki.creativecommons.org/Considerations_for_licensees
|
||||
|
||||
=======================================================================
|
||||
|
||||
Creative Commons Attribution 4.0 International Public License
|
||||
|
||||
By exercising the Licensed Rights (defined below), You accept and agree
|
||||
to be bound by the terms and conditions of this Creative Commons
|
||||
Attribution 4.0 International Public License ("Public License"). To the
|
||||
extent this Public License may be interpreted as a contract, You are
|
||||
granted the Licensed Rights in consideration of Your acceptance of
|
||||
these terms and conditions, and the Licensor grants You such rights in
|
||||
consideration of benefits the Licensor receives from making the
|
||||
Licensed Material available under these terms and conditions.
|
||||
|
||||
|
||||
Section 1 -- Definitions.
|
||||
|
||||
a. Adapted Material means material subject to Copyright and Similar
|
||||
Rights that is derived from or based upon the Licensed Material
|
||||
and in which the Licensed Material is translated, altered,
|
||||
arranged, transformed, or otherwise modified in a manner requiring
|
||||
permission under the Copyright and Similar Rights held by the
|
||||
Licensor. For purposes of this Public License, where the Licensed
|
||||
Material is a musical work, performance, or sound recording,
|
||||
Adapted Material is always produced where the Licensed Material is
|
||||
synched in timed relation with a moving image.
|
||||
|
||||
b. Adapter's License means the license You apply to Your Copyright
|
||||
and Similar Rights in Your contributions to Adapted Material in
|
||||
accordance with the terms and conditions of this Public License.
|
||||
|
||||
c. Copyright and Similar Rights means copyright and/or similar rights
|
||||
closely related to copyright including, without limitation,
|
||||
performance, broadcast, sound recording, and Sui Generis Database
|
||||
Rights, without regard to how the rights are labeled or
|
||||
categorized. For purposes of this Public License, the rights
|
||||
specified in Section 2(b)(1)-(2) are not Copyright and Similar
|
||||
Rights.
|
||||
|
||||
d. Effective Technological Measures means those measures that, in the
|
||||
absence of proper authority, may not be circumvented under laws
|
||||
fulfilling obligations under Article 11 of the WIPO Copyright
|
||||
Treaty adopted on December 20, 1996, and/or similar international
|
||||
agreements.
|
||||
|
||||
e. Exceptions and Limitations means fair use, fair dealing, and/or
|
||||
any other exception or limitation to Copyright and Similar Rights
|
||||
that applies to Your use of the Licensed Material.
|
||||
|
||||
f. Licensed Material means the artistic or literary work, database,
|
||||
or other material to which the Licensor applied this Public
|
||||
License.
|
||||
|
||||
g. Licensed Rights means the rights granted to You subject to the
|
||||
terms and conditions of this Public License, which are limited to
|
||||
all Copyright and Similar Rights that apply to Your use of the
|
||||
Licensed Material and that the Licensor has authority to license.
|
||||
|
||||
h. Licensor means the individual(s) or entity(ies) granting rights
|
||||
under this Public License.
|
||||
|
||||
i. Share means to provide material to the public by any means or
|
||||
process that requires permission under the Licensed Rights, such
|
||||
as reproduction, public display, public performance, distribution,
|
||||
dissemination, communication, or importation, and to make material
|
||||
available to the public including in ways that members of the
|
||||
public may access the material from a place and at a time
|
||||
individually chosen by them.
|
||||
|
||||
j. Sui Generis Database Rights means rights other than copyright
|
||||
resulting from Directive 96/9/EC of the European Parliament and of
|
||||
the Council of 11 March 1996 on the legal protection of databases,
|
||||
as amended and/or succeeded, as well as other essentially
|
||||
equivalent rights anywhere in the world.
|
||||
|
||||
k. You means the individual or entity exercising the Licensed Rights
|
||||
under this Public License. Your has a corresponding meaning.
|
||||
|
||||
|
||||
Section 2 -- Scope.
|
||||
|
||||
a. License grant.
|
||||
|
||||
1. Subject to the terms and conditions of this Public License,
|
||||
the Licensor hereby grants You a worldwide, royalty-free,
|
||||
non-sublicensable, non-exclusive, irrevocable license to
|
||||
exercise the Licensed Rights in the Licensed Material to:
|
||||
|
||||
a. reproduce and Share the Licensed Material, in whole or
|
||||
in part; and
|
||||
|
||||
b. produce, reproduce, and Share Adapted Material.
|
||||
|
||||
2. Exceptions and Limitations. For the avoidance of doubt, where
|
||||
Exceptions and Limitations apply to Your use, this Public
|
||||
License does not apply, and You do not need to comply with
|
||||
its terms and conditions.
|
||||
|
||||
3. Term. The term of this Public License is specified in Section
|
||||
6(a).
|
||||
|
||||
4. Media and formats; technical modifications allowed. The
|
||||
Licensor authorizes You to exercise the Licensed Rights in
|
||||
all media and formats whether now known or hereafter created,
|
||||
and to make technical modifications necessary to do so. The
|
||||
Licensor waives and/or agrees not to assert any right or
|
||||
authority to forbid You from making technical modifications
|
||||
necessary to exercise the Licensed Rights, including
|
||||
technical modifications necessary to circumvent Effective
|
||||
Technological Measures. For purposes of this Public License,
|
||||
simply making modifications authorized by this Section 2(a)
|
||||
(4) never produces Adapted Material.
|
||||
|
||||
5. Downstream recipients.
|
||||
|
||||
a. Offer from the Licensor -- Licensed Material. Every
|
||||
recipient of the Licensed Material automatically
|
||||
receives an offer from the Licensor to exercise the
|
||||
Licensed Rights under the terms and conditions of this
|
||||
Public License.
|
||||
|
||||
b. No downstream restrictions. You may not offer or impose
|
||||
any additional or different terms or conditions on, or
|
||||
apply any Effective Technological Measures to, the
|
||||
Licensed Material if doing so restricts exercise of the
|
||||
Licensed Rights by any recipient of the Licensed
|
||||
Material.
|
||||
|
||||
6. No endorsement. Nothing in this Public License constitutes or
|
||||
may be construed as permission to assert or imply that You
|
||||
are, or that Your use of the Licensed Material is, connected
|
||||
with, or sponsored, endorsed, or granted official status by,
|
||||
the Licensor or others designated to receive attribution as
|
||||
provided in Section 3(a)(1)(A)(i).
|
||||
|
||||
b. Other rights.
|
||||
|
||||
1. Moral rights, such as the right of integrity, are not
|
||||
licensed under this Public License, nor are publicity,
|
||||
privacy, and/or other similar personality rights; however, to
|
||||
the extent possible, the Licensor waives and/or agrees not to
|
||||
assert any such rights held by the Licensor to the limited
|
||||
extent necessary to allow You to exercise the Licensed
|
||||
Rights, but not otherwise.
|
||||
|
||||
2. Patent and trademark rights are not licensed under this
|
||||
Public License.
|
||||
|
||||
3. To the extent possible, the Licensor waives any right to
|
||||
collect royalties from You for the exercise of the Licensed
|
||||
Rights, whether directly or through a collecting society
|
||||
under any voluntary or waivable statutory or compulsory
|
||||
licensing scheme. In all other cases the Licensor expressly
|
||||
reserves any right to collect such royalties.
|
||||
|
||||
|
||||
Section 3 -- License Conditions.
|
||||
|
||||
Your exercise of the Licensed Rights is expressly made subject to the
|
||||
following conditions.
|
||||
|
||||
a. Attribution.
|
||||
|
||||
1. If You Share the Licensed Material (including in modified
|
||||
form), You must:
|
||||
|
||||
a. retain the following if it is supplied by the Licensor
|
||||
with the Licensed Material:
|
||||
|
||||
i. identification of the creator(s) of the Licensed
|
||||
Material and any others designated to receive
|
||||
attribution, in any reasonable manner requested by
|
||||
the Licensor (including by pseudonym if
|
||||
designated);
|
||||
|
||||
ii. a copyright notice;
|
||||
|
||||
iii. a notice that refers to this Public License;
|
||||
|
||||
iv. a notice that refers to the disclaimer of
|
||||
warranties;
|
||||
|
||||
v. a URI or hyperlink to the Licensed Material to the
|
||||
extent reasonably practicable;
|
||||
|
||||
b. indicate if You modified the Licensed Material and
|
||||
retain an indication of any previous modifications; and
|
||||
|
||||
c. indicate the Licensed Material is licensed under this
|
||||
Public License, and include the text of, or the URI or
|
||||
hyperlink to, this Public License.
|
||||
|
||||
2. You may satisfy the conditions in Section 3(a)(1) in any
|
||||
reasonable manner based on the medium, means, and context in
|
||||
which You Share the Licensed Material. For example, it may be
|
||||
reasonable to satisfy the conditions by providing a URI or
|
||||
hyperlink to a resource that includes the required
|
||||
information.
|
||||
|
||||
3. If requested by the Licensor, You must remove any of the
|
||||
information required by Section 3(a)(1)(A) to the extent
|
||||
reasonably practicable.
|
||||
|
||||
4. If You Share Adapted Material You produce, the Adapter's
|
||||
License You apply must not prevent recipients of the Adapted
|
||||
Material from complying with this Public License.
|
||||
|
||||
|
||||
Section 4 -- Sui Generis Database Rights.
|
||||
|
||||
Where the Licensed Rights include Sui Generis Database Rights that
|
||||
apply to Your use of the Licensed Material:
|
||||
|
||||
a. for the avoidance of doubt, Section 2(a)(1) grants You the right
|
||||
to extract, reuse, reproduce, and Share all or a substantial
|
||||
portion of the contents of the database;
|
||||
|
||||
b. if You include all or a substantial portion of the database
|
||||
contents in a database in which You have Sui Generis Database
|
||||
Rights, then the database in which You have Sui Generis Database
|
||||
Rights (but not its individual contents) is Adapted Material; and
|
||||
|
||||
c. You must comply with the conditions in Section 3(a) if You Share
|
||||
all or a substantial portion of the contents of the database.
|
||||
|
||||
For the avoidance of doubt, this Section 4 supplements and does not
|
||||
replace Your obligations under this Public License where the Licensed
|
||||
Rights include other Copyright and Similar Rights.
|
||||
|
||||
|
||||
Section 5 -- Disclaimer of Warranties and Limitation of Liability.
|
||||
|
||||
a. UNLESS OTHERWISE SEPARATELY UNDERTAKEN BY THE LICENSOR, TO THE
|
||||
EXTENT POSSIBLE, THE LICENSOR OFFERS THE LICENSED MATERIAL AS-IS
|
||||
AND AS-AVAILABLE, AND MAKES NO REPRESENTATIONS OR WARRANTIES OF
|
||||
ANY KIND CONCERNING THE LICENSED MATERIAL, WHETHER EXPRESS,
|
||||
IMPLIED, STATUTORY, OR OTHER. THIS INCLUDES, WITHOUT LIMITATION,
|
||||
WARRANTIES OF TITLE, MERCHANTABILITY, FITNESS FOR A PARTICULAR
|
||||
PURPOSE, NON-INFRINGEMENT, ABSENCE OF LATENT OR OTHER DEFECTS,
|
||||
ACCURACY, OR THE PRESENCE OR ABSENCE OF ERRORS, WHETHER OR NOT
|
||||
KNOWN OR DISCOVERABLE. WHERE DISCLAIMERS OF WARRANTIES ARE NOT
|
||||
ALLOWED IN FULL OR IN PART, THIS DISCLAIMER MAY NOT APPLY TO YOU.
|
||||
|
||||
b. TO THE EXTENT POSSIBLE, IN NO EVENT WILL THE LICENSOR BE LIABLE
|
||||
TO YOU ON ANY LEGAL THEORY (INCLUDING, WITHOUT LIMITATION,
|
||||
NEGLIGENCE) OR OTHERWISE FOR ANY DIRECT, SPECIAL, INDIRECT,
|
||||
INCIDENTAL, CONSEQUENTIAL, PUNITIVE, EXEMPLARY, OR OTHER LOSSES,
|
||||
COSTS, EXPENSES, OR DAMAGES ARISING OUT OF THIS PUBLIC LICENSE OR
|
||||
USE OF THE LICENSED MATERIAL, EVEN IF THE LICENSOR HAS BEEN
|
||||
ADVISED OF THE POSSIBILITY OF SUCH LOSSES, COSTS, EXPENSES, OR
|
||||
DAMAGES. WHERE A LIMITATION OF LIABILITY IS NOT ALLOWED IN FULL OR
|
||||
IN PART, THIS LIMITATION MAY NOT APPLY TO YOU.
|
||||
|
||||
c. The disclaimer of warranties and limitation of liability provided
|
||||
above shall be interpreted in a manner that, to the extent
|
||||
possible, most closely approximates an absolute disclaimer and
|
||||
waiver of all liability.
|
||||
|
||||
|
||||
Section 6 -- Term and Termination.
|
||||
|
||||
a. This Public License applies for the term of the Copyright and
|
||||
Similar Rights licensed here. However, if You fail to comply with
|
||||
this Public License, then Your rights under this Public License
|
||||
terminate automatically.
|
||||
|
||||
b. Where Your right to use the Licensed Material has terminated under
|
||||
Section 6(a), it reinstates:
|
||||
|
||||
1. automatically as of the date the violation is cured, provided
|
||||
it is cured within 30 days of Your discovery of the
|
||||
violation; or
|
||||
|
||||
2. upon express reinstatement by the Licensor.
|
||||
|
||||
For the avoidance of doubt, this Section 6(b) does not affect any
|
||||
right the Licensor may have to seek remedies for Your violations
|
||||
of this Public License.
|
||||
|
||||
c. For the avoidance of doubt, the Licensor may also offer the
|
||||
Licensed Material under separate terms or conditions or stop
|
||||
distributing the Licensed Material at any time; however, doing so
|
||||
will not terminate this Public License.
|
||||
|
||||
d. Sections 1, 5, 6, 7, and 8 survive termination of this Public
|
||||
License.
|
||||
|
||||
|
||||
Section 7 -- Other Terms and Conditions.
|
||||
|
||||
a. The Licensor shall not be bound by any additional or different
|
||||
terms or conditions communicated by You unless expressly agreed.
|
||||
|
||||
b. Any arrangements, understandings, or agreements regarding the
|
||||
Licensed Material not stated herein are separate from and
|
||||
independent of the terms and conditions of this Public License.
|
||||
|
||||
|
||||
Section 8 -- Interpretation.
|
||||
|
||||
a. For the avoidance of doubt, this Public License does not, and
|
||||
shall not be interpreted to, reduce, limit, restrict, or impose
|
||||
conditions on any use of the Licensed Material that could lawfully
|
||||
be made without permission under this Public License.
|
||||
|
||||
b. To the extent possible, if any provision of this Public License is
|
||||
deemed unenforceable, it shall be automatically reformed to the
|
||||
minimum extent necessary to make it enforceable. If the provision
|
||||
cannot be reformed, it shall be severed from this Public License
|
||||
without affecting the enforceability of the remaining terms and
|
||||
conditions.
|
||||
|
||||
c. No term or condition of this Public License will be waived and no
|
||||
failure to comply consented to unless expressly agreed to by the
|
||||
Licensor.
|
||||
|
||||
d. Nothing in this Public License constitutes or may be interpreted
|
||||
as a limitation upon, or waiver of, any privileges and immunities
|
||||
that apply to the Licensor or You, including from the legal
|
||||
processes of any jurisdiction or authority.
|
||||
|
||||
|
||||
=======================================================================
|
||||
|
||||
Creative Commons is not a party to its public
|
||||
licenses. Notwithstanding, Creative Commons may elect to apply one of
|
||||
its public licenses to material it publishes and in those instances
|
||||
will be considered the “Licensor.” The text of the Creative Commons
|
||||
public licenses is dedicated to the public domain under the CC0 Public
|
||||
Domain Dedication. Except for the limited purpose of indicating that
|
||||
material is shared under a Creative Commons public license or as
|
||||
otherwise permitted by the Creative Commons policies published at
|
||||
creativecommons.org/policies, Creative Commons does not authorize the
|
||||
use of the trademark "Creative Commons" or any other trademark or logo
|
||||
of Creative Commons without its prior written consent including,
|
||||
without limitation, in connection with any unauthorized modifications
|
||||
to any of its public licenses or any other arrangements,
|
||||
understandings, or agreements concerning use of licensed material. For
|
||||
the avoidance of doubt, this paragraph does not form part of the
|
||||
public licenses.
|
||||
|
||||
Creative Commons may be contacted at creativecommons.org.
|
||||
|
||||
1
live2d/tha3/models/placeholder.txt
Normal file
1
live2d/tha3/models/placeholder.txt
Normal file
@@ -0,0 +1 @@
|
||||
This is the folder to extract the models to.
|
||||
BIN
live2d/tha3/models/separable_float/editor.pt
Normal file
BIN
live2d/tha3/models/separable_float/editor.pt
Normal file
Binary file not shown.
BIN
live2d/tha3/models/separable_float/eyebrow_decomposer.pt
Normal file
BIN
live2d/tha3/models/separable_float/eyebrow_decomposer.pt
Normal file
Binary file not shown.
BIN
live2d/tha3/models/separable_float/eyebrow_morphing_combiner.pt
Normal file
BIN
live2d/tha3/models/separable_float/eyebrow_morphing_combiner.pt
Normal file
Binary file not shown.
BIN
live2d/tha3/models/separable_float/face_morpher.pt
Normal file
BIN
live2d/tha3/models/separable_float/face_morpher.pt
Normal file
Binary file not shown.
BIN
live2d/tha3/models/separable_float/two_algo_face_body_rotator.pt
Normal file
BIN
live2d/tha3/models/separable_float/two_algo_face_body_rotator.pt
Normal file
Binary file not shown.
0
live2d/tha3/module/__init__.py
Normal file
0
live2d/tha3/module/__init__.py
Normal file
9
live2d/tha3/module/module_factory.py
Normal file
9
live2d/tha3/module/module_factory.py
Normal file
@@ -0,0 +1,9 @@
|
||||
from abc import ABC, abstractmethod
|
||||
|
||||
from torch.nn import Module
|
||||
|
||||
|
||||
class ModuleFactory(ABC):
|
||||
@abstractmethod
|
||||
def create(self) -> Module:
|
||||
pass
|
||||
0
live2d/tha3/nn/__init__.py
Normal file
0
live2d/tha3/nn/__init__.py
Normal file
0
live2d/tha3/nn/common/__init__.py
Normal file
0
live2d/tha3/nn/common/__init__.py
Normal file
55
live2d/tha3/nn/common/conv_block_factory.py
Normal file
55
live2d/tha3/nn/common/conv_block_factory.py
Normal file
@@ -0,0 +1,55 @@
|
||||
from typing import Optional
|
||||
|
||||
from tha3.nn.conv import create_conv7_block_from_block_args, create_conv3_block_from_block_args, \
|
||||
create_downsample_block_from_block_args, create_conv3
|
||||
from tha3.nn.resnet_block import ResnetBlock
|
||||
from tha3.nn.resnet_block_seperable import ResnetBlockSeparable
|
||||
from tha3.nn.separable_conv import create_separable_conv7_block, create_separable_conv3_block, \
|
||||
create_separable_downsample_block, create_separable_conv3
|
||||
from tha3.nn.util import BlockArgs
|
||||
|
||||
|
||||
class ConvBlockFactory:
|
||||
def __init__(self,
|
||||
block_args: BlockArgs,
|
||||
use_separable_convolution: bool = False):
|
||||
self.use_separable_convolution = use_separable_convolution
|
||||
self.block_args = block_args
|
||||
|
||||
def create_conv3(self,
|
||||
in_channels: int,
|
||||
out_channels: int,
|
||||
bias: bool,
|
||||
initialization_method: Optional[str] = None):
|
||||
if initialization_method is None:
|
||||
initialization_method = self.block_args.initialization_method
|
||||
if self.use_separable_convolution:
|
||||
return create_separable_conv3(
|
||||
in_channels, out_channels, bias, initialization_method, self.block_args.use_spectral_norm)
|
||||
else:
|
||||
return create_conv3(
|
||||
in_channels, out_channels, bias, initialization_method, self.block_args.use_spectral_norm)
|
||||
|
||||
def create_conv7_block(self, in_channels: int, out_channels: int):
|
||||
if self.use_separable_convolution:
|
||||
return create_separable_conv7_block(in_channels, out_channels, self.block_args)
|
||||
else:
|
||||
return create_conv7_block_from_block_args(in_channels, out_channels, self.block_args)
|
||||
|
||||
def create_conv3_block(self, in_channels: int, out_channels: int):
|
||||
if self.use_separable_convolution:
|
||||
return create_separable_conv3_block(in_channels, out_channels, self.block_args)
|
||||
else:
|
||||
return create_conv3_block_from_block_args(in_channels, out_channels, self.block_args)
|
||||
|
||||
def create_downsample_block(self, in_channels: int, out_channels: int, is_output_1x1: bool):
|
||||
if self.use_separable_convolution:
|
||||
return create_separable_downsample_block(in_channels, out_channels, is_output_1x1, self.block_args)
|
||||
else:
|
||||
return create_downsample_block_from_block_args(in_channels, out_channels, is_output_1x1)
|
||||
|
||||
def create_resnet_block(self, num_channels: int, is_1x1: bool):
|
||||
if self.use_separable_convolution:
|
||||
return ResnetBlockSeparable.create(num_channels, is_1x1, block_args=self.block_args)
|
||||
else:
|
||||
return ResnetBlock.create(num_channels, is_1x1, block_args=self.block_args)
|
||||
68
live2d/tha3/nn/common/poser_args.py
Normal file
68
live2d/tha3/nn/common/poser_args.py
Normal file
@@ -0,0 +1,68 @@
|
||||
from typing import Optional
|
||||
|
||||
from torch.nn import Sigmoid, Sequential, Tanh
|
||||
|
||||
from tha3.nn.conv import create_conv3, create_conv3_from_block_args
|
||||
from tha3.nn.nonlinearity_factory import ReLUFactory
|
||||
from tha3.nn.normalization import InstanceNorm2dFactory
|
||||
from tha3.nn.util import BlockArgs
|
||||
|
||||
|
||||
class PoserArgs00:
|
||||
def __init__(self,
|
||||
image_size: int,
|
||||
input_image_channels: int,
|
||||
output_image_channels: int,
|
||||
start_channels: int,
|
||||
num_pose_params: int,
|
||||
block_args: Optional[BlockArgs] = None):
|
||||
self.num_pose_params = num_pose_params
|
||||
self.start_channels = start_channels
|
||||
self.output_image_channels = output_image_channels
|
||||
self.input_image_channels = input_image_channels
|
||||
self.image_size = image_size
|
||||
if block_args is None:
|
||||
self.block_args = BlockArgs(
|
||||
normalization_layer_factory=InstanceNorm2dFactory(),
|
||||
nonlinearity_factory=ReLUFactory(inplace=True))
|
||||
else:
|
||||
self.block_args = block_args
|
||||
|
||||
def create_alpha_block(self):
|
||||
from torch.nn import Sequential
|
||||
return Sequential(
|
||||
create_conv3(
|
||||
in_channels=self.start_channels,
|
||||
out_channels=1,
|
||||
bias=True,
|
||||
initialization_method=self.block_args.initialization_method,
|
||||
use_spectral_norm=False),
|
||||
Sigmoid())
|
||||
|
||||
def create_all_channel_alpha_block(self):
|
||||
from torch.nn import Sequential
|
||||
return Sequential(
|
||||
create_conv3(
|
||||
in_channels=self.start_channels,
|
||||
out_channels=self.output_image_channels,
|
||||
bias=True,
|
||||
initialization_method=self.block_args.initialization_method,
|
||||
use_spectral_norm=False),
|
||||
Sigmoid())
|
||||
|
||||
def create_color_change_block(self):
|
||||
return Sequential(
|
||||
create_conv3_from_block_args(
|
||||
in_channels=self.start_channels,
|
||||
out_channels=self.output_image_channels,
|
||||
bias=True,
|
||||
block_args=self.block_args),
|
||||
Tanh())
|
||||
|
||||
def create_grid_change_block(self):
|
||||
return create_conv3(
|
||||
in_channels=self.start_channels,
|
||||
out_channels=2,
|
||||
bias=False,
|
||||
initialization_method='zero',
|
||||
use_spectral_norm=False)
|
||||
121
live2d/tha3/nn/common/poser_encoder_decoder_00.py
Normal file
121
live2d/tha3/nn/common/poser_encoder_decoder_00.py
Normal file
@@ -0,0 +1,121 @@
|
||||
import math
|
||||
from typing import Optional, List
|
||||
|
||||
import torch
|
||||
from torch import Tensor
|
||||
from torch.nn import ModuleList, Module
|
||||
|
||||
from tha3.nn.common.poser_args import PoserArgs00
|
||||
from tha3.nn.conv import create_conv3_block_from_block_args, create_downsample_block_from_block_args, \
|
||||
create_upsample_block_from_block_args
|
||||
from tha3.nn.nonlinearity_factory import ReLUFactory
|
||||
from tha3.nn.normalization import InstanceNorm2dFactory
|
||||
from tha3.nn.resnet_block import ResnetBlock
|
||||
from tha3.nn.util import BlockArgs
|
||||
|
||||
|
||||
class PoserEncoderDecoder00Args(PoserArgs00):
|
||||
def __init__(self,
|
||||
image_size: int,
|
||||
input_image_channels: int,
|
||||
output_image_channels: int,
|
||||
num_pose_params: int ,
|
||||
start_channels: int,
|
||||
bottleneck_image_size,
|
||||
num_bottleneck_blocks,
|
||||
max_channels: int,
|
||||
block_args: Optional[BlockArgs] = None):
|
||||
super().__init__(
|
||||
image_size, input_image_channels, output_image_channels, start_channels, num_pose_params, block_args)
|
||||
self.max_channels = max_channels
|
||||
self.num_bottleneck_blocks = num_bottleneck_blocks
|
||||
self.bottleneck_image_size = bottleneck_image_size
|
||||
assert bottleneck_image_size > 1
|
||||
|
||||
if block_args is None:
|
||||
self.block_args = BlockArgs(
|
||||
normalization_layer_factory=InstanceNorm2dFactory(),
|
||||
nonlinearity_factory=ReLUFactory(inplace=True))
|
||||
else:
|
||||
self.block_args = block_args
|
||||
|
||||
|
||||
class PoserEncoderDecoder00(Module):
|
||||
def __init__(self, args: PoserEncoderDecoder00Args):
|
||||
super().__init__()
|
||||
self.args = args
|
||||
|
||||
self.num_levels = int(math.log2(args.image_size // args.bottleneck_image_size)) + 1
|
||||
|
||||
self.downsample_blocks = ModuleList()
|
||||
self.downsample_blocks.append(
|
||||
create_conv3_block_from_block_args(
|
||||
args.input_image_channels,
|
||||
args.start_channels,
|
||||
args.block_args))
|
||||
current_image_size = args.image_size
|
||||
current_num_channels = args.start_channels
|
||||
while current_image_size > args.bottleneck_image_size:
|
||||
next_image_size = current_image_size // 2
|
||||
next_num_channels = self.get_num_output_channels_from_image_size(next_image_size)
|
||||
self.downsample_blocks.append(create_downsample_block_from_block_args(
|
||||
in_channels=current_num_channels,
|
||||
out_channels=next_num_channels,
|
||||
is_output_1x1=False,
|
||||
block_args=args.block_args))
|
||||
current_image_size = next_image_size
|
||||
current_num_channels = next_num_channels
|
||||
assert len(self.downsample_blocks) == self.num_levels
|
||||
|
||||
self.bottleneck_blocks = ModuleList()
|
||||
self.bottleneck_blocks.append(create_conv3_block_from_block_args(
|
||||
in_channels=current_num_channels + args.num_pose_params,
|
||||
out_channels=current_num_channels,
|
||||
block_args=args.block_args))
|
||||
for i in range(1, args.num_bottleneck_blocks):
|
||||
self.bottleneck_blocks.append(
|
||||
ResnetBlock.create(
|
||||
num_channels=current_num_channels,
|
||||
is1x1=False,
|
||||
block_args=args.block_args))
|
||||
|
||||
self.upsample_blocks = ModuleList()
|
||||
while current_image_size < args.image_size:
|
||||
next_image_size = current_image_size * 2
|
||||
next_num_channels = self.get_num_output_channels_from_image_size(next_image_size)
|
||||
self.upsample_blocks.append(create_upsample_block_from_block_args(
|
||||
in_channels=current_num_channels,
|
||||
out_channels=next_num_channels,
|
||||
block_args=args.block_args))
|
||||
current_image_size = next_image_size
|
||||
current_num_channels = next_num_channels
|
||||
|
||||
def get_num_output_channels_from_level(self, level: int):
|
||||
return self.get_num_output_channels_from_image_size(self.args.image_size // (2 ** level))
|
||||
|
||||
def get_num_output_channels_from_image_size(self, image_size: int):
|
||||
return min(self.args.start_channels * (self.args.image_size // image_size), self.args.max_channels)
|
||||
|
||||
def forward(self, image: Tensor, pose: Optional[Tensor] = None) -> List[Tensor]:
|
||||
if self.args.num_pose_params != 0:
|
||||
assert pose is not None
|
||||
else:
|
||||
assert pose is None
|
||||
outputs = []
|
||||
feature = image
|
||||
outputs.append(feature)
|
||||
for block in self.downsample_blocks:
|
||||
feature = block(feature)
|
||||
outputs.append(feature)
|
||||
if pose is not None:
|
||||
n, c = pose.shape
|
||||
pose = pose.view(n, c, 1, 1).repeat(1, 1, self.args.bottleneck_image_size, self.args.bottleneck_image_size)
|
||||
feature = torch.cat([feature, pose], dim=1)
|
||||
for block in self.bottleneck_blocks:
|
||||
feature = block(feature)
|
||||
outputs.append(feature)
|
||||
for block in self.upsample_blocks:
|
||||
feature = block(feature)
|
||||
outputs.append(feature)
|
||||
outputs.reverse()
|
||||
return outputs
|
||||
92
live2d/tha3/nn/common/poser_encoder_decoder_00_separable.py
Normal file
92
live2d/tha3/nn/common/poser_encoder_decoder_00_separable.py
Normal file
@@ -0,0 +1,92 @@
|
||||
import math
|
||||
from typing import Optional, List
|
||||
|
||||
import torch
|
||||
from torch import Tensor
|
||||
from torch.nn import ModuleList, Module
|
||||
|
||||
from tha3.nn.common.poser_encoder_decoder_00 import PoserEncoderDecoder00Args
|
||||
from tha3.nn.resnet_block_seperable import ResnetBlockSeparable
|
||||
from tha3.nn.separable_conv import create_separable_conv3_block, create_separable_downsample_block, \
|
||||
create_separable_upsample_block
|
||||
|
||||
|
||||
class PoserEncoderDecoder00Separable(Module):
|
||||
def __init__(self, args: PoserEncoderDecoder00Args):
|
||||
super().__init__()
|
||||
self.args = args
|
||||
|
||||
self.num_levels = int(math.log2(args.image_size // args.bottleneck_image_size)) + 1
|
||||
|
||||
self.downsample_blocks = ModuleList()
|
||||
self.downsample_blocks.append(
|
||||
create_separable_conv3_block(
|
||||
args.input_image_channels,
|
||||
args.start_channels,
|
||||
args.block_args))
|
||||
current_image_size = args.image_size
|
||||
current_num_channels = args.start_channels
|
||||
while current_image_size > args.bottleneck_image_size:
|
||||
next_image_size = current_image_size // 2
|
||||
next_num_channels = self.get_num_output_channels_from_image_size(next_image_size)
|
||||
self.downsample_blocks.append(create_separable_downsample_block(
|
||||
in_channels=current_num_channels,
|
||||
out_channels=next_num_channels,
|
||||
is_output_1x1=False,
|
||||
block_args=args.block_args))
|
||||
current_image_size = next_image_size
|
||||
current_num_channels = next_num_channels
|
||||
assert len(self.downsample_blocks) == self.num_levels
|
||||
|
||||
self.bottleneck_blocks = ModuleList()
|
||||
self.bottleneck_blocks.append(create_separable_conv3_block(
|
||||
in_channels=current_num_channels + args.num_pose_params,
|
||||
out_channels=current_num_channels,
|
||||
block_args=args.block_args))
|
||||
for i in range(1, args.num_bottleneck_blocks):
|
||||
self.bottleneck_blocks.append(
|
||||
ResnetBlockSeparable.create(
|
||||
num_channels=current_num_channels,
|
||||
is1x1=False,
|
||||
block_args=args.block_args))
|
||||
|
||||
self.upsample_blocks = ModuleList()
|
||||
while current_image_size < args.image_size:
|
||||
next_image_size = current_image_size * 2
|
||||
next_num_channels = self.get_num_output_channels_from_image_size(next_image_size)
|
||||
self.upsample_blocks.append(create_separable_upsample_block(
|
||||
in_channels=current_num_channels,
|
||||
out_channels=next_num_channels,
|
||||
block_args=args.block_args))
|
||||
current_image_size = next_image_size
|
||||
current_num_channels = next_num_channels
|
||||
|
||||
def get_num_output_channels_from_level(self, level: int):
|
||||
return self.get_num_output_channels_from_image_size(self.args.image_size // (2 ** level))
|
||||
|
||||
def get_num_output_channels_from_image_size(self, image_size: int):
|
||||
return min(self.args.start_channels * (self.args.image_size // image_size), self.args.max_channels)
|
||||
|
||||
def forward(self, image: Tensor, pose: Optional[Tensor] = None) -> List[Tensor]:
|
||||
if self.args.num_pose_params != 0:
|
||||
assert pose is not None
|
||||
else:
|
||||
assert pose is None
|
||||
outputs = []
|
||||
feature = image
|
||||
outputs.append(feature)
|
||||
for block in self.downsample_blocks:
|
||||
feature = block(feature)
|
||||
outputs.append(feature)
|
||||
if pose is not None:
|
||||
n, c = pose.shape
|
||||
pose = pose.view(n, c, 1, 1).repeat(1, 1, self.args.bottleneck_image_size, self.args.bottleneck_image_size)
|
||||
feature = torch.cat([feature, pose], dim=1)
|
||||
for block in self.bottleneck_blocks:
|
||||
feature = block(feature)
|
||||
outputs.append(feature)
|
||||
for block in self.upsample_blocks:
|
||||
feature = block(feature)
|
||||
outputs.append(feature)
|
||||
outputs.reverse()
|
||||
return outputs
|
||||
125
live2d/tha3/nn/common/resize_conv_encoder_decoder.py
Normal file
125
live2d/tha3/nn/common/resize_conv_encoder_decoder.py
Normal file
@@ -0,0 +1,125 @@
|
||||
import math
|
||||
from typing import Optional, List
|
||||
|
||||
import torch
|
||||
from torch import Tensor
|
||||
from torch.nn import Module, ModuleList, Sequential, Upsample
|
||||
|
||||
from tha3.nn.common.conv_block_factory import ConvBlockFactory
|
||||
from tha3.nn.nonlinearity_factory import LeakyReLUFactory
|
||||
from tha3.nn.normalization import InstanceNorm2dFactory
|
||||
from tha3.nn.util import BlockArgs
|
||||
|
||||
|
||||
class ResizeConvEncoderDecoderArgs:
|
||||
def __init__(self,
|
||||
image_size: int,
|
||||
input_channels: int,
|
||||
start_channels: int,
|
||||
bottleneck_image_size,
|
||||
num_bottleneck_blocks,
|
||||
max_channels: int,
|
||||
block_args: Optional[BlockArgs] = None,
|
||||
upsample_mode: str = 'bilinear',
|
||||
use_separable_convolution=False):
|
||||
self.use_separable_convolution = use_separable_convolution
|
||||
self.upsample_mode = upsample_mode
|
||||
self.block_args = block_args
|
||||
self.max_channels = max_channels
|
||||
self.num_bottleneck_blocks = num_bottleneck_blocks
|
||||
self.bottleneck_image_size = bottleneck_image_size
|
||||
self.start_channels = start_channels
|
||||
self.image_size = image_size
|
||||
self.input_channels = input_channels
|
||||
|
||||
|
||||
class ResizeConvEncoderDecoder(Module):
|
||||
def __init__(self, args: ResizeConvEncoderDecoderArgs):
|
||||
super().__init__()
|
||||
self.args = args
|
||||
|
||||
self.num_levels = int(math.log2(args.image_size // args.bottleneck_image_size)) + 1
|
||||
|
||||
conv_block_factory = ConvBlockFactory(args.block_args, args.use_separable_convolution)
|
||||
|
||||
self.downsample_blocks = ModuleList()
|
||||
self.downsample_blocks.append(conv_block_factory.create_conv7_block(args.input_channels, args.start_channels))
|
||||
current_image_size = args.image_size
|
||||
current_num_channels = args.start_channels
|
||||
while current_image_size > args.bottleneck_image_size:
|
||||
next_image_size = current_image_size // 2
|
||||
next_num_channels = self.get_num_output_channels_from_image_size(next_image_size)
|
||||
self.downsample_blocks.append(conv_block_factory.create_downsample_block(
|
||||
in_channels=current_num_channels,
|
||||
out_channels=next_num_channels,
|
||||
is_output_1x1=False))
|
||||
current_image_size = next_image_size
|
||||
current_num_channels = next_num_channels
|
||||
assert len(self.downsample_blocks) == self.num_levels
|
||||
|
||||
self.bottleneck_blocks = ModuleList()
|
||||
for i in range(args.num_bottleneck_blocks):
|
||||
self.bottleneck_blocks.append(conv_block_factory.create_resnet_block(current_num_channels, is_1x1=False))
|
||||
|
||||
self.output_image_sizes = [current_image_size]
|
||||
self.output_num_channels = [current_num_channels]
|
||||
self.upsample_blocks = ModuleList()
|
||||
if args.upsample_mode == 'nearest':
|
||||
align_corners = None
|
||||
else:
|
||||
align_corners = False
|
||||
while current_image_size < args.image_size:
|
||||
next_image_size = current_image_size * 2
|
||||
next_num_channels = self.get_num_output_channels_from_image_size(next_image_size)
|
||||
self.upsample_blocks.append(
|
||||
Sequential(
|
||||
Upsample(scale_factor=2, mode=args.upsample_mode, align_corners=align_corners),
|
||||
conv_block_factory.create_conv3_block(
|
||||
in_channels=current_num_channels, out_channels=next_num_channels)))
|
||||
current_image_size = next_image_size
|
||||
current_num_channels = next_num_channels
|
||||
self.output_image_sizes.append(current_image_size)
|
||||
self.output_num_channels.append(current_num_channels)
|
||||
|
||||
def get_num_output_channels_from_level(self, level: int):
|
||||
return self.get_num_output_channels_from_image_size(self.args.image_size // (2 ** level))
|
||||
|
||||
def get_num_output_channels_from_image_size(self, image_size: int):
|
||||
return min(self.args.start_channels * (self.args.image_size // image_size), self.args.max_channels)
|
||||
|
||||
def forward(self, feature: Tensor) -> List[Tensor]:
|
||||
outputs = []
|
||||
for block in self.downsample_blocks:
|
||||
feature = block(feature)
|
||||
for block in self.bottleneck_blocks:
|
||||
feature = block(feature)
|
||||
outputs.append(feature)
|
||||
for block in self.upsample_blocks:
|
||||
feature = block(feature)
|
||||
outputs.append(feature)
|
||||
return outputs
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
device = torch.device('cuda')
|
||||
args = ResizeConvEncoderDecoderArgs(
|
||||
image_size=512,
|
||||
input_channels=4 + 6,
|
||||
start_channels=32,
|
||||
bottleneck_image_size=32,
|
||||
num_bottleneck_blocks=6,
|
||||
max_channels=512,
|
||||
use_separable_convolution=True,
|
||||
block_args=BlockArgs(
|
||||
initialization_method='he',
|
||||
use_spectral_norm=False,
|
||||
normalization_layer_factory=InstanceNorm2dFactory(),
|
||||
nonlinearity_factory=LeakyReLUFactory(inplace=False, negative_slope=0.1)))
|
||||
module = ResizeConvEncoderDecoder(args).to(device)
|
||||
print(module.output_image_sizes)
|
||||
print(module.output_num_channels)
|
||||
|
||||
input = torch.zeros(8, 4 + 6, 512, 512, device=device)
|
||||
outputs = module(input)
|
||||
for output in outputs:
|
||||
print(output.shape)
|
||||
155
live2d/tha3/nn/common/resize_conv_unet.py
Normal file
155
live2d/tha3/nn/common/resize_conv_unet.py
Normal file
@@ -0,0 +1,155 @@
|
||||
from typing import Optional, List
|
||||
|
||||
import torch
|
||||
from torch import Tensor
|
||||
from torch.nn import ModuleList, Module, Upsample
|
||||
|
||||
from tha3.nn.common.conv_block_factory import ConvBlockFactory
|
||||
from tha3.nn.nonlinearity_factory import ReLUFactory
|
||||
from tha3.nn.normalization import InstanceNorm2dFactory
|
||||
from tha3.nn.util import BlockArgs
|
||||
|
||||
|
||||
class ResizeConvUNetArgs:
|
||||
def __init__(self,
|
||||
image_size: int,
|
||||
input_channels: int,
|
||||
start_channels: int,
|
||||
bottleneck_image_size: int,
|
||||
num_bottleneck_blocks: int,
|
||||
max_channels: int,
|
||||
upsample_mode: str = 'bilinear',
|
||||
block_args: Optional[BlockArgs] = None,
|
||||
use_separable_convolution: bool = False):
|
||||
if block_args is None:
|
||||
block_args = BlockArgs(
|
||||
normalization_layer_factory=InstanceNorm2dFactory(),
|
||||
nonlinearity_factory=ReLUFactory(inplace=False))
|
||||
|
||||
self.use_separable_convolution = use_separable_convolution
|
||||
self.block_args = block_args
|
||||
self.upsample_mode = upsample_mode
|
||||
self.max_channels = max_channels
|
||||
self.num_bottleneck_blocks = num_bottleneck_blocks
|
||||
self.bottleneck_image_size = bottleneck_image_size
|
||||
self.input_channels = input_channels
|
||||
self.start_channels = start_channels
|
||||
self.image_size = image_size
|
||||
|
||||
|
||||
class ResizeConvUNet(Module):
|
||||
def __init__(self, args: ResizeConvUNetArgs):
|
||||
super().__init__()
|
||||
self.args = args
|
||||
conv_block_factory = ConvBlockFactory(args.block_args, args.use_separable_convolution)
|
||||
|
||||
self.downsample_blocks = ModuleList()
|
||||
self.downsample_blocks.append(conv_block_factory.create_conv3_block(
|
||||
self.args.input_channels,
|
||||
self.args.start_channels))
|
||||
current_channels = self.args.start_channels
|
||||
current_size = self.args.image_size
|
||||
|
||||
size_to_channel = {
|
||||
current_size: current_channels
|
||||
}
|
||||
while current_size > self.args.bottleneck_image_size:
|
||||
next_size = current_size // 2
|
||||
next_channels = min(self.args.max_channels, current_channels * 2)
|
||||
self.downsample_blocks.append(conv_block_factory.create_downsample_block(
|
||||
current_channels,
|
||||
next_channels,
|
||||
is_output_1x1=False))
|
||||
current_size = next_size
|
||||
current_channels = next_channels
|
||||
size_to_channel[current_size] = current_channels
|
||||
|
||||
self.bottleneck_blocks = ModuleList()
|
||||
for i in range(self.args.num_bottleneck_blocks):
|
||||
self.bottleneck_blocks.append(conv_block_factory.create_resnet_block(current_channels, is_1x1=False))
|
||||
|
||||
self.output_image_sizes = [current_size]
|
||||
self.output_num_channels = [current_channels]
|
||||
self.upsample_blocks = ModuleList()
|
||||
while current_size < self.args.image_size:
|
||||
next_size = current_size * 2
|
||||
next_channels = size_to_channel[next_size]
|
||||
self.upsample_blocks.append(conv_block_factory.create_conv3_block(
|
||||
current_channels + next_channels,
|
||||
next_channels))
|
||||
current_size = next_size
|
||||
current_channels = next_channels
|
||||
self.output_image_sizes.append(current_size)
|
||||
self.output_num_channels.append(current_channels)
|
||||
|
||||
if args.upsample_mode == 'nearest':
|
||||
align_corners = None
|
||||
else:
|
||||
align_corners = False
|
||||
self.double_resolution = Upsample(scale_factor=2, mode=args.upsample_mode, align_corners=align_corners)
|
||||
|
||||
def forward(self, feature: Tensor) -> List[Tensor]:
|
||||
downsampled_features = []
|
||||
for block in self.downsample_blocks:
|
||||
feature = block(feature)
|
||||
downsampled_features.append(feature)
|
||||
|
||||
for block in self.bottleneck_blocks:
|
||||
feature = block(feature)
|
||||
|
||||
outputs = [feature]
|
||||
for i in range(0, len(self.upsample_blocks)):
|
||||
feature = self.double_resolution(feature)
|
||||
feature = torch.cat([feature, downsampled_features[-i - 2]], dim=1)
|
||||
feature = self.upsample_blocks[i](feature)
|
||||
outputs.append(feature)
|
||||
|
||||
return outputs
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
device = torch.device('cuda')
|
||||
|
||||
image_size = 512
|
||||
image_channels = 4
|
||||
num_pose_params = 6
|
||||
args = ResizeConvUNetArgs(
|
||||
image_size=512,
|
||||
input_channels=10,
|
||||
start_channels=32,
|
||||
bottleneck_image_size=32,
|
||||
num_bottleneck_blocks=6,
|
||||
max_channels=512,
|
||||
upsample_mode='nearest',
|
||||
use_separable_convolution=False,
|
||||
block_args=BlockArgs(
|
||||
initialization_method='he',
|
||||
use_spectral_norm=False,
|
||||
normalization_layer_factory=InstanceNorm2dFactory(),
|
||||
nonlinearity_factory=ReLUFactory(inplace=False)))
|
||||
module = ResizeConvUNet(args).to(device)
|
||||
|
||||
image_count = 8
|
||||
input = torch.zeros(image_count, 10, 512, 512, device=device)
|
||||
outputs = module.forward(input)
|
||||
for output in outputs:
|
||||
print(output.shape)
|
||||
|
||||
|
||||
if True:
|
||||
repeat = 100
|
||||
acc = 0.0
|
||||
for i in range(repeat + 2):
|
||||
start = torch.cuda.Event(enable_timing=True)
|
||||
end = torch.cuda.Event(enable_timing=True)
|
||||
|
||||
start.record()
|
||||
module.forward(input)
|
||||
end.record()
|
||||
torch.cuda.synchronize()
|
||||
if i >= 2:
|
||||
elapsed_time = start.elapsed_time(end)
|
||||
print("%d:" % i, elapsed_time)
|
||||
acc = acc + elapsed_time
|
||||
|
||||
print("average:", acc / repeat)
|
||||
189
live2d/tha3/nn/conv.py
Normal file
189
live2d/tha3/nn/conv.py
Normal file
@@ -0,0 +1,189 @@
|
||||
from typing import Optional, Union, Callable
|
||||
|
||||
from torch.nn import Conv2d, Module, Sequential, ConvTranspose2d
|
||||
|
||||
from tha3.module.module_factory import ModuleFactory
|
||||
from tha3.nn.nonlinearity_factory import resolve_nonlinearity_factory
|
||||
from tha3.nn.normalization import NormalizationLayerFactory
|
||||
from tha3.nn.util import wrap_conv_or_linear_module, BlockArgs
|
||||
|
||||
|
||||
def create_conv7(in_channels: int, out_channels: int,
|
||||
bias: bool = False,
|
||||
initialization_method: Union[str, Callable[[Module], Module]] = 'he',
|
||||
use_spectral_norm: bool = False) -> Module:
|
||||
return wrap_conv_or_linear_module(
|
||||
Conv2d(in_channels, out_channels, kernel_size=7, stride=1, padding=3, bias=bias),
|
||||
initialization_method,
|
||||
use_spectral_norm)
|
||||
|
||||
|
||||
def create_conv7_from_block_args(in_channels: int,
|
||||
out_channels: int,
|
||||
bias: bool = False,
|
||||
block_args: Optional[BlockArgs] = None) -> Module:
|
||||
if block_args is None:
|
||||
block_args = BlockArgs()
|
||||
return create_conv7(
|
||||
in_channels, out_channels, bias,
|
||||
block_args.initialization_method,
|
||||
block_args.use_spectral_norm)
|
||||
|
||||
|
||||
def create_conv3(in_channels: int,
|
||||
out_channels: int,
|
||||
bias: bool = False,
|
||||
initialization_method: Union[str, Callable[[Module], Module]] = 'he',
|
||||
use_spectral_norm: bool = False) -> Module:
|
||||
return wrap_conv_or_linear_module(
|
||||
Conv2d(in_channels, out_channels, kernel_size=3, stride=1, padding=1, bias=bias),
|
||||
initialization_method,
|
||||
use_spectral_norm)
|
||||
|
||||
|
||||
def create_conv3_from_block_args(in_channels: int, out_channels: int,
|
||||
bias: bool = False,
|
||||
block_args: Optional[BlockArgs] = None):
|
||||
if block_args is None:
|
||||
block_args = BlockArgs()
|
||||
return create_conv3(in_channels, out_channels, bias,
|
||||
block_args.initialization_method,
|
||||
block_args.use_spectral_norm)
|
||||
|
||||
|
||||
def create_conv1(in_channels: int, out_channels: int,
|
||||
initialization_method: Union[str, Callable[[Module], Module]] = 'he',
|
||||
bias: bool = False,
|
||||
use_spectral_norm: bool = False) -> Module:
|
||||
return wrap_conv_or_linear_module(
|
||||
Conv2d(in_channels, out_channels, kernel_size=1, stride=1, padding=0, bias=bias),
|
||||
initialization_method,
|
||||
use_spectral_norm)
|
||||
|
||||
|
||||
def create_conv1_from_block_args(in_channels: int,
|
||||
out_channels: int,
|
||||
bias: bool = False,
|
||||
block_args: Optional[BlockArgs] = None) -> Module:
|
||||
if block_args is None:
|
||||
block_args = BlockArgs()
|
||||
return create_conv1(
|
||||
in_channels=in_channels,
|
||||
out_channels=out_channels,
|
||||
initialization_method=block_args.initialization_method,
|
||||
bias=bias,
|
||||
use_spectral_norm=block_args.use_spectral_norm)
|
||||
|
||||
|
||||
def create_conv7_block(in_channels: int, out_channels: int,
|
||||
initialization_method: Union[str, Callable[[Module], Module]] = 'he',
|
||||
nonlinearity_factory: Optional[ModuleFactory] = None,
|
||||
normalization_layer_factory: Optional[NormalizationLayerFactory] = None,
|
||||
use_spectral_norm: bool = False) -> Module:
|
||||
nonlinearity_factory = resolve_nonlinearity_factory(nonlinearity_factory)
|
||||
return Sequential(
|
||||
create_conv7(in_channels, out_channels,
|
||||
bias=False, initialization_method=initialization_method, use_spectral_norm=use_spectral_norm),
|
||||
NormalizationLayerFactory.resolve_2d(normalization_layer_factory).create(out_channels, affine=True),
|
||||
resolve_nonlinearity_factory(nonlinearity_factory).create())
|
||||
|
||||
|
||||
def create_conv7_block_from_block_args(
|
||||
in_channels: int, out_channels: int,
|
||||
block_args: Optional[BlockArgs] = None) -> Module:
|
||||
if block_args is None:
|
||||
block_args = BlockArgs()
|
||||
return create_conv7_block(in_channels, out_channels,
|
||||
block_args.initialization_method,
|
||||
block_args.nonlinearity_factory,
|
||||
block_args.normalization_layer_factory,
|
||||
block_args.use_spectral_norm)
|
||||
|
||||
|
||||
def create_conv3_block(in_channels: int, out_channels: int,
|
||||
initialization_method: Union[str, Callable[[Module], Module]] = 'he',
|
||||
nonlinearity_factory: Optional[ModuleFactory] = None,
|
||||
normalization_layer_factory: Optional[NormalizationLayerFactory] = None,
|
||||
use_spectral_norm: bool = False) -> Module:
|
||||
nonlinearity_factory = resolve_nonlinearity_factory(nonlinearity_factory)
|
||||
return Sequential(
|
||||
create_conv3(in_channels, out_channels,
|
||||
bias=False, initialization_method=initialization_method, use_spectral_norm=use_spectral_norm),
|
||||
NormalizationLayerFactory.resolve_2d(normalization_layer_factory).create(out_channels, affine=True),
|
||||
resolve_nonlinearity_factory(nonlinearity_factory).create())
|
||||
|
||||
|
||||
def create_conv3_block_from_block_args(
|
||||
in_channels: int, out_channels: int, block_args: Optional[BlockArgs] = None):
|
||||
if block_args is None:
|
||||
block_args = BlockArgs()
|
||||
return create_conv3_block(in_channels, out_channels,
|
||||
block_args.initialization_method,
|
||||
block_args.nonlinearity_factory,
|
||||
block_args.normalization_layer_factory,
|
||||
block_args.use_spectral_norm)
|
||||
|
||||
|
||||
def create_downsample_block(in_channels: int, out_channels: int,
|
||||
is_output_1x1: bool = False,
|
||||
initialization_method: Union[str, Callable[[Module], Module]] = 'he',
|
||||
nonlinearity_factory: Optional[ModuleFactory] = None,
|
||||
normalization_layer_factory: Optional[NormalizationLayerFactory] = None,
|
||||
use_spectral_norm: bool = False) -> Module:
|
||||
if is_output_1x1:
|
||||
return Sequential(
|
||||
wrap_conv_or_linear_module(
|
||||
Conv2d(in_channels, out_channels, kernel_size=4, stride=2, padding=1, bias=False),
|
||||
initialization_method,
|
||||
use_spectral_norm),
|
||||
resolve_nonlinearity_factory(nonlinearity_factory).create())
|
||||
else:
|
||||
return Sequential(
|
||||
wrap_conv_or_linear_module(
|
||||
Conv2d(in_channels, out_channels, kernel_size=4, stride=2, padding=1, bias=False),
|
||||
initialization_method,
|
||||
use_spectral_norm),
|
||||
NormalizationLayerFactory.resolve_2d(normalization_layer_factory).create(out_channels, affine=True),
|
||||
resolve_nonlinearity_factory(nonlinearity_factory).create())
|
||||
|
||||
|
||||
def create_downsample_block_from_block_args(in_channels: int, out_channels: int,
|
||||
is_output_1x1: bool = False,
|
||||
block_args: Optional[BlockArgs] = None):
|
||||
if block_args is None:
|
||||
block_args = BlockArgs()
|
||||
return create_downsample_block(
|
||||
in_channels, out_channels,
|
||||
is_output_1x1,
|
||||
block_args.initialization_method,
|
||||
block_args.nonlinearity_factory,
|
||||
block_args.normalization_layer_factory,
|
||||
block_args.use_spectral_norm)
|
||||
|
||||
|
||||
def create_upsample_block(in_channels: int,
|
||||
out_channels: int,
|
||||
initialization_method: Union[str, Callable[[Module], Module]] = 'he',
|
||||
nonlinearity_factory: Optional[ModuleFactory] = None,
|
||||
normalization_layer_factory: Optional[NormalizationLayerFactory] = None,
|
||||
use_spectral_norm: bool = False) -> Module:
|
||||
nonlinearity_factory = resolve_nonlinearity_factory(nonlinearity_factory)
|
||||
return Sequential(
|
||||
wrap_conv_or_linear_module(
|
||||
ConvTranspose2d(in_channels, out_channels, kernel_size=4, stride=2, padding=1, bias=False),
|
||||
initialization_method,
|
||||
use_spectral_norm),
|
||||
NormalizationLayerFactory.resolve_2d(normalization_layer_factory).create(out_channels, affine=True),
|
||||
resolve_nonlinearity_factory(nonlinearity_factory).create())
|
||||
|
||||
|
||||
def create_upsample_block_from_block_args(in_channels: int,
|
||||
out_channels: int,
|
||||
block_args: Optional[BlockArgs] = None) -> Module:
|
||||
if block_args is None:
|
||||
block_args = BlockArgs()
|
||||
return create_upsample_block(in_channels, out_channels,
|
||||
block_args.initialization_method,
|
||||
block_args.nonlinearity_factory,
|
||||
block_args.normalization_layer_factory,
|
||||
block_args.use_spectral_norm)
|
||||
0
live2d/tha3/nn/editor/__init__.py
Normal file
0
live2d/tha3/nn/editor/__init__.py
Normal file
180
live2d/tha3/nn/editor/editor_07.py
Normal file
180
live2d/tha3/nn/editor/editor_07.py
Normal file
@@ -0,0 +1,180 @@
|
||||
from typing import Optional, List
|
||||
|
||||
import torch
|
||||
from matplotlib import pyplot
|
||||
from torch import Tensor
|
||||
from torch.nn import Module, Sequential, Tanh, Sigmoid
|
||||
|
||||
from tha3.nn.image_processing_util import GridChangeApplier, apply_color_change
|
||||
from tha3.nn.common.resize_conv_unet import ResizeConvUNet, ResizeConvUNetArgs
|
||||
from tha3.util import numpy_linear_to_srgb
|
||||
from tha3.module.module_factory import ModuleFactory
|
||||
from tha3.nn.conv import create_conv3_from_block_args, create_conv3
|
||||
from tha3.nn.nonlinearity_factory import ReLUFactory
|
||||
from tha3.nn.normalization import InstanceNorm2dFactory
|
||||
from tha3.nn.util import BlockArgs
|
||||
|
||||
|
||||
class Editor07Args:
|
||||
def __init__(self,
|
||||
image_size: int = 512,
|
||||
image_channels: int = 4,
|
||||
num_pose_params: int = 6,
|
||||
start_channels: int = 32,
|
||||
bottleneck_image_size=32,
|
||||
num_bottleneck_blocks=6,
|
||||
max_channels: int = 512,
|
||||
upsampling_mode: str = 'nearest',
|
||||
block_args: Optional[BlockArgs] = None,
|
||||
use_separable_convolution: bool = False):
|
||||
if block_args is None:
|
||||
block_args = BlockArgs(
|
||||
normalization_layer_factory=InstanceNorm2dFactory(),
|
||||
nonlinearity_factory=ReLUFactory(inplace=False))
|
||||
|
||||
self.block_args = block_args
|
||||
self.upsampling_mode = upsampling_mode
|
||||
self.max_channels = max_channels
|
||||
self.num_bottleneck_blocks = num_bottleneck_blocks
|
||||
self.bottleneck_image_size = bottleneck_image_size
|
||||
self.start_channels = start_channels
|
||||
self.num_pose_params = num_pose_params
|
||||
self.image_channels = image_channels
|
||||
self.image_size = image_size
|
||||
self.use_separable_convolution = use_separable_convolution
|
||||
|
||||
|
||||
class Editor07(Module):
|
||||
def __init__(self, args: Editor07Args):
|
||||
super().__init__()
|
||||
self.args = args
|
||||
|
||||
self.body = ResizeConvUNet(ResizeConvUNetArgs(
|
||||
image_size=args.image_size,
|
||||
input_channels=2 * args.image_channels + args.num_pose_params + 2,
|
||||
start_channels=args.start_channels,
|
||||
bottleneck_image_size=args.bottleneck_image_size,
|
||||
num_bottleneck_blocks=args.num_bottleneck_blocks,
|
||||
max_channels=args.max_channels,
|
||||
upsample_mode=args.upsampling_mode,
|
||||
block_args=args.block_args,
|
||||
use_separable_convolution=args.use_separable_convolution))
|
||||
self.color_change_creator = Sequential(
|
||||
create_conv3_from_block_args(
|
||||
in_channels=self.args.start_channels,
|
||||
out_channels=self.args.image_channels,
|
||||
bias=True,
|
||||
block_args=self.args.block_args),
|
||||
Tanh())
|
||||
self.alpha_creator = Sequential(
|
||||
create_conv3_from_block_args(
|
||||
in_channels=self.args.start_channels,
|
||||
out_channels=self.args.image_channels,
|
||||
bias=True,
|
||||
block_args=self.args.block_args),
|
||||
Sigmoid())
|
||||
self.grid_change_creator = create_conv3(
|
||||
in_channels=self.args.start_channels,
|
||||
out_channels=2,
|
||||
bias=False,
|
||||
initialization_method='zero',
|
||||
use_spectral_norm=False)
|
||||
self.grid_change_applier = GridChangeApplier()
|
||||
|
||||
def forward(self,
|
||||
input_original_image: Tensor,
|
||||
input_warped_image: Tensor,
|
||||
input_grid_change: Tensor,
|
||||
pose: Tensor,
|
||||
*args) -> List[Tensor]:
|
||||
n, c = pose.shape
|
||||
pose = pose.view(n, c, 1, 1).repeat(1, 1, self.args.image_size, self.args.image_size)
|
||||
feature = torch.cat([input_original_image, input_warped_image, input_grid_change, pose], dim=1)
|
||||
|
||||
feature = self.body.forward(feature)[-1]
|
||||
output_grid_change = input_grid_change + self.grid_change_creator(feature)
|
||||
|
||||
output_color_change = self.color_change_creator(feature)
|
||||
output_color_change_alpha = self.alpha_creator(feature)
|
||||
output_warped_image = self.grid_change_applier.apply(output_grid_change, input_original_image)
|
||||
output_color_changed = apply_color_change(output_color_change_alpha, output_color_change, output_warped_image)
|
||||
|
||||
return [
|
||||
output_color_changed,
|
||||
output_color_change_alpha,
|
||||
output_color_change,
|
||||
output_warped_image,
|
||||
output_grid_change,
|
||||
]
|
||||
|
||||
COLOR_CHANGED_IMAGE_INDEX = 0
|
||||
COLOR_CHANGE_ALPHA_INDEX = 1
|
||||
COLOR_CHANGE_IMAGE_INDEX = 2
|
||||
WARPED_IMAGE_INDEX = 3
|
||||
GRID_CHANGE_INDEX = 4
|
||||
OUTPUT_LENGTH = 5
|
||||
|
||||
|
||||
class Editor07Factory(ModuleFactory):
|
||||
def __init__(self, args: Editor07Args):
|
||||
super().__init__()
|
||||
self.args = args
|
||||
|
||||
def create(self) -> Module:
|
||||
return Editor07(self.args)
|
||||
|
||||
|
||||
def show_image(pytorch_image):
|
||||
numpy_image = ((pytorch_image + 1.0) / 2.0).squeeze(0).numpy()
|
||||
numpy_image[0:3, :, :] = numpy_linear_to_srgb(numpy_image[0:3, :, :])
|
||||
c, h, w = numpy_image.shape
|
||||
numpy_image = numpy_image.reshape((c, h * w)).transpose().reshape((h, w, c))
|
||||
pyplot.imshow(numpy_image)
|
||||
pyplot.show()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
cuda = torch.device('cuda')
|
||||
|
||||
image_size = 512
|
||||
image_channels = 4
|
||||
num_pose_params = 6
|
||||
args = Editor07Args(
|
||||
image_size=512,
|
||||
image_channels=4,
|
||||
start_channels=32,
|
||||
num_pose_params=6,
|
||||
bottleneck_image_size=32,
|
||||
num_bottleneck_blocks=6,
|
||||
max_channels=512,
|
||||
upsampling_mode='nearest',
|
||||
block_args=BlockArgs(
|
||||
initialization_method='he',
|
||||
use_spectral_norm=False,
|
||||
normalization_layer_factory=InstanceNorm2dFactory(),
|
||||
nonlinearity_factory=ReLUFactory(inplace=False)))
|
||||
module = Editor07(args).to(cuda)
|
||||
|
||||
image_count = 1
|
||||
input_image = torch.zeros(image_count, 4, image_size, image_size, device=cuda)
|
||||
direct_image = torch.zeros(image_count, 4, image_size, image_size, device=cuda)
|
||||
warped_image = torch.zeros(image_count, 4, image_size, image_size, device=cuda)
|
||||
grid_change = torch.zeros(image_count, 2, image_size, image_size, device=cuda)
|
||||
pose = torch.zeros(image_count, num_pose_params, device=cuda)
|
||||
|
||||
repeat = 100
|
||||
acc = 0.0
|
||||
for i in range(repeat + 2):
|
||||
start = torch.cuda.Event(enable_timing=True)
|
||||
end = torch.cuda.Event(enable_timing=True)
|
||||
|
||||
start.record()
|
||||
module.forward(input_image, warped_image, grid_change, pose)
|
||||
end.record()
|
||||
torch.cuda.synchronize()
|
||||
if i >= 2:
|
||||
elapsed_time = start.elapsed_time(end)
|
||||
print("%d:" % i, elapsed_time)
|
||||
acc = acc + elapsed_time
|
||||
|
||||
print("average:", acc / repeat)
|
||||
0
live2d/tha3/nn/eyebrow_decomposer/__init__.py
Normal file
0
live2d/tha3/nn/eyebrow_decomposer/__init__.py
Normal file
102
live2d/tha3/nn/eyebrow_decomposer/eyebrow_decomposer_00.py
Normal file
102
live2d/tha3/nn/eyebrow_decomposer/eyebrow_decomposer_00.py
Normal file
@@ -0,0 +1,102 @@
|
||||
from typing import List, Optional
|
||||
|
||||
import torch
|
||||
from torch import Tensor
|
||||
from torch.nn import Module
|
||||
|
||||
from tha3.nn.common.poser_encoder_decoder_00 import PoserEncoderDecoder00Args, PoserEncoderDecoder00
|
||||
from tha3.nn.image_processing_util import apply_color_change
|
||||
from tha3.module.module_factory import ModuleFactory
|
||||
from tha3.nn.nonlinearity_factory import ReLUFactory
|
||||
from tha3.nn.normalization import InstanceNorm2dFactory
|
||||
from tha3.nn.util import BlockArgs
|
||||
|
||||
|
||||
class EyebrowDecomposer00Args(PoserEncoderDecoder00Args):
|
||||
def __init__(self,
|
||||
image_size: int = 128,
|
||||
image_channels: int = 4,
|
||||
start_channels: int = 64,
|
||||
bottleneck_image_size=16,
|
||||
num_bottleneck_blocks=6,
|
||||
max_channels: int = 512,
|
||||
block_args: Optional[BlockArgs] = None):
|
||||
super().__init__(
|
||||
image_size,
|
||||
image_channels,
|
||||
image_channels,
|
||||
0,
|
||||
start_channels,
|
||||
bottleneck_image_size,
|
||||
num_bottleneck_blocks,
|
||||
max_channels,
|
||||
block_args)
|
||||
|
||||
|
||||
class EyebrowDecomposer00(Module):
|
||||
def __init__(self, args: EyebrowDecomposer00Args):
|
||||
super().__init__()
|
||||
self.args = args
|
||||
self.body = PoserEncoderDecoder00(args)
|
||||
self.background_layer_alpha = self.args.create_alpha_block()
|
||||
self.background_layer_color_change = self.args.create_color_change_block()
|
||||
self.eyebrow_layer_alpha = self.args.create_alpha_block()
|
||||
self.eyebrow_layer_color_change = self.args.create_color_change_block()
|
||||
|
||||
def forward(self, image: Tensor, *args) -> List[Tensor]:
|
||||
feature = self.body(image)[0]
|
||||
|
||||
background_layer_alpha = self.background_layer_alpha(feature)
|
||||
background_layer_color_change = self.background_layer_color_change(feature)
|
||||
background_layer_1 = apply_color_change(background_layer_alpha, background_layer_color_change, image)
|
||||
|
||||
eyebrow_layer_alpha = self.eyebrow_layer_alpha(feature)
|
||||
eyebrow_layer_color_change = self.eyebrow_layer_color_change(feature)
|
||||
eyebrow_layer = apply_color_change(eyebrow_layer_alpha, image, eyebrow_layer_color_change)
|
||||
|
||||
return [
|
||||
eyebrow_layer, # 0
|
||||
eyebrow_layer_alpha, # 1
|
||||
eyebrow_layer_color_change, # 2
|
||||
background_layer_1, # 3
|
||||
background_layer_alpha, # 4
|
||||
background_layer_color_change, # 5
|
||||
]
|
||||
|
||||
EYEBROW_LAYER_INDEX = 0
|
||||
EYEBROW_LAYER_ALPHA_INDEX = 1
|
||||
EYEBROW_LAYER_COLOR_CHANGE_INDEX = 2
|
||||
BACKGROUND_LAYER_INDEX = 3
|
||||
BACKGROUND_LAYER_ALPHA_INDEX = 4
|
||||
BACKGROUND_LAYER_COLOR_CHANGE_INDEX = 5
|
||||
OUTPUT_LENGTH = 6
|
||||
|
||||
|
||||
class EyebrowDecomposer00Factory(ModuleFactory):
|
||||
def __init__(self, args: EyebrowDecomposer00Args):
|
||||
super().__init__()
|
||||
self.args = args
|
||||
|
||||
def create(self) -> Module:
|
||||
return EyebrowDecomposer00(self.args)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
cuda = torch.device('cuda')
|
||||
args = EyebrowDecomposer00Args(
|
||||
image_size=128,
|
||||
image_channels=4,
|
||||
start_channels=64,
|
||||
bottleneck_image_size=16,
|
||||
num_bottleneck_blocks=3,
|
||||
block_args=BlockArgs(
|
||||
initialization_method='xavier',
|
||||
use_spectral_norm=False,
|
||||
normalization_layer_factory=InstanceNorm2dFactory(),
|
||||
nonlinearity_factory=ReLUFactory(inplace=True)))
|
||||
face_morpher = EyebrowDecomposer00(args).to(cuda)
|
||||
|
||||
image = torch.randn(8, 4, 128, 128, device=cuda)
|
||||
outputs = face_morpher.forward(image)
|
||||
for i in range(len(outputs)):
|
||||
print(i, outputs[i].shape)
|
||||
109
live2d/tha3/nn/eyebrow_decomposer/eyebrow_decomposer_03.py
Normal file
109
live2d/tha3/nn/eyebrow_decomposer/eyebrow_decomposer_03.py
Normal file
@@ -0,0 +1,109 @@
|
||||
from typing import List, Optional
|
||||
|
||||
import torch
|
||||
from torch import Tensor
|
||||
from torch.nn import Module
|
||||
|
||||
from tha3.nn.common.poser_encoder_decoder_00 import PoserEncoderDecoder00Args
|
||||
from tha3.nn.common.poser_encoder_decoder_00_separable import PoserEncoderDecoder00Separable
|
||||
from tha3.nn.image_processing_util import apply_color_change
|
||||
from tha3.module.module_factory import ModuleFactory
|
||||
from tha3.nn.nonlinearity_factory import ReLUFactory
|
||||
from tha3.nn.normalization import InstanceNorm2dFactory
|
||||
from tha3.nn.util import BlockArgs
|
||||
|
||||
|
||||
class EyebrowDecomposer03Args(PoserEncoderDecoder00Args):
|
||||
def __init__(self,
|
||||
image_size: int = 128,
|
||||
image_channels: int = 4,
|
||||
start_channels: int = 64,
|
||||
bottleneck_image_size=16,
|
||||
num_bottleneck_blocks=6,
|
||||
max_channels: int = 512,
|
||||
block_args: Optional[BlockArgs] = None):
|
||||
super().__init__(
|
||||
image_size,
|
||||
image_channels,
|
||||
image_channels,
|
||||
0,
|
||||
start_channels,
|
||||
bottleneck_image_size,
|
||||
num_bottleneck_blocks,
|
||||
max_channels,
|
||||
block_args)
|
||||
|
||||
|
||||
class EyebrowDecomposer03(Module):
|
||||
def __init__(self, args: EyebrowDecomposer03Args):
|
||||
super().__init__()
|
||||
self.args = args
|
||||
self.body = PoserEncoderDecoder00Separable(args)
|
||||
self.background_layer_alpha = self.args.create_alpha_block()
|
||||
self.background_layer_color_change = self.args.create_color_change_block()
|
||||
self.eyebrow_layer_alpha = self.args.create_alpha_block()
|
||||
self.eyebrow_layer_color_change = self.args.create_color_change_block()
|
||||
|
||||
def forward(self, image: Tensor, *args) -> List[Tensor]:
|
||||
feature = self.body(image)[0]
|
||||
|
||||
background_layer_alpha = self.background_layer_alpha(feature)
|
||||
background_layer_color_change = self.background_layer_color_change(feature)
|
||||
background_layer_1 = apply_color_change(background_layer_alpha, background_layer_color_change, image)
|
||||
|
||||
eyebrow_layer_alpha = self.eyebrow_layer_alpha(feature)
|
||||
eyebrow_layer_color_change = self.eyebrow_layer_color_change(feature)
|
||||
eyebrow_layer = apply_color_change(eyebrow_layer_alpha, image, eyebrow_layer_color_change)
|
||||
|
||||
return [
|
||||
eyebrow_layer, # 0
|
||||
eyebrow_layer_alpha, # 1
|
||||
eyebrow_layer_color_change, # 2
|
||||
background_layer_1, # 3
|
||||
background_layer_alpha, # 4
|
||||
background_layer_color_change, # 5
|
||||
]
|
||||
|
||||
EYEBROW_LAYER_INDEX = 0
|
||||
EYEBROW_LAYER_ALPHA_INDEX = 1
|
||||
EYEBROW_LAYER_COLOR_CHANGE_INDEX = 2
|
||||
BACKGROUND_LAYER_INDEX = 3
|
||||
BACKGROUND_LAYER_ALPHA_INDEX = 4
|
||||
BACKGROUND_LAYER_COLOR_CHANGE_INDEX = 5
|
||||
OUTPUT_LENGTH = 6
|
||||
|
||||
|
||||
class EyebrowDecomposer03Factory(ModuleFactory):
|
||||
def __init__(self, args: EyebrowDecomposer03Args):
|
||||
super().__init__()
|
||||
self.args = args
|
||||
|
||||
def create(self) -> Module:
|
||||
return EyebrowDecomposer03(self.args)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
cuda = torch.device('cuda')
|
||||
args = EyebrowDecomposer03Args(
|
||||
image_size=128,
|
||||
image_channels=4,
|
||||
start_channels=64,
|
||||
bottleneck_image_size=16,
|
||||
num_bottleneck_blocks=6,
|
||||
block_args=BlockArgs(
|
||||
initialization_method='xavier',
|
||||
use_spectral_norm=False,
|
||||
normalization_layer_factory=InstanceNorm2dFactory(),
|
||||
nonlinearity_factory=ReLUFactory(inplace=True)))
|
||||
face_morpher = EyebrowDecomposer03(args).to(cuda)
|
||||
|
||||
#image = torch.randn(8, 4, 128, 128, device=cuda)
|
||||
#outputs = face_morpher.forward(image)
|
||||
#for i in range(len(outputs)):
|
||||
# print(i, outputs[i].shape)
|
||||
|
||||
state_dict = face_morpher.state_dict()
|
||||
index = 0
|
||||
for key in state_dict:
|
||||
print(f"[{index}]", key, state_dict[key].shape)
|
||||
index += 1
|
||||
@@ -0,0 +1,115 @@
|
||||
from typing import List, Optional
|
||||
|
||||
import torch
|
||||
from torch import Tensor
|
||||
from torch.nn import Module
|
||||
|
||||
from tha3.nn.common.poser_encoder_decoder_00 import PoserEncoderDecoder00Args, PoserEncoderDecoder00
|
||||
from tha3.nn.image_processing_util import apply_color_change, apply_grid_change, apply_rgb_change
|
||||
from tha3.module.module_factory import ModuleFactory
|
||||
from tha3.nn.nonlinearity_factory import ReLUFactory
|
||||
from tha3.nn.normalization import InstanceNorm2dFactory
|
||||
from tha3.nn.util import BlockArgs
|
||||
|
||||
|
||||
class EyebrowMorphingCombiner00Args(PoserEncoderDecoder00Args):
|
||||
def __init__(self,
|
||||
image_size: int = 128,
|
||||
image_channels: int = 4,
|
||||
num_pose_params: int = 12,
|
||||
start_channels: int = 64,
|
||||
bottleneck_image_size=16,
|
||||
num_bottleneck_blocks=6,
|
||||
max_channels: int = 512,
|
||||
block_args: Optional[BlockArgs] = None):
|
||||
super().__init__(
|
||||
image_size,
|
||||
2 * image_channels,
|
||||
image_channels,
|
||||
num_pose_params,
|
||||
start_channels,
|
||||
bottleneck_image_size,
|
||||
num_bottleneck_blocks,
|
||||
max_channels,
|
||||
block_args)
|
||||
|
||||
|
||||
class EyebrowMorphingCombiner00(Module):
|
||||
def __init__(self, args: EyebrowMorphingCombiner00Args):
|
||||
super().__init__()
|
||||
self.args = args
|
||||
self.body = PoserEncoderDecoder00(args)
|
||||
self.morphed_eyebrow_layer_grid_change = self.args.create_grid_change_block()
|
||||
self.morphed_eyebrow_layer_alpha = self.args.create_alpha_block()
|
||||
self.morphed_eyebrow_layer_color_change = self.args.create_color_change_block()
|
||||
self.combine_alpha = self.args.create_alpha_block()
|
||||
|
||||
def forward(self, background_layer: Tensor, eyebrow_layer: Tensor, pose: Tensor, *args) -> List[Tensor]:
|
||||
combined_image = torch.cat([background_layer, eyebrow_layer], dim=1)
|
||||
feature = self.body(combined_image, pose)[0]
|
||||
|
||||
morphed_eyebrow_layer_grid_change = self.morphed_eyebrow_layer_grid_change(feature)
|
||||
morphed_eyebrow_layer_alpha = self.morphed_eyebrow_layer_alpha(feature)
|
||||
morphed_eyebrow_layer_color_change = self.morphed_eyebrow_layer_color_change(feature)
|
||||
warped_eyebrow_layer = apply_grid_change(morphed_eyebrow_layer_grid_change, eyebrow_layer)
|
||||
morphed_eyebrow_layer = apply_color_change(
|
||||
morphed_eyebrow_layer_alpha, morphed_eyebrow_layer_color_change, warped_eyebrow_layer)
|
||||
|
||||
combine_alpha = self.combine_alpha(feature)
|
||||
eyebrow_image = apply_rgb_change(combine_alpha, morphed_eyebrow_layer, background_layer)
|
||||
eyebrow_image_no_combine_alpha = apply_rgb_change(
|
||||
(morphed_eyebrow_layer[:, 3:4, :, :] + 1.0) / 2.0, morphed_eyebrow_layer, background_layer)
|
||||
|
||||
return [
|
||||
eyebrow_image, # 0
|
||||
combine_alpha, # 1
|
||||
eyebrow_image_no_combine_alpha, # 2
|
||||
morphed_eyebrow_layer, # 3
|
||||
morphed_eyebrow_layer_alpha, # 4
|
||||
morphed_eyebrow_layer_color_change, # 5
|
||||
warped_eyebrow_layer, # 6
|
||||
morphed_eyebrow_layer_grid_change, # 7
|
||||
]
|
||||
|
||||
EYEBROW_IMAGE_INDEX = 0
|
||||
COMBINE_ALPHA_INDEX = 1
|
||||
EYEBROW_IMAGE_NO_COMBINE_ALPHA_INDEX = 2
|
||||
MORPHED_EYEBROW_LAYER_INDEX = 3
|
||||
MORPHED_EYEBROW_LAYER_ALPHA_INDEX = 4
|
||||
MORPHED_EYEBROW_LAYER_COLOR_CHANGE_INDEX = 5
|
||||
WARPED_EYEBROW_LAYER_INDEX = 6
|
||||
MORPHED_EYEBROW_LAYER_GRID_CHANGE_INDEX = 7
|
||||
OUTPUT_LENGTH = 8
|
||||
|
||||
|
||||
class EyebrowMorphingCombiner00Factory(ModuleFactory):
|
||||
def __init__(self, args: EyebrowMorphingCombiner00Args):
|
||||
super().__init__()
|
||||
self.args = args
|
||||
|
||||
def create(self) -> Module:
|
||||
return EyebrowMorphingCombiner00(self.args)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
cuda = torch.device('cuda')
|
||||
args = EyebrowMorphingCombiner00Args(
|
||||
image_size=128,
|
||||
image_channels=4,
|
||||
num_pose_params=12,
|
||||
start_channels=64,
|
||||
bottleneck_image_size=16,
|
||||
num_bottleneck_blocks=3,
|
||||
block_args=BlockArgs(
|
||||
initialization_method='xavier',
|
||||
use_spectral_norm=False,
|
||||
normalization_layer_factory=InstanceNorm2dFactory(),
|
||||
nonlinearity_factory=ReLUFactory(inplace=True)))
|
||||
face_morpher = EyebrowMorphingCombiner00(args).to(cuda)
|
||||
|
||||
background_layer = torch.randn(8, 4, 128, 128, device=cuda)
|
||||
eyebrow_layer = torch.randn(8, 4, 128, 128, device=cuda)
|
||||
pose = torch.randn(8, 12, device=cuda)
|
||||
outputs = face_morpher.forward(background_layer, eyebrow_layer, pose)
|
||||
for i in range(len(outputs)):
|
||||
print(i, outputs[i].shape)
|
||||
@@ -0,0 +1,117 @@
|
||||
from typing import List, Optional
|
||||
|
||||
import torch
|
||||
from torch import Tensor
|
||||
from torch.nn import Module
|
||||
|
||||
from tha3.nn.common.poser_encoder_decoder_00 import PoserEncoderDecoder00Args
|
||||
from tha3.nn.common.poser_encoder_decoder_00_separable import PoserEncoderDecoder00Separable
|
||||
from tha3.nn.image_processing_util import apply_color_change, apply_rgb_change, GridChangeApplier
|
||||
from tha3.module.module_factory import ModuleFactory
|
||||
from tha3.nn.nonlinearity_factory import ReLUFactory
|
||||
from tha3.nn.normalization import InstanceNorm2dFactory
|
||||
from tha3.nn.util import BlockArgs
|
||||
|
||||
|
||||
class EyebrowMorphingCombiner03Args(PoserEncoderDecoder00Args):
|
||||
def __init__(self,
|
||||
image_size: int = 128,
|
||||
image_channels: int = 4,
|
||||
num_pose_params: int = 12,
|
||||
start_channels: int = 64,
|
||||
bottleneck_image_size=16,
|
||||
num_bottleneck_blocks=6,
|
||||
max_channels: int = 512,
|
||||
block_args: Optional[BlockArgs] = None):
|
||||
super().__init__(
|
||||
image_size,
|
||||
2 * image_channels,
|
||||
image_channels,
|
||||
num_pose_params,
|
||||
start_channels,
|
||||
bottleneck_image_size,
|
||||
num_bottleneck_blocks,
|
||||
max_channels,
|
||||
block_args)
|
||||
|
||||
|
||||
class EyebrowMorphingCombiner03(Module):
|
||||
def __init__(self, args: EyebrowMorphingCombiner03Args):
|
||||
super().__init__()
|
||||
self.args = args
|
||||
self.body = PoserEncoderDecoder00Separable(args)
|
||||
self.morphed_eyebrow_layer_grid_change = self.args.create_grid_change_block()
|
||||
self.morphed_eyebrow_layer_alpha = self.args.create_alpha_block()
|
||||
self.morphed_eyebrow_layer_color_change = self.args.create_color_change_block()
|
||||
self.combine_alpha = self.args.create_alpha_block()
|
||||
self.grid_change_applier = GridChangeApplier()
|
||||
|
||||
def forward(self, background_layer: Tensor, eyebrow_layer: Tensor, pose: Tensor, *args) -> List[Tensor]:
|
||||
combined_image = torch.cat([background_layer, eyebrow_layer], dim=1)
|
||||
feature = self.body(combined_image, pose)[0]
|
||||
|
||||
morphed_eyebrow_layer_grid_change = self.morphed_eyebrow_layer_grid_change(feature)
|
||||
morphed_eyebrow_layer_alpha = self.morphed_eyebrow_layer_alpha(feature)
|
||||
morphed_eyebrow_layer_color_change = self.morphed_eyebrow_layer_color_change(feature)
|
||||
warped_eyebrow_layer = self.grid_change_applier.apply(morphed_eyebrow_layer_grid_change, eyebrow_layer)
|
||||
morphed_eyebrow_layer = apply_color_change(
|
||||
morphed_eyebrow_layer_alpha, morphed_eyebrow_layer_color_change, warped_eyebrow_layer)
|
||||
|
||||
combine_alpha = self.combine_alpha(feature)
|
||||
eyebrow_image = apply_rgb_change(combine_alpha, morphed_eyebrow_layer, background_layer)
|
||||
eyebrow_image_no_combine_alpha = apply_rgb_change(
|
||||
(morphed_eyebrow_layer[:, 3:4, :, :] + 1.0) / 2.0, morphed_eyebrow_layer, background_layer)
|
||||
|
||||
return [
|
||||
eyebrow_image, # 0
|
||||
combine_alpha, # 1
|
||||
eyebrow_image_no_combine_alpha, # 2
|
||||
morphed_eyebrow_layer, # 3
|
||||
morphed_eyebrow_layer_alpha, # 4
|
||||
morphed_eyebrow_layer_color_change, # 5
|
||||
warped_eyebrow_layer, # 6
|
||||
morphed_eyebrow_layer_grid_change, # 7
|
||||
]
|
||||
|
||||
EYEBROW_IMAGE_INDEX = 0
|
||||
COMBINE_ALPHA_INDEX = 1
|
||||
EYEBROW_IMAGE_NO_COMBINE_ALPHA_INDEX = 2
|
||||
MORPHED_EYEBROW_LAYER_INDEX = 3
|
||||
MORPHED_EYEBROW_LAYER_ALPHA_INDEX = 4
|
||||
MORPHED_EYEBROW_LAYER_COLOR_CHANGE_INDEX = 5
|
||||
WARPED_EYEBROW_LAYER_INDEX = 6
|
||||
MORPHED_EYEBROW_LAYER_GRID_CHANGE_INDEX = 7
|
||||
OUTPUT_LENGTH = 8
|
||||
|
||||
|
||||
class EyebrowMorphingCombiner03Factory(ModuleFactory):
|
||||
def __init__(self, args: EyebrowMorphingCombiner03Args):
|
||||
super().__init__()
|
||||
self.args = args
|
||||
|
||||
def create(self) -> Module:
|
||||
return EyebrowMorphingCombiner03(self.args)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
cuda = torch.device('cuda')
|
||||
args = EyebrowMorphingCombiner03Args(
|
||||
image_size=128,
|
||||
image_channels=4,
|
||||
num_pose_params=12,
|
||||
start_channels=64,
|
||||
bottleneck_image_size=16,
|
||||
num_bottleneck_blocks=3,
|
||||
block_args=BlockArgs(
|
||||
initialization_method='xavier',
|
||||
use_spectral_norm=False,
|
||||
normalization_layer_factory=InstanceNorm2dFactory(),
|
||||
nonlinearity_factory=ReLUFactory(inplace=True)))
|
||||
face_morpher = EyebrowMorphingCombiner03(args).to(cuda)
|
||||
|
||||
background_layer = torch.randn(8, 4, 128, 128, device=cuda)
|
||||
eyebrow_layer = torch.randn(8, 4, 128, 128, device=cuda)
|
||||
pose = torch.randn(8, 12, device=cuda)
|
||||
outputs = face_morpher.forward(background_layer, eyebrow_layer, pose)
|
||||
for i in range(len(outputs)):
|
||||
print(i, outputs[i].shape)
|
||||
0
live2d/tha3/nn/face_morpher/__init__.py
Normal file
0
live2d/tha3/nn/face_morpher/__init__.py
Normal file
241
live2d/tha3/nn/face_morpher/face_morpher_08.py
Normal file
241
live2d/tha3/nn/face_morpher/face_morpher_08.py
Normal file
@@ -0,0 +1,241 @@
|
||||
import math
|
||||
from typing import List, Optional
|
||||
|
||||
import torch
|
||||
from torch import Tensor
|
||||
from torch.nn import ModuleList, Sequential, Sigmoid, Tanh, Module
|
||||
from torch.nn.functional import affine_grid, grid_sample
|
||||
|
||||
from tha3.module.module_factory import ModuleFactory
|
||||
from tha3.nn.conv import create_conv3_block_from_block_args, \
|
||||
create_downsample_block_from_block_args, create_upsample_block_from_block_args, create_conv3_from_block_args, \
|
||||
create_conv3
|
||||
from tha3.nn.nonlinearity_factory import LeakyReLUFactory
|
||||
from tha3.nn.normalization import InstanceNorm2dFactory
|
||||
from tha3.nn.resnet_block import ResnetBlock
|
||||
from tha3.nn.util import BlockArgs
|
||||
|
||||
|
||||
class FaceMorpher08Args:
|
||||
def __init__(self,
|
||||
image_size: int = 256,
|
||||
image_channels: int = 4,
|
||||
num_expression_params: int = 67,
|
||||
start_channels: int = 16,
|
||||
bottleneck_image_size=4,
|
||||
num_bottleneck_blocks=3,
|
||||
max_channels: int = 512,
|
||||
block_args: Optional[BlockArgs] = None):
|
||||
self.max_channels = max_channels
|
||||
self.num_bottleneck_blocks = num_bottleneck_blocks
|
||||
assert bottleneck_image_size > 1
|
||||
self.bottleneck_image_size = bottleneck_image_size
|
||||
self.start_channels = start_channels
|
||||
self.image_channels = image_channels
|
||||
self.num_expression_params = num_expression_params
|
||||
self.image_size = image_size
|
||||
|
||||
if block_args is None:
|
||||
self.block_args = BlockArgs(
|
||||
normalization_layer_factory=InstanceNorm2dFactory(),
|
||||
nonlinearity_factory=LeakyReLUFactory(negative_slope=0.2, inplace=True))
|
||||
else:
|
||||
self.block_args = block_args
|
||||
|
||||
|
||||
class FaceMorpher08(Module):
|
||||
def __init__(self, args: FaceMorpher08Args):
|
||||
super().__init__()
|
||||
self.args = args
|
||||
self.num_levels = int(math.log2(args.image_size // args.bottleneck_image_size)) + 1
|
||||
|
||||
self.downsample_blocks = ModuleList()
|
||||
self.downsample_blocks.append(
|
||||
create_conv3_block_from_block_args(
|
||||
args.image_channels,
|
||||
args.start_channels,
|
||||
args.block_args))
|
||||
current_image_size = args.image_size
|
||||
current_num_channels = args.start_channels
|
||||
while current_image_size > args.bottleneck_image_size:
|
||||
next_image_size = current_image_size // 2
|
||||
next_num_channels = self.get_num_output_channels_from_image_size(next_image_size)
|
||||
self.downsample_blocks.append(create_downsample_block_from_block_args(
|
||||
in_channels=current_num_channels,
|
||||
out_channels=next_num_channels,
|
||||
is_output_1x1=False,
|
||||
block_args=args.block_args))
|
||||
current_image_size = next_image_size
|
||||
current_num_channels = next_num_channels
|
||||
assert len(self.downsample_blocks) == self.num_levels
|
||||
|
||||
self.bottleneck_blocks = ModuleList()
|
||||
self.bottleneck_blocks.append(create_conv3_block_from_block_args(
|
||||
in_channels=current_num_channels + args.num_expression_params,
|
||||
out_channels=current_num_channels,
|
||||
block_args=args.block_args))
|
||||
for i in range(1, args.num_bottleneck_blocks):
|
||||
self.bottleneck_blocks.append(
|
||||
ResnetBlock.create(
|
||||
num_channels=current_num_channels,
|
||||
is1x1=False,
|
||||
block_args=args.block_args))
|
||||
|
||||
self.upsample_blocks = ModuleList()
|
||||
while current_image_size < args.image_size:
|
||||
next_image_size = current_image_size * 2
|
||||
next_num_channels = self.get_num_output_channels_from_image_size(next_image_size)
|
||||
self.upsample_blocks.append(create_upsample_block_from_block_args(
|
||||
in_channels=current_num_channels,
|
||||
out_channels=next_num_channels,
|
||||
block_args=args.block_args))
|
||||
current_image_size = next_image_size
|
||||
current_num_channels = next_num_channels
|
||||
|
||||
self.iris_mouth_grid_change = self.create_grid_change_block()
|
||||
self.iris_mouth_color_change = self.create_color_change_block()
|
||||
self.iris_mouth_alpha = self.create_alpha_block()
|
||||
|
||||
self.eye_color_change = self.create_color_change_block()
|
||||
self.eye_alpha = self.create_alpha_block()
|
||||
|
||||
def create_alpha_block(self):
|
||||
return Sequential(
|
||||
create_conv3(
|
||||
in_channels=self.args.start_channels,
|
||||
out_channels=1,
|
||||
bias=True,
|
||||
initialization_method=self.args.block_args.initialization_method,
|
||||
use_spectral_norm=False),
|
||||
Sigmoid())
|
||||
|
||||
def create_color_change_block(self):
|
||||
return Sequential(
|
||||
create_conv3_from_block_args(
|
||||
in_channels=self.args.start_channels,
|
||||
out_channels=self.args.image_channels,
|
||||
bias=True,
|
||||
block_args=self.args.block_args),
|
||||
Tanh())
|
||||
|
||||
def create_grid_change_block(self):
|
||||
return create_conv3(
|
||||
in_channels=self.args.start_channels,
|
||||
out_channels=2,
|
||||
bias=False,
|
||||
initialization_method='zero',
|
||||
use_spectral_norm=False)
|
||||
|
||||
def get_num_output_channels_from_level(self, level: int):
|
||||
return self.get_num_output_channels_from_image_size(self.args.image_size // (2 ** level))
|
||||
|
||||
def get_num_output_channels_from_image_size(self, image_size: int):
|
||||
return min(self.args.start_channels * (self.args.image_size // image_size), self.args.max_channels)
|
||||
|
||||
def merge_down(self, top_layer: Tensor, bottom_layer: Tensor):
|
||||
top_layer_rgb = top_layer[:, 0:3, :, :]
|
||||
top_layer_a = top_layer[:, 3:4, :, :]
|
||||
return bottom_layer * (1-top_layer_a) + torch.cat([top_layer_rgb * top_layer_a, top_layer_a], dim=1)
|
||||
|
||||
def apply_grid_change(self, grid_change, image: Tensor) -> Tensor:
|
||||
n, c, h, w = image.shape
|
||||
device = grid_change.device
|
||||
grid_change = torch.transpose(grid_change.view(n, 2, h * w), 1, 2).view(n, h, w, 2)
|
||||
identity = torch.tensor(
|
||||
[[1.0, 0.0, 0.0], [0.0, 1.0, 0.0]],
|
||||
device=device,
|
||||
dtype=grid_change.dtype).unsqueeze(0).repeat(n, 1, 1)
|
||||
base_grid = affine_grid(identity, [n, c, h, w], align_corners=False)
|
||||
grid = base_grid + grid_change
|
||||
resampled_image = grid_sample(image, grid, mode='bilinear', padding_mode='border', align_corners=False)
|
||||
return resampled_image
|
||||
|
||||
def apply_color_change(self, alpha, color_change, image: Tensor) -> Tensor:
|
||||
return color_change * alpha + image * (1 - alpha)
|
||||
|
||||
def forward(self, image: Tensor, pose: Tensor, *args) -> List[Tensor]:
|
||||
feature = image
|
||||
for block in self.downsample_blocks:
|
||||
feature = block(feature)
|
||||
n, c = pose.shape
|
||||
pose = pose.view(n, c, 1, 1).repeat(1, 1, self.args.bottleneck_image_size, self.args.bottleneck_image_size)
|
||||
feature = torch.cat([feature, pose], dim=1)
|
||||
for block in self.bottleneck_blocks:
|
||||
feature = block(feature)
|
||||
for block in self.upsample_blocks:
|
||||
feature = block(feature)
|
||||
|
||||
iris_mouth_grid_change = self.iris_mouth_grid_change(feature)
|
||||
iris_mouth_image_0 = self.apply_grid_change(iris_mouth_grid_change, image)
|
||||
iris_mouth_color_change = self.iris_mouth_color_change(feature)
|
||||
iris_mouth_alpha = self.iris_mouth_alpha(feature)
|
||||
iris_mouth_image_1 = self.apply_color_change(iris_mouth_alpha, iris_mouth_color_change, iris_mouth_image_0)
|
||||
|
||||
eye_color_change = self.eye_color_change(feature)
|
||||
eye_alpha = self.eye_alpha(feature)
|
||||
output_image = self.apply_color_change(eye_alpha, eye_color_change, iris_mouth_image_1.detach())
|
||||
|
||||
return [
|
||||
output_image, #0
|
||||
eye_alpha, #1
|
||||
eye_color_change, #2
|
||||
iris_mouth_image_1, #3
|
||||
iris_mouth_alpha, #4
|
||||
iris_mouth_color_change, #5
|
||||
iris_mouth_image_0, #6
|
||||
]
|
||||
|
||||
OUTPUT_IMAGE_INDEX = 0
|
||||
EYE_ALPHA_INDEX = 1
|
||||
EYE_COLOR_CHANGE_INDEX = 2
|
||||
IRIS_MOUTH_IMAGE_1_INDEX = 3
|
||||
IRIS_MOUTH_ALPHA_INDEX = 4
|
||||
IRIS_MOUTH_COLOR_CHANGE_INDEX = 5
|
||||
IRIS_MOUTh_IMAGE_0_INDEX = 6
|
||||
|
||||
|
||||
class FaceMorpher08Factory(ModuleFactory):
|
||||
def __init__(self, args: FaceMorpher08Args):
|
||||
super().__init__()
|
||||
self.args = args
|
||||
|
||||
def create(self) -> Module:
|
||||
return FaceMorpher08(self.args)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
cuda = torch.device('cuda')
|
||||
args = FaceMorpher08Args(
|
||||
image_size=256,
|
||||
image_channels=4,
|
||||
num_expression_params=12,
|
||||
start_channels=64,
|
||||
bottleneck_image_size=32,
|
||||
num_bottleneck_blocks=6,
|
||||
block_args=BlockArgs(
|
||||
initialization_method='he',
|
||||
use_spectral_norm=False,
|
||||
normalization_layer_factory=InstanceNorm2dFactory(),
|
||||
nonlinearity_factory=LeakyReLUFactory(inplace=True, negative_slope=0.2)))
|
||||
module = FaceMorpher08(args).to(cuda)
|
||||
|
||||
image = torch.zeros(16, 4, 256, 256, device=cuda)
|
||||
pose = torch.zeros(16, 12, device=cuda)
|
||||
|
||||
repeat = 100
|
||||
acc = 0.0
|
||||
for i in range(repeat + 2):
|
||||
start = torch.cuda.Event(enable_timing=True)
|
||||
end = torch.cuda.Event(enable_timing=True)
|
||||
|
||||
start.record()
|
||||
module.forward(image, pose)
|
||||
end.record()
|
||||
torch.cuda.synchronize()
|
||||
|
||||
if i >= 2:
|
||||
elapsed_time = start.elapsed_time(end)
|
||||
print("%d:" % i, elapsed_time)
|
||||
acc += elapsed_time
|
||||
|
||||
print("average:", acc / repeat)
|
||||
187
live2d/tha3/nn/face_morpher/face_morpher_09.py
Normal file
187
live2d/tha3/nn/face_morpher/face_morpher_09.py
Normal file
@@ -0,0 +1,187 @@
|
||||
from typing import List, Optional
|
||||
|
||||
import torch
|
||||
from torch import Tensor
|
||||
from torch.nn import Sequential, Sigmoid, Tanh, Module
|
||||
from torch.nn.functional import affine_grid, grid_sample
|
||||
|
||||
from tha3.nn.common.poser_encoder_decoder_00 import PoserEncoderDecoder00Args
|
||||
from tha3.nn.common.poser_encoder_decoder_00_separable import PoserEncoderDecoder00Separable
|
||||
from tha3.nn.image_processing_util import GridChangeApplier
|
||||
from tha3.module.module_factory import ModuleFactory
|
||||
from tha3.nn.conv import create_conv3_from_block_args, create_conv3
|
||||
from tha3.nn.nonlinearity_factory import LeakyReLUFactory
|
||||
from tha3.nn.normalization import InstanceNorm2dFactory
|
||||
from tha3.nn.util import BlockArgs
|
||||
|
||||
|
||||
class FaceMorpher09Args(PoserEncoderDecoder00Args):
|
||||
def __init__(self,
|
||||
image_size: int = 256,
|
||||
image_channels: int = 4,
|
||||
num_pose_params: int = 67,
|
||||
start_channels: int = 16,
|
||||
bottleneck_image_size=4,
|
||||
num_bottleneck_blocks=3,
|
||||
max_channels: int = 512,
|
||||
block_args: Optional[BlockArgs] = None):
|
||||
super().__init__(
|
||||
image_size,
|
||||
image_channels,
|
||||
image_channels,
|
||||
num_pose_params,
|
||||
start_channels,
|
||||
bottleneck_image_size,
|
||||
num_bottleneck_blocks,
|
||||
max_channels,
|
||||
block_args)
|
||||
|
||||
|
||||
class FaceMorpher09(Module):
|
||||
def __init__(self, args: FaceMorpher09Args):
|
||||
super().__init__()
|
||||
self.args = args
|
||||
self.body = PoserEncoderDecoder00Separable(args)
|
||||
|
||||
self.iris_mouth_grid_change = self.create_grid_change_block()
|
||||
self.iris_mouth_color_change = self.create_color_change_block()
|
||||
self.iris_mouth_alpha = self.create_alpha_block()
|
||||
|
||||
self.eye_color_change = self.create_color_change_block()
|
||||
self.eye_alpha = self.create_alpha_block()
|
||||
|
||||
self.grid_change_applier = GridChangeApplier()
|
||||
|
||||
def create_alpha_block(self):
|
||||
return Sequential(
|
||||
create_conv3(
|
||||
in_channels=self.args.start_channels,
|
||||
out_channels=1,
|
||||
bias=True,
|
||||
initialization_method=self.args.block_args.initialization_method,
|
||||
use_spectral_norm=False),
|
||||
Sigmoid())
|
||||
|
||||
def create_color_change_block(self):
|
||||
return Sequential(
|
||||
create_conv3_from_block_args(
|
||||
in_channels=self.args.start_channels,
|
||||
out_channels=self.args.input_image_channels,
|
||||
bias=True,
|
||||
block_args=self.args.block_args),
|
||||
Tanh())
|
||||
|
||||
def create_grid_change_block(self):
|
||||
return create_conv3(
|
||||
in_channels=self.args.start_channels,
|
||||
out_channels=2,
|
||||
bias=False,
|
||||
initialization_method='zero',
|
||||
use_spectral_norm=False)
|
||||
|
||||
def get_num_output_channels_from_level(self, level: int):
|
||||
return self.get_num_output_channels_from_image_size(self.args.image_size // (2 ** level))
|
||||
|
||||
def get_num_output_channels_from_image_size(self, image_size: int):
|
||||
return min(self.args.start_channels * (self.args.image_size // image_size), self.args.max_channels)
|
||||
|
||||
def forward(self, image: Tensor, pose: Tensor, *args) -> List[Tensor]:
|
||||
feature = self.body(image, pose)[0]
|
||||
|
||||
iris_mouth_grid_change = self.iris_mouth_grid_change(feature)
|
||||
iris_mouth_image_0 = self.grid_change_applier.apply(iris_mouth_grid_change, image)
|
||||
iris_mouth_color_change = self.iris_mouth_color_change(feature)
|
||||
iris_mouth_alpha = self.iris_mouth_alpha(feature)
|
||||
iris_mouth_image_1 = self.apply_color_change(iris_mouth_alpha, iris_mouth_color_change, iris_mouth_image_0)
|
||||
|
||||
eye_color_change = self.eye_color_change(feature)
|
||||
eye_alpha = self.eye_alpha(feature)
|
||||
output_image = self.apply_color_change(eye_alpha, eye_color_change, iris_mouth_image_1.detach())
|
||||
|
||||
return [
|
||||
output_image, # 0
|
||||
eye_alpha, # 1
|
||||
eye_color_change, # 2
|
||||
iris_mouth_image_1, # 3
|
||||
iris_mouth_alpha, # 4
|
||||
iris_mouth_color_change, # 5
|
||||
iris_mouth_image_0, # 6
|
||||
]
|
||||
|
||||
OUTPUT_IMAGE_INDEX = 0
|
||||
EYE_ALPHA_INDEX = 1
|
||||
EYE_COLOR_CHANGE_INDEX = 2
|
||||
IRIS_MOUTH_IMAGE_1_INDEX = 3
|
||||
IRIS_MOUTH_ALPHA_INDEX = 4
|
||||
IRIS_MOUTH_COLOR_CHANGE_INDEX = 5
|
||||
IRIS_MOUTh_IMAGE_0_INDEX = 6
|
||||
|
||||
def merge_down(self, top_layer: Tensor, bottom_layer: Tensor):
|
||||
top_layer_rgb = top_layer[:, 0:3, :, :]
|
||||
top_layer_a = top_layer[:, 3:4, :, :]
|
||||
return bottom_layer * (1 - top_layer_a) + torch.cat([top_layer_rgb * top_layer_a, top_layer_a], dim=1)
|
||||
|
||||
def apply_grid_change(self, grid_change, image: Tensor) -> Tensor:
|
||||
n, c, h, w = image.shape
|
||||
device = grid_change.device
|
||||
grid_change = torch.transpose(grid_change.view(n, 2, h * w), 1, 2).view(n, h, w, 2)
|
||||
identity = torch.tensor([[1.0, 0.0, 0.0], [0.0, 1.0, 0.0]], device=device).unsqueeze(0).repeat(n, 1, 1)
|
||||
base_grid = affine_grid(identity, [n, c, h, w], align_corners=False)
|
||||
grid = base_grid + grid_change
|
||||
resampled_image = grid_sample(image, grid, mode='bilinear', padding_mode='border', align_corners=False)
|
||||
return resampled_image
|
||||
|
||||
def apply_color_change(self, alpha, color_change, image: Tensor) -> Tensor:
|
||||
return color_change * alpha + image * (1 - alpha)
|
||||
|
||||
|
||||
class FaceMorpher09Factory(ModuleFactory):
|
||||
def __init__(self, args: FaceMorpher09Args):
|
||||
super().__init__()
|
||||
self.args = args
|
||||
|
||||
def create(self) -> Module:
|
||||
return FaceMorpher09(self.args)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
cuda = torch.device('cuda')
|
||||
args = FaceMorpher09Args(
|
||||
image_size=256,
|
||||
image_channels=4,
|
||||
num_pose_params=12,
|
||||
start_channels=64,
|
||||
bottleneck_image_size=32,
|
||||
num_bottleneck_blocks=6,
|
||||
block_args=BlockArgs(
|
||||
initialization_method='xavier',
|
||||
use_spectral_norm=False,
|
||||
normalization_layer_factory=InstanceNorm2dFactory(),
|
||||
nonlinearity_factory=LeakyReLUFactory(inplace=True, negative_slope=0.2)))
|
||||
module = FaceMorpher09(args).to(cuda)
|
||||
|
||||
image = torch.zeros(16, 4, 256, 256, device=cuda)
|
||||
pose = torch.zeros(16, 12, device=cuda)
|
||||
|
||||
state_dict = module.state_dict()
|
||||
for key in state_dict:
|
||||
print(key, state_dict[key].shape)
|
||||
|
||||
if False:
|
||||
repeat = 100
|
||||
acc = 0.0
|
||||
for i in range(repeat + 2):
|
||||
start = torch.cuda.Event(enable_timing=True)
|
||||
end = torch.cuda.Event(enable_timing=True)
|
||||
|
||||
start.record()
|
||||
module.forward(image, pose)
|
||||
end.record()
|
||||
torch.cuda.synchronize()
|
||||
|
||||
if i >= 2:
|
||||
elapsed_time = start.elapsed_time(end)
|
||||
print("%d:" % i, elapsed_time)
|
||||
acc += elapsed_time
|
||||
|
||||
print("average:", acc / repeat)
|
||||
58
live2d/tha3/nn/image_processing_util.py
Normal file
58
live2d/tha3/nn/image_processing_util.py
Normal file
@@ -0,0 +1,58 @@
|
||||
import torch
|
||||
from torch import Tensor
|
||||
from torch.nn.functional import affine_grid, grid_sample
|
||||
|
||||
|
||||
def apply_rgb_change(alpha: Tensor, color_change: Tensor, image: Tensor):
|
||||
image_rgb = image[:, 0:3, :, :]
|
||||
color_change_rgb = color_change[:, 0:3, :, :]
|
||||
output_rgb = color_change_rgb * alpha + image_rgb * (1 - alpha)
|
||||
return torch.cat([output_rgb, image[:, 3:4, :, :]], dim=1)
|
||||
|
||||
|
||||
def apply_grid_change(grid_change, image: Tensor) -> Tensor:
|
||||
n, c, h, w = image.shape
|
||||
device = grid_change.device
|
||||
grid_change = torch.transpose(grid_change.view(n, 2, h * w), 1, 2).view(n, h, w, 2)
|
||||
identity = torch.tensor(
|
||||
[[1.0, 0.0, 0.0], [0.0, 1.0, 0.0]],
|
||||
dtype=grid_change.dtype,
|
||||
device=device).unsqueeze(0).repeat(n, 1, 1)
|
||||
base_grid = affine_grid(identity, [n, c, h, w], align_corners=False)
|
||||
grid = base_grid + grid_change
|
||||
resampled_image = grid_sample(image, grid, mode='bilinear', padding_mode='border', align_corners=False)
|
||||
return resampled_image
|
||||
|
||||
|
||||
class GridChangeApplier:
|
||||
def __init__(self):
|
||||
self.last_n = None
|
||||
self.last_device = None
|
||||
self.last_identity = None
|
||||
|
||||
def apply(self, grid_change: Tensor, image: Tensor, align_corners: bool = False) -> Tensor:
|
||||
n, c, h, w = image.shape
|
||||
device = grid_change.device
|
||||
grid_change = torch.transpose(grid_change.view(n, 2, h * w), 1, 2).view(n, h, w, 2)
|
||||
|
||||
if n == self.last_n and device == self.last_device:
|
||||
identity = self.last_identity
|
||||
else:
|
||||
identity = torch.tensor(
|
||||
[[1.0, 0.0, 0.0], [0.0, 1.0, 0.0]],
|
||||
dtype=grid_change.dtype,
|
||||
device=device,
|
||||
requires_grad=False) \
|
||||
.unsqueeze(0).repeat(n, 1, 1)
|
||||
self.last_identity = identity
|
||||
self.last_n = n
|
||||
self.last_device = device
|
||||
base_grid = affine_grid(identity, [n, c, h, w], align_corners=align_corners)
|
||||
|
||||
grid = base_grid + grid_change
|
||||
resampled_image = grid_sample(image, grid, mode='bilinear', padding_mode='border', align_corners=align_corners)
|
||||
return resampled_image
|
||||
|
||||
|
||||
def apply_color_change(alpha, color_change, image: Tensor) -> Tensor:
|
||||
return color_change * alpha + image * (1 - alpha)
|
||||
76
live2d/tha3/nn/init_function.py
Normal file
76
live2d/tha3/nn/init_function.py
Normal file
@@ -0,0 +1,76 @@
|
||||
from typing import Callable
|
||||
|
||||
import torch
|
||||
from torch import zero_
|
||||
from torch.nn import Module
|
||||
from torch.nn.init import kaiming_normal_, xavier_normal_, normal_
|
||||
|
||||
|
||||
def create_init_function(method: str = 'none') -> Callable[[Module], Module]:
|
||||
def init(module: Module):
|
||||
if method == 'none':
|
||||
return module
|
||||
elif method == 'he':
|
||||
kaiming_normal_(module.weight)
|
||||
return module
|
||||
elif method == 'xavier':
|
||||
xavier_normal_(module.weight)
|
||||
return module
|
||||
elif method == 'dcgan':
|
||||
normal_(module.weight, 0.0, 0.02)
|
||||
return module
|
||||
elif method == 'dcgan_001':
|
||||
normal_(module.weight, 0.0, 0.01)
|
||||
return module
|
||||
elif method == "zero":
|
||||
with torch.no_grad():
|
||||
zero_(module.weight)
|
||||
return module
|
||||
else:
|
||||
raise ("Invalid initialization method %s" % method)
|
||||
|
||||
return init
|
||||
|
||||
|
||||
class HeInitialization:
|
||||
def __init__(self, a: int = 0, mode: str = 'fan_in', nonlinearity: str = 'leaky_relu'):
|
||||
self.nonlinearity = nonlinearity
|
||||
self.mode = mode
|
||||
self.a = a
|
||||
|
||||
def __call__(self, module: Module) -> Module:
|
||||
with torch.no_grad():
|
||||
kaiming_normal_(module.weight, a=self.a, mode=self.mode, nonlinearity=self.nonlinearity)
|
||||
return module
|
||||
|
||||
|
||||
class NormalInitialization:
|
||||
def __init__(self, mean: float = 0.0, std: float = 1.0):
|
||||
self.std = std
|
||||
self.mean = mean
|
||||
|
||||
def __call__(self, module: Module) -> Module:
|
||||
with torch.no_grad():
|
||||
normal_(module.weight, self.mean, self.std)
|
||||
return module
|
||||
|
||||
|
||||
class XavierInitialization:
|
||||
def __init__(self, gain: float = 1.0):
|
||||
self.gain = gain
|
||||
|
||||
def __call__(self, module: Module) -> Module:
|
||||
with torch.no_grad():
|
||||
xavier_normal_(module.weight, self.gain)
|
||||
return module
|
||||
|
||||
|
||||
class ZeroInitialization:
|
||||
def __call__(self, module: Module) -> Module:
|
||||
with torch.no_grad:
|
||||
zero_(module.weight)
|
||||
return module
|
||||
|
||||
class NoInitialization:
|
||||
def __call__(self, module: Module) -> Module:
|
||||
return module
|
||||
72
live2d/tha3/nn/nonlinearity_factory.py
Normal file
72
live2d/tha3/nn/nonlinearity_factory.py
Normal file
@@ -0,0 +1,72 @@
|
||||
from typing import Optional
|
||||
|
||||
from torch.nn import Module, ReLU, LeakyReLU, ELU, ReLU6, Hardswish, SiLU, Tanh, Sigmoid
|
||||
|
||||
from tha3.module.module_factory import ModuleFactory
|
||||
|
||||
|
||||
class ReLUFactory(ModuleFactory):
|
||||
def __init__(self, inplace: bool = False):
|
||||
self.inplace = inplace
|
||||
|
||||
def create(self) -> Module:
|
||||
return ReLU(self.inplace)
|
||||
|
||||
|
||||
class LeakyReLUFactory(ModuleFactory):
|
||||
def __init__(self, inplace: bool = False, negative_slope: float = 1e-2):
|
||||
self.negative_slope = negative_slope
|
||||
self.inplace = inplace
|
||||
|
||||
def create(self) -> Module:
|
||||
return LeakyReLU(inplace=self.inplace, negative_slope=self.negative_slope)
|
||||
|
||||
|
||||
class ELUFactory(ModuleFactory):
|
||||
def __init__(self, inplace: bool = False, alpha: float = 1.0):
|
||||
self.alpha = alpha
|
||||
self.inplace = inplace
|
||||
|
||||
def create(self) -> Module:
|
||||
return ELU(inplace=self.inplace, alpha=self.alpha)
|
||||
|
||||
|
||||
class ReLU6Factory(ModuleFactory):
|
||||
def __init__(self, inplace: bool = False):
|
||||
self.inplace = inplace
|
||||
|
||||
def create(self) -> Module:
|
||||
return ReLU6(inplace=self.inplace)
|
||||
|
||||
|
||||
class SiLUFactory(ModuleFactory):
|
||||
def __init__(self, inplace: bool = False):
|
||||
self.inplace = inplace
|
||||
|
||||
def create(self) -> Module:
|
||||
return SiLU(inplace=self.inplace)
|
||||
|
||||
|
||||
class HardswishFactory(ModuleFactory):
|
||||
def __init__(self, inplace: bool = False):
|
||||
self.inplace = inplace
|
||||
|
||||
def create(self) -> Module:
|
||||
return Hardswish(inplace=self.inplace)
|
||||
|
||||
|
||||
class TanhFactory(ModuleFactory):
|
||||
def create(self) -> Module:
|
||||
return Tanh()
|
||||
|
||||
|
||||
class SigmoidFactory(ModuleFactory):
|
||||
def create(self) -> Module:
|
||||
return Sigmoid()
|
||||
|
||||
|
||||
def resolve_nonlinearity_factory(nonlinearity_fatory: Optional[ModuleFactory]) -> ModuleFactory:
|
||||
if nonlinearity_fatory is None:
|
||||
return ReLUFactory(inplace=False)
|
||||
else:
|
||||
return nonlinearity_fatory
|
||||
126
live2d/tha3/nn/normalization.py
Normal file
126
live2d/tha3/nn/normalization.py
Normal file
@@ -0,0 +1,126 @@
|
||||
from abc import ABC, abstractmethod
|
||||
from typing import Optional
|
||||
|
||||
import torch
|
||||
from torch import layer_norm
|
||||
from torch.nn import Module, BatchNorm2d, InstanceNorm2d, Parameter
|
||||
from torch.nn.init import normal_, constant_
|
||||
|
||||
from tha3.nn.pass_through import PassThrough
|
||||
|
||||
|
||||
class PixelNormalization(Module):
|
||||
def __init__(self, epsilon=1e-8):
|
||||
super().__init__()
|
||||
self.epsilon = epsilon
|
||||
|
||||
def forward(self, x):
|
||||
return x / torch.sqrt((x ** 2).mean(dim=1, keepdim=True) + self.epsilon)
|
||||
|
||||
|
||||
class NormalizationLayerFactory(ABC):
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
|
||||
@abstractmethod
|
||||
def create(self, num_features: int, affine: bool = True) -> Module:
|
||||
pass
|
||||
|
||||
@staticmethod
|
||||
def resolve_2d(factory: Optional['NormalizationLayerFactory']) -> 'NormalizationLayerFactory':
|
||||
if factory is None:
|
||||
return InstanceNorm2dFactory()
|
||||
else:
|
||||
return factory
|
||||
|
||||
|
||||
class Bias2d(Module):
|
||||
def __init__(self, num_features: int):
|
||||
super().__init__()
|
||||
self.num_features = num_features
|
||||
self.bias = Parameter(torch.zeros(1, num_features, 1, 1))
|
||||
|
||||
def forward(self, x):
|
||||
return x + self.bias
|
||||
|
||||
|
||||
class NoNorm2dFactory(NormalizationLayerFactory):
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
|
||||
def create(self, num_features: int, affine: bool = True) -> Module:
|
||||
if affine:
|
||||
return Bias2d(num_features)
|
||||
else:
|
||||
return PassThrough()
|
||||
|
||||
|
||||
class BatchNorm2dFactory(NormalizationLayerFactory):
|
||||
def __init__(self,
|
||||
weight_mean: Optional[float] = None,
|
||||
weight_std: Optional[float] = None,
|
||||
bias: Optional[float] = None):
|
||||
super().__init__()
|
||||
self.bias = bias
|
||||
self.weight_std = weight_std
|
||||
self.weight_mean = weight_mean
|
||||
|
||||
def get_weight_mean(self):
|
||||
if self.weight_mean is None:
|
||||
return 1.0
|
||||
else:
|
||||
return self.weight_mean
|
||||
|
||||
def get_weight_std(self):
|
||||
if self.weight_std is None:
|
||||
return 0.02
|
||||
else:
|
||||
return self.weight_std
|
||||
|
||||
def create(self, num_features: int, affine: bool = True) -> Module:
|
||||
module = BatchNorm2d(num_features=num_features, affine=affine)
|
||||
if affine:
|
||||
if self.weight_mean is not None or self.weight_std is not None:
|
||||
normal_(module.weight, self.get_weight_mean(), self.get_weight_std())
|
||||
if self.bias is not None:
|
||||
constant_(module.bias, self.bias)
|
||||
return module
|
||||
|
||||
|
||||
class InstanceNorm2dFactory(NormalizationLayerFactory):
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
|
||||
def create(self, num_features: int, affine: bool = True) -> Module:
|
||||
return InstanceNorm2d(num_features=num_features, affine=affine)
|
||||
|
||||
|
||||
class PixelNormFactory(NormalizationLayerFactory):
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
|
||||
def create(self, num_features: int, affine: bool = True) -> Module:
|
||||
return PixelNormalization()
|
||||
|
||||
|
||||
class LayerNorm2d(Module):
|
||||
def __init__(self, channels: int, affine: bool = True):
|
||||
super(LayerNorm2d, self).__init__()
|
||||
self.channels = channels
|
||||
self.affine = affine
|
||||
|
||||
if self.affine:
|
||||
self.weight = Parameter(torch.ones(1, channels, 1, 1))
|
||||
self.bias = Parameter(torch.zeros(1, channels, 1, 1))
|
||||
|
||||
def forward(self, x):
|
||||
shape = x.size()[1:]
|
||||
y = layer_norm(x, shape) * self.weight + self.bias
|
||||
return y
|
||||
|
||||
class LayerNorm2dFactory(NormalizationLayerFactory):
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
|
||||
def create(self, num_features: int, affine: bool = True) -> Module:
|
||||
return LayerNorm2d(channels=num_features, affine=affine)
|
||||
9
live2d/tha3/nn/pass_through.py
Normal file
9
live2d/tha3/nn/pass_through.py
Normal file
@@ -0,0 +1,9 @@
|
||||
from torch.nn import Module
|
||||
|
||||
|
||||
class PassThrough(Module):
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
|
||||
def forward(self, x):
|
||||
return x
|
||||
67
live2d/tha3/nn/resnet_block.py
Normal file
67
live2d/tha3/nn/resnet_block.py
Normal file
@@ -0,0 +1,67 @@
|
||||
from typing import Optional
|
||||
|
||||
import torch
|
||||
from torch.nn import Module, Sequential, Parameter
|
||||
|
||||
from tha3.module.module_factory import ModuleFactory
|
||||
from tha3.nn.conv import create_conv1, create_conv3
|
||||
from tha3.nn.nonlinearity_factory import resolve_nonlinearity_factory
|
||||
from tha3.nn.normalization import NormalizationLayerFactory
|
||||
from tha3.nn.util import BlockArgs
|
||||
|
||||
|
||||
class ResnetBlock(Module):
|
||||
@staticmethod
|
||||
def create(num_channels: int,
|
||||
is1x1: bool = False,
|
||||
use_scale_parameters: bool = False,
|
||||
block_args: Optional[BlockArgs] = None):
|
||||
if block_args is None:
|
||||
block_args = BlockArgs()
|
||||
return ResnetBlock(num_channels,
|
||||
is1x1,
|
||||
block_args.initialization_method,
|
||||
block_args.nonlinearity_factory,
|
||||
block_args.normalization_layer_factory,
|
||||
block_args.use_spectral_norm,
|
||||
use_scale_parameters)
|
||||
|
||||
def __init__(self,
|
||||
num_channels: int,
|
||||
is1x1: bool = False,
|
||||
initialization_method: str = 'he',
|
||||
nonlinearity_factory: ModuleFactory = None,
|
||||
normalization_layer_factory: Optional[NormalizationLayerFactory] = None,
|
||||
use_spectral_norm: bool = False,
|
||||
use_scale_parameter: bool = False):
|
||||
super().__init__()
|
||||
self.use_scale_parameter = use_scale_parameter
|
||||
if self.use_scale_parameter:
|
||||
self.scale = Parameter(torch.zeros(1))
|
||||
nonlinearity_factory = resolve_nonlinearity_factory(nonlinearity_factory)
|
||||
if is1x1:
|
||||
self.resnet_path = Sequential(
|
||||
create_conv1(num_channels, num_channels, initialization_method,
|
||||
bias=True,
|
||||
use_spectral_norm=use_spectral_norm),
|
||||
nonlinearity_factory.create(),
|
||||
create_conv1(num_channels, num_channels, initialization_method,
|
||||
bias=True,
|
||||
use_spectral_norm=use_spectral_norm))
|
||||
else:
|
||||
self.resnet_path = Sequential(
|
||||
create_conv3(num_channels, num_channels,
|
||||
bias=False, initialization_method=initialization_method,
|
||||
use_spectral_norm=use_spectral_norm),
|
||||
NormalizationLayerFactory.resolve_2d(normalization_layer_factory).create(num_channels, affine=True),
|
||||
nonlinearity_factory.create(),
|
||||
create_conv3(num_channels, num_channels,
|
||||
bias=False, initialization_method=initialization_method,
|
||||
use_spectral_norm=use_spectral_norm),
|
||||
NormalizationLayerFactory.resolve_2d(normalization_layer_factory).create(num_channels, affine=True))
|
||||
|
||||
def forward(self, x):
|
||||
if self.use_scale_parameter:
|
||||
return x + self.scale * self.resnet_path(x)
|
||||
else:
|
||||
return x + self.resnet_path(x)
|
||||
71
live2d/tha3/nn/resnet_block_seperable.py
Normal file
71
live2d/tha3/nn/resnet_block_seperable.py
Normal file
@@ -0,0 +1,71 @@
|
||||
from typing import Optional
|
||||
|
||||
import torch
|
||||
from torch.nn import Module, Sequential, Parameter
|
||||
|
||||
from tha3.module.module_factory import ModuleFactory
|
||||
from tha3.nn.conv import create_conv1
|
||||
from tha3.nn.nonlinearity_factory import resolve_nonlinearity_factory
|
||||
from tha3.nn.normalization import NormalizationLayerFactory
|
||||
from tha3.nn.separable_conv import create_separable_conv3
|
||||
from tha3.nn.util import BlockArgs
|
||||
|
||||
|
||||
class ResnetBlockSeparable(Module):
|
||||
@staticmethod
|
||||
def create(num_channels: int,
|
||||
is1x1: bool = False,
|
||||
use_scale_parameters: bool = False,
|
||||
block_args: Optional[BlockArgs] = None):
|
||||
if block_args is None:
|
||||
block_args = BlockArgs()
|
||||
return ResnetBlockSeparable(
|
||||
num_channels,
|
||||
is1x1,
|
||||
block_args.initialization_method,
|
||||
block_args.nonlinearity_factory,
|
||||
block_args.normalization_layer_factory,
|
||||
block_args.use_spectral_norm,
|
||||
use_scale_parameters)
|
||||
|
||||
def __init__(self,
|
||||
num_channels: int,
|
||||
is1x1: bool = False,
|
||||
initialization_method: str = 'he',
|
||||
nonlinearity_factory: ModuleFactory = None,
|
||||
normalization_layer_factory: Optional[NormalizationLayerFactory] = None,
|
||||
use_spectral_norm: bool = False,
|
||||
use_scale_parameter: bool = False):
|
||||
super().__init__()
|
||||
self.use_scale_parameter = use_scale_parameter
|
||||
if self.use_scale_parameter:
|
||||
self.scale = Parameter(torch.zeros(1))
|
||||
nonlinearity_factory = resolve_nonlinearity_factory(nonlinearity_factory)
|
||||
if is1x1:
|
||||
self.resnet_path = Sequential(
|
||||
create_conv1(num_channels, num_channels, initialization_method,
|
||||
bias=True,
|
||||
use_spectral_norm=use_spectral_norm),
|
||||
nonlinearity_factory.create(),
|
||||
create_conv1(num_channels, num_channels, initialization_method,
|
||||
bias=True,
|
||||
use_spectral_norm=use_spectral_norm))
|
||||
else:
|
||||
self.resnet_path = Sequential(
|
||||
create_separable_conv3(
|
||||
num_channels, num_channels,
|
||||
bias=False, initialization_method=initialization_method,
|
||||
use_spectral_norm=use_spectral_norm),
|
||||
NormalizationLayerFactory.resolve_2d(normalization_layer_factory).create(num_channels, affine=True),
|
||||
nonlinearity_factory.create(),
|
||||
create_separable_conv3(
|
||||
num_channels, num_channels,
|
||||
bias=False, initialization_method=initialization_method,
|
||||
use_spectral_norm=use_spectral_norm),
|
||||
NormalizationLayerFactory.resolve_2d(normalization_layer_factory).create(num_channels, affine=True))
|
||||
|
||||
def forward(self, x):
|
||||
if self.use_scale_parameter:
|
||||
return x + self.scale * self.resnet_path(x)
|
||||
else:
|
||||
return x + self.resnet_path(x)
|
||||
119
live2d/tha3/nn/separable_conv.py
Normal file
119
live2d/tha3/nn/separable_conv.py
Normal file
@@ -0,0 +1,119 @@
|
||||
from typing import Optional
|
||||
|
||||
from torch.nn import Sequential, Conv2d, ConvTranspose2d, Module
|
||||
|
||||
from tha3.nn.normalization import NormalizationLayerFactory
|
||||
from tha3.nn.util import BlockArgs, wrap_conv_or_linear_module
|
||||
|
||||
|
||||
def create_separable_conv3(in_channels: int, out_channels: int,
|
||||
bias: bool = False,
|
||||
initialization_method='he',
|
||||
use_spectral_norm: bool = False) -> Module:
|
||||
return Sequential(
|
||||
wrap_conv_or_linear_module(
|
||||
Conv2d(in_channels, in_channels, kernel_size=3, stride=1, padding=1, bias=False, groups=in_channels),
|
||||
initialization_method,
|
||||
use_spectral_norm),
|
||||
wrap_conv_or_linear_module(
|
||||
Conv2d(in_channels, out_channels, kernel_size=1, stride=1, padding=0, bias=bias),
|
||||
initialization_method,
|
||||
use_spectral_norm))
|
||||
|
||||
|
||||
def create_separable_conv7(in_channels: int, out_channels: int,
|
||||
bias: bool = False,
|
||||
initialization_method='he',
|
||||
use_spectral_norm: bool = False) -> Module:
|
||||
return Sequential(
|
||||
wrap_conv_or_linear_module(
|
||||
Conv2d(in_channels, in_channels, kernel_size=7, stride=1, padding=3, bias=False, groups=in_channels),
|
||||
initialization_method,
|
||||
use_spectral_norm),
|
||||
wrap_conv_or_linear_module(
|
||||
Conv2d(in_channels, out_channels, kernel_size=1, stride=1, padding=0, bias=bias),
|
||||
initialization_method,
|
||||
use_spectral_norm))
|
||||
|
||||
|
||||
def create_separable_conv3_block(
|
||||
in_channels: int, out_channels: int, block_args: Optional[BlockArgs] = None):
|
||||
if block_args is None:
|
||||
block_args = BlockArgs()
|
||||
return Sequential(
|
||||
wrap_conv_or_linear_module(
|
||||
Conv2d(in_channels, in_channels, kernel_size=3, stride=1, padding=1, bias=False, groups=in_channels),
|
||||
block_args.initialization_method,
|
||||
block_args.use_spectral_norm),
|
||||
wrap_conv_or_linear_module(
|
||||
Conv2d(in_channels, out_channels, kernel_size=1, stride=1, padding=0, bias=False),
|
||||
block_args.initialization_method,
|
||||
block_args.use_spectral_norm),
|
||||
NormalizationLayerFactory.resolve_2d(block_args.normalization_layer_factory).create(out_channels, affine=True),
|
||||
block_args.nonlinearity_factory.create())
|
||||
|
||||
|
||||
def create_separable_conv7_block(
|
||||
in_channels: int, out_channels: int, block_args: Optional[BlockArgs] = None):
|
||||
if block_args is None:
|
||||
block_args = BlockArgs()
|
||||
return Sequential(
|
||||
wrap_conv_or_linear_module(
|
||||
Conv2d(in_channels, in_channels, kernel_size=7, stride=1, padding=3, bias=False, groups=in_channels),
|
||||
block_args.initialization_method,
|
||||
block_args.use_spectral_norm),
|
||||
wrap_conv_or_linear_module(
|
||||
Conv2d(in_channels, out_channels, kernel_size=1, stride=1, padding=0, bias=False),
|
||||
block_args.initialization_method,
|
||||
block_args.use_spectral_norm),
|
||||
NormalizationLayerFactory.resolve_2d(block_args.normalization_layer_factory).create(out_channels, affine=True),
|
||||
block_args.nonlinearity_factory.create())
|
||||
|
||||
|
||||
def create_separable_downsample_block(
|
||||
in_channels: int, out_channels: int, is_output_1x1: bool, block_args: Optional[BlockArgs] = None):
|
||||
if block_args is None:
|
||||
block_args = BlockArgs()
|
||||
if is_output_1x1:
|
||||
return Sequential(
|
||||
wrap_conv_or_linear_module(
|
||||
Conv2d(in_channels, in_channels, kernel_size=4, stride=2, padding=1, bias=False, groups=in_channels),
|
||||
block_args.initialization_method,
|
||||
block_args.use_spectral_norm),
|
||||
wrap_conv_or_linear_module(
|
||||
Conv2d(in_channels, out_channels, kernel_size=1, stride=1, padding=0, bias=False),
|
||||
block_args.initialization_method,
|
||||
block_args.use_spectral_norm),
|
||||
block_args.nonlinearity_factory.create())
|
||||
else:
|
||||
return Sequential(
|
||||
wrap_conv_or_linear_module(
|
||||
Conv2d(in_channels, in_channels, kernel_size=4, stride=2, padding=1, bias=False, groups=in_channels),
|
||||
block_args.initialization_method,
|
||||
block_args.use_spectral_norm),
|
||||
wrap_conv_or_linear_module(
|
||||
Conv2d(in_channels, out_channels, kernel_size=1, stride=1, padding=0, bias=False),
|
||||
block_args.initialization_method,
|
||||
block_args.use_spectral_norm),
|
||||
NormalizationLayerFactory.resolve_2d(block_args.normalization_layer_factory)
|
||||
.create(out_channels, affine=True),
|
||||
block_args.nonlinearity_factory.create())
|
||||
|
||||
|
||||
def create_separable_upsample_block(
|
||||
in_channels: int, out_channels: int, block_args: Optional[BlockArgs] = None):
|
||||
if block_args is None:
|
||||
block_args = BlockArgs()
|
||||
return Sequential(
|
||||
wrap_conv_or_linear_module(
|
||||
ConvTranspose2d(
|
||||
in_channels, in_channels, kernel_size=4, stride=2, padding=1, bias=False, groups=in_channels),
|
||||
block_args.initialization_method,
|
||||
block_args.use_spectral_norm),
|
||||
wrap_conv_or_linear_module(
|
||||
Conv2d(in_channels, out_channels, kernel_size=1, stride=1, padding=0, bias=False),
|
||||
block_args.initialization_method,
|
||||
block_args.use_spectral_norm),
|
||||
NormalizationLayerFactory.resolve_2d(block_args.normalization_layer_factory)
|
||||
.create(out_channels, affine=True),
|
||||
block_args.nonlinearity_factory.create())
|
||||
9
live2d/tha3/nn/spectral_norm.py
Normal file
9
live2d/tha3/nn/spectral_norm.py
Normal file
@@ -0,0 +1,9 @@
|
||||
from torch.nn import Module
|
||||
from torch.nn.utils import spectral_norm
|
||||
|
||||
|
||||
def apply_spectral_norm(module: Module, use_spectrial_norm: bool = False) -> Module:
|
||||
if use_spectrial_norm:
|
||||
return spectral_norm(module)
|
||||
else:
|
||||
return module
|
||||
0
live2d/tha3/nn/two_algo_body_rotator/__init__.py
Normal file
0
live2d/tha3/nn/two_algo_body_rotator/__init__.py
Normal file
@@ -0,0 +1,149 @@
|
||||
from typing import Optional, List
|
||||
|
||||
import torch
|
||||
from torch import Tensor
|
||||
from torch.nn import Module, Sequential, Tanh
|
||||
|
||||
from tha3.nn.image_processing_util import GridChangeApplier
|
||||
from tha3.nn.common.resize_conv_encoder_decoder import ResizeConvEncoderDecoder, ResizeConvEncoderDecoderArgs
|
||||
from tha3.module.module_factory import ModuleFactory
|
||||
from tha3.nn.conv import create_conv3_from_block_args, create_conv3
|
||||
from tha3.nn.nonlinearity_factory import ReLUFactory, LeakyReLUFactory
|
||||
from tha3.nn.normalization import InstanceNorm2dFactory
|
||||
from tha3.nn.util import BlockArgs
|
||||
|
||||
|
||||
class TwoAlgoFaceBodyRotator05Args:
|
||||
def __init__(self,
|
||||
image_size: int = 512,
|
||||
image_channels: int = 4,
|
||||
num_pose_params: int = 6,
|
||||
start_channels: int = 32,
|
||||
bottleneck_image_size=32,
|
||||
num_bottleneck_blocks=6,
|
||||
max_channels: int = 512,
|
||||
upsample_mode: str = 'bilinear',
|
||||
block_args: Optional[BlockArgs] = None,
|
||||
use_separable_convolution=False):
|
||||
if block_args is None:
|
||||
block_args = BlockArgs(
|
||||
normalization_layer_factory=InstanceNorm2dFactory(),
|
||||
nonlinearity_factory=ReLUFactory(inplace=False))
|
||||
|
||||
self.use_separable_convolution = use_separable_convolution
|
||||
self.upsample_mode = upsample_mode
|
||||
self.max_channels = max_channels
|
||||
self.num_bottleneck_blocks = num_bottleneck_blocks
|
||||
self.bottleneck_image_size = bottleneck_image_size
|
||||
self.start_channels = start_channels
|
||||
self.num_pose_params = num_pose_params
|
||||
self.image_channels = image_channels
|
||||
self.image_size = image_size
|
||||
self.block_args = block_args
|
||||
|
||||
|
||||
class TwoAlgoFaceBodyRotator05(Module):
|
||||
def __init__(self, args: TwoAlgoFaceBodyRotator05Args):
|
||||
super().__init__()
|
||||
self.args = args
|
||||
|
||||
self.encoder_decoder = ResizeConvEncoderDecoder(
|
||||
ResizeConvEncoderDecoderArgs(
|
||||
image_size=args.image_size,
|
||||
input_channels=args.image_channels + args.num_pose_params,
|
||||
start_channels=args.start_channels,
|
||||
bottleneck_image_size=args.bottleneck_image_size,
|
||||
num_bottleneck_blocks=args.num_bottleneck_blocks,
|
||||
max_channels=args.max_channels,
|
||||
block_args=args.block_args,
|
||||
upsample_mode=args.upsample_mode,
|
||||
use_separable_convolution=args.use_separable_convolution))
|
||||
|
||||
self.direct_creator = Sequential(
|
||||
create_conv3_from_block_args(
|
||||
in_channels=self.args.start_channels,
|
||||
out_channels=self.args.image_channels,
|
||||
bias=True,
|
||||
block_args=self.args.block_args),
|
||||
Tanh())
|
||||
self.grid_change_creator = create_conv3(
|
||||
in_channels=self.args.start_channels,
|
||||
out_channels=2,
|
||||
bias=False,
|
||||
initialization_method='zero',
|
||||
use_spectral_norm=False)
|
||||
self.grid_change_applier = GridChangeApplier()
|
||||
|
||||
def forward(self, image: Tensor, pose: Tensor, *args) -> List[Tensor]:
|
||||
n, c = pose.shape
|
||||
pose = pose.view(n, c, 1, 1).repeat(1, 1, self.args.image_size, self.args.image_size)
|
||||
feature = torch.cat([image, pose], dim=1)
|
||||
|
||||
feature = self.encoder_decoder.forward(feature)[-1]
|
||||
grid_change = self.grid_change_creator(feature)
|
||||
direct_image = self.direct_creator(feature)
|
||||
warped_image = self.grid_change_applier.apply(grid_change, image)
|
||||
|
||||
return [
|
||||
direct_image,
|
||||
warped_image,
|
||||
grid_change]
|
||||
|
||||
DIRECT_IMAGE_INDEX = 0
|
||||
WARPED_IMAGE_INDEX = 1
|
||||
GRID_CHANGE_INDEX = 2
|
||||
OUTPUT_LENGTH = 3
|
||||
|
||||
|
||||
class TwoAlgoFaceBodyRotator05Factory(ModuleFactory):
|
||||
def __init__(self, args: TwoAlgoFaceBodyRotator05Args):
|
||||
super().__init__()
|
||||
self.args = args
|
||||
|
||||
def create(self) -> Module:
|
||||
return TwoAlgoFaceBodyRotator05(self.args)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
cuda = torch.device('cuda')
|
||||
|
||||
image_size = 256
|
||||
image_channels = 4
|
||||
num_pose_params = 6
|
||||
args = TwoAlgoFaceBodyRotator05Args(
|
||||
image_size=256,
|
||||
image_channels=4,
|
||||
start_channels=64,
|
||||
num_pose_params=6,
|
||||
bottleneck_image_size=32,
|
||||
num_bottleneck_blocks=6,
|
||||
max_channels=512,
|
||||
upsample_mode='nearest',
|
||||
use_separable_convolution=True,
|
||||
block_args=BlockArgs(
|
||||
initialization_method='he',
|
||||
use_spectral_norm=False,
|
||||
normalization_layer_factory=InstanceNorm2dFactory(),
|
||||
nonlinearity_factory=LeakyReLUFactory(inplace=False, negative_slope=0.1)))
|
||||
module = TwoAlgoFaceBodyRotator05(args).to(cuda)
|
||||
|
||||
image_count = 1
|
||||
image = torch.zeros(image_count, 4, image_size, image_size, device=cuda)
|
||||
pose = torch.zeros(image_count, num_pose_params, device=cuda)
|
||||
|
||||
repeat = 100
|
||||
acc = 0.0
|
||||
for i in range(repeat + 2):
|
||||
start = torch.cuda.Event(enable_timing=True)
|
||||
end = torch.cuda.Event(enable_timing=True)
|
||||
|
||||
start.record()
|
||||
module.forward(image, pose)
|
||||
end.record()
|
||||
torch.cuda.synchronize()
|
||||
if i >= 2:
|
||||
elapsed_time = start.elapsed_time(end)
|
||||
print("%d:" % i, elapsed_time)
|
||||
acc = acc + elapsed_time
|
||||
|
||||
print("average:", acc / repeat)
|
||||
40
live2d/tha3/nn/util.py
Normal file
40
live2d/tha3/nn/util.py
Normal file
@@ -0,0 +1,40 @@
|
||||
from typing import Optional, Callable, Union
|
||||
|
||||
from torch.nn import Module
|
||||
|
||||
from tha3.module.module_factory import ModuleFactory
|
||||
from tha3.nn.init_function import create_init_function
|
||||
from tha3.nn.nonlinearity_factory import resolve_nonlinearity_factory
|
||||
from tha3.nn.normalization import NormalizationLayerFactory
|
||||
from tha3.nn.spectral_norm import apply_spectral_norm
|
||||
|
||||
|
||||
def wrap_conv_or_linear_module(module: Module,
|
||||
initialization_method: Union[str, Callable[[Module], Module]],
|
||||
use_spectral_norm: bool):
|
||||
if isinstance(initialization_method, str):
|
||||
init = create_init_function(initialization_method)
|
||||
else:
|
||||
init = initialization_method
|
||||
return apply_spectral_norm(init(module), use_spectral_norm)
|
||||
|
||||
|
||||
class BlockArgs:
|
||||
def __init__(self,
|
||||
initialization_method: Union[str, Callable[[Module], Module]] = 'he',
|
||||
use_spectral_norm: bool = False,
|
||||
normalization_layer_factory: Optional[NormalizationLayerFactory] = None,
|
||||
nonlinearity_factory: Optional[ModuleFactory] = None):
|
||||
self.nonlinearity_factory = resolve_nonlinearity_factory(nonlinearity_factory)
|
||||
self.normalization_layer_factory = normalization_layer_factory
|
||||
self.use_spectral_norm = use_spectral_norm
|
||||
self.initialization_method = initialization_method
|
||||
|
||||
def wrap_module(self, module: Module) -> Module:
|
||||
return wrap_conv_or_linear_module(module, self.get_init_func(), self.use_spectral_norm)
|
||||
|
||||
def get_init_func(self) -> Callable[[Module], Module]:
|
||||
if isinstance(self.initialization_method, str):
|
||||
return create_init_function(self.initialization_method)
|
||||
else:
|
||||
return self.initialization_method
|
||||
0
live2d/tha3/poser/__init__.py
Normal file
0
live2d/tha3/poser/__init__.py
Normal file
85
live2d/tha3/poser/general_poser_02.py
Normal file
85
live2d/tha3/poser/general_poser_02.py
Normal file
@@ -0,0 +1,85 @@
|
||||
from typing import List, Optional, Tuple, Dict, Callable
|
||||
|
||||
import torch
|
||||
from torch import Tensor
|
||||
from torch.nn import Module
|
||||
|
||||
from tha3.poser.poser import PoseParameterGroup, Poser
|
||||
from tha3.compute.cached_computation_func import TensorListCachedComputationFunc
|
||||
|
||||
|
||||
class GeneralPoser02(Poser):
|
||||
def __init__(self,
|
||||
module_loaders: Dict[str, Callable[[], Module]],
|
||||
device: torch.device,
|
||||
output_length: int,
|
||||
pose_parameters: List[PoseParameterGroup],
|
||||
output_list_func: TensorListCachedComputationFunc,
|
||||
subrect: Optional[Tuple[Tuple[int, int], Tuple[int, int]]] = None,
|
||||
default_output_index: int = 0,
|
||||
image_size: int = 256,
|
||||
dtype: torch.dtype = torch.float):
|
||||
self.dtype = dtype
|
||||
self.image_size = image_size
|
||||
self.default_output_index = default_output_index
|
||||
self.output_list_func = output_list_func
|
||||
self.subrect = subrect
|
||||
self.pose_parameters = pose_parameters
|
||||
self.device = device
|
||||
self.module_loaders = module_loaders
|
||||
|
||||
self.modules = None
|
||||
|
||||
self.num_parameters = 0
|
||||
for pose_parameter in self.pose_parameters:
|
||||
self.num_parameters += pose_parameter.get_arity()
|
||||
|
||||
self.output_length = output_length
|
||||
|
||||
def get_image_size(self) -> int:
|
||||
return self.image_size
|
||||
|
||||
def get_modules(self):
|
||||
if self.modules is None:
|
||||
self.modules = {}
|
||||
for key in self.module_loaders:
|
||||
module = self.module_loaders[key]()
|
||||
self.modules[key] = module
|
||||
module.to(self.device)
|
||||
module.train(False)
|
||||
return self.modules
|
||||
|
||||
def get_pose_parameter_groups(self) -> List[PoseParameterGroup]:
|
||||
return self.pose_parameters
|
||||
|
||||
def get_num_parameters(self) -> int:
|
||||
return self.num_parameters
|
||||
|
||||
def pose(self, image: Tensor, pose: Tensor, output_index: Optional[int] = None) -> Tensor:
|
||||
if output_index is None:
|
||||
output_index = self.default_output_index
|
||||
output_list = self.get_posing_outputs(image, pose)
|
||||
return output_list[output_index]
|
||||
|
||||
def get_posing_outputs(self, image: Tensor, pose: Tensor) -> List[Tensor]:
|
||||
modules = self.get_modules()
|
||||
|
||||
if len(image.shape) == 3:
|
||||
image = image.unsqueeze(0)
|
||||
if len(pose.shape) == 1:
|
||||
pose = pose.unsqueeze(0)
|
||||
if self.subrect is not None:
|
||||
image = image[:, :, self.subrect[0][0]:self.subrect[0][1], self.subrect[1][0]:self.subrect[1][1]]
|
||||
batch = [image, pose]
|
||||
|
||||
outputs = {}
|
||||
return self.output_list_func(modules, batch, outputs)
|
||||
|
||||
def get_output_length(self) -> int:
|
||||
return self.output_length
|
||||
|
||||
def free(self):
|
||||
self.modules = None
|
||||
|
||||
def get_dtype(self) -> torch.dtype:
|
||||
return self.dtype
|
||||
0
live2d/tha3/poser/modes/__init__.py
Normal file
0
live2d/tha3/poser/modes/__init__.py
Normal file
19
live2d/tha3/poser/modes/load_poser.py
Normal file
19
live2d/tha3/poser/modes/load_poser.py
Normal file
@@ -0,0 +1,19 @@
|
||||
import torch
|
||||
|
||||
|
||||
def load_poser(model: str, device: torch.device):
|
||||
print("Using the %s model." % model)
|
||||
if model == "standard_float":
|
||||
from tha3.poser.modes.standard_float import create_poser
|
||||
return create_poser(device)
|
||||
elif model == "standard_half":
|
||||
from tha3.poser.modes.standard_half import create_poser
|
||||
return create_poser(device)
|
||||
elif model == "separable_float":
|
||||
from tha3.poser.modes.separable_float import create_poser
|
||||
return create_poser(device)
|
||||
elif model == "separable_half":
|
||||
from tha3.poser.modes.separable_half import create_poser
|
||||
return create_poser(device)
|
||||
else:
|
||||
raise RuntimeError("Invalid model: '%s'" % model)
|
||||
36
live2d/tha3/poser/modes/pose_parameters.py
Normal file
36
live2d/tha3/poser/modes/pose_parameters.py
Normal file
@@ -0,0 +1,36 @@
|
||||
from tha3.poser.poser import PoseParameters, PoseParameterCategory
|
||||
|
||||
|
||||
def get_pose_parameters():
|
||||
return PoseParameters.Builder() \
|
||||
.add_parameter_group("eyebrow_troubled", PoseParameterCategory.EYEBROW, arity=2) \
|
||||
.add_parameter_group("eyebrow_angry", PoseParameterCategory.EYEBROW, arity=2) \
|
||||
.add_parameter_group("eyebrow_lowered", PoseParameterCategory.EYEBROW, arity=2) \
|
||||
.add_parameter_group("eyebrow_raised", PoseParameterCategory.EYEBROW, arity=2) \
|
||||
.add_parameter_group("eyebrow_happy", PoseParameterCategory.EYEBROW, arity=2) \
|
||||
.add_parameter_group("eyebrow_serious", PoseParameterCategory.EYEBROW, arity=2) \
|
||||
.add_parameter_group("eye_wink", PoseParameterCategory.EYE, arity=2) \
|
||||
.add_parameter_group("eye_happy_wink", PoseParameterCategory.EYE, arity=2) \
|
||||
.add_parameter_group("eye_surprised", PoseParameterCategory.EYE, arity=2) \
|
||||
.add_parameter_group("eye_relaxed", PoseParameterCategory.EYE, arity=2) \
|
||||
.add_parameter_group("eye_unimpressed", PoseParameterCategory.EYE, arity=2) \
|
||||
.add_parameter_group("eye_raised_lower_eyelid", PoseParameterCategory.EYE, arity=2) \
|
||||
.add_parameter_group("iris_small", PoseParameterCategory.IRIS_MORPH, arity=2) \
|
||||
.add_parameter_group("mouth_aaa", PoseParameterCategory.MOUTH, arity=1, default_value=1.0) \
|
||||
.add_parameter_group("mouth_iii", PoseParameterCategory.MOUTH, arity=1) \
|
||||
.add_parameter_group("mouth_uuu", PoseParameterCategory.MOUTH, arity=1) \
|
||||
.add_parameter_group("mouth_eee", PoseParameterCategory.MOUTH, arity=1) \
|
||||
.add_parameter_group("mouth_ooo", PoseParameterCategory.MOUTH, arity=1) \
|
||||
.add_parameter_group("mouth_delta", PoseParameterCategory.MOUTH, arity=1) \
|
||||
.add_parameter_group("mouth_lowered_corner", PoseParameterCategory.MOUTH, arity=2) \
|
||||
.add_parameter_group("mouth_raised_corner", PoseParameterCategory.MOUTH, arity=2) \
|
||||
.add_parameter_group("mouth_smirk", PoseParameterCategory.MOUTH, arity=1) \
|
||||
.add_parameter_group("iris_rotation_x", PoseParameterCategory.IRIS_ROTATION, arity=1, range=(-1.0, 1.0)) \
|
||||
.add_parameter_group("iris_rotation_y", PoseParameterCategory.IRIS_ROTATION, arity=1, range=(-1.0, 1.0)) \
|
||||
.add_parameter_group("head_x", PoseParameterCategory.FACE_ROTATION, arity=1, range=(-1.0, 1.0)) \
|
||||
.add_parameter_group("head_y", PoseParameterCategory.FACE_ROTATION, arity=1, range=(-1.0, 1.0)) \
|
||||
.add_parameter_group("neck_z", PoseParameterCategory.FACE_ROTATION, arity=1, range=(-1.0, 1.0)) \
|
||||
.add_parameter_group("body_y", PoseParameterCategory.BODY_ROTATION, arity=1, range=(-1.0, 1.0)) \
|
||||
.add_parameter_group("body_z", PoseParameterCategory.BODY_ROTATION, arity=1, range=(-1.0, 1.0)) \
|
||||
.add_parameter_group("breathing", PoseParameterCategory.BREATHING, arity=1, range=(0.0, 1.0)) \
|
||||
.build()
|
||||
331
live2d/tha3/poser/modes/separable_float.py
Normal file
331
live2d/tha3/poser/modes/separable_float.py
Normal file
@@ -0,0 +1,331 @@
|
||||
from enum import Enum
|
||||
from typing import Dict, Optional, List
|
||||
|
||||
import torch
|
||||
from torch import Tensor
|
||||
from torch.nn import Module
|
||||
from torch.nn.functional import interpolate
|
||||
|
||||
from tha3.nn.eyebrow_morphing_combiner.eyebrow_morphing_combiner_00 import EyebrowMorphingCombiner00
|
||||
from tha3.nn.eyebrow_decomposer.eyebrow_decomposer_03 import EyebrowDecomposer03Factory, \
|
||||
EyebrowDecomposer03Args, EyebrowDecomposer03
|
||||
from tha3.nn.eyebrow_morphing_combiner.eyebrow_morphing_combiner_03 import \
|
||||
EyebrowMorphingCombiner03Factory, EyebrowMorphingCombiner03Args
|
||||
from tha3.nn.face_morpher.face_morpher_09 import FaceMorpher09Factory, FaceMorpher09Args
|
||||
from tha3.poser.general_poser_02 import GeneralPoser02
|
||||
from tha3.nn.editor.editor_07 import Editor07, Editor07Args
|
||||
from tha3.nn.two_algo_body_rotator.two_algo_face_body_rotator_05 import TwoAlgoFaceBodyRotator05, \
|
||||
TwoAlgoFaceBodyRotator05Args
|
||||
from tha3.poser.modes.pose_parameters import get_pose_parameters
|
||||
from tha3.util import torch_load
|
||||
from tha3.compute.cached_computation_func import TensorListCachedComputationFunc
|
||||
from tha3.compute.cached_computation_protocol import CachedComputationProtocol
|
||||
from tha3.nn.nonlinearity_factory import ReLUFactory, LeakyReLUFactory
|
||||
from tha3.nn.normalization import InstanceNorm2dFactory
|
||||
from tha3.nn.util import BlockArgs
|
||||
|
||||
|
||||
class Network(Enum):
|
||||
eyebrow_decomposer = 1
|
||||
eyebrow_morphing_combiner = 2
|
||||
face_morpher = 3
|
||||
two_algo_face_body_rotator = 4
|
||||
editor = 5
|
||||
|
||||
@property
|
||||
def outputs_key(self):
|
||||
return f"{self.name}_outputs"
|
||||
|
||||
|
||||
class Branch(Enum):
|
||||
face_morphed_half = 1
|
||||
face_morphed_full = 2
|
||||
all_outputs = 3
|
||||
|
||||
|
||||
NUM_EYEBROW_PARAMS = 12
|
||||
NUM_FACE_PARAMS = 27
|
||||
NUM_ROTATION_PARAMS = 6
|
||||
|
||||
|
||||
class FiveStepPoserComputationProtocol(CachedComputationProtocol):
|
||||
def __init__(self, eyebrow_morphed_image_index: int):
|
||||
super().__init__()
|
||||
self.eyebrow_morphed_image_index = eyebrow_morphed_image_index
|
||||
self.cached_batch_0 = None
|
||||
self.cached_eyebrow_decomposer_output = None
|
||||
|
||||
def compute_func(self) -> TensorListCachedComputationFunc:
|
||||
def func(modules: Dict[str, Module],
|
||||
batch: List[Tensor],
|
||||
outputs: Dict[str, List[Tensor]]):
|
||||
if self.cached_batch_0 is None:
|
||||
new_batch_0 = True
|
||||
elif batch[0].shape[0] != self.cached_batch_0.shape[0]:
|
||||
new_batch_0 = True
|
||||
else:
|
||||
new_batch_0 = torch.max((batch[0] - self.cached_batch_0).abs()).item() > 0
|
||||
if not new_batch_0:
|
||||
outputs[Network.eyebrow_decomposer.outputs_key] = self.cached_eyebrow_decomposer_output
|
||||
output = self.get_output(Branch.all_outputs.name, modules, batch, outputs)
|
||||
if new_batch_0:
|
||||
self.cached_batch_0 = batch[0]
|
||||
self.cached_eyebrow_decomposer_output = outputs[Network.eyebrow_decomposer.outputs_key]
|
||||
return output
|
||||
|
||||
return func
|
||||
|
||||
def compute_output(self, key: str, modules: Dict[str, Module], batch: List[Tensor],
|
||||
outputs: Dict[str, List[Tensor]]) -> List[Tensor]:
|
||||
if key == Network.eyebrow_decomposer.outputs_key:
|
||||
input_image = batch[0][:, :, 64:192, 64 + 128:192 + 128]
|
||||
return modules[Network.eyebrow_decomposer.name].forward(input_image)
|
||||
elif key == Network.eyebrow_morphing_combiner.outputs_key:
|
||||
eyebrow_decomposer_output = self.get_output(Network.eyebrow_decomposer.outputs_key, modules, batch, outputs)
|
||||
background_layer = eyebrow_decomposer_output[EyebrowDecomposer03.BACKGROUND_LAYER_INDEX]
|
||||
eyebrow_layer = eyebrow_decomposer_output[EyebrowDecomposer03.EYEBROW_LAYER_INDEX]
|
||||
eyebrow_pose = batch[1][:, :NUM_EYEBROW_PARAMS]
|
||||
return modules[Network.eyebrow_morphing_combiner.name].forward(
|
||||
background_layer,
|
||||
eyebrow_layer,
|
||||
eyebrow_pose)
|
||||
elif key == Network.face_morpher.outputs_key:
|
||||
eyebrow_morphing_combiner_output = self.get_output(
|
||||
Network.eyebrow_morphing_combiner.outputs_key, modules, batch, outputs)
|
||||
eyebrow_morphed_image = eyebrow_morphing_combiner_output[self.eyebrow_morphed_image_index]
|
||||
input_image = batch[0][:, :, 32:32 + 192, (32 + 128):(32 + 192 + 128)].clone()
|
||||
input_image[:, :, 32:32 + 128, 32:32 + 128] = eyebrow_morphed_image
|
||||
face_pose = batch[1][:, NUM_EYEBROW_PARAMS:NUM_EYEBROW_PARAMS + NUM_FACE_PARAMS]
|
||||
return modules[Network.face_morpher.name].forward(input_image, face_pose)
|
||||
elif key == Branch.face_morphed_full.name:
|
||||
face_morpher_output = self.get_output(Network.face_morpher.outputs_key, modules, batch, outputs)
|
||||
face_morphed_image = face_morpher_output[0]
|
||||
input_image = batch[0].clone()
|
||||
input_image[:, :, 32:32 + 192, 32 + 128:32 + 192 + 128] = face_morphed_image
|
||||
return [input_image]
|
||||
elif key == Branch.face_morphed_half.name:
|
||||
face_morphed_full = self.get_output(Branch.face_morphed_full.name, modules, batch, outputs)[0]
|
||||
return [
|
||||
interpolate(face_morphed_full, size=(256, 256), mode='bilinear', align_corners=False)
|
||||
]
|
||||
elif key == Network.two_algo_face_body_rotator.outputs_key:
|
||||
face_morphed_half = self.get_output(Branch.face_morphed_half.name, modules, batch, outputs)[0]
|
||||
rotation_pose = batch[1][:, NUM_EYEBROW_PARAMS + NUM_FACE_PARAMS:]
|
||||
output = modules[Network.two_algo_face_body_rotator.name].forward(face_morphed_half, rotation_pose)
|
||||
return output
|
||||
elif key == Network.editor.outputs_key:
|
||||
input_original_image = self.get_output(Branch.face_morphed_full.name, modules, batch, outputs)[0]
|
||||
rotator_outputs = self.get_output(
|
||||
Network.two_algo_face_body_rotator.outputs_key, modules, batch, outputs)
|
||||
half_warped_image = rotator_outputs[TwoAlgoFaceBodyRotator05.WARPED_IMAGE_INDEX]
|
||||
full_warped_image = interpolate(
|
||||
half_warped_image, size=(512, 512), mode='bilinear', align_corners=False)
|
||||
half_grid_change = rotator_outputs[TwoAlgoFaceBodyRotator05.GRID_CHANGE_INDEX]
|
||||
full_grid_change = interpolate(
|
||||
half_grid_change, size=(512, 512), mode='bilinear', align_corners=False)
|
||||
rotation_pose = batch[1][:, NUM_EYEBROW_PARAMS + NUM_FACE_PARAMS:]
|
||||
return modules[Network.editor.name].forward(
|
||||
input_original_image, full_warped_image, full_grid_change, rotation_pose)
|
||||
elif key == Branch.all_outputs.name:
|
||||
editor_output = self.get_output(Network.editor.outputs_key, modules, batch, outputs)
|
||||
rotater_output = self.get_output(Network.two_algo_face_body_rotator.outputs_key, modules, batch, outputs)
|
||||
face_morpher_output = self.get_output(Network.face_morpher.outputs_key, modules, batch, outputs)
|
||||
eyebrow_morphing_combiner_output = self.get_output(
|
||||
Network.eyebrow_morphing_combiner.outputs_key, modules, batch, outputs)
|
||||
eyebrow_decomposer_output = self.get_output(
|
||||
Network.eyebrow_decomposer.outputs_key, modules, batch, outputs)
|
||||
output = editor_output \
|
||||
+ rotater_output \
|
||||
+ face_morpher_output \
|
||||
+ eyebrow_morphing_combiner_output \
|
||||
+ eyebrow_decomposer_output
|
||||
return output
|
||||
else:
|
||||
raise RuntimeError("Unsupported key: " + key)
|
||||
|
||||
|
||||
def load_eyebrow_decomposer(file_name: str):
|
||||
factory = EyebrowDecomposer03Factory(
|
||||
EyebrowDecomposer03Args(
|
||||
image_size=128,
|
||||
image_channels=4,
|
||||
start_channels=64,
|
||||
bottleneck_image_size=16,
|
||||
num_bottleneck_blocks=6,
|
||||
max_channels=512,
|
||||
block_args=BlockArgs(
|
||||
initialization_method='he',
|
||||
use_spectral_norm=False,
|
||||
normalization_layer_factory=InstanceNorm2dFactory(),
|
||||
nonlinearity_factory=ReLUFactory(inplace=True))))
|
||||
print("Loading the eyebrow decomposer ... ", end="")
|
||||
module = factory.create()
|
||||
module.load_state_dict(torch_load(file_name))
|
||||
print("DONE!!!")
|
||||
return module
|
||||
|
||||
|
||||
def load_eyebrow_morphing_combiner(file_name: str):
|
||||
factory = EyebrowMorphingCombiner03Factory(
|
||||
EyebrowMorphingCombiner03Args(
|
||||
image_size=128,
|
||||
image_channels=4,
|
||||
start_channels=64,
|
||||
num_pose_params=12,
|
||||
bottleneck_image_size=16,
|
||||
num_bottleneck_blocks=6,
|
||||
max_channels=512,
|
||||
block_args=BlockArgs(
|
||||
initialization_method='he',
|
||||
use_spectral_norm=False,
|
||||
normalization_layer_factory=InstanceNorm2dFactory(),
|
||||
nonlinearity_factory=ReLUFactory(inplace=True))))
|
||||
print("Loading the eyebrow morphing conbiner ... ", end="")
|
||||
module = factory.create()
|
||||
module.load_state_dict(torch_load(file_name))
|
||||
print("DONE!!!")
|
||||
return module
|
||||
|
||||
|
||||
def load_face_morpher(file_name: str):
|
||||
factory = FaceMorpher09Factory(
|
||||
FaceMorpher09Args(
|
||||
image_size=192,
|
||||
image_channels=4,
|
||||
num_pose_params=27,
|
||||
start_channels=64,
|
||||
bottleneck_image_size=24,
|
||||
num_bottleneck_blocks=6,
|
||||
max_channels=512,
|
||||
block_args=BlockArgs(
|
||||
initialization_method='he',
|
||||
use_spectral_norm=False,
|
||||
normalization_layer_factory=InstanceNorm2dFactory(),
|
||||
nonlinearity_factory=ReLUFactory(inplace=False))))
|
||||
print("Loading the face morpher ... ", end="")
|
||||
module = factory.create()
|
||||
module.load_state_dict(torch_load(file_name))
|
||||
print("DONE!!!")
|
||||
return module
|
||||
|
||||
|
||||
def load_two_algo_generator(file_name) -> Module:
|
||||
module = TwoAlgoFaceBodyRotator05(
|
||||
TwoAlgoFaceBodyRotator05Args(
|
||||
image_size=256,
|
||||
image_channels=4,
|
||||
start_channels=64,
|
||||
num_pose_params=6,
|
||||
bottleneck_image_size=32,
|
||||
num_bottleneck_blocks=6,
|
||||
max_channels=512,
|
||||
upsample_mode='nearest',
|
||||
use_separable_convolution=True,
|
||||
block_args=BlockArgs(
|
||||
initialization_method='he',
|
||||
use_spectral_norm=False,
|
||||
normalization_layer_factory=InstanceNorm2dFactory(),
|
||||
nonlinearity_factory=LeakyReLUFactory(inplace=False, negative_slope=0.1))))
|
||||
print("Loading the face-body rotator ... ", end="")
|
||||
module.load_state_dict(torch_load(file_name))
|
||||
print("DONE!!!")
|
||||
return module
|
||||
|
||||
|
||||
def load_editor(file_name) -> Module:
|
||||
module = Editor07(
|
||||
Editor07Args(
|
||||
image_size=512,
|
||||
image_channels=4,
|
||||
num_pose_params=6,
|
||||
start_channels=32,
|
||||
bottleneck_image_size=64,
|
||||
num_bottleneck_blocks=6,
|
||||
max_channels=512,
|
||||
upsampling_mode='nearest',
|
||||
use_separable_convolution=True,
|
||||
block_args=BlockArgs(
|
||||
initialization_method='he',
|
||||
use_spectral_norm=False,
|
||||
normalization_layer_factory=InstanceNorm2dFactory(),
|
||||
nonlinearity_factory=LeakyReLUFactory(inplace=False, negative_slope=0.1))))
|
||||
print("Loading the combiner ... ", end="")
|
||||
module.load_state_dict(torch_load(file_name))
|
||||
print("DONE!!!")
|
||||
return module
|
||||
|
||||
|
||||
def create_poser(
|
||||
device: torch.device,
|
||||
module_file_names: Optional[Dict[str, str]] = None,
|
||||
eyebrow_morphed_image_index: int = EyebrowMorphingCombiner00.EYEBROW_IMAGE_NO_COMBINE_ALPHA_INDEX,
|
||||
default_output_index: int = 0) -> GeneralPoser02:
|
||||
if module_file_names is None:
|
||||
module_file_names = {}
|
||||
if Network.eyebrow_decomposer.name not in module_file_names:
|
||||
dir = "live2d/tha3/models/separable_float"
|
||||
file_name = dir + "/eyebrow_decomposer.pt"
|
||||
module_file_names[Network.eyebrow_decomposer.name] = file_name
|
||||
if Network.eyebrow_morphing_combiner.name not in module_file_names:
|
||||
dir = "live2d/tha3/models/separable_float"
|
||||
file_name = dir + "/eyebrow_morphing_combiner.pt"
|
||||
module_file_names[Network.eyebrow_morphing_combiner.name] = file_name
|
||||
if Network.face_morpher.name not in module_file_names:
|
||||
dir = "live2d/tha3/models/separable_float"
|
||||
file_name = dir + "/face_morpher.pt"
|
||||
module_file_names[Network.face_morpher.name] = file_name
|
||||
if Network.two_algo_face_body_rotator.name not in module_file_names:
|
||||
dir = "live2d/tha3/models/separable_float"
|
||||
file_name = dir + "/two_algo_face_body_rotator.pt"
|
||||
module_file_names[Network.two_algo_face_body_rotator.name] = file_name
|
||||
if Network.editor.name not in module_file_names:
|
||||
dir = "live2d/tha3/models/separable_float"
|
||||
file_name = dir + "/editor.pt"
|
||||
module_file_names[Network.editor.name] = file_name
|
||||
|
||||
loaders = {
|
||||
Network.eyebrow_decomposer.name:
|
||||
lambda: load_eyebrow_decomposer(module_file_names[Network.eyebrow_decomposer.name]),
|
||||
Network.eyebrow_morphing_combiner.name:
|
||||
lambda: load_eyebrow_morphing_combiner(module_file_names[Network.eyebrow_morphing_combiner.name]),
|
||||
Network.face_morpher.name:
|
||||
lambda: load_face_morpher(module_file_names[Network.face_morpher.name]),
|
||||
Network.two_algo_face_body_rotator.name:
|
||||
lambda: load_two_algo_generator(module_file_names[Network.two_algo_face_body_rotator.name]),
|
||||
Network.editor.name:
|
||||
lambda: load_editor(module_file_names[Network.editor.name]),
|
||||
}
|
||||
return GeneralPoser02(
|
||||
image_size=512,
|
||||
module_loaders=loaders,
|
||||
pose_parameters=get_pose_parameters().get_pose_parameter_groups(),
|
||||
output_list_func=FiveStepPoserComputationProtocol(eyebrow_morphed_image_index).compute_func(),
|
||||
subrect=None,
|
||||
device=device,
|
||||
output_length=29,
|
||||
default_output_index=default_output_index)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
device = torch.device('cuda')
|
||||
poser = create_poser(device)
|
||||
|
||||
image = torch.zeros(1, 4, 512, 512, device=device)
|
||||
pose = torch.zeros(1, 45, device=device)
|
||||
|
||||
repeat = 100
|
||||
acc = 0.0
|
||||
for i in range(repeat + 2):
|
||||
start = torch.cuda.Event(enable_timing=True)
|
||||
end = torch.cuda.Event(enable_timing=True)
|
||||
|
||||
start.record()
|
||||
poser.pose(image, pose)
|
||||
end.record()
|
||||
torch.cuda.synchronize()
|
||||
if i >= 2:
|
||||
elapsed_time = start.elapsed_time(end)
|
||||
print("%d:" % i, elapsed_time)
|
||||
acc = acc + elapsed_time
|
||||
|
||||
print("average:", acc / repeat)
|
||||
365
live2d/tha3/poser/modes/separable_half.py
Normal file
365
live2d/tha3/poser/modes/separable_half.py
Normal file
@@ -0,0 +1,365 @@
|
||||
from enum import Enum
|
||||
from typing import List, Dict, Optional
|
||||
|
||||
import torch
|
||||
from torch import Tensor
|
||||
from torch.nn import Module
|
||||
from torch.nn.functional import interpolate
|
||||
|
||||
from tha3.nn.eyebrow_decomposer.eyebrow_decomposer_03 import EyebrowDecomposer03Factory, \
|
||||
EyebrowDecomposer03Args, EyebrowDecomposer03
|
||||
from tha3.nn.eyebrow_morphing_combiner.eyebrow_morphing_combiner_03 import \
|
||||
EyebrowMorphingCombiner03Factory, EyebrowMorphingCombiner03Args, EyebrowMorphingCombiner03
|
||||
from tha3.nn.face_morpher.face_morpher_09 import FaceMorpher09Factory, FaceMorpher09Args
|
||||
from tha3.poser.general_poser_02 import GeneralPoser02
|
||||
from tha3.poser.poser import PoseParameterCategory, PoseParameters
|
||||
from tha3.nn.editor.editor_07 import Editor07, Editor07Args
|
||||
from tha3.nn.two_algo_body_rotator.two_algo_face_body_rotator_05 import TwoAlgoFaceBodyRotator05, \
|
||||
TwoAlgoFaceBodyRotator05Args
|
||||
from tha3.util import torch_load
|
||||
from tha3.compute.cached_computation_func import TensorListCachedComputationFunc
|
||||
from tha3.compute.cached_computation_protocol import CachedComputationProtocol
|
||||
from tha3.nn.nonlinearity_factory import ReLUFactory, LeakyReLUFactory
|
||||
from tha3.nn.normalization import InstanceNorm2dFactory
|
||||
from tha3.nn.util import BlockArgs
|
||||
|
||||
|
||||
class Network(Enum):
|
||||
eyebrow_decomposer = 1
|
||||
eyebrow_morphing_combiner = 2
|
||||
face_morpher = 3
|
||||
two_algo_face_body_rotator = 4
|
||||
editor = 5
|
||||
|
||||
@property
|
||||
def outputs_key(self):
|
||||
return f"{self.name}_outputs"
|
||||
|
||||
|
||||
class Branch(Enum):
|
||||
face_morphed_half = 1
|
||||
face_morphed_full = 2
|
||||
all_outputs = 3
|
||||
|
||||
|
||||
NUM_EYEBROW_PARAMS = 12
|
||||
NUM_FACE_PARAMS = 27
|
||||
NUM_ROTATION_PARAMS = 6
|
||||
|
||||
|
||||
class FiveStepPoserComputationProtocol(CachedComputationProtocol):
|
||||
def __init__(self, eyebrow_morphed_image_index: int):
|
||||
super().__init__()
|
||||
self.eyebrow_morphed_image_index = eyebrow_morphed_image_index
|
||||
self.cached_batch_0 = None
|
||||
self.cached_eyebrow_decomposer_output = None
|
||||
|
||||
def compute_func(self) -> TensorListCachedComputationFunc:
|
||||
def func(modules: Dict[str, Module],
|
||||
batch: List[Tensor],
|
||||
outputs: Dict[str, List[Tensor]]):
|
||||
if self.cached_batch_0 is None:
|
||||
new_batch_0 = True
|
||||
elif batch[0].shape[0] != self.cached_batch_0.shape[0]:
|
||||
new_batch_0 = True
|
||||
else:
|
||||
new_batch_0 = torch.max((batch[0] - self.cached_batch_0).abs()).item() > 0
|
||||
if not new_batch_0:
|
||||
outputs[Network.eyebrow_decomposer.outputs_key] = self.cached_eyebrow_decomposer_output
|
||||
output = self.get_output(Branch.all_outputs.name, modules, batch, outputs)
|
||||
if new_batch_0:
|
||||
self.cached_batch_0 = batch[0]
|
||||
self.cached_eyebrow_decomposer_output = outputs[Network.eyebrow_decomposer.outputs_key]
|
||||
return output
|
||||
|
||||
return func
|
||||
|
||||
def compute_output(self, key: str, modules: Dict[str, Module], batch: List[Tensor],
|
||||
outputs: Dict[str, List[Tensor]]) -> List[Tensor]:
|
||||
if key == Network.eyebrow_decomposer.outputs_key:
|
||||
input_image = batch[0][:, :, 64:192, 64 + 128:192 + 128]
|
||||
return modules[Network.eyebrow_decomposer.name].forward(input_image)
|
||||
elif key == Network.eyebrow_morphing_combiner.outputs_key:
|
||||
eyebrow_decomposer_output = self.get_output(Network.eyebrow_decomposer.outputs_key, modules, batch, outputs)
|
||||
background_layer = eyebrow_decomposer_output[EyebrowDecomposer03.BACKGROUND_LAYER_INDEX]
|
||||
eyebrow_layer = eyebrow_decomposer_output[EyebrowDecomposer03.EYEBROW_LAYER_INDEX]
|
||||
eyebrow_pose = batch[1][:, :NUM_EYEBROW_PARAMS]
|
||||
return modules[Network.eyebrow_morphing_combiner.name].forward(
|
||||
background_layer,
|
||||
eyebrow_layer,
|
||||
eyebrow_pose)
|
||||
elif key == Network.face_morpher.outputs_key:
|
||||
eyebrow_morphing_combiner_output = self.get_output(
|
||||
Network.eyebrow_morphing_combiner.outputs_key, modules, batch, outputs)
|
||||
eyebrow_morphed_image = eyebrow_morphing_combiner_output[self.eyebrow_morphed_image_index]
|
||||
input_image = batch[0][:, :, 32:32 + 192, (32 + 128):(32 + 192 + 128)].clone()
|
||||
input_image[:, :, 32:32 + 128, 32:32 + 128] = eyebrow_morphed_image
|
||||
face_pose = batch[1][:, NUM_EYEBROW_PARAMS:NUM_EYEBROW_PARAMS + NUM_FACE_PARAMS]
|
||||
return modules[Network.face_morpher.name].forward(input_image, face_pose)
|
||||
elif key == Branch.face_morphed_full.name:
|
||||
face_morpher_output = self.get_output(Network.face_morpher.outputs_key, modules, batch, outputs)
|
||||
face_morphed_image = face_morpher_output[0]
|
||||
input_image = batch[0].clone()
|
||||
input_image[:, :, 32:32 + 192, 32 + 128:32 + 192 + 128] = face_morphed_image
|
||||
return [input_image]
|
||||
elif key == Branch.face_morphed_half.name:
|
||||
face_morphed_full = self.get_output(Branch.face_morphed_full.name, modules, batch, outputs)[0]
|
||||
return [
|
||||
interpolate(face_morphed_full, size=(256, 256), mode='bilinear', align_corners=False)
|
||||
]
|
||||
elif key == Network.two_algo_face_body_rotator.outputs_key:
|
||||
face_morphed_half = self.get_output(Branch.face_morphed_half.name, modules, batch, outputs)[0]
|
||||
rotation_pose = batch[1][:, NUM_EYEBROW_PARAMS + NUM_FACE_PARAMS:]
|
||||
return modules[Network.two_algo_face_body_rotator.name].forward(face_morphed_half, rotation_pose)
|
||||
elif key == Network.editor.outputs_key:
|
||||
input_original_image = self.get_output(Branch.face_morphed_full.name, modules, batch, outputs)[0]
|
||||
rotator_outputs = self.get_output(
|
||||
Network.two_algo_face_body_rotator.outputs_key, modules, batch, outputs)
|
||||
half_warped_image = rotator_outputs[TwoAlgoFaceBodyRotator05.WARPED_IMAGE_INDEX]
|
||||
full_warped_image = interpolate(
|
||||
half_warped_image, size=(512, 512), mode='bilinear', align_corners=False)
|
||||
half_grid_change = rotator_outputs[TwoAlgoFaceBodyRotator05.GRID_CHANGE_INDEX]
|
||||
full_grid_change = interpolate(
|
||||
half_grid_change, size=(512, 512), mode='bilinear', align_corners=False)
|
||||
rotation_pose = batch[1][:, NUM_EYEBROW_PARAMS + NUM_FACE_PARAMS:]
|
||||
return modules[Network.editor.name].forward(
|
||||
input_original_image, full_warped_image, full_grid_change, rotation_pose)
|
||||
elif key == Branch.all_outputs.name:
|
||||
editor_output = self.get_output(Network.editor.outputs_key, modules, batch, outputs)
|
||||
rotater_output = self.get_output(Network.two_algo_face_body_rotator.outputs_key, modules, batch, outputs)
|
||||
face_morpher_output = self.get_output(Network.face_morpher.outputs_key, modules, batch, outputs)
|
||||
eyebrow_morphing_combiner_output = self.get_output(
|
||||
Network.eyebrow_morphing_combiner.outputs_key, modules, batch, outputs)
|
||||
eyebrow_decomposer_output = self.get_output(
|
||||
Network.eyebrow_decomposer.outputs_key, modules, batch, outputs)
|
||||
output = editor_output \
|
||||
+ rotater_output \
|
||||
+ face_morpher_output \
|
||||
+ eyebrow_morphing_combiner_output \
|
||||
+ eyebrow_decomposer_output
|
||||
return output
|
||||
else:
|
||||
raise RuntimeError("Unsupported key: " + key)
|
||||
|
||||
|
||||
def load_eyebrow_decomposer(file_name: str):
|
||||
factory = EyebrowDecomposer03Factory(
|
||||
EyebrowDecomposer03Args(
|
||||
image_size=128,
|
||||
image_channels=4,
|
||||
start_channels=64,
|
||||
bottleneck_image_size=16,
|
||||
num_bottleneck_blocks=6,
|
||||
max_channels=512,
|
||||
block_args=BlockArgs(
|
||||
initialization_method='he',
|
||||
use_spectral_norm=False,
|
||||
normalization_layer_factory=InstanceNorm2dFactory(),
|
||||
nonlinearity_factory=ReLUFactory(inplace=True))))
|
||||
print("Loading the eyebrow decomposer ... ", end="")
|
||||
module = factory.create().half()
|
||||
module.load_state_dict(torch_load(file_name))
|
||||
print("DONE!!!")
|
||||
return module
|
||||
|
||||
|
||||
def load_eyebrow_morphing_combiner(file_name: str):
|
||||
factory = EyebrowMorphingCombiner03Factory(
|
||||
EyebrowMorphingCombiner03Args(
|
||||
image_size=128,
|
||||
image_channels=4,
|
||||
start_channels=64,
|
||||
num_pose_params=12,
|
||||
bottleneck_image_size=16,
|
||||
num_bottleneck_blocks=6,
|
||||
max_channels=512,
|
||||
block_args=BlockArgs(
|
||||
initialization_method='he',
|
||||
use_spectral_norm=False,
|
||||
normalization_layer_factory=InstanceNorm2dFactory(),
|
||||
nonlinearity_factory=ReLUFactory(inplace=True))))
|
||||
print("Loading the eyebrow morphing conbiner ... ", end="")
|
||||
module = factory.create().half()
|
||||
module.load_state_dict(torch_load(file_name))
|
||||
print("DONE!!!")
|
||||
return module
|
||||
|
||||
|
||||
def load_face_morpher(file_name: str):
|
||||
factory = FaceMorpher09Factory(
|
||||
FaceMorpher09Args(
|
||||
image_size=192,
|
||||
image_channels=4,
|
||||
num_pose_params=27,
|
||||
start_channels=64,
|
||||
bottleneck_image_size=24,
|
||||
num_bottleneck_blocks=6,
|
||||
max_channels=512,
|
||||
block_args=BlockArgs(
|
||||
initialization_method='he',
|
||||
use_spectral_norm=False,
|
||||
normalization_layer_factory=InstanceNorm2dFactory(),
|
||||
nonlinearity_factory=ReLUFactory(inplace=False))))
|
||||
print("Loading the face morpher ... ", end="")
|
||||
module = factory.create().half()
|
||||
module.load_state_dict(torch_load(file_name))
|
||||
print("DONE!!!")
|
||||
return module
|
||||
|
||||
|
||||
def load_two_algo_generator(file_name) -> Module:
|
||||
module = TwoAlgoFaceBodyRotator05(
|
||||
TwoAlgoFaceBodyRotator05Args(
|
||||
image_size=256,
|
||||
image_channels=4,
|
||||
start_channels=64,
|
||||
num_pose_params=6,
|
||||
bottleneck_image_size=32,
|
||||
num_bottleneck_blocks=6,
|
||||
max_channels=512,
|
||||
upsample_mode='nearest',
|
||||
use_separable_convolution=True,
|
||||
block_args=BlockArgs(
|
||||
initialization_method='he',
|
||||
use_spectral_norm=False,
|
||||
normalization_layer_factory=InstanceNorm2dFactory(),
|
||||
nonlinearity_factory=LeakyReLUFactory(inplace=False, negative_slope=0.1)))).half()
|
||||
print("Loading the face-body rotator ... ", end="")
|
||||
module.load_state_dict(torch_load(file_name))
|
||||
print("DONE!!!")
|
||||
return module
|
||||
|
||||
|
||||
def load_editor(file_name) -> Module:
|
||||
module = Editor07(
|
||||
Editor07Args(
|
||||
image_size=512,
|
||||
image_channels=4,
|
||||
num_pose_params=6,
|
||||
start_channels=32,
|
||||
bottleneck_image_size=64,
|
||||
num_bottleneck_blocks=6,
|
||||
max_channels=512,
|
||||
upsampling_mode='nearest',
|
||||
use_separable_convolution=True,
|
||||
block_args=BlockArgs(
|
||||
initialization_method='he',
|
||||
use_spectral_norm=False,
|
||||
normalization_layer_factory=InstanceNorm2dFactory(),
|
||||
nonlinearity_factory=LeakyReLUFactory(inplace=False, negative_slope=0.1)))).half()
|
||||
print("Loading the combiner ... ", end="")
|
||||
module.load_state_dict(torch_load(file_name))
|
||||
print("DONE!!!")
|
||||
return module
|
||||
|
||||
|
||||
def get_pose_parameters():
|
||||
return PoseParameters.Builder() \
|
||||
.add_parameter_group("eyebrow_troubled", PoseParameterCategory.EYEBROW, arity=2) \
|
||||
.add_parameter_group("eyebrow_angry", PoseParameterCategory.EYEBROW, arity=2) \
|
||||
.add_parameter_group("eyebrow_lowered", PoseParameterCategory.EYEBROW, arity=2) \
|
||||
.add_parameter_group("eyebrow_raised", PoseParameterCategory.EYEBROW, arity=2) \
|
||||
.add_parameter_group("eyebrow_happy", PoseParameterCategory.EYEBROW, arity=2) \
|
||||
.add_parameter_group("eyebrow_serious", PoseParameterCategory.EYEBROW, arity=2) \
|
||||
.add_parameter_group("eye_wink", PoseParameterCategory.EYE, arity=2) \
|
||||
.add_parameter_group("eye_happy_wink", PoseParameterCategory.EYE, arity=2) \
|
||||
.add_parameter_group("eye_surprised", PoseParameterCategory.EYE, arity=2) \
|
||||
.add_parameter_group("eye_relaxed", PoseParameterCategory.EYE, arity=2) \
|
||||
.add_parameter_group("eye_unimpressed", PoseParameterCategory.EYE, arity=2) \
|
||||
.add_parameter_group("eye_raised_lower_eyelid", PoseParameterCategory.EYE, arity=2) \
|
||||
.add_parameter_group("iris_small", PoseParameterCategory.IRIS_MORPH, arity=2) \
|
||||
.add_parameter_group("mouth_aaa", PoseParameterCategory.MOUTH, arity=1, default_value=1.0) \
|
||||
.add_parameter_group("mouth_iii", PoseParameterCategory.MOUTH, arity=1) \
|
||||
.add_parameter_group("mouth_uuu", PoseParameterCategory.MOUTH, arity=1) \
|
||||
.add_parameter_group("mouth_eee", PoseParameterCategory.MOUTH, arity=1) \
|
||||
.add_parameter_group("mouth_ooo", PoseParameterCategory.MOUTH, arity=1) \
|
||||
.add_parameter_group("mouth_delta", PoseParameterCategory.MOUTH, arity=1) \
|
||||
.add_parameter_group("mouth_lowered_corner", PoseParameterCategory.MOUTH, arity=2) \
|
||||
.add_parameter_group("mouth_raised_corner", PoseParameterCategory.MOUTH, arity=2) \
|
||||
.add_parameter_group("mouth_smirk", PoseParameterCategory.MOUTH, arity=1) \
|
||||
.add_parameter_group("iris_rotation_x", PoseParameterCategory.IRIS_ROTATION, arity=1, range=(-1.0, 1.0)) \
|
||||
.add_parameter_group("iris_rotation_y", PoseParameterCategory.IRIS_ROTATION, arity=1, range=(-1.0, 1.0)) \
|
||||
.add_parameter_group("head_x", PoseParameterCategory.FACE_ROTATION, arity=1, range=(-1.0, 1.0)) \
|
||||
.add_parameter_group("head_y", PoseParameterCategory.FACE_ROTATION, arity=1, range=(-1.0, 1.0)) \
|
||||
.add_parameter_group("neck_z", PoseParameterCategory.FACE_ROTATION, arity=1, range=(-1.0, 1.0)) \
|
||||
.add_parameter_group("body_y", PoseParameterCategory.BODY_ROTATION, arity=1, range=(-1.0, 1.0)) \
|
||||
.add_parameter_group("body_z", PoseParameterCategory.BODY_ROTATION, arity=1, range=(-1.0, 1.0)) \
|
||||
.add_parameter_group("breathing", PoseParameterCategory.BREATHING, arity=1, range=(0.0, 1.0)) \
|
||||
.build()
|
||||
|
||||
|
||||
def create_poser(
|
||||
device: torch.device,
|
||||
module_file_names: Optional[Dict[str, str]] = None,
|
||||
eyebrow_morphed_image_index: int = EyebrowMorphingCombiner03.EYEBROW_IMAGE_NO_COMBINE_ALPHA_INDEX,
|
||||
default_output_index: int = 0) -> GeneralPoser02:
|
||||
if module_file_names is None:
|
||||
module_file_names = {}
|
||||
if Network.eyebrow_decomposer.name not in module_file_names:
|
||||
dir = "live2d/tha3/models/separable_half"
|
||||
file_name = dir + "/eyebrow_decomposer.pt"
|
||||
module_file_names[Network.eyebrow_decomposer.name] = file_name
|
||||
if Network.eyebrow_morphing_combiner.name not in module_file_names:
|
||||
dir = "live2d/tha3/models/separable_half"
|
||||
file_name = dir + "/eyebrow_morphing_combiner.pt"
|
||||
module_file_names[Network.eyebrow_morphing_combiner.name] = file_name
|
||||
if Network.face_morpher.name not in module_file_names:
|
||||
dir = "live2d/tha3/models/separable_half"
|
||||
file_name = dir + "/face_morpher.pt"
|
||||
module_file_names[Network.face_morpher.name] = file_name
|
||||
if Network.two_algo_face_body_rotator.name not in module_file_names:
|
||||
dir = "live2d/tha3/models/separable_half"
|
||||
file_name = dir + "/two_algo_face_body_rotator.pt"
|
||||
module_file_names[Network.two_algo_face_body_rotator.name] = file_name
|
||||
if Network.editor.name not in module_file_names:
|
||||
dir = "live2d/tha3/models/separable_half"
|
||||
file_name = dir + "/editor.pt"
|
||||
module_file_names[Network.editor.name] = file_name
|
||||
|
||||
loaders = {
|
||||
Network.eyebrow_decomposer.name:
|
||||
lambda: load_eyebrow_decomposer(module_file_names[Network.eyebrow_decomposer.name]),
|
||||
Network.eyebrow_morphing_combiner.name:
|
||||
lambda: load_eyebrow_morphing_combiner(module_file_names[Network.eyebrow_morphing_combiner.name]),
|
||||
Network.face_morpher.name:
|
||||
lambda: load_face_morpher(module_file_names[Network.face_morpher.name]),
|
||||
Network.two_algo_face_body_rotator.name:
|
||||
lambda: load_two_algo_generator(module_file_names[Network.two_algo_face_body_rotator.name]),
|
||||
Network.editor.name:
|
||||
lambda: load_editor(module_file_names[Network.editor.name]),
|
||||
}
|
||||
return GeneralPoser02(
|
||||
image_size=512,
|
||||
module_loaders=loaders,
|
||||
pose_parameters=get_pose_parameters().get_pose_parameter_groups(),
|
||||
output_list_func=FiveStepPoserComputationProtocol(eyebrow_morphed_image_index).compute_func(),
|
||||
subrect=None,
|
||||
device=device,
|
||||
output_length=29,
|
||||
dtype=torch.half,
|
||||
default_output_index=default_output_index)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
device = torch.device('cuda')
|
||||
poser = create_poser(device)
|
||||
|
||||
image = torch.zeros(1, 4, 512, 512, device=device, dtype=torch.half)
|
||||
pose = torch.zeros(1, 45, device=device, dtype=torch.half)
|
||||
|
||||
repeat = 100
|
||||
acc = 0.0
|
||||
for i in range(repeat + 2):
|
||||
start = torch.cuda.Event(enable_timing=True)
|
||||
end = torch.cuda.Event(enable_timing=True)
|
||||
|
||||
start.record()
|
||||
poser.pose(image, pose)
|
||||
end.record()
|
||||
torch.cuda.synchronize()
|
||||
if i >= 2:
|
||||
elapsed_time = start.elapsed_time(end)
|
||||
print("%d:" % i, elapsed_time)
|
||||
acc = acc + elapsed_time
|
||||
|
||||
print("average:", acc / repeat)
|
||||
362
live2d/tha3/poser/modes/standard_float.py
Normal file
362
live2d/tha3/poser/modes/standard_float.py
Normal file
@@ -0,0 +1,362 @@
|
||||
from enum import Enum
|
||||
from typing import List, Dict, Optional
|
||||
|
||||
import torch
|
||||
from torch import Tensor
|
||||
from torch.nn import Module
|
||||
from torch.nn.functional import interpolate
|
||||
|
||||
from tha3.nn.eyebrow_decomposer.eyebrow_decomposer_00 import EyebrowDecomposer00, \
|
||||
EyebrowDecomposer00Factory, EyebrowDecomposer00Args
|
||||
from tha3.nn.eyebrow_morphing_combiner.eyebrow_morphing_combiner_00 import \
|
||||
EyebrowMorphingCombiner00Factory, EyebrowMorphingCombiner00Args, EyebrowMorphingCombiner00
|
||||
from tha3.nn.face_morpher.face_morpher_08 import FaceMorpher08Args, FaceMorpher08Factory
|
||||
from tha3.poser.general_poser_02 import GeneralPoser02
|
||||
from tha3.poser.poser import PoseParameterCategory, PoseParameters
|
||||
from tha3.nn.editor.editor_07 import Editor07, Editor07Args
|
||||
from tha3.nn.two_algo_body_rotator.two_algo_face_body_rotator_05 import TwoAlgoFaceBodyRotator05, \
|
||||
TwoAlgoFaceBodyRotator05Args
|
||||
from tha3.util import torch_load
|
||||
from tha3.compute.cached_computation_func import TensorListCachedComputationFunc
|
||||
from tha3.compute.cached_computation_protocol import CachedComputationProtocol
|
||||
from tha3.nn.nonlinearity_factory import ReLUFactory, LeakyReLUFactory
|
||||
from tha3.nn.normalization import InstanceNorm2dFactory
|
||||
from tha3.nn.util import BlockArgs
|
||||
|
||||
|
||||
class Network(Enum):
|
||||
eyebrow_decomposer = 1
|
||||
eyebrow_morphing_combiner = 2
|
||||
face_morpher = 3
|
||||
two_algo_face_body_rotator = 4
|
||||
editor = 5
|
||||
|
||||
@property
|
||||
def outputs_key(self):
|
||||
return f"{self.name}_outputs"
|
||||
|
||||
|
||||
class Branch(Enum):
|
||||
face_morphed_half = 1
|
||||
face_morphed_full = 2
|
||||
all_outputs = 3
|
||||
|
||||
|
||||
NUM_EYEBROW_PARAMS = 12
|
||||
NUM_FACE_PARAMS = 27
|
||||
NUM_ROTATION_PARAMS = 6
|
||||
|
||||
|
||||
class FiveStepPoserComputationProtocol(CachedComputationProtocol):
|
||||
def __init__(self, eyebrow_morphed_image_index: int):
|
||||
super().__init__()
|
||||
self.eyebrow_morphed_image_index = eyebrow_morphed_image_index
|
||||
self.cached_batch_0 = None
|
||||
self.cached_eyebrow_decomposer_output = None
|
||||
|
||||
def compute_func(self) -> TensorListCachedComputationFunc:
|
||||
def func(modules: Dict[str, Module],
|
||||
batch: List[Tensor],
|
||||
outputs: Dict[str, List[Tensor]]):
|
||||
if self.cached_batch_0 is None:
|
||||
new_batch_0 = True
|
||||
elif batch[0].shape[0] != self.cached_batch_0.shape[0]:
|
||||
new_batch_0 = True
|
||||
else:
|
||||
new_batch_0 = torch.max((batch[0] - self.cached_batch_0).abs()).item() > 0
|
||||
if not new_batch_0:
|
||||
outputs[Network.eyebrow_decomposer.outputs_key] = self.cached_eyebrow_decomposer_output
|
||||
output = self.get_output(Branch.all_outputs.name, modules, batch, outputs)
|
||||
if new_batch_0:
|
||||
self.cached_batch_0 = batch[0]
|
||||
self.cached_eyebrow_decomposer_output = outputs[Network.eyebrow_decomposer.outputs_key]
|
||||
return output
|
||||
|
||||
return func
|
||||
|
||||
def compute_output(self, key: str, modules: Dict[str, Module], batch: List[Tensor],
|
||||
outputs: Dict[str, List[Tensor]]) -> List[Tensor]:
|
||||
if key == Network.eyebrow_decomposer.outputs_key:
|
||||
input_image = batch[0][:, :, 64:192, 64 + 128:192 + 128]
|
||||
return modules[Network.eyebrow_decomposer.name].forward(input_image)
|
||||
elif key == Network.eyebrow_morphing_combiner.outputs_key:
|
||||
eyebrow_decomposer_output = self.get_output(Network.eyebrow_decomposer.outputs_key, modules, batch, outputs)
|
||||
background_layer = eyebrow_decomposer_output[EyebrowDecomposer00.BACKGROUND_LAYER_INDEX]
|
||||
eyebrow_layer = eyebrow_decomposer_output[EyebrowDecomposer00.EYEBROW_LAYER_INDEX]
|
||||
eyebrow_pose = batch[1][:, :NUM_EYEBROW_PARAMS]
|
||||
return modules[Network.eyebrow_morphing_combiner.name].forward(
|
||||
background_layer,
|
||||
eyebrow_layer,
|
||||
eyebrow_pose)
|
||||
elif key == Network.face_morpher.outputs_key:
|
||||
eyebrow_morphing_combiner_output = self.get_output(
|
||||
Network.eyebrow_morphing_combiner.outputs_key, modules, batch, outputs)
|
||||
eyebrow_morphed_image = eyebrow_morphing_combiner_output[self.eyebrow_morphed_image_index]
|
||||
input_image = batch[0][:, :, 32:32 + 192, (32 + 128):(32 + 192 + 128)].clone()
|
||||
input_image[:, :, 32:32 + 128, 32:32 + 128] = eyebrow_morphed_image
|
||||
face_pose = batch[1][:, NUM_EYEBROW_PARAMS:NUM_EYEBROW_PARAMS + NUM_FACE_PARAMS]
|
||||
return modules[Network.face_morpher.name].forward(input_image, face_pose)
|
||||
elif key == Branch.face_morphed_full.name:
|
||||
face_morpher_output = self.get_output(Network.face_morpher.outputs_key, modules, batch, outputs)
|
||||
face_morphed_image = face_morpher_output[0]
|
||||
input_image = batch[0].clone()
|
||||
input_image[:, :, 32:32 + 192, 32 + 128:32 + 192 + 128] = face_morphed_image
|
||||
return [input_image]
|
||||
elif key == Branch.face_morphed_half.name:
|
||||
face_morphed_full = self.get_output(Branch.face_morphed_full.name, modules, batch, outputs)[0]
|
||||
return [
|
||||
interpolate(face_morphed_full, size=(256, 256), mode='bilinear', align_corners=False)
|
||||
]
|
||||
elif key == Network.two_algo_face_body_rotator.outputs_key:
|
||||
face_morphed_half = self.get_output(Branch.face_morphed_half.name, modules, batch, outputs)[0]
|
||||
rotation_pose = batch[1][:, NUM_EYEBROW_PARAMS + NUM_FACE_PARAMS:]
|
||||
return modules[Network.two_algo_face_body_rotator.name].forward(face_morphed_half, rotation_pose)
|
||||
elif key == Network.editor.outputs_key:
|
||||
input_original_image = self.get_output(Branch.face_morphed_full.name, modules, batch, outputs)[0]
|
||||
rotator_outputs = self.get_output(
|
||||
Network.two_algo_face_body_rotator.outputs_key, modules, batch, outputs)
|
||||
half_warped_image = rotator_outputs[TwoAlgoFaceBodyRotator05.WARPED_IMAGE_INDEX]
|
||||
full_warped_image = interpolate(
|
||||
half_warped_image, size=(512, 512), mode='bilinear', align_corners=False)
|
||||
half_grid_change = rotator_outputs[TwoAlgoFaceBodyRotator05.GRID_CHANGE_INDEX]
|
||||
full_grid_change = interpolate(
|
||||
half_grid_change, size=(512, 512), mode='bilinear', align_corners=False)
|
||||
rotation_pose = batch[1][:, NUM_EYEBROW_PARAMS + NUM_FACE_PARAMS:]
|
||||
return modules[Network.editor.name].forward(
|
||||
input_original_image, full_warped_image, full_grid_change, rotation_pose)
|
||||
elif key == Branch.all_outputs.name:
|
||||
editor_output = self.get_output(Network.editor.outputs_key, modules, batch, outputs)
|
||||
rotater_output = self.get_output(Network.two_algo_face_body_rotator.outputs_key, modules, batch, outputs)
|
||||
face_morpher_output = self.get_output(Network.face_morpher.outputs_key, modules, batch, outputs)
|
||||
eyebrow_morphing_combiner_output = self.get_output(
|
||||
Network.eyebrow_morphing_combiner.outputs_key, modules, batch, outputs)
|
||||
eyebrow_decomposer_output = self.get_output(
|
||||
Network.eyebrow_decomposer.outputs_key, modules, batch, outputs)
|
||||
output = editor_output \
|
||||
+ rotater_output \
|
||||
+ face_morpher_output \
|
||||
+ eyebrow_morphing_combiner_output \
|
||||
+ eyebrow_decomposer_output
|
||||
return output
|
||||
else:
|
||||
raise RuntimeError("Unsupported key: " + key)
|
||||
|
||||
|
||||
def load_eyebrow_decomposer(file_name: str):
|
||||
factory = EyebrowDecomposer00Factory(
|
||||
EyebrowDecomposer00Args(
|
||||
image_size=128,
|
||||
image_channels=4,
|
||||
start_channels=64,
|
||||
bottleneck_image_size=16,
|
||||
num_bottleneck_blocks=6,
|
||||
max_channels=512,
|
||||
block_args=BlockArgs(
|
||||
initialization_method='he',
|
||||
use_spectral_norm=False,
|
||||
normalization_layer_factory=InstanceNorm2dFactory(),
|
||||
nonlinearity_factory=ReLUFactory(inplace=True))))
|
||||
print("Loading the eyebrow decomposer ... ", end="")
|
||||
module = factory.create()
|
||||
module.load_state_dict(torch_load(file_name))
|
||||
print("DONE!!!")
|
||||
return module
|
||||
|
||||
|
||||
def load_eyebrow_morphing_combiner(file_name: str):
|
||||
factory = EyebrowMorphingCombiner00Factory(
|
||||
EyebrowMorphingCombiner00Args(
|
||||
image_size=128,
|
||||
image_channels=4,
|
||||
start_channels=64,
|
||||
num_pose_params=12,
|
||||
bottleneck_image_size=16,
|
||||
num_bottleneck_blocks=6,
|
||||
max_channels=512,
|
||||
block_args=BlockArgs(
|
||||
initialization_method='he',
|
||||
use_spectral_norm=False,
|
||||
normalization_layer_factory=InstanceNorm2dFactory(),
|
||||
nonlinearity_factory=ReLUFactory(inplace=True))))
|
||||
print("Loading the eyebrow morphing conbiner ... ", end="")
|
||||
module = factory.create()
|
||||
module.load_state_dict(torch_load(file_name))
|
||||
print("DONE!!!")
|
||||
return module
|
||||
|
||||
|
||||
def load_face_morpher(file_name: str):
|
||||
factory = FaceMorpher08Factory(
|
||||
FaceMorpher08Args(
|
||||
image_size=192,
|
||||
image_channels=4,
|
||||
num_expression_params=27,
|
||||
start_channels=64,
|
||||
bottleneck_image_size=24,
|
||||
num_bottleneck_blocks=6,
|
||||
max_channels=512,
|
||||
block_args=BlockArgs(
|
||||
initialization_method='he',
|
||||
use_spectral_norm=False,
|
||||
normalization_layer_factory=InstanceNorm2dFactory(),
|
||||
nonlinearity_factory=ReLUFactory(inplace=False))))
|
||||
print("Loading the face morpher ... ", end="")
|
||||
module = factory.create()
|
||||
module.load_state_dict(torch_load(file_name))
|
||||
print("DONE!!!")
|
||||
return module
|
||||
|
||||
|
||||
def load_two_algo_generator(file_name) -> Module:
|
||||
module = TwoAlgoFaceBodyRotator05(
|
||||
TwoAlgoFaceBodyRotator05Args(
|
||||
image_size=256,
|
||||
image_channels=4,
|
||||
start_channels=64,
|
||||
num_pose_params=6,
|
||||
bottleneck_image_size=32,
|
||||
num_bottleneck_blocks=6,
|
||||
max_channels=512,
|
||||
upsample_mode='nearest',
|
||||
block_args=BlockArgs(
|
||||
initialization_method='he',
|
||||
use_spectral_norm=False,
|
||||
normalization_layer_factory=InstanceNorm2dFactory(),
|
||||
nonlinearity_factory=LeakyReLUFactory(inplace=False, negative_slope=0.1))))
|
||||
print("Loading the face-body rotator ... ", end="")
|
||||
module.load_state_dict(torch_load(file_name))
|
||||
print("DONE!!!")
|
||||
return module
|
||||
|
||||
|
||||
def load_editor(file_name) -> Module:
|
||||
module = Editor07(
|
||||
Editor07Args(
|
||||
image_size=512,
|
||||
image_channels=4,
|
||||
num_pose_params=6,
|
||||
start_channels=32,
|
||||
bottleneck_image_size=64,
|
||||
num_bottleneck_blocks=6,
|
||||
max_channels=512,
|
||||
upsampling_mode='nearest',
|
||||
block_args=BlockArgs(
|
||||
initialization_method='he',
|
||||
use_spectral_norm=False,
|
||||
normalization_layer_factory=InstanceNorm2dFactory(),
|
||||
nonlinearity_factory=LeakyReLUFactory(inplace=False, negative_slope=0.1))))
|
||||
print("Loading the combiner ... ", end="")
|
||||
module.load_state_dict(torch_load(file_name))
|
||||
print("DONE!!!")
|
||||
return module
|
||||
|
||||
|
||||
def get_pose_parameters():
|
||||
return PoseParameters.Builder() \
|
||||
.add_parameter_group("eyebrow_troubled", PoseParameterCategory.EYEBROW, arity=2) \
|
||||
.add_parameter_group("eyebrow_angry", PoseParameterCategory.EYEBROW, arity=2) \
|
||||
.add_parameter_group("eyebrow_lowered", PoseParameterCategory.EYEBROW, arity=2) \
|
||||
.add_parameter_group("eyebrow_raised", PoseParameterCategory.EYEBROW, arity=2) \
|
||||
.add_parameter_group("eyebrow_happy", PoseParameterCategory.EYEBROW, arity=2) \
|
||||
.add_parameter_group("eyebrow_serious", PoseParameterCategory.EYEBROW, arity=2) \
|
||||
.add_parameter_group("eye_wink", PoseParameterCategory.EYE, arity=2) \
|
||||
.add_parameter_group("eye_happy_wink", PoseParameterCategory.EYE, arity=2) \
|
||||
.add_parameter_group("eye_surprised", PoseParameterCategory.EYE, arity=2) \
|
||||
.add_parameter_group("eye_relaxed", PoseParameterCategory.EYE, arity=2) \
|
||||
.add_parameter_group("eye_unimpressed", PoseParameterCategory.EYE, arity=2) \
|
||||
.add_parameter_group("eye_raised_lower_eyelid", PoseParameterCategory.EYE, arity=2) \
|
||||
.add_parameter_group("iris_small", PoseParameterCategory.IRIS_MORPH, arity=2) \
|
||||
.add_parameter_group("mouth_aaa", PoseParameterCategory.MOUTH, arity=1, default_value=1.0) \
|
||||
.add_parameter_group("mouth_iii", PoseParameterCategory.MOUTH, arity=1) \
|
||||
.add_parameter_group("mouth_uuu", PoseParameterCategory.MOUTH, arity=1) \
|
||||
.add_parameter_group("mouth_eee", PoseParameterCategory.MOUTH, arity=1) \
|
||||
.add_parameter_group("mouth_ooo", PoseParameterCategory.MOUTH, arity=1) \
|
||||
.add_parameter_group("mouth_delta", PoseParameterCategory.MOUTH, arity=1) \
|
||||
.add_parameter_group("mouth_lowered_corner", PoseParameterCategory.MOUTH, arity=2) \
|
||||
.add_parameter_group("mouth_raised_corner", PoseParameterCategory.MOUTH, arity=2) \
|
||||
.add_parameter_group("mouth_smirk", PoseParameterCategory.MOUTH, arity=1) \
|
||||
.add_parameter_group("iris_rotation_x", PoseParameterCategory.IRIS_ROTATION, arity=1, range=(-1.0, 1.0)) \
|
||||
.add_parameter_group("iris_rotation_y", PoseParameterCategory.IRIS_ROTATION, arity=1, range=(-1.0, 1.0)) \
|
||||
.add_parameter_group("head_x", PoseParameterCategory.FACE_ROTATION, arity=1, range=(-1.0, 1.0)) \
|
||||
.add_parameter_group("head_y", PoseParameterCategory.FACE_ROTATION, arity=1, range=(-1.0, 1.0)) \
|
||||
.add_parameter_group("neck_z", PoseParameterCategory.FACE_ROTATION, arity=1, range=(-1.0, 1.0)) \
|
||||
.add_parameter_group("body_y", PoseParameterCategory.BODY_ROTATION, arity=1, range=(-1.0, 1.0)) \
|
||||
.add_parameter_group("body_z", PoseParameterCategory.BODY_ROTATION, arity=1, range=(-1.0, 1.0)) \
|
||||
.add_parameter_group("breathing", PoseParameterCategory.BREATHING, arity=1, range=(0.0, 1.0)) \
|
||||
.build()
|
||||
|
||||
|
||||
def create_poser(
|
||||
device: torch.device,
|
||||
module_file_names: Optional[Dict[str, str]] = None,
|
||||
eyebrow_morphed_image_index: int = EyebrowMorphingCombiner00.EYEBROW_IMAGE_NO_COMBINE_ALPHA_INDEX,
|
||||
default_output_index: int = 0) -> GeneralPoser02:
|
||||
if module_file_names is None:
|
||||
module_file_names = {}
|
||||
if Network.eyebrow_decomposer.name not in module_file_names:
|
||||
dir = "live2d/tha3/models/standard_float"
|
||||
file_name = dir + "/eyebrow_decomposer.pt"
|
||||
module_file_names[Network.eyebrow_decomposer.name] = file_name
|
||||
if Network.eyebrow_morphing_combiner.name not in module_file_names:
|
||||
dir = "live2d/tha3/models/standard_float"
|
||||
file_name = dir + "/eyebrow_morphing_combiner.pt"
|
||||
module_file_names[Network.eyebrow_morphing_combiner.name] = file_name
|
||||
if Network.face_morpher.name not in module_file_names:
|
||||
dir = "live2d/tha3/models/standard_float"
|
||||
file_name = dir + "/face_morpher.pt"
|
||||
module_file_names[Network.face_morpher.name] = file_name
|
||||
if Network.two_algo_face_body_rotator.name not in module_file_names:
|
||||
dir = "live2d/tha3/models/standard_float"
|
||||
file_name = dir + "/two_algo_face_body_rotator.pt"
|
||||
module_file_names[Network.two_algo_face_body_rotator.name] = file_name
|
||||
if Network.editor.name not in module_file_names:
|
||||
dir = "live2d/tha3/models/standard_float"
|
||||
file_name = dir + "/editor.pt"
|
||||
module_file_names[Network.editor.name] = file_name
|
||||
|
||||
loaders = {
|
||||
Network.eyebrow_decomposer.name:
|
||||
lambda: load_eyebrow_decomposer(module_file_names[Network.eyebrow_decomposer.name]),
|
||||
Network.eyebrow_morphing_combiner.name:
|
||||
lambda: load_eyebrow_morphing_combiner(module_file_names[Network.eyebrow_morphing_combiner.name]),
|
||||
Network.face_morpher.name:
|
||||
lambda: load_face_morpher(module_file_names[Network.face_morpher.name]),
|
||||
Network.two_algo_face_body_rotator.name:
|
||||
lambda: load_two_algo_generator(module_file_names[Network.two_algo_face_body_rotator.name]),
|
||||
Network.editor.name:
|
||||
lambda: load_editor(module_file_names[Network.editor.name]),
|
||||
}
|
||||
return GeneralPoser02(
|
||||
image_size=512,
|
||||
module_loaders=loaders,
|
||||
pose_parameters=get_pose_parameters().get_pose_parameter_groups(),
|
||||
output_list_func=FiveStepPoserComputationProtocol(eyebrow_morphed_image_index).compute_func(),
|
||||
subrect=None,
|
||||
device=device,
|
||||
output_length=29,
|
||||
default_output_index=default_output_index)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
device = torch.device('cuda')
|
||||
poser = create_poser(device)
|
||||
|
||||
image = torch.zeros(1, 4, 512, 512, device=device)
|
||||
pose = torch.zeros(1, 45, device=device)
|
||||
|
||||
repeat = 100
|
||||
acc = 0.0
|
||||
for i in range(repeat + 2):
|
||||
start = torch.cuda.Event(enable_timing=True)
|
||||
end = torch.cuda.Event(enable_timing=True)
|
||||
|
||||
start.record()
|
||||
poser.pose(image, pose)
|
||||
end.record()
|
||||
torch.cuda.synchronize()
|
||||
if i >= 2:
|
||||
elapsed_time = start.elapsed_time(end)
|
||||
print("%d:" % i, elapsed_time)
|
||||
acc = acc + elapsed_time
|
||||
|
||||
print("average:", acc / repeat)
|
||||
363
live2d/tha3/poser/modes/standard_half.py
Normal file
363
live2d/tha3/poser/modes/standard_half.py
Normal file
@@ -0,0 +1,363 @@
|
||||
from enum import Enum
|
||||
from typing import List, Dict, Optional
|
||||
|
||||
import torch
|
||||
from torch import Tensor
|
||||
from torch.nn import Module
|
||||
from torch.nn.functional import interpolate
|
||||
|
||||
from tha3.nn.eyebrow_decomposer.eyebrow_decomposer_00 import EyebrowDecomposer00, \
|
||||
EyebrowDecomposer00Factory, EyebrowDecomposer00Args
|
||||
from tha3.nn.eyebrow_morphing_combiner.eyebrow_morphing_combiner_00 import \
|
||||
EyebrowMorphingCombiner00Factory, EyebrowMorphingCombiner00Args, EyebrowMorphingCombiner00
|
||||
from tha3.nn.face_morpher.face_morpher_08 import FaceMorpher08Args, FaceMorpher08Factory
|
||||
from tha3.poser.general_poser_02 import GeneralPoser02
|
||||
from tha3.poser.poser import PoseParameterCategory, PoseParameters
|
||||
from tha3.nn.editor.editor_07 import Editor07, Editor07Args
|
||||
from tha3.nn.two_algo_body_rotator.two_algo_face_body_rotator_05 import TwoAlgoFaceBodyRotator05, \
|
||||
TwoAlgoFaceBodyRotator05Args
|
||||
from tha3.util import torch_load
|
||||
from tha3.compute.cached_computation_func import TensorListCachedComputationFunc
|
||||
from tha3.compute.cached_computation_protocol import CachedComputationProtocol
|
||||
from tha3.nn.nonlinearity_factory import ReLUFactory, LeakyReLUFactory
|
||||
from tha3.nn.normalization import InstanceNorm2dFactory
|
||||
from tha3.nn.util import BlockArgs
|
||||
|
||||
|
||||
class Network(Enum):
|
||||
eyebrow_decomposer = 1
|
||||
eyebrow_morphing_combiner = 2
|
||||
face_morpher = 3
|
||||
two_algo_face_body_rotator = 4
|
||||
editor = 5
|
||||
|
||||
@property
|
||||
def outputs_key(self):
|
||||
return f"{self.name}_outputs"
|
||||
|
||||
|
||||
class Branch(Enum):
|
||||
face_morphed_half = 1
|
||||
face_morphed_full = 2
|
||||
all_outputs = 3
|
||||
|
||||
|
||||
NUM_EYEBROW_PARAMS = 12
|
||||
NUM_FACE_PARAMS = 27
|
||||
NUM_ROTATION_PARAMS = 6
|
||||
|
||||
|
||||
class FiveStepPoserComputationProtocol(CachedComputationProtocol):
|
||||
def __init__(self, eyebrow_morphed_image_index: int):
|
||||
super().__init__()
|
||||
self.eyebrow_morphed_image_index = eyebrow_morphed_image_index
|
||||
self.cached_batch_0 = None
|
||||
self.cached_eyebrow_decomposer_output = None
|
||||
|
||||
def compute_func(self) -> TensorListCachedComputationFunc:
|
||||
def func(modules: Dict[str, Module],
|
||||
batch: List[Tensor],
|
||||
outputs: Dict[str, List[Tensor]]):
|
||||
if self.cached_batch_0 is None:
|
||||
new_batch_0 = True
|
||||
elif batch[0].shape[0] != self.cached_batch_0.shape[0]:
|
||||
new_batch_0 = True
|
||||
else:
|
||||
new_batch_0 = torch.max((batch[0] - self.cached_batch_0).abs()).item() > 0
|
||||
if not new_batch_0:
|
||||
outputs[Network.eyebrow_decomposer.outputs_key] = self.cached_eyebrow_decomposer_output
|
||||
output = self.get_output(Branch.all_outputs.name, modules, batch, outputs)
|
||||
if new_batch_0:
|
||||
self.cached_batch_0 = batch[0]
|
||||
self.cached_eyebrow_decomposer_output = outputs[Network.eyebrow_decomposer.outputs_key]
|
||||
return output
|
||||
|
||||
return func
|
||||
|
||||
def compute_output(self, key: str, modules: Dict[str, Module], batch: List[Tensor],
|
||||
outputs: Dict[str, List[Tensor]]) -> List[Tensor]:
|
||||
if key == Network.eyebrow_decomposer.outputs_key:
|
||||
input_image = batch[0][:, :, 64:192, 64 + 128:192 + 128]
|
||||
return modules[Network.eyebrow_decomposer.name].forward(input_image)
|
||||
elif key == Network.eyebrow_morphing_combiner.outputs_key:
|
||||
eyebrow_decomposer_output = self.get_output(Network.eyebrow_decomposer.outputs_key, modules, batch, outputs)
|
||||
background_layer = eyebrow_decomposer_output[EyebrowDecomposer00.BACKGROUND_LAYER_INDEX]
|
||||
eyebrow_layer = eyebrow_decomposer_output[EyebrowDecomposer00.EYEBROW_LAYER_INDEX]
|
||||
eyebrow_pose = batch[1][:, :NUM_EYEBROW_PARAMS]
|
||||
return modules[Network.eyebrow_morphing_combiner.name].forward(
|
||||
background_layer,
|
||||
eyebrow_layer,
|
||||
eyebrow_pose)
|
||||
elif key == Network.face_morpher.outputs_key:
|
||||
eyebrow_morphing_combiner_output = self.get_output(
|
||||
Network.eyebrow_morphing_combiner.outputs_key, modules, batch, outputs)
|
||||
eyebrow_morphed_image = eyebrow_morphing_combiner_output[self.eyebrow_morphed_image_index]
|
||||
input_image = batch[0][:, :, 32:32 + 192, (32 + 128):(32 + 192 + 128)].clone()
|
||||
input_image[:, :, 32:32 + 128, 32:32 + 128] = eyebrow_morphed_image
|
||||
face_pose = batch[1][:, NUM_EYEBROW_PARAMS:NUM_EYEBROW_PARAMS + NUM_FACE_PARAMS]
|
||||
return modules[Network.face_morpher.name].forward(input_image, face_pose)
|
||||
elif key == Branch.face_morphed_full.name:
|
||||
face_morpher_output = self.get_output(Network.face_morpher.outputs_key, modules, batch, outputs)
|
||||
face_morphed_image = face_morpher_output[0]
|
||||
input_image = batch[0].clone()
|
||||
input_image[:, :, 32:32 + 192, 32 + 128:32 + 192 + 128] = face_morphed_image
|
||||
return [input_image]
|
||||
elif key == Branch.face_morphed_half.name:
|
||||
face_morphed_full = self.get_output(Branch.face_morphed_full.name, modules, batch, outputs)[0]
|
||||
return [
|
||||
interpolate(face_morphed_full, size=(256, 256), mode='bilinear', align_corners=False)
|
||||
]
|
||||
elif key == Network.two_algo_face_body_rotator.outputs_key:
|
||||
face_morphed_half = self.get_output(Branch.face_morphed_half.name, modules, batch, outputs)[0]
|
||||
rotation_pose = batch[1][:, NUM_EYEBROW_PARAMS + NUM_FACE_PARAMS:]
|
||||
return modules[Network.two_algo_face_body_rotator.name].forward(face_morphed_half, rotation_pose)
|
||||
elif key == Network.editor.outputs_key:
|
||||
input_original_image = self.get_output(Branch.face_morphed_full.name, modules, batch, outputs)[0]
|
||||
rotator_outputs = self.get_output(
|
||||
Network.two_algo_face_body_rotator.outputs_key, modules, batch, outputs)
|
||||
half_warped_image = rotator_outputs[TwoAlgoFaceBodyRotator05.WARPED_IMAGE_INDEX]
|
||||
full_warped_image = interpolate(
|
||||
half_warped_image, size=(512, 512), mode='bilinear', align_corners=False)
|
||||
half_grid_change = rotator_outputs[TwoAlgoFaceBodyRotator05.GRID_CHANGE_INDEX]
|
||||
full_grid_change = interpolate(
|
||||
half_grid_change, size=(512, 512), mode='bilinear', align_corners=False)
|
||||
rotation_pose = batch[1][:, NUM_EYEBROW_PARAMS + NUM_FACE_PARAMS:]
|
||||
return modules[Network.editor.name].forward(
|
||||
input_original_image, full_warped_image, full_grid_change, rotation_pose)
|
||||
elif key == Branch.all_outputs.name:
|
||||
editor_output = self.get_output(Network.editor.outputs_key, modules, batch, outputs)
|
||||
rotater_output = self.get_output(Network.two_algo_face_body_rotator.outputs_key, modules, batch, outputs)
|
||||
face_morpher_output = self.get_output(Network.face_morpher.outputs_key, modules, batch, outputs)
|
||||
eyebrow_morphing_combiner_output = self.get_output(
|
||||
Network.eyebrow_morphing_combiner.outputs_key, modules, batch, outputs)
|
||||
eyebrow_decomposer_output = self.get_output(
|
||||
Network.eyebrow_decomposer.outputs_key, modules, batch, outputs)
|
||||
output = editor_output \
|
||||
+ rotater_output \
|
||||
+ face_morpher_output \
|
||||
+ eyebrow_morphing_combiner_output \
|
||||
+ eyebrow_decomposer_output
|
||||
return output
|
||||
else:
|
||||
raise RuntimeError("Unsupported key: " + key)
|
||||
|
||||
|
||||
def load_eyebrow_decomposer(file_name: str):
|
||||
factory = EyebrowDecomposer00Factory(
|
||||
EyebrowDecomposer00Args(
|
||||
image_size=128,
|
||||
image_channels=4,
|
||||
start_channels=64,
|
||||
bottleneck_image_size=16,
|
||||
num_bottleneck_blocks=6,
|
||||
max_channels=512,
|
||||
block_args=BlockArgs(
|
||||
initialization_method='he',
|
||||
use_spectral_norm=False,
|
||||
normalization_layer_factory=InstanceNorm2dFactory(),
|
||||
nonlinearity_factory=ReLUFactory(inplace=True))))
|
||||
print("Loading the eyebrow decomposer ... ", end="")
|
||||
module = factory.create().half()
|
||||
module.load_state_dict(torch_load(file_name))
|
||||
print("DONE!!!")
|
||||
return module
|
||||
|
||||
|
||||
def load_eyebrow_morphing_combiner(file_name: str):
|
||||
factory = EyebrowMorphingCombiner00Factory(
|
||||
EyebrowMorphingCombiner00Args(
|
||||
image_size=128,
|
||||
image_channels=4,
|
||||
start_channels=64,
|
||||
num_pose_params=12,
|
||||
bottleneck_image_size=16,
|
||||
num_bottleneck_blocks=6,
|
||||
max_channels=512,
|
||||
block_args=BlockArgs(
|
||||
initialization_method='he',
|
||||
use_spectral_norm=False,
|
||||
normalization_layer_factory=InstanceNorm2dFactory(),
|
||||
nonlinearity_factory=ReLUFactory(inplace=True))))
|
||||
print("Loading the eyebrow morphing conbiner ... ", end="")
|
||||
module = factory.create().half()
|
||||
module.load_state_dict(torch_load(file_name))
|
||||
print("DONE!!!")
|
||||
return module
|
||||
|
||||
|
||||
def load_face_morpher(file_name: str):
|
||||
factory = FaceMorpher08Factory(
|
||||
FaceMorpher08Args(
|
||||
image_size=192,
|
||||
image_channels=4,
|
||||
num_expression_params=27,
|
||||
start_channels=64,
|
||||
bottleneck_image_size=24,
|
||||
num_bottleneck_blocks=6,
|
||||
max_channels=512,
|
||||
block_args=BlockArgs(
|
||||
initialization_method='he',
|
||||
use_spectral_norm=False,
|
||||
normalization_layer_factory=InstanceNorm2dFactory(),
|
||||
nonlinearity_factory=ReLUFactory(inplace=False))))
|
||||
print("Loading the face morpher ... ", end="")
|
||||
module = factory.create().half()
|
||||
module.load_state_dict(torch_load(file_name))
|
||||
print("DONE!!!")
|
||||
return module
|
||||
|
||||
|
||||
def load_two_algo_generator(file_name) -> Module:
|
||||
module = TwoAlgoFaceBodyRotator05(
|
||||
TwoAlgoFaceBodyRotator05Args(
|
||||
image_size=256,
|
||||
image_channels=4,
|
||||
start_channels=64,
|
||||
num_pose_params=6,
|
||||
bottleneck_image_size=32,
|
||||
num_bottleneck_blocks=6,
|
||||
max_channels=512,
|
||||
upsample_mode='nearest',
|
||||
block_args=BlockArgs(
|
||||
initialization_method='he',
|
||||
use_spectral_norm=False,
|
||||
normalization_layer_factory=InstanceNorm2dFactory(),
|
||||
nonlinearity_factory=LeakyReLUFactory(inplace=False, negative_slope=0.1)))).half()
|
||||
print("Loading the face-body rotator ... ", end="")
|
||||
module.load_state_dict(torch_load(file_name))
|
||||
print("DONE!!!")
|
||||
return module
|
||||
|
||||
|
||||
def load_editor(file_name) -> Module:
|
||||
module = Editor07(
|
||||
Editor07Args(
|
||||
image_size=512,
|
||||
image_channels=4,
|
||||
num_pose_params=6,
|
||||
start_channels=32,
|
||||
bottleneck_image_size=64,
|
||||
num_bottleneck_blocks=6,
|
||||
max_channels=512,
|
||||
upsampling_mode='nearest',
|
||||
block_args=BlockArgs(
|
||||
initialization_method='he',
|
||||
use_spectral_norm=False,
|
||||
normalization_layer_factory=InstanceNorm2dFactory(),
|
||||
nonlinearity_factory=LeakyReLUFactory(inplace=False, negative_slope=0.1)))).half()
|
||||
print("Loading the combiner ... ", end="")
|
||||
module.load_state_dict(torch_load(file_name))
|
||||
print("DONE!!!")
|
||||
return module
|
||||
|
||||
|
||||
def get_pose_parameters():
|
||||
return PoseParameters.Builder() \
|
||||
.add_parameter_group("eyebrow_troubled", PoseParameterCategory.EYEBROW, arity=2) \
|
||||
.add_parameter_group("eyebrow_angry", PoseParameterCategory.EYEBROW, arity=2) \
|
||||
.add_parameter_group("eyebrow_lowered", PoseParameterCategory.EYEBROW, arity=2) \
|
||||
.add_parameter_group("eyebrow_raised", PoseParameterCategory.EYEBROW, arity=2) \
|
||||
.add_parameter_group("eyebrow_happy", PoseParameterCategory.EYEBROW, arity=2) \
|
||||
.add_parameter_group("eyebrow_serious", PoseParameterCategory.EYEBROW, arity=2) \
|
||||
.add_parameter_group("eye_wink", PoseParameterCategory.EYE, arity=2) \
|
||||
.add_parameter_group("eye_happy_wink", PoseParameterCategory.EYE, arity=2) \
|
||||
.add_parameter_group("eye_surprised", PoseParameterCategory.EYE, arity=2) \
|
||||
.add_parameter_group("eye_relaxed", PoseParameterCategory.EYE, arity=2) \
|
||||
.add_parameter_group("eye_unimpressed", PoseParameterCategory.EYE, arity=2) \
|
||||
.add_parameter_group("eye_raised_lower_eyelid", PoseParameterCategory.EYE, arity=2) \
|
||||
.add_parameter_group("iris_small", PoseParameterCategory.IRIS_MORPH, arity=2) \
|
||||
.add_parameter_group("mouth_aaa", PoseParameterCategory.MOUTH, arity=1, default_value=1.0) \
|
||||
.add_parameter_group("mouth_iii", PoseParameterCategory.MOUTH, arity=1) \
|
||||
.add_parameter_group("mouth_uuu", PoseParameterCategory.MOUTH, arity=1) \
|
||||
.add_parameter_group("mouth_eee", PoseParameterCategory.MOUTH, arity=1) \
|
||||
.add_parameter_group("mouth_ooo", PoseParameterCategory.MOUTH, arity=1) \
|
||||
.add_parameter_group("mouth_delta", PoseParameterCategory.MOUTH, arity=1) \
|
||||
.add_parameter_group("mouth_lowered_corner", PoseParameterCategory.MOUTH, arity=2) \
|
||||
.add_parameter_group("mouth_raised_corner", PoseParameterCategory.MOUTH, arity=2) \
|
||||
.add_parameter_group("mouth_smirk", PoseParameterCategory.MOUTH, arity=1) \
|
||||
.add_parameter_group("iris_rotation_x", PoseParameterCategory.IRIS_ROTATION, arity=1, range=(-1.0, 1.0)) \
|
||||
.add_parameter_group("iris_rotation_y", PoseParameterCategory.IRIS_ROTATION, arity=1, range=(-1.0, 1.0)) \
|
||||
.add_parameter_group("head_x", PoseParameterCategory.FACE_ROTATION, arity=1, range=(-1.0, 1.0)) \
|
||||
.add_parameter_group("head_y", PoseParameterCategory.FACE_ROTATION, arity=1, range=(-1.0, 1.0)) \
|
||||
.add_parameter_group("neck_z", PoseParameterCategory.FACE_ROTATION, arity=1, range=(-1.0, 1.0)) \
|
||||
.add_parameter_group("body_y", PoseParameterCategory.BODY_ROTATION, arity=1, range=(-1.0, 1.0)) \
|
||||
.add_parameter_group("body_z", PoseParameterCategory.BODY_ROTATION, arity=1, range=(-1.0, 1.0)) \
|
||||
.add_parameter_group("breathing", PoseParameterCategory.BREATHING, arity=1, range=(0.0, 1.0)) \
|
||||
.build()
|
||||
|
||||
|
||||
def create_poser(
|
||||
device: torch.device,
|
||||
module_file_names: Optional[Dict[str, str]] = None,
|
||||
eyebrow_morphed_image_index: int = EyebrowMorphingCombiner00.EYEBROW_IMAGE_NO_COMBINE_ALPHA_INDEX,
|
||||
default_output_index: int = 0) -> GeneralPoser02:
|
||||
if module_file_names is None:
|
||||
module_file_names = {}
|
||||
if Network.eyebrow_decomposer.name not in module_file_names:
|
||||
dir = "live2d/tha3/models/standard_half"
|
||||
file_name = dir + "/eyebrow_decomposer.pt"
|
||||
module_file_names[Network.eyebrow_decomposer.name] = file_name
|
||||
if Network.eyebrow_morphing_combiner.name not in module_file_names:
|
||||
dir = "live2d/tha3/models/standard_half"
|
||||
file_name = dir + "/eyebrow_morphing_combiner.pt"
|
||||
module_file_names[Network.eyebrow_morphing_combiner.name] = file_name
|
||||
if Network.face_morpher.name not in module_file_names:
|
||||
dir = "live2d/tha3/models/standard_half"
|
||||
file_name = dir + "/face_morpher.pt"
|
||||
module_file_names[Network.face_morpher.name] = file_name
|
||||
if Network.two_algo_face_body_rotator.name not in module_file_names:
|
||||
dir = "live2d/tha3/models/standard_half"
|
||||
file_name = dir + "/two_algo_face_body_rotator.pt"
|
||||
module_file_names[Network.two_algo_face_body_rotator.name] = file_name
|
||||
if Network.editor.name not in module_file_names:
|
||||
dir = "live2d/tha3/models/standard_half"
|
||||
file_name = dir + "/editor.pt"
|
||||
module_file_names[Network.editor.name] = file_name
|
||||
|
||||
loaders = {
|
||||
Network.eyebrow_decomposer.name:
|
||||
lambda: load_eyebrow_decomposer(module_file_names[Network.eyebrow_decomposer.name]),
|
||||
Network.eyebrow_morphing_combiner.name:
|
||||
lambda: load_eyebrow_morphing_combiner(module_file_names[Network.eyebrow_morphing_combiner.name]),
|
||||
Network.face_morpher.name:
|
||||
lambda: load_face_morpher(module_file_names[Network.face_morpher.name]),
|
||||
Network.two_algo_face_body_rotator.name:
|
||||
lambda: load_two_algo_generator(module_file_names[Network.two_algo_face_body_rotator.name]),
|
||||
Network.editor.name:
|
||||
lambda: load_editor(module_file_names[Network.editor.name]),
|
||||
}
|
||||
return GeneralPoser02(
|
||||
image_size=512,
|
||||
module_loaders=loaders,
|
||||
pose_parameters=get_pose_parameters().get_pose_parameter_groups(),
|
||||
output_list_func=FiveStepPoserComputationProtocol(eyebrow_morphed_image_index).compute_func(),
|
||||
subrect=None,
|
||||
device=device,
|
||||
output_length=29,
|
||||
dtype=torch.half,
|
||||
default_output_index=default_output_index)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
device = torch.device('cuda')
|
||||
poser = create_poser(device)
|
||||
|
||||
image = torch.zeros(1, 4, 512, 512, device=device, dtype=torch.half)
|
||||
pose = torch.zeros(1, 45, device=device, dtype=torch.half)
|
||||
|
||||
repeat = 100
|
||||
acc = 0.0
|
||||
for i in range(repeat + 2):
|
||||
start = torch.cuda.Event(enable_timing=True)
|
||||
end = torch.cuda.Event(enable_timing=True)
|
||||
|
||||
start.record()
|
||||
poser.pose(image, pose)
|
||||
end.record()
|
||||
torch.cuda.synchronize()
|
||||
if i >= 2:
|
||||
elapsed_time = start.elapsed_time(end)
|
||||
print("%d:" % i, elapsed_time)
|
||||
acc = acc + elapsed_time
|
||||
|
||||
print("average:", acc / repeat)
|
||||
158
live2d/tha3/poser/poser.py
Normal file
158
live2d/tha3/poser/poser.py
Normal file
@@ -0,0 +1,158 @@
|
||||
from abc import ABC, abstractmethod
|
||||
from enum import Enum
|
||||
from typing import Tuple, List, Optional
|
||||
|
||||
import torch
|
||||
from torch import Tensor
|
||||
|
||||
|
||||
class PoseParameterCategory(Enum):
|
||||
EYEBROW = 1
|
||||
EYE = 2
|
||||
IRIS_MORPH = 3
|
||||
IRIS_ROTATION = 4
|
||||
MOUTH = 5
|
||||
FACE_ROTATION = 6
|
||||
BODY_ROTATION = 7
|
||||
BREATHING = 8
|
||||
|
||||
|
||||
class PoseParameterGroup:
|
||||
def __init__(self,
|
||||
group_name: str,
|
||||
parameter_index: int,
|
||||
category: PoseParameterCategory,
|
||||
arity: int = 1,
|
||||
discrete: bool = False,
|
||||
default_value: float = 0.0,
|
||||
range: Optional[Tuple[float, float]] = None):
|
||||
assert arity == 1 or arity == 2
|
||||
if range is None:
|
||||
range = (0.0, 1.0)
|
||||
if arity == 1:
|
||||
parameter_names = [group_name]
|
||||
else:
|
||||
parameter_names = [group_name + "_left", group_name + "_right"]
|
||||
assert len(parameter_names) == arity
|
||||
|
||||
self.parameter_names = parameter_names
|
||||
self.range = range
|
||||
self.default_value = default_value
|
||||
self.discrete = discrete
|
||||
self.arity = arity
|
||||
self.category = category
|
||||
self.parameter_index = parameter_index
|
||||
self.group_name = group_name
|
||||
|
||||
def get_arity(self) -> int:
|
||||
return self.arity
|
||||
|
||||
def get_group_name(self) -> str:
|
||||
return self.group_name
|
||||
|
||||
def get_parameter_names(self) -> List[str]:
|
||||
return self.parameter_names
|
||||
|
||||
def is_discrete(self) -> bool:
|
||||
return self.discrete
|
||||
|
||||
def get_range(self) -> Tuple[float, float]:
|
||||
return self.range
|
||||
|
||||
def get_default_value(self):
|
||||
return self.default_value
|
||||
|
||||
def get_parameter_index(self):
|
||||
return self.parameter_index
|
||||
|
||||
def get_category(self) -> PoseParameterCategory:
|
||||
return self.category
|
||||
|
||||
|
||||
class PoseParameters:
|
||||
def __init__(self, pose_parameter_groups: List[PoseParameterGroup]):
|
||||
self.pose_parameter_groups = pose_parameter_groups
|
||||
|
||||
def get_parameter_index(self, name: str) -> int:
|
||||
index = 0
|
||||
for parameter_group in self.pose_parameter_groups:
|
||||
for param_name in parameter_group.parameter_names:
|
||||
if name == param_name:
|
||||
return index
|
||||
index += 1
|
||||
raise RuntimeError("Cannot find parameter with name %s" % name)
|
||||
|
||||
def get_parameter_name(self, index: int) -> str:
|
||||
assert index >= 0 and index < self.get_parameter_count()
|
||||
|
||||
for group in self.pose_parameter_groups:
|
||||
if index < group.get_arity():
|
||||
return group.get_parameter_names()[index]
|
||||
index -= group.arity
|
||||
|
||||
raise RuntimeError("Something is wrong here!!!")
|
||||
|
||||
def get_pose_parameter_groups(self):
|
||||
return self.pose_parameter_groups
|
||||
|
||||
def get_parameter_count(self):
|
||||
count = 0
|
||||
for group in self.pose_parameter_groups:
|
||||
count += group.arity
|
||||
return count
|
||||
|
||||
class Builder:
|
||||
def __init__(self):
|
||||
self.index = 0
|
||||
self.pose_parameter_groups = []
|
||||
|
||||
def add_parameter_group(self,
|
||||
group_name: str,
|
||||
category: PoseParameterCategory,
|
||||
arity: int = 1,
|
||||
discrete: bool = False,
|
||||
default_value: float = 0.0,
|
||||
range: Optional[Tuple[float, float]] = None):
|
||||
self.pose_parameter_groups.append(
|
||||
PoseParameterGroup(
|
||||
group_name,
|
||||
self.index,
|
||||
category,
|
||||
arity,
|
||||
discrete,
|
||||
default_value,
|
||||
range))
|
||||
self.index += arity
|
||||
return self
|
||||
|
||||
def build(self) -> 'PoseParameters':
|
||||
return PoseParameters(self.pose_parameter_groups)
|
||||
|
||||
|
||||
class Poser(ABC):
|
||||
@abstractmethod
|
||||
def get_image_size(self) -> int:
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def get_output_length(self) -> int:
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def get_pose_parameter_groups(self) -> List[PoseParameterGroup]:
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def get_num_parameters(self) -> int:
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def pose(self, image: Tensor, pose: Tensor, output_index: int = 0) -> Tensor:
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def get_posing_outputs(self, image: Tensor, pose: Tensor) -> List[Tensor]:
|
||||
pass
|
||||
|
||||
def get_dtype(self) -> torch.dtype:
|
||||
return torch.float
|
||||
281
live2d/tha3/util.py
Normal file
281
live2d/tha3/util.py
Normal file
@@ -0,0 +1,281 @@
|
||||
import math
|
||||
import os
|
||||
from typing import List
|
||||
|
||||
import PIL.Image
|
||||
import numpy
|
||||
import torch
|
||||
from matplotlib import cm
|
||||
from torch import Tensor
|
||||
|
||||
|
||||
def is_power2(x):
|
||||
return x != 0 and ((x & (x - 1)) == 0)
|
||||
|
||||
|
||||
def numpy_srgb_to_linear(x):
|
||||
x = numpy.clip(x, 0.0, 1.0)
|
||||
return numpy.where(x <= 0.04045, x / 12.92, ((x + 0.055) / 1.055) ** 2.4)
|
||||
|
||||
|
||||
def numpy_linear_to_srgb(x):
|
||||
x = numpy.clip(x, 0.0, 1.0)
|
||||
return numpy.where(x <= 0.003130804953560372, x * 12.92, 1.055 * (x ** (1.0 / 2.4)) - 0.055)
|
||||
|
||||
|
||||
def torch_srgb_to_linear(x: torch.Tensor):
|
||||
x = torch.clip(x, 0.0, 1.0)
|
||||
return torch.where(torch.le(x, 0.04045), x / 12.92, ((x + 0.055) / 1.055) ** 2.4)
|
||||
|
||||
|
||||
def torch_linear_to_srgb(x):
|
||||
x = torch.clip(x, 0.0, 1.0)
|
||||
return torch.where(torch.le(x, 0.003130804953560372), x * 12.92, 1.055 * (x ** (1.0 / 2.4)) - 0.055)
|
||||
|
||||
|
||||
def image_linear_to_srgb(image):
|
||||
assert image.shape[2] == 3 or image.shape[2] == 4
|
||||
if image.shape[2] == 3:
|
||||
return numpy_linear_to_srgb(image)
|
||||
else:
|
||||
height, width, _ = image.shape
|
||||
rgb_image = numpy_linear_to_srgb(image[:, :, 0:3])
|
||||
a_image = image[:, :, 3:4]
|
||||
return numpy.concatenate((rgb_image, a_image), axis=2)
|
||||
|
||||
|
||||
def image_srgb_to_linear(image):
|
||||
assert image.shape[2] == 3 or image.shape[2] == 4
|
||||
if image.shape[2] == 3:
|
||||
return numpy_srgb_to_linear(image)
|
||||
else:
|
||||
height, width, _ = image.shape
|
||||
rgb_image = numpy_srgb_to_linear(image[:, :, 0:3])
|
||||
a_image = image[:, :, 3:4]
|
||||
return numpy.concatenate((rgb_image, a_image), axis=2)
|
||||
|
||||
|
||||
def save_rng_state(file_name):
|
||||
rng_state = torch.get_rng_state()
|
||||
torch_save(rng_state, file_name)
|
||||
|
||||
|
||||
def load_rng_state(file_name):
|
||||
rng_state = torch_load(file_name)
|
||||
torch.set_rng_state(rng_state)
|
||||
|
||||
|
||||
def grid_change_to_numpy_image(torch_image, num_channels=3):
|
||||
height = torch_image.shape[1]
|
||||
width = torch_image.shape[2]
|
||||
size_image = (torch_image[0, :, :] ** 2 + torch_image[1, :, :] ** 2).sqrt().view(height, width, 1).numpy()
|
||||
hsv = cm.get_cmap('hsv')
|
||||
angle_image = hsv(((torch.atan2(
|
||||
torch_image[0, :, :].view(height * width),
|
||||
torch_image[1, :, :].view(height * width)).view(height, width) + math.pi) / (2 * math.pi)).numpy()) * 3
|
||||
numpy_image = size_image * angle_image[:, :, 0:3]
|
||||
rgb_image = numpy_linear_to_srgb(numpy_image)
|
||||
if num_channels == 3:
|
||||
return rgb_image
|
||||
elif num_channels == 4:
|
||||
return numpy.concatenate([rgb_image, numpy.ones_like(size_image)], axis=2)
|
||||
else:
|
||||
raise RuntimeError("Unsupported num_channels: " + str(num_channels))
|
||||
|
||||
|
||||
def rgb_to_numpy_image(torch_image: Tensor, min_pixel_value=-1.0, max_pixel_value=1.0):
|
||||
assert torch_image.dim() == 3
|
||||
assert torch_image.shape[0] == 3
|
||||
height = torch_image.shape[1]
|
||||
width = torch_image.shape[2]
|
||||
|
||||
reshaped_image = torch_image.numpy().reshape(3, height * width).transpose().reshape(height, width, 3)
|
||||
numpy_image = (reshaped_image - min_pixel_value) / (max_pixel_value - min_pixel_value)
|
||||
return numpy_linear_to_srgb(numpy_image)
|
||||
|
||||
|
||||
def rgba_to_numpy_image_greenscreen(torch_image: Tensor,
|
||||
min_pixel_value=-1.0,
|
||||
max_pixel_value=1.0,
|
||||
include_alpha=False):
|
||||
height = torch_image.shape[1]
|
||||
width = torch_image.shape[2]
|
||||
|
||||
numpy_image = (torch_image.numpy().reshape(4, height * width).transpose().reshape(height, width,
|
||||
4) - min_pixel_value) \
|
||||
/ (max_pixel_value - min_pixel_value)
|
||||
rgb_image = numpy_linear_to_srgb(numpy_image[:, :, 0:3])
|
||||
a_image = numpy_image[:, :, 3]
|
||||
rgb_image[:, :, 0:3] = rgb_image[:, :, 0:3] * a_image.reshape(a_image.shape[0], a_image.shape[1], 1)
|
||||
rgb_image[:, :, 1] = rgb_image[:, :, 1] + (1 - a_image)
|
||||
|
||||
if not include_alpha:
|
||||
return rgb_image
|
||||
else:
|
||||
return numpy.concatenate((rgb_image, numpy.ones_like(numpy_image[:, :, 3:4])), axis=2)
|
||||
|
||||
|
||||
def rgba_to_numpy_image(torch_image: Tensor, min_pixel_value=-1.0, max_pixel_value=1.0):
|
||||
assert torch_image.dim() == 3
|
||||
assert torch_image.shape[0] == 4
|
||||
height = torch_image.shape[1]
|
||||
width = torch_image.shape[2]
|
||||
|
||||
reshaped_image = torch_image.numpy().reshape(4, height * width).transpose().reshape(height, width, 4)
|
||||
numpy_image = (reshaped_image - min_pixel_value) / (max_pixel_value - min_pixel_value)
|
||||
rgb_image = numpy_linear_to_srgb(numpy_image[:, :, 0:3])
|
||||
a_image = numpy.clip(numpy_image[:, :, 3], 0.0, 1.0)
|
||||
rgba_image = numpy.concatenate((rgb_image, a_image.reshape(height, width, 1)), axis=2)
|
||||
return rgba_image
|
||||
|
||||
|
||||
def extract_numpy_image_from_filelike_with_pytorch_layout(file, has_alpha=True, scale=2.0, offset=-1.0):
|
||||
try:
|
||||
pil_image = PIL.Image.open(file)
|
||||
except Exception as e:
|
||||
raise RuntimeError(file)
|
||||
return extract_numpy_image_from_PIL_image_with_pytorch_layout(pil_image, has_alpha, scale, offset)
|
||||
|
||||
|
||||
def extract_numpy_image_from_PIL_image_with_pytorch_layout(pil_image, has_alpha=True, scale=2.0, offset=-1.0):
|
||||
if has_alpha:
|
||||
num_channel = 4
|
||||
else:
|
||||
num_channel = 3
|
||||
image_size = pil_image.width
|
||||
|
||||
# search for transparent pixels(alpha==0) and change them to [0 0 0 0] to avoid the color influence to the model
|
||||
for i, px in enumerate(pil_image.getdata()):
|
||||
if px[3] <= 0:
|
||||
y = i // image_size
|
||||
x = i % image_size
|
||||
pil_image.putpixel((x, y), (0, 0, 0, 0))
|
||||
|
||||
raw_image = numpy.asarray(pil_image)
|
||||
image = (raw_image / 255.0).reshape(image_size, image_size, num_channel)
|
||||
image[:, :, 0:3] = numpy_srgb_to_linear(image[:, :, 0:3])
|
||||
image = image \
|
||||
.reshape(image_size * image_size, num_channel) \
|
||||
.transpose() \
|
||||
.reshape(num_channel, image_size, image_size) * scale + offset
|
||||
return image
|
||||
|
||||
|
||||
def extract_pytorch_image_from_filelike(file, has_alpha=True, scale=2.0, offset=-1.0):
|
||||
try:
|
||||
pil_image = PIL.Image.open(file)
|
||||
except Exception as e:
|
||||
raise RuntimeError(file)
|
||||
image = extract_numpy_image_from_PIL_image_with_pytorch_layout(pil_image, has_alpha, scale, offset)
|
||||
return torch.from_numpy(image).float()
|
||||
|
||||
|
||||
def extract_pytorch_image_from_PIL_image(pil_image, has_alpha=True, scale=2.0, offset=-1.0):
|
||||
image = extract_numpy_image_from_PIL_image_with_pytorch_layout(pil_image, has_alpha, scale, offset)
|
||||
return torch.from_numpy(image).float()
|
||||
|
||||
|
||||
def extract_numpy_image_from_filelike(file):
|
||||
pil_image = PIL.Image.open(file)
|
||||
image_width = pil_image.width
|
||||
image_height = pil_image.height
|
||||
if pil_image.mode == "RGBA":
|
||||
image = (numpy.asarray(pil_image) / 255.0).reshape(image_height, image_width, 4)
|
||||
else:
|
||||
image = (numpy.asarray(pil_image) / 255.0).reshape(image_height, image_width, 3)
|
||||
image[:, :, 0:3] = numpy_srgb_to_linear(image[:, :, 0:3])
|
||||
return image
|
||||
|
||||
|
||||
def convert_avs_to_avi(avs_file, avi_file):
|
||||
os.makedirs(os.path.dirname(avi_file), exist_ok=True)
|
||||
|
||||
file = open("temp.vdub", "w")
|
||||
file.write("VirtualDub.Open(\"%s\");" % avs_file)
|
||||
file.write("VirtualDub.video.SetCompression(\"cvid\", 0, 10000, 0);")
|
||||
file.write("VirtualDub.SaveAVI(\"%s\");" % avi_file)
|
||||
file.write("VirtualDub.Close();")
|
||||
file.close()
|
||||
|
||||
os.system("C:\\ProgramData\\chocolatey\\lib\\virtualdub\\tools\\vdub64.exe /i temp.vdub")
|
||||
|
||||
os.remove("temp.vdub")
|
||||
|
||||
|
||||
def convert_avi_to_mp4(avi_file, mp4_file):
|
||||
os.makedirs(os.path.dirname(mp4_file), exist_ok=True)
|
||||
os.system("ffmpeg -y -i %s -c:v libx264 -preset slow -crf 22 -c:a libfaac -b:a 128k %s" % \
|
||||
(avi_file, mp4_file))
|
||||
|
||||
|
||||
def convert_avi_to_webm(avi_file, webm_file):
|
||||
os.makedirs(os.path.dirname(webm_file), exist_ok=True)
|
||||
os.system("ffmpeg -y -i %s -vcodec libvpx -qmin 0 -qmax 50 -crf 10 -b:v 1M -acodec libvorbis %s" % \
|
||||
(avi_file, webm_file))
|
||||
|
||||
|
||||
def convert_mp4_to_webm(mp4_file, webm_file):
|
||||
os.makedirs(os.path.dirname(webm_file), exist_ok=True)
|
||||
os.system("ffmpeg -y -i %s -vcodec libvpx -qmin 0 -qmax 50 -crf 10 -b:v 1M -acodec libvorbis %s" % \
|
||||
(mp4_file, webm_file))
|
||||
|
||||
|
||||
def create_parent_dir(file_name):
|
||||
os.makedirs(os.path.dirname(file_name), exist_ok=True)
|
||||
|
||||
|
||||
def run_command(command_parts: List[str]):
|
||||
command = " ".join(command_parts)
|
||||
os.system(command)
|
||||
|
||||
|
||||
def save_pytorch_image(image, file_name):
|
||||
if image.shape[0] == 1:
|
||||
image = image.squeeze()
|
||||
if image.shape[0] == 4:
|
||||
numpy_image = rgba_to_numpy_image(image.detach().cpu())
|
||||
pil_image = PIL.Image.fromarray(numpy.uint8(numpy.rint(numpy_image * 255.0)), mode='RGBA')
|
||||
else:
|
||||
numpy_image = rgb_to_numpy_image(image.detach().cpu())
|
||||
pil_image = PIL.Image.fromarray(numpy.uint8(numpy.rint(numpy_image * 255.0)), mode='RGB')
|
||||
os.makedirs(os.path.dirname(file_name), exist_ok=True)
|
||||
pil_image.save(file_name)
|
||||
|
||||
|
||||
def torch_load(file_name):
|
||||
with open(file_name, 'rb') as f:
|
||||
return torch.load(f)
|
||||
|
||||
|
||||
def torch_save(content, file_name):
|
||||
os.makedirs(os.path.dirname(file_name), exist_ok=True)
|
||||
with open(file_name, 'wb') as f:
|
||||
torch.save(content, f)
|
||||
|
||||
|
||||
def resize_PIL_image(pil_image, size=(256, 256)):
|
||||
w, h = pil_image.size
|
||||
d = min(w, h)
|
||||
r = ((w - d) // 2, (h - d) // 2, (w + d) // 2, (h + d) // 2)
|
||||
return pil_image.resize(size, resample=PIL.Image.LANCZOS, box=r)
|
||||
|
||||
|
||||
def extract_PIL_image_from_filelike(file):
|
||||
return PIL.Image.open(file)
|
||||
|
||||
|
||||
def convert_output_image_from_torch_to_numpy(output_image):
|
||||
if output_image.shape[2] == 2:
|
||||
h, w, c = output_image.shape
|
||||
output_image = torch.transpose(output_image.reshape(h * w, c), 0, 1).reshape(c, h, w)
|
||||
if output_image.shape[0] == 4:
|
||||
numpy_image = rgba_to_numpy_image(output_image)
|
||||
elif output_image.shape[0] == 1:
|
||||
c, h, w = output_image.shape
|
||||
alpha_image = torch.cat([output_image.repeat(3, 1, 1) * 2.0 - 1.0, torch.ones(1, h, w)], dim=0)
|
||||
numpy_image = rgba_to_numpy_image(alpha_image)
|
||||
elif output_image.shape[0] == 2:
|
||||
numpy_image = grid_change_to_numpy_image(output_image, num_channels=4)
|
||||
else:
|
||||
raise RuntimeError("Unsupported # image channels: %d" % output_image.shape[0])
|
||||
return numpy_image
|
||||
Reference in New Issue
Block a user