diff --git a/.gitignore b/.gitignore index 2b43880..7b51604 100644 --- a/.gitignore +++ b/.gitignore @@ -139,3 +139,4 @@ model.pt api_key.txt .vscode stt_test.wav +talkinghead/tha3/models diff --git a/requirements-rocm.txt b/requirements-rocm.txt index aae3be3..fee0a01 100644 --- a/requirements-rocm.txt +++ b/requirements-rocm.txt @@ -24,3 +24,4 @@ vosk sounddevice openai-whisper selenium +huggingface-hub diff --git a/requirements-silicon.txt b/requirements-silicon.txt index 6e6bd23..7cb406f 100644 --- a/requirements-silicon.txt +++ b/requirements-silicon.txt @@ -23,3 +23,4 @@ vosk sounddevice openai-whisper selenium +huggingface-hub diff --git a/requirements.txt b/requirements.txt index a4cbe23..10ba0ca 100644 --- a/requirements.txt +++ b/requirements.txt @@ -23,6 +23,6 @@ vosk sounddevice openai-whisper selenium - +huggingface-hub fastapi wxpython; sys_platform == 'win32' or sys_platform == 'darwin' diff --git a/server.py b/server.py index 097652a..b55ff12 100644 --- a/server.py +++ b/server.py @@ -86,6 +86,15 @@ parser.add_argument( "--secure", action="store_true", help="Enforces the use of an API key" ) parser.add_argument("--talkinghead-gpu", action="store_true", help="Run the talkinghead animation on the GPU (CPU is default)") +parser.add_argument( + "--talkinghead-model", type=str, help="The THA3 model to use. 'float' models are fp32, 'half' are fp16. 'auto' (default) picks fp16 for GPU and fp32 for CPU.", + required=False, default="auto", + choices=["auto", "standard_float", "separable_float", "standard_half", "separable_half"], +) +parser.add_argument( + "--talkinghead-models", type=str, help="If THA3 models are not yet installed, use the given HuggingFace repository to install them.", + default="OktayAlpk/talking-head-anime-3" +) parser.add_argument("--coqui-gpu", action="store_true", help="Run the voice models on the GPU (CPU is default)") parser.add_argument("--coqui-models", help="Install given Coqui-api TTS model at launch (comma separated list, last one will be loaded at start)") @@ -180,21 +189,41 @@ if not torch.cuda.is_available() and not args.cpu: print(f"{Fore.GREEN}{Style.BRIGHT}Using torch device: {device_string}{Style.RESET_ALL}") if "talkinghead" in modules: + # Install the THA3 models if needed + talkinghead_models_dir = os.path.join(os.getcwd(), "talkinghead", "tha3", "models") + if not os.path.exists(talkinghead_models_dir): + # API: + # https://huggingface.co/docs/huggingface_hub/en/guides/download + try: + from huggingface_hub import snapshot_download + except ImportError: + raise ImportError( + "You need to install huggingface_hub to install talkinghead models automatically. " + "See https://pypi.org/project/huggingface-hub/ for installation." + ) + os.makedirs(talkinghead_models_dir, exist_ok=True) + print(f"THA3 models not yet installed. Installing from {args.talkinghead_models} into talkinghead/tha3/models.") + # TODO: I'd prefer to install with symlinks, but how about Windows users? + snapshot_download(repo_id=args.talkinghead_models, local_dir=talkinghead_models_dir, local_dir_use_symlinks=False) + import sys import threading mode = "cuda" if args.talkinghead_gpu else "cpu" - print("Initializing talkinghead pipeline in " + mode + " mode....") + model = args.talkinghead_model + if model == "auto": # default + # FP16 boosts the rendering performance by ~1.5x, but is only supported on GPU. + model = "separable_half" if args.talkinghead_gpu else "separable_float" + print(f"Initializing talkinghead pipeline in {mode} mode with model {model}....") talkinghead_path = os.path.abspath(os.path.join(os.getcwd(), "talkinghead")) sys.path.append(talkinghead_path) # Add the path to the 'tha3' module to the sys.path list try: import talkinghead.tha3.app.app as talkinghead - from talkinghead import * - def launch_talkinghead_gui(): - talkinghead.launch_gui(mode, "separable_float") - #choices=['standard_float', 'separable_float', 'standard_half', 'separable_half'], - #choices='The device to use for PyTorch ("cuda" for GPU, "cpu" for CPU).' - talkinghead_thread = threading.Thread(target=launch_talkinghead_gui) + def launch_talkinghead(): + # mode: choices='The device to use for PyTorch ("cuda" for GPU, "cpu" for CPU).' + # model: choices=['standard_float', 'separable_float', 'standard_half', 'separable_half'], + talkinghead.launch(mode, model) + talkinghead_thread = threading.Thread(target=launch_talkinghead) talkinghead_thread.daemon = True # Set the thread as a daemon thread talkinghead_thread.start() diff --git a/talkinghead/start_standalone_app.sh b/talkinghead/start_standalone_app.sh deleted file mode 100755 index fbaeb48..0000000 --- a/talkinghead/start_standalone_app.sh +++ /dev/null @@ -1,20 +0,0 @@ -#!/bin/bash -# -# Launch THA3 in standalone app mode. -# -# This standalone app mode does not interact with SillyTavern. -# -# The usual way to run this fork of THA3 is as a SillyTavern-extras plugin. -# The standalone app mode comes from the original THA3 code, and is included -# for testing and debugging. -# -# If you want to manually pose a character (to generate static expression images), -# use `start_manual_poser.sh` instead. -# -# This must run in the "extras" conda venv! -# Do this first: -# conda activate extras -# -# The `--char=...` flag can be used to specify which image to load under "tha3/images". -# -python -m tha3.app.app --char=example.png $@ diff --git a/talkinghead/tha3/app/app.py b/talkinghead/tha3/app/app.py index 0d81e65..4c17398 100644 --- a/talkinghead/tha3/app/app.py +++ b/talkinghead/tha3/app/app.py @@ -1,933 +1,347 @@ -# TODO: Standalone app mode does not work yet. The SillyTavern-extras plugin mode works. +"""THA3 live mode for SillyTavern-extras. -import argparse -import ast +This is the animation engine, running on top of the THA3 posing engine. +This module implements the live animation backend and serves the API. For usage, see `server.py`. + +If you want to play around with THA3 expressions in a standalone app, see `manual_poser.py`. +""" + +# TODO: talkinghead live mode: +# - talking animation is broken, seems the client isn't sending us a request to start/stop talking? +# - improve idle animations +# - cosine schedule? +# - or perhaps the current ODE approach is better (define instant rate only, based on target state; then integrate) +# - add option to server.py to load with float32 or float16, as desired +# - PNG sending efficiency? + +import atexit +import io +import logging import os import random import sys -import threading import time -import torch -import io -import wx import numpy as np -import json -import typing +import threading +from typing import Dict, List, NoReturn, Union + +import PIL + +import torch -from PIL import Image from flask import Flask, Response from flask_cors import CORS -from io import BytesIO -sys.path.append(os.getcwd()) -from tha3.mocap import ifacialmocap_constants as mocap_constants -from tha3.mocap.ifacialmocap_pose import create_default_ifacialmocap_pose -from tha3.mocap.ifacialmocap_pose_converter import IFacialMocapPoseConverter -from tha3.mocap.ifacialmocap_poser_converter_25 import create_ifacialmocap_pose_converter from tha3.poser.modes.load_poser import load_poser from tha3.poser.poser import Poser -from tha3.util import ( - torch_linear_to_srgb, resize_PIL_image, extract_PIL_image_from_filelike, - extract_pytorch_image_from_PIL_image -) -from typing import Optional +from tha3.util import (torch_linear_to_srgb, resize_PIL_image, + extract_PIL_image_from_filelike, extract_pytorch_image_from_PIL_image) +from tha3.app.util import posedict_keys, posedict_key_to_index, load_emotion_presets, posedict_to_pose, to_talkinghead_image, FpsStatistics + +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) # Global variables +# TODO: we could move many of these into TalkingheadLive, and just keep a reference to that as global. +global_instance = None +global_basedir = "talkinghead" global_source_image = None global_result_image = None -global_reload = None -is_talking_override = False +global_reload_image = None +animation_running = False is_talking = False -global_timer_paused = False -emotion = "neutral" -lasttranisitiondPose = "NotInit" -inMotion = False -fps = 0 +current_emotion = "neutral" current_pose = None -global_basedir = "talkinghead" # for SillyTavern-extras live mode; if running standalone, we override this later +fps = 0 # Flask setup app = Flask(__name__) CORS(app) -def setEmotion(_emotion): - global emotion +# -------------------------------------------------------------------------------- +# API - highest_score = float('-inf') +def setEmotion(_emotion: Dict[str, float]) -> None: + """Set the current emotion of the character based on sentiment analysis results. + + Currently, we pick the emotion with the highest confidence score. + + _emotion: result of sentiment analysis: {emotion0: confidence0, ...} + """ + global current_emotion + + highest_score = float("-inf") highest_label = None for item in _emotion: - if item['score'] > highest_score: - highest_score = item['score'] - highest_label = item['label'] + if item["score"] > highest_score: + highest_score = item["score"] + highest_label = item["label"] - # print("Applying ", emotion) - emotion = highest_label + logger.debug(f"setEmotion: applying emotion {highest_label}") + current_emotion = highest_label -def unload(): - global global_timer_paused - global_timer_paused = True +def unload() -> str: + """Stop animation.""" + global animation_running + animation_running = False + logger.debug("unload: animation paused") return "Animation Paused" -def start_talking(): - global is_talking_override - is_talking_override = True +def start_talking() -> str: + """Start talking animation.""" + global is_talking + is_talking = True + logger.debug("start_talking called") return "started" -def stop_talking(): - global is_talking_override - is_talking_override = False +def stop_talking() -> str: + """Stop talking animation.""" + global is_talking + is_talking = False + logger.debug("stop_talking called") return "stopped" -def result_feed(): +def result_feed() -> Response: + """Return a Flask `Response` that repeatedly yields the current image as 'image/png'.""" def generate(): while True: if global_result_image is not None: try: rgb_image = global_result_image[:, :, [2, 1, 0]] # Swap B and R channels - pil_image = Image.fromarray(np.uint8(rgb_image)) # Convert to PIL Image + pil_image = PIL.Image.fromarray(np.uint8(rgb_image)) # Convert to PIL Image if global_result_image.shape[2] == 4: # Check if there is an alpha channel present alpha_channel = global_result_image[:, :, 3] # Extract alpha channel - pil_image.putalpha(Image.fromarray(np.uint8(alpha_channel))) # Set alpha channel in the PIL Image + pil_image.putalpha(PIL.Image.fromarray(np.uint8(alpha_channel))) # Set alpha channel in the PIL Image buffer = io.BytesIO() # Save as PNG with RGBA mode - pil_image.save(buffer, format='PNG') + pil_image.save(buffer, format="PNG") image_bytes = buffer.getvalue() - except Exception as e: - print(f"Error when trying to write image: {e}") - yield (b'--frame\r\n' # Send the PNG image - b'Content-Type: image/png\r\n\r\n' + image_bytes + b'\r\n') + except Exception as exc: + logger.error(f"Error when trying to write image: {exc}") + yield (b"--frame\r\n" # Send the PNG image (last available in case of error) + b"Content-Type: image/png\r\n\r\n" + image_bytes + b"\r\n") else: time.sleep(0.1) - return Response(generate(), mimetype='multipart/x-mixed-replace; boundary=frame') + return Response(generate(), mimetype="multipart/x-mixed-replace; boundary=frame") -def talkinghead_load_file(stream): - global global_basedir - global global_source_image - global global_reload - global global_timer_paused - global_timer_paused = False +# TODO: the input is a flask.request.file.stream; what's the type of that? +def talkinghead_load_file(stream) -> str: + """Load image from stream and start animation.""" + global global_reload_image + global animation_running + logger.debug("talkinghead_load_file: loading new input image from stream") try: - pil_image = Image.open(stream) # Load the image using PIL.Image.open - img_data = BytesIO() # Create a copy of the image data in memory using BytesIO - pil_image.save(img_data, format='PNG') - global_reload = Image.open(BytesIO(img_data.getvalue())) # Set the global_reload to the copy of the image data - except Image.UnidentifiedImageError: - print("Could not load image from file, loading blank") + animation_running = False # pause animation while loading a new image + pil_image = PIL.Image.open(stream) # Load the image using PIL.Image.open + img_data = io.BytesIO() # Create a copy of the image data in memory using BytesIO + pil_image.save(img_data, format="PNG") + global_reload_image = PIL.Image.open(io.BytesIO(img_data.getvalue())) # Set the global_reload_image to a copy of the image data + except PIL.Image.UnidentifiedImageError: + logger.warning("Could not load input image from stream, loading blank") full_path = os.path.join(os.getcwd(), os.path.normpath(os.path.join(global_basedir, "tha3", "images", "inital.png"))) - MainFrame.load_image(None, full_path) - global_timer_paused = True - return 'OK' + global_instance.load_image(full_path) + finally: + animation_running = True + return "OK" -def convert_linear_to_srgb(image: torch.Tensor) -> torch.Tensor: - rgb_image = torch_linear_to_srgb(image[0:3, :, :]) - return torch.cat([rgb_image, image[3:4, :, :]], dim=0) +def launch(device: str, model: str) -> Union[None, NoReturn]: + """Launch the talking head plugin (live mode). + + If the plugin fails to load, the process exits. -def launch_gui(device: str, model: str, char: typing.Optional[str] = None, standalone: bool = False): - """ device: "cpu" or "cuda" model: one of the folder names inside "talkinghead/tha3/models/" - char: name of png file inside "talkinghead/tha3/images/"; if not given, defaults to "inital.png". """ - global global_basedir - global initAMI + global global_instance + global initAMI # TODO: initAREYOU? See if we still need this - the idea seems to be to stop animation until the first image is loaded. initAMI = True - # TODO: We could use this to parse the arguments that were provided to `server.py`, but we don't currently use the parser output. - parser = argparse.ArgumentParser(description='uWu Waifu') - # Add other parser arguments here - args, unknown = parser.parse_known_args() - - if char is None: - char = "inital.png" - try: poser = load_poser(model, device, modelsdir=os.path.join(global_basedir, "tha3", "models")) - pose_converter = create_ifacialmocap_pose_converter() # creates a list of 45 + global_instance = TalkingheadLive(poser, device) - app = wx.App() - main_frame = MainFrame(poser, pose_converter, device) - main_frame.SetSize((750, 600)) + # Load initial blank character image + full_path = os.path.join(os.getcwd(), os.path.normpath(os.path.join(global_basedir, "tha3", "images", "inital.png"))) + global_instance.load_image(full_path) - # Load character image - full_path = os.path.join(os.getcwd(), os.path.normpath(os.path.join(global_basedir, "tha3", "images", char))) - main_frame.load_image(None, full_path) + global_instance.start() - if standalone: - main_frame.Show(True) - main_frame.capture_timer.Start(100) - main_frame.animation_timer.Start(100) - wx.DisableAsserts() # prevent popup about debug alert closed from other threads - app.MainLoop() - - except RuntimeError as e: - print(e) + except RuntimeError as exc: + logger.error(exc) sys.exit() -class FpsStatistics: - def __init__(self): - self.count = 100 - self.fps = [] +# -------------------------------------------------------------------------------- +# Internal stuff - def add_fps(self, fps): - self.fps.append(fps) - while len(self.fps) > self.count: - del self.fps[0] +def convert_linear_to_srgb(image: torch.Tensor) -> torch.Tensor: + """RGBA (linear) -> RGBA (SRGB), preserving the alpha channel.""" + rgb_image = torch_linear_to_srgb(image[0:3, :, :]) + return torch.cat([rgb_image, image[3:4, :, :]], dim=0) - def get_average_fps(self): - if len(self.fps) == 0: - return 0.0 - else: - return sum(self.fps) / len(self.fps) +class TalkingheadLive: + """uWu Waifu""" -class MainFrame(wx.Frame): - def __init__(self, poser: Poser, pose_converter: IFacialMocapPoseConverter, device: torch.device): - super().__init__(None, wx.ID_ANY, "uWu Waifu") - self.pose_converter = pose_converter + def __init__(self, poser: Poser, device: torch.device): self.poser = poser self.device = device - self.last_blink_timestamp = 0 - self.is_blinked = False + self.last_blink_timestamp = 0 # TODO: Great idea! We should actually use this. + self.is_blinked = False # TODO: Maybe we might need this, too, now that the FPS is acceptable enough that we may need to blink over several frames. self.targets = {"head_y_index": 0} self.progress = {"head_y_index": 0} self.direction = {"head_y_index": 1} - self.originals = {"head_y_index": 0} + self.originals = {"head_y_index": 0} # TODO: what was this for; probably for recording the values from the current emotion, before sway animation? self.forward = {"head_y_index": True} # Direction of interpolation self.start_values = {"head_y_index": 0} self.fps_statistics = FpsStatistics() - self.image_load_counter = 0 - self.custom_background_image = None # Add this line - self.sliders = {} - self.ifacialmocap_pose = create_default_ifacialmocap_pose() - self.source_image_bitmap = wx.Bitmap(self.poser.get_image_size(), self.poser.get_image_size()) - self.result_image_bitmap = wx.Bitmap(self.poser.get_image_size(), self.poser.get_image_size()) - self.wx_source_image = None self.torch_source_image = None self.last_update_time = None - - self.create_ui() - - self.create_timers() - self.Bind(wx.EVT_CLOSE, self.on_close) - - self.update_source_image_bitmap() - self.update_result_image_bitmap() - - def create_timers(self): - self.capture_timer = wx.Timer(self, wx.ID_ANY) - self.Bind(wx.EVT_TIMER, self.update_capture_panel, id=self.capture_timer.GetId()) - self.animation_timer = wx.Timer(self, wx.ID_ANY) - self.Bind(wx.EVT_TIMER, self.update_result_image_bitmap, id=self.animation_timer.GetId()) - - def on_close(self, event: wx.Event): - # Stop the timers - self.animation_timer.Stop() - self.capture_timer.Stop() - - # Destroy the windows - self.Destroy() - event.Skip() - sys.exit(0) - - def random_generate_value(self, min, max, origin_value): - random_value = random.choice(list(range(min, max, 1))) / 2500.0 - randomized = origin_value + random_value - if randomized > 1.0: - randomized = 1.0 - if randomized < 0: - randomized = 0 - return randomized - - def animationTalking(self): - global is_talking - current_pose = self.ifacialmocap_pose - - # NOTE: randomize mouth - for blendshape_name in mocap_constants.BLENDSHAPE_NAMES: - if "jawOpen" in blendshape_name: - if is_talking or is_talking_override: - current_pose[blendshape_name] = self.random_generate_value(-5000, 5000, abs(1 - current_pose[blendshape_name])) - else: - current_pose[blendshape_name] = 0 - - return current_pose - - def animationHeadMove(self): - current_pose = self.ifacialmocap_pose - - for key in [mocap_constants.HEAD_BONE_Y]: # can add more to this list if needed - current_pose[key] = self.random_generate_value(-20, 20, current_pose[key]) - - return current_pose - - def animationBlink(self): - current_pose = self.ifacialmocap_pose - - if random.random() <= 0.03: - current_pose["eyeBlinkRight"] = 1 - current_pose["eyeBlinkLeft"] = 1 - else: - current_pose["eyeBlinkRight"] = 0 - current_pose["eyeBlinkLeft"] = 0 - - return current_pose - - def addNamestoConvert(pose): - # TODO: What are the unknown keys? - index_to_name = { - 0: 'eyebrow_troubled_left_index', - 1: 'eyebrow_troubled_right_index', - 2: 'eyebrow_angry_left_index', - 3: 'eyebrow_angry_right_index', - 4: 'unknown1', # COMBACK TO UNK - 5: 'unknown2', # COMBACK TO UNK - 6: 'eyebrow_raised_left_index', - 7: 'eyebrow_raised_right_index', - 8: 'eyebrow_happy_left_index', - 9: 'eyebrow_happy_right_index', - 10: 'unknown3', # COMBACK TO UNK - 11: 'unknown4', # COMBACK TO UNK - 12: 'wink_left_index', - 13: 'wink_right_index', - 14: 'eye_happy_wink_left_index', - 15: 'eye_happy_wink_right_index', - 16: 'eye_surprised_left_index', - 17: 'eye_surprised_right_index', - 18: 'unknown5', # COMBACK TO UNK - 19: 'unknown6', # COMBACK TO UNK - 20: 'unknown7', # COMBACK TO UNK - 21: 'unknown8', # COMBACK TO UNK - 22: 'eye_raised_lower_eyelid_left_index', - 23: 'eye_raised_lower_eyelid_right_index', - 24: 'iris_small_left_index', - 25: 'iris_small_right_index', - 26: 'mouth_aaa_index', - 27: 'mouth_iii_index', - 28: 'mouth_ooo_index', - 29: 'unknown9a', # COMBACK TO UNK - 30: 'mouth_ooo_index2', - 31: 'unknown9', # COMBACK TO UNK - 32: 'unknown10', # COMBACK TO UNK - 33: 'unknown11', # COMBACK TO UNK - 34: 'mouth_raised_corner_left_index', - 35: 'mouth_raised_corner_right_index', - 36: 'unknown12', # COMBACK TO UNK - 37: 'iris_rotation_x_index', - 38: 'iris_rotation_y_index', - 39: 'head_x_index', - 40: 'head_y_index', - 41: 'neck_z_index', - 42: 'body_y_index', - 43: 'body_z_index', - 44: 'breathing_index' - } - - output = [] - - for index, value in enumerate(pose): - name = index_to_name.get(index, "Unknown") - output.append(f"{name}: {value}") - - return output - - def get_emotion_values(self, emotion): # Place to define emotion presets - global global_basedir - - # print(emotion) - file_path = os.path.join(global_basedir, "emotions", emotion + ".json") - # print("trying: ", file_path) - - if not os.path.exists(file_path): - print("using backup for: ", file_path) - file_path = os.path.join(global_basedir, "emotions", "_defaults.json") - - with open(file_path, 'r') as json_file: - emotions = json.load(json_file) - - targetpose = emotions.get(emotion, {}) - targetpose_values = targetpose - - # targetpose_values = list(targetpose.values()) - # print("targetpose: ", targetpose, "for ", emotion) - return targetpose_values - - def animateToEmotion(self, current_pose_list, target_pose_dict): - transitionPose = [] - - # Loop through the current_pose_list - for item in current_pose_list: - index, value = item.split(': ') - - # Always take the value from target_pose_dict if the key exists - if index in target_pose_dict and index != "breathing_index": - transitionPose.append(f"{index}: {target_pose_dict[index]}") - else: - transitionPose.append(item) - - # Ensure that the number of elements in transitionPose matches with current_pose_list - assert len(transitionPose) == len(current_pose_list) - - return transitionPose - - def animationMain(self): - self.ifacialmocap_pose = self.animationBlink() - self.ifacialmocap_pose = self.animationHeadMove() - self.ifacialmocap_pose = self.animationTalking() - return self.ifacialmocap_pose - - def filter_by_index(self, current_pose_list, index): - # Create an empty list to store the filtered dictionaries - filtered_list = [] - - # Iterate through each dictionary in the current_pose_list - for pose_dict in current_pose_list: - # Check if the 'breathing_index' key exists in the dictionary - if index in pose_dict: - # If the key exists, append the dictionary to the filtered list - filtered_list.append(pose_dict) - - return filtered_list - - def on_erase_background(self, event: wx.Event): - pass - - def create_animation_panel(self, parent): - self.animation_panel = wx.Panel(parent, style=wx.RAISED_BORDER) - self.animation_panel_sizer = wx.BoxSizer(wx.HORIZONTAL) - self.animation_panel.SetSizer(self.animation_panel_sizer) - self.animation_panel.SetAutoLayout(1) - - image_size = self.poser.get_image_size() - - # Left Column (Image) - self.animation_left_panel = wx.Panel(self.animation_panel, style=wx.SIMPLE_BORDER) - self.animation_left_panel_sizer = wx.BoxSizer(wx.VERTICAL) - self.animation_left_panel.SetSizer(self.animation_left_panel_sizer) - self.animation_left_panel.SetAutoLayout(1) - self.animation_panel_sizer.Add(self.animation_left_panel, 1, wx.EXPAND) - - self.result_image_panel = wx.Panel(self.animation_left_panel, size=(image_size, image_size), - style=wx.SIMPLE_BORDER) - self.result_image_panel.Bind(wx.EVT_PAINT, self.paint_result_image_panel) - self.result_image_panel.Bind(wx.EVT_ERASE_BACKGROUND, self.on_erase_background) - self.result_image_panel.Bind(wx.EVT_LEFT_DOWN, self.load_image) - self.animation_left_panel_sizer.Add(self.result_image_panel, 1, wx.EXPAND) - - separator = wx.StaticLine(self.animation_left_panel, -1, size=(256, 1)) - self.animation_left_panel_sizer.Add(separator, 0, wx.EXPAND) - - self.fps_text = wx.StaticText(self.animation_left_panel, label="") - self.animation_left_panel_sizer.Add(self.fps_text, wx.SizerFlags().Border()) - - self.animation_left_panel_sizer.Fit(self.animation_left_panel) - - # Right Column (Sliders) - self.animation_right_panel = wx.Panel(self.animation_panel, style=wx.SIMPLE_BORDER) - self.animation_right_panel_sizer = wx.BoxSizer(wx.VERTICAL) - self.animation_right_panel.SetSizer(self.animation_right_panel_sizer) - self.animation_right_panel.SetAutoLayout(1) - self.animation_panel_sizer.Add(self.animation_right_panel, 1, wx.EXPAND) - - separator = wx.StaticLine(self.animation_right_panel, -1, size=(256, 5)) - self.animation_right_panel_sizer.Add(separator, 0, wx.EXPAND) - - background_text = wx.StaticText(self.animation_right_panel, label="--- Background ---", style=wx.ALIGN_CENTER) - self.animation_right_panel_sizer.Add(background_text, 0, wx.EXPAND) - - self.output_background_choice = wx.Choice( - self.animation_right_panel, - choices=[ - "TRANSPARENT", - "GREEN", - "BLUE", - "BLACK", - "WHITE", - "LOADED", - "CUSTOM" - ] - ) - self.output_background_choice.SetSelection(0) - self.animation_right_panel_sizer.Add(self.output_background_choice, 0, wx.EXPAND) - - # These are applied to `ifacialmocap_pose`, so we can only use names that are defined there (see `update_ifacialmocap_pose`). - blendshape_groups = { - 'Eyes': ['eyeLookOutLeft', 'eyeLookOutRight', 'eyeLookDownLeft', 'eyeLookUpLeft', 'eyeWideLeft', 'eyeWideRight'], - 'Mouth': ['mouthSmileLeft', 'mouthFrownLeft'], - 'Cheek': ['cheekSquintLeft', 'cheekSquintRight', 'cheekPuff'], - 'Brow': ['browDownLeft', 'browOuterUpLeft', 'browDownRight', 'browOuterUpRight', 'browInnerUp'], - # 'Eyelash': [], - 'Nose': ['noseSneerLeft', 'noseSneerRight'], - 'Misc': ['tongueOut'] - } - - for group_name, variables in blendshape_groups.items(): - collapsible_pane = wx.CollapsiblePane(self.animation_right_panel, label=group_name, style=wx.CP_DEFAULT_STYLE | wx.CP_NO_TLW_RESIZE) - collapsible_pane.Bind(wx.EVT_COLLAPSIBLEPANE_CHANGED, self.on_pane_changed) - self.animation_right_panel_sizer.Add(collapsible_pane, 0, wx.EXPAND) - pane_sizer = wx.BoxSizer(wx.VERTICAL) - collapsible_pane.GetPane().SetSizer(pane_sizer) - - for variable in variables: - variable_label = wx.StaticText(collapsible_pane.GetPane(), label=variable) - - # Multiply min and max values by 100 for the slider - slider = wx.Slider( - collapsible_pane.GetPane(), - value=0, - minValue=0, - maxValue=100, - size=(150, -1), # Set the width to 150 and height to default - style=wx.SL_HORIZONTAL | wx.SL_LABELS - ) - - slider.SetName(variable) - slider.Bind(wx.EVT_SLIDER, self.on_slider_change) - self.sliders[slider.GetId()] = slider - - pane_sizer.Add(variable_label, 0, wx.ALIGN_CENTER | wx.ALL, 5) - pane_sizer.Add(slider, 0, wx.EXPAND) - - self.animation_right_panel_sizer.Fit(self.animation_right_panel) - self.animation_panel_sizer.Fit(self.animation_panel) - - def on_pane_changed(self, event): - # Update the layout when a collapsible pane is expanded or collapsed - self.animation_right_panel.Layout() - - def on_slider_change(self, event): - slider = event.GetEventObject() - value = slider.GetValue() / 100.0 # Divide by 100 to get the actual float value - # print(value) - slider_name = slider.GetName() - self.ifacialmocap_pose[slider_name] = value - - def create_ui(self): - # Make the UI Elements - self.main_sizer = wx.BoxSizer(wx.VERTICAL) - self.SetSizer(self.main_sizer) - self.SetAutoLayout(1) - - self.capture_pose_lock = threading.Lock() - - # Main panel with JPS - self.create_animation_panel(self) - self.main_sizer.Add(self.animation_panel, wx.SizerFlags(0).Expand().Border(wx.ALL, 5)) - - def update_capture_panel(self, event: wx.Event): - data = self.ifacialmocap_pose - for rotation_name in mocap_constants.ROTATION_NAMES: - value = data[rotation_name] # TODO/FIXME: updating unused variable; what was this supposed to do? - - @staticmethod - def convert_to_100(x): - return int(max(0.0, min(1.0, x)) * 100) - - def paint_source_image_panel(self, event: wx.Event): - wx.BufferedPaintDC(self.source_image_panel, self.source_image_bitmap) - - def update_source_image_bitmap(self): - dc = wx.MemoryDC() - dc.SelectObject(self.source_image_bitmap) - if self.wx_source_image is None: - self.draw_nothing_yet_string(dc) - else: - dc.Clear() - dc.DrawBitmap(self.wx_source_image, 0, 0, True) - del dc - - def draw_nothing_yet_string(self, dc): - dc.Clear() - font = wx.Font(wx.FontInfo(14).Family(wx.FONTFAMILY_SWISS)) - dc.SetFont(font) - w, h = dc.GetTextExtent("Nothing yet!") - dc.DrawText("Nothing yet!", (self.poser.get_image_size() - w) // 2, (self.poser.get_image_size() - h) // 2) - - def paint_result_image_panel(self, event: wx.Event): - wx.BufferedPaintDC(self.result_image_panel, self.result_image_bitmap) - - def combine_pose_with_names(combine_pose): - pose_names = [ - 'eyeLookInLeft', 'eyeLookOutLeft', 'eyeLookDownLeft', 'eyeLookUpLeft', - 'eyeBlinkLeft', 'eyeSquintLeft', 'eyeWideLeft', 'eyeLookInRight', - 'eyeLookOutRight', 'eyeLookDownRight', 'eyeLookUpRight', 'eyeBlinkRight', - 'eyeSquintRight', 'eyeWideRight', 'browDownLeft', 'browOuterUpLeft', - 'browDownRight', 'browOuterUpRight', 'browInnerUp', 'noseSneerLeft', - 'noseSneerRight', 'cheekSquintLeft', 'cheekSquintRight', 'cheekPuff', - 'mouthLeft', 'mouthDimpleLeft', 'mouthFrownLeft', 'mouthLowerDownLeft', - 'mouthPressLeft', 'mouthSmileLeft', 'mouthStretchLeft', 'mouthUpperUpLeft', - 'mouthRight', 'mouthDimpleRight', 'mouthFrownRight', 'mouthLowerDownRight', - 'mouthPressRight', 'mouthSmileRight', 'mouthStretchRight', 'mouthUpperUpRight', - 'mouthClose', 'mouthFunnel', 'mouthPucker', 'mouthRollLower', 'mouthRollUpper', - 'mouthShrugLower', 'mouthShrugUpper', 'jawLeft', 'jawRight', 'jawForward', - 'jawOpen', 'tongueOut', 'headBoneX', 'headBoneY', 'headBoneZ', 'headBoneQuat', - 'leftEyeBoneX', 'leftEyeBoneY', 'leftEyeBoneZ', 'leftEyeBoneQuat', - 'rightEyeBoneX', 'rightEyeBoneY', 'rightEyeBoneZ', 'rightEyeBoneQuat' - ] - pose_dict = dict(zip(pose_names, combine_pose)) - return pose_dict - - def determine_data_type(self, data): # TODO: is this needed, nothing in the project seems to call it; and why not just use `isinstance` directly? - if isinstance(data, list): - print("It's a list.") - elif isinstance(data, dict): - print("It's a dictionary.") - elif isinstance(data, str): - print("It's a string.") - else: - print("Unknown data type.") - - def count_elements(self, input_data): - if isinstance(input_data, list) or isinstance(input_data, dict): - return len(input_data) - else: - raise TypeError("Input must be a list or dictionary.") - - def convert_list_to_dict(self, list_str): - # Evaluate the string to get the actual list - list_data = ast.literal_eval(list_str) - - # Initialize an empty dictionary - result_dict = {} - - # Convert the list to a dictionary - for item in list_data: - key, value_str = item.split(': ') - value = float(value_str) - result_dict[key] = value - - return result_dict - - def dict_to_tensor(self, d): - if isinstance(d, dict): - return torch.tensor(list(d.values())) - elif isinstance(d, list): - return torch.tensor(d) - else: - raise ValueError("Unsupported data type passed to dict_to_tensor.") - - def update_ifacialmocap_pose(self, ifacialmocap_pose, emotion_pose): - # Update Values - The following values are in emotion_pose but not defined in ifacialmocap_pose - # eye_happy_wink_left_index, eye_happy_wink_right_index - # eye_surprised_left_index, eye_surprised_right_index - # eye_relaxed_left_index, eye_relaxed_right_index - # eye_unimpressed - # eye_raised_lower_eyelid_left_index, eye_raised_lower_eyelid_right_index - # mouth_uuu_index - # mouth_eee_index - # mouth_ooo_index - # mouth_delta - # mouth_smirk - # body_y_index - # body_z_index - # breathing_index - - ifacialmocap_pose['browDownLeft'] = emotion_pose['eyebrow_troubled_left_index'] - ifacialmocap_pose['browDownRight'] = emotion_pose['eyebrow_troubled_right_index'] - ifacialmocap_pose['browOuterUpLeft'] = emotion_pose['eyebrow_angry_left_index'] - ifacialmocap_pose['browOuterUpRight'] = emotion_pose['eyebrow_angry_right_index'] - ifacialmocap_pose['browInnerUp'] = emotion_pose['eyebrow_happy_left_index'] - ifacialmocap_pose['browInnerUp'] += emotion_pose['eyebrow_happy_right_index'] - ifacialmocap_pose['browDownLeft'] = emotion_pose['eyebrow_raised_left_index'] - ifacialmocap_pose['browDownRight'] = emotion_pose['eyebrow_raised_right_index'] - ifacialmocap_pose['browDownLeft'] += emotion_pose['eyebrow_lowered_left_index'] - ifacialmocap_pose['browDownRight'] += emotion_pose['eyebrow_lowered_right_index'] - ifacialmocap_pose['browDownLeft'] += emotion_pose['eyebrow_serious_left_index'] - ifacialmocap_pose['browDownRight'] += emotion_pose['eyebrow_serious_right_index'] - - # Update eye values - ifacialmocap_pose['eyeWideLeft'] = emotion_pose['eye_surprised_left_index'] - ifacialmocap_pose['eyeWideRight'] = emotion_pose['eye_surprised_right_index'] - - # Update eye blink (though we will overwrite it later) - ifacialmocap_pose['eyeBlinkLeft'] = emotion_pose['eye_wink_left_index'] - ifacialmocap_pose['eyeBlinkRight'] = emotion_pose['eye_wink_right_index'] - - # Update iris rotation values - ifacialmocap_pose['eyeLookInLeft'] = -emotion_pose['iris_rotation_y_index'] - ifacialmocap_pose['eyeLookOutLeft'] = emotion_pose['iris_rotation_y_index'] - ifacialmocap_pose['eyeLookInRight'] = emotion_pose['iris_rotation_y_index'] - ifacialmocap_pose['eyeLookOutRight'] = -emotion_pose['iris_rotation_y_index'] - ifacialmocap_pose['eyeLookUpLeft'] = emotion_pose['iris_rotation_x_index'] - ifacialmocap_pose['eyeLookDownLeft'] = -emotion_pose['iris_rotation_x_index'] - ifacialmocap_pose['eyeLookUpRight'] = emotion_pose['iris_rotation_x_index'] - ifacialmocap_pose['eyeLookDownRight'] = -emotion_pose['iris_rotation_x_index'] - - # Update iris size values - ifacialmocap_pose['irisWideLeft'] = emotion_pose['iris_small_left_index'] - ifacialmocap_pose['irisWideRight'] = emotion_pose['iris_small_right_index'] - - # Update head rotation values - ifacialmocap_pose['headBoneX'] = -emotion_pose['head_x_index'] * 15.0 - ifacialmocap_pose['headBoneY'] = -emotion_pose['head_y_index'] * 10.0 - ifacialmocap_pose['headBoneZ'] = emotion_pose['neck_z_index'] * 15.0 - - # Update mouth values - ifacialmocap_pose['mouthSmileLeft'] = emotion_pose['mouth_aaa_index'] - ifacialmocap_pose['mouthSmileRight'] = emotion_pose['mouth_aaa_index'] - ifacialmocap_pose['mouthFrownLeft'] = emotion_pose['mouth_lowered_corner_left_index'] - ifacialmocap_pose['mouthFrownRight'] = emotion_pose['mouth_lowered_corner_right_index'] - ifacialmocap_pose['mouthPressLeft'] = emotion_pose['mouth_raised_corner_left_index'] - ifacialmocap_pose['mouthPressRight'] = emotion_pose['mouth_raised_corner_right_index'] - - return ifacialmocap_pose - - def update_blinking_pose(self, tranisitiondPose): - PARTS = ['wink_left_index', 'wink_right_index'] - updated_list = [] - - should_blink = random.random() <= 0.03 # Determine if there should be a blink - - for item in tranisitiondPose: - key, value = item.split(': ') - if key in PARTS: - # If there should be a blink, set value to 1; otherwise, use the provided value - new_value = 1 if should_blink else float(value) - updated_list.append(f"{key}: {new_value}") - else: - updated_list.append(item) - - return updated_list - - def update_talking_pose(self, tranisitiondPose): - global is_talking, is_talking_override - - MOUTHPARTS = ['mouth_aaa_index'] - - updated_list = [] - - for item in tranisitiondPose: - key, value = item.split(': ') - - if key in MOUTHPARTS and is_talking_override: - new_value = self.random_generate_value(-5000, 5000, abs(1 - float(value))) - updated_list.append(f"{key}: {new_value}") - else: - updated_list.append(item) - - return updated_list - - def update_sway_pose_good(self, tranisitiondPose): # TODO: good? why is there a bad one, too? keep only one! - MOVEPARTS = ['head_y_index'] - updated_list = [] - - print(self.start_values, self.targets, self.progress, self.direction) - - for item in tranisitiondPose: - key, value = item.split(': ') - - if key in MOVEPARTS: - current_value = float(value) - - # If progress reaches 1 or 0 - if self.progress[key] >= 1 or self.progress[key] <= 0: - # Reverse direction - self.direction[key] *= -1 - - # If direction is now forward, set a new target and store starting value - if self.direction[key] == 1: - self.start_values[key] = current_value - self.targets[key] = current_value + random.uniform(-1, 1) - self.progress[key] = 0 # Reset progress when setting a new target - - # Linearly interpolate between start and target values - new_value = self.start_values[key] + self.progress[key] * (self.targets[key] - self.start_values[key]) - new_value = min(max(new_value, -1), 1) # clip to bounds (just in case) - - # Update progress based on direction - self.progress[key] += 0.02 * self.direction[key] - - updated_list.append(f"{key}: {new_value}") - else: - updated_list.append(item) - - return updated_list - - def update_sway_pose(self, tranisitiondPose): - MOVEPARTS = ['head_y_index'] - updated_list = [] - - # print( self.start_values, self.targets, self.progress, self.direction ) - - for item in tranisitiondPose: - key, value = item.split(': ') - - if key in MOVEPARTS: - current_value = float(value) - - # Linearly interpolate between start and target values - new_value = self.start_values[key] + self.progress[key] * (self.targets[key] - self.start_values[key]) - new_value = min(max(new_value, -1), 1) # clip to bounds (just in case) - - # Check if we've reached the target or start value - is_close_to_target = abs(new_value - self.targets[key]) < 0.04 - is_close_to_start = abs(new_value - self.start_values[key]) < 0.04 - - if (self.direction[key] == 1 and is_close_to_target) or (self.direction[key] == -1 and is_close_to_start): - # Reverse direction - self.direction[key] *= -1 - - # If direction is now forward, set a new target and store starting value - if self.direction[key] == 1: - self.start_values[key] = new_value - self.targets[key] = current_value + random.uniform(-0.6, 0.6) - self.progress[key] = 0 # Reset progress when setting a new target - - # Update progress based on direction - self.progress[key] += 0.04 * self.direction[key] - - updated_list.append(f"{key}: {new_value}") - else: - updated_list.append(item) - - return updated_list - - def update_transition_pose(self, last_transition_pose_s, transition_pose_s): - inMotion = True - - # Create dictionaries from the lists for easier comparison - last_transition_dict = {} - for item in last_transition_pose_s: - key = item.split(': ')[0] - value = float(item.split(': ')[1]) - if key == 'unknown': - key += f"_{list(last_transition_dict.values()).count(value)}" - last_transition_dict[key] = value - - transition_dict = {} - for item in transition_pose_s: - key = item.split(': ')[0] - value = float(item.split(': ')[1]) - if key == 'unknown': - key += f"_{list(transition_dict.values()).count(value)}" - transition_dict[key] = value - - updated_last_transition_pose = [] - - for key, last_value in last_transition_dict.items(): - # If the key exists in transition_dict, increment its value by 0.4 and clip it to the target - if key in transition_dict: - - # If the key is 'wink_left_index' or 'wink_right_index', set the value directly dont animate blinks - if key in ['wink_left_index', 'wink_right_index']: # BLINK FIX - last_value = transition_dict[key] - - # For all other keys, increment its value by 0.1 of the delta and clip it to the target - else: - delta = transition_dict[key] - last_value - last_value += delta * 0.1 - - # Reconstruct the string and append it to the updated list - updated_last_transition_pose.append(f"{key}: {last_value}") - - # If any value is less than the target, set inMotion to True - # TODO/FIXME: inMotion is not actually used; what was this supposed to do? - if any(last_transition_dict[k] < transition_dict[k] for k in last_transition_dict if k in transition_dict): - inMotion = True - else: - inMotion = False - - return updated_last_transition_pose - - def update_result_image_bitmap(self, event: Optional[wx.Event] = None): - global global_timer_paused + self.last_report_time = None + + self.emotions, self.emotion_names = load_emotion_presets(os.path.join("talkinghead", "emotions")) + + def start(self) -> None: + """Start the animation thread.""" + self._terminated = False + def manage_animation_update(): + while not self._terminated: + # TODO: add a configurable FPS limiter (take a parameter in `__init__`; populate it from cli args in `server.py`) + # - should sleep for `max(eps, frame_target_ms - render_average_ms)`, where `eps = 0.01`, so that the next frame is ready in time + # (get render_average_ms from FPS counter; sanity check for nonsense value) + self.update_result_image_bitmap() + time.sleep(0.01) + self.animation_thread = threading.Thread(target=manage_animation_update, daemon=True) + self.animation_thread.start() + atexit.register(self.exit) + + def exit(self) -> None: + """Terminate the animation thread. + + Called automatically when the process exits. + """ + self._terminated = True + + def apply_emotion_to_pose(self, emotion_posedict: Dict[str, float], pose: List[float]) -> List[float]: + """Copy all morphs except breathing from `emotion_posedict` to `pose`. + + If a morph does not exist in `emotion_posedict`, its value is copied from `pose`. + + Return the modified pose. + """ + new_pose = list(pose) # copy + for idx, key in enumerate(posedict_keys): + if key in emotion_posedict and key != "breathing_index": + new_pose[idx] = emotion_posedict[key] + return new_pose + + def animate_blinking(self, pose: List[float]) -> List[float]: + # TODO: add smoothly animated blink? + + # If there should be a blink, set the wink morphs to 1; otherwise, use the provided value. + should_blink = (random.random() <= 0.03) + if not should_blink: + return pose + + new_pose = list(pose) # copy + for morph_name in ["eye_wink_left_index", "eye_wink_right_index"]: + idx = posedict_key_to_index[morph_name] + new_pose[idx] = 1.0 + return new_pose + + def animate_talking(self, pose: List[float]) -> List[float]: + if not is_talking: + return pose + + new_pose = list(pose) # copy + idx = posedict_key_to_index["mouth_aaa_index"] + x = pose[idx] + x = abs(1.0 - x) + random.uniform(-2.0, 2.0) + x = max(0.0, min(x, 1.0)) # clamp (not the manga studio) + new_pose[idx] = x + return new_pose + + def animate_sway(self, pose: List[float]) -> List[float]: + # TODO: add sway for other axes and body + + new_pose = list(pose) # copy + MOVEPARTS = ["head_y_index"] + for key in MOVEPARTS: + idx = posedict_key_to_index[key] + current_value = pose[idx] + + # Linearly interpolate between start and target values + new_value = self.start_values[key] + self.progress[key] * (self.targets[key] - self.start_values[key]) + new_value = min(max(new_value, -1), 1) # clip to bounds (just in case) + + # Check if we've reached the target or start value + is_close_to_target = abs(new_value - self.targets[key]) < 0.04 + is_close_to_start = abs(new_value - self.start_values[key]) < 0.04 + + if (self.direction[key] == 1 and is_close_to_target) or (self.direction[key] == -1 and is_close_to_start): + # Reverse direction + self.direction[key] *= -1 + + # If direction is now forward, set a new target and store starting value + if self.direction[key] == 1: + self.start_values[key] = new_value + self.targets[key] = current_value + random.uniform(-0.6, 0.6) + self.progress[key] = 0 # Reset progress when setting a new target + + # Update progress based on direction + self.progress[key] += 0.04 * self.direction[key] + + new_pose[idx] = new_value + return new_pose + + def interpolate_pose(self, pose: List[float], target_pose: List[float], step=0.1) -> List[float]: + # TODO: ignore sway? + # TODO: ignore breathing? + new_pose = list(pose) # copy + for idx, key in enumerate(posedict_keys): + # # We animate blinking *after* interpolating the pose, so when blinking, the eyes close instantly. + # # This part makes the blink also end instantly. + # if key in ["eye_wink_left_index", "eye_wink_right_index"]: + # new_pose[idx] = new_pose[idx] + + # Note this leads to an exponentially saturating behavior (1 - exp(-x)), because the delta is from the current pose to the final pose. + delta = target_pose[idx] - pose[idx] + new_pose[idx] = pose[idx] + step * delta + return new_pose + + def update_result_image_bitmap(self) -> None: + """Render an animation frame.""" + + global animation_running global initAMI global global_result_image - global global_reload - global emotion global fps global current_pose - global is_talking - global is_talking_override - global lasttranisitiondPose - if global_timer_paused: + if not animation_running: return try: - if global_reload is not None: - MainFrame.load_image(self, event=None, file_path=None) # call load_image function here - return - - # # OLD METHOD - # ifacialmocap_pose = self.animationMain() # GET ANIMATION CHANGES - # current_posesaved = self.pose_converter.convert(ifacialmocap_pose) - # combined_posesaved = current_posesaved - - # NEW METHOD - # CREATES THE DEFAULT POSE AND STORES OBJ IN STRING - # ifacialmocap_pose = self.animationMain() # DISABLE FOR TESTING!!!!!!!!!!!!!!!!!!!!!!!! - ifacialmocap_pose = self.ifacialmocap_pose - # print("ifacialmocap_pose", ifacialmocap_pose) - - # GET EMOTION SETTING - emotion_pose = self.get_emotion_values(emotion) - # print("emotion_pose ", emotion_pose) - - # MERGE EMOTION SETTING WITH CURRENT OUTPUT - # NOTE: This is a mutating method that overwrites the original `ifacialmocap_pose`. - updated_pose = self.update_ifacialmocap_pose(ifacialmocap_pose, emotion_pose) - # print("updated_pose ", updated_pose) - - # CONVERT RESULT TO FORMAT NN CAN USE - current_pose = self.pose_converter.convert(updated_pose) - # print("current_pose ", current_pose) - - # SEND THROUGH CONVERT - current_pose = self.pose_converter.convert(ifacialmocap_pose) - # print("current_pose2 ", current_pose) - - # ADD LABELS/NAMES TO THE POSE - names_current_pose = MainFrame.addNamestoConvert(current_pose) - # print("current pose :", names_current_pose) - - # GET THE EMOTION VALUES again for some reason - emotion_pose2 = self.get_emotion_values(emotion) - # print("target pose :", emotion_pose2) - - # APPLY VALUES TO THE POSE AGAIN?? This needs to overwrite the values - tranisitiondPose = self.animateToEmotion(names_current_pose, emotion_pose2) - # print("combine pose :", tranisitiondPose) - - # smooth animate - # print("LAST VALUES: ", lasttranisitiondPose) - # print("TARGER VALUES: ", tranisitiondPose) - - if lasttranisitiondPose != "NotInit": - tranisitiondPose = self.update_transition_pose(lasttranisitiondPose, tranisitiondPose) - # print("smoothed: ", tranisitiondPose) - - # Animate blinking - tranisitiondPose = self.update_blinking_pose(tranisitiondPose) - - # Animate Head Sway - tranisitiondPose = self.update_sway_pose(tranisitiondPose) - - # Animate Talking - tranisitiondPose = self.update_talking_pose(tranisitiondPose) - - # reformat the data correctly - parsed_data = [] - for item in tranisitiondPose: - key, value_str = item.split(': ') - value = float(value_str) - parsed_data.append((key, value)) - tranisitiondPosenew = [value for _, value in parsed_data] - - # not sure what this is for TBH - ifacialmocap_pose = tranisitiondPosenew - + if global_reload_image is not None: + self.load_image() + return # TODO: do we really need to return here, we could just proceed? if self.torch_source_image is None: - dc = wx.MemoryDC() - dc.SelectObject(self.result_image_bitmap) - self.draw_nothing_yet_string(dc) - del dc return + if current_pose is None: # initialize character pose at plugin startup + current_pose = posedict_to_pose(self.emotions[current_emotion]) - # pose = torch.tensor(tranisitiondPosenew, device=self.device, dtype=self.poser.get_dtype()) - pose = self.dict_to_tensor(tranisitiondPosenew).to(device=self.device, dtype=self.poser.get_dtype()) + emotion_posedict = self.emotions[current_emotion] + target_pose = self.apply_emotion_to_pose(emotion_posedict, current_pose) + + current_pose = self.interpolate_pose(current_pose, target_pose) + current_pose = self.animate_blinking(current_pose) + current_pose = self.animate_sway(current_pose) + current_pose = self.animate_talking(current_pose) + # TODO: animate breathing + + pose = torch.tensor(current_pose, device=self.device, dtype=self.poser.get_dtype()) with torch.no_grad(): output_image = self.poser.pose(self.torch_source_image, pose)[0].float() @@ -937,24 +351,10 @@ class MainFrame(wx.Frame): output_image = (255.0 * torch.transpose(output_image.reshape(c, h * w), 0, 1)).reshape(h, w, c).byte() numpy_image = output_image.detach().cpu().numpy() - wx_image = wx.ImageFromBuffer(numpy_image.shape[0], - numpy_image.shape[1], - numpy_image[:, :, 0:3].tobytes(), - numpy_image[:, :, 3].tobytes()) - wx_bitmap = wx_image.ConvertToBitmap() - - dc = wx.MemoryDC() - dc.SelectObject(self.result_image_bitmap) - dc.Clear() - dc.DrawBitmap(wx_bitmap, - (self.poser.get_image_size() - numpy_image.shape[0]) // 2, - (self.poser.get_image_size() - numpy_image.shape[1]) // 2, True) - numpy_image_bgra = numpy_image[:, :, [2, 1, 0, 3]] # Convert color channels from RGB to BGR and keep alpha channel global_result_image = numpy_image_bgra - del dc - + # Update FPS counter time_now = time.time_ns() if self.last_update_time is not None: elapsed_time = time_now - self.last_update_time @@ -962,45 +362,34 @@ class MainFrame(wx.Frame): if self.torch_source_image is not None: self.fps_statistics.add_fps(fps) - self.fps_text.SetLabelText("FPS = %0.2f" % self.fps_statistics.get_average_fps()) - self.last_update_time = time_now if initAMI: # If the models are just now initalized stop animation to save - global_timer_paused = True + animation_running = False initAMI = False - if random.random() <= 0.01: - trimmed_fps = round(fps, 1) - print("talkinghead FPS: {:.1f}".format(trimmed_fps)) - - # Store current pose to use as last pose on next loop - lasttranisitiondPose = tranisitiondPose - - self.Refresh() + if self.last_report_time is None or time_now - self.last_report_time > 5e9: + trimmed_fps = round(self.fps_statistics.get_average_fps(), 1) + logger.info("update_result_image_bitmap: FPS: {:.1f}".format(trimmed_fps)) + self.last_report_time = time_now except KeyboardInterrupt: - print("Update process was interrupted by the user.") - wx.Exit() + pass - def resize_image(image, size=(512, 512)): - image.thumbnail(size, Image.LANCZOS) # Step 1: Resize the image to maintain the aspect ratio with the larger dimension being 512 pixels - new_image = Image.new("RGBA", size) # Step 2: Create a new image of size 512x512 with transparency - new_image.paste(image, ((size[0] - image.size[0]) // 2, - (size[1] - image.size[1]) // 2)) # Step 3: Paste the resized image into the new image, centered - return new_image + def load_image(self, file_path=None) -> None: + """Load the image file at `file_path`. - def load_image(self, event: wx.Event, file_path=None): + Except, if `global_reload_image is not None`, use the global reload image data instead. + """ + global global_source_image + global global_reload_image - global global_source_image # Declare global_source_image as a global variable - global global_reload - - if global_reload is not None: - file_path = "global_reload" + if global_reload_image is not None: + file_path = "global_reload_image" try: - if file_path == "global_reload": - pil_image = global_reload + if file_path == "global_reload_image": + pil_image = global_reload_image else: pil_image = resize_PIL_image( extract_PIL_image_from_filelike(file_path), @@ -1009,53 +398,22 @@ class MainFrame(wx.Frame): w, h = pil_image.size if pil_image.size != (512, 512): - print("Resizing Char Card to work") - pil_image = MainFrame.resize_image(pil_image) + logger.info("Resizing Char Card to work") + pil_image = to_talkinghead_image(pil_image) w, h = pil_image.size - if pil_image.mode != 'RGBA': - self.source_image_string = "Image must have alpha channel!" - self.wx_source_image = None + if pil_image.mode != "RGBA": + logger.error("load_image: image must have alpha channel") self.torch_source_image = None else: - self.wx_source_image = wx.Bitmap.FromBufferRGBA(w, h, pil_image.convert("RGBA").tobytes()) self.torch_source_image = extract_pytorch_image_from_PIL_image(pil_image) \ .to(self.device).to(self.poser.get_dtype()) - global_source_image = self.torch_source_image # Set global_source_image as a global variable + global_source_image = self.torch_source_image - self.update_source_image_bitmap() + except Exception as exc: + logger.error(f"load_image: {exc}") - except Exception as error: - print("Error: ", error) - - global_reload = None # Reset the globe load - self.Refresh() - -if __name__ == "__main__": - parser = argparse.ArgumentParser(description='uWu Waifu') - parser.add_argument( - '--model', - type=str, - required=False, - default='separable_float', - choices=['standard_float', 'separable_float', 'standard_half', 'separable_half'], - help='The model to use.' - ) - parser.add_argument('--char', - type=str, - required=False, - help='The filename of the character image under "tha3/images/".') - parser.add_argument( - '--device', - type=str, - required=False, - default='cuda', - choices=['cpu', 'cuda'], - help='The device to use for PyTorch ("cuda" for GPU, "cpu" for CPU).' - ) - - args = parser.parse_args() - global_basedir = "" # in standalone mode, cwd is the "talkinghead" directory - launch_gui(device=args.device, model=args.model, char=args.char, standalone=True) + finally: + global_reload_image = None diff --git a/talkinghead/tha3/app/manual_poser.py b/talkinghead/tha3/app/manual_poser.py index f66b2f3..b2fded6 100644 --- a/talkinghead/tha3/app/manual_poser.py +++ b/talkinghead/tha3/app/manual_poser.py @@ -59,7 +59,7 @@ import os import pathlib import sys import time -from typing import Dict, List, Tuple +from typing import List import PIL.Image @@ -71,8 +71,8 @@ import wx from tha3.poser.modes.load_poser import load_poser from tha3.poser.poser import Poser, PoseParameterCategory, PoseParameterGroup -from tha3.util import rgba_to_numpy_image, grid_change_to_numpy_image, \ - rgb_to_numpy_image, resize_PIL_image, extract_PIL_image_from_filelike, extract_pytorch_image_from_PIL_image +from tha3.util import resize_PIL_image, extract_PIL_image_from_filelike, extract_pytorch_image_from_PIL_image +from tha3.app.util import load_emotion_presets, posedict_to_pose, pose_to_posedict, torch_image_to_numpy, FpsStatistics logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) @@ -95,97 +95,6 @@ logger = logging.getLogger(__name__) # input_exts_and_descs_str = "|".join(format_fileformat_list(PIL_supported_input_formats)) # filter-spec accepted by `wx.FileDialog` # output_exts_and_descs_str = "|".join(format_fileformat_list(PIL_supported_output_formats)) -# The keys for a pose in the emotion JSON files. -# -# TODO: "eye_unimpressed" is arity-2, but has only one entry in the JSON. The current implementation smashes both into one, -# letting the second one (right slider) win. Maybe the two values should be saved separately, but we have to avoid -# breaking the live mode served by `app.py`. -posedict_keys = ["eyebrow_troubled_left_index", "eyebrow_troubled_right_index", - "eyebrow_angry_left_index", "eyebrow_angry_right_index", - "eyebrow_lowered_left_index", "eyebrow_lowered_right_index", - "eyebrow_raised_left_index", "eyebrow_raised_right_index", - "eyebrow_happy_left_index", "eyebrow_happy_right_index", - "eyebrow_serious_left_index", "eyebrow_serious_right_index", - "eye_wink_left_index", "eye_wink_right_index", - "eye_happy_wink_left_index", "eye_happy_wink_right_index", - "eye_surprised_left_index", "eye_surprised_right_index", - "eye_relaxed_left_index", "eye_relaxed_right_index", - "eye_unimpressed", "eye_unimpressed", - "eye_raised_lower_eyelid_left_index", "eye_raised_lower_eyelid_right_index", - "iris_small_left_index", "iris_small_right_index", - "mouth_aaa_index", - "mouth_iii_index", - "mouth_uuu_index", - "mouth_eee_index", - "mouth_ooo_index", - "mouth_delta", - "mouth_lowered_corner_left_index", "mouth_lowered_corner_right_index", - "mouth_raised_corner_left_index", "mouth_raised_corner_right_index", - "mouth_smirk", - "iris_rotation_x_index", "iris_rotation_y_index", - "head_x_index", "head_y_index", - "neck_z_index", - "body_y_index", "body_z_index", - "breathing_index"] -assert len(posedict_keys) == 45 - - -def load_emotion_presets() -> Tuple[Dict[str, Dict[str, float]], List[str]]: - """Load emotion presets from disk. - - These are JSON files in "talkinghead/emotions". - - Returns the tuple `(emotions, emotion_names)`, where:: - - emotions = {emotion0_name: posedict0, ...} - emotion_names = [emotion0_name, emotion1_name, ...] - - The dict contains the actual pose data. The list is a sorted list of emotion names - that can be used to map a linear index (e.g. the choice index in a GUI dropdown) - to the corresponding key of `emotions`. - - The directory "talkinghead/emotions" must also contain a "_defaults.json" file, - containing factory defaults (as a fallback) for the 28 standard emotions - (as recognized by distilbert), as well as a hidden "zero" preset that represents - a neutral pose. (This is separate from the "neutral" emotion, which is allowed - to be "non-zero".) - """ - emotion_names = [] - for root, dirs, files in os.walk("emotions", topdown=True): - for filename in files: - if filename == "_defaults.json": # skip the repository containing the default fallbacks - continue - if filename.lower().endswith(".json"): - emotion_names.append(filename[:-5]) # drop the ".json" - emotion_names.sort() # the 28 actual emotions - - # TODO: Note that currently, we build the list of emotion names from JSON filenames, - # and then check whether each JSON implements the emotion matching its filename. - # On second thought, I'm not sure whether that makes much sense. Maybe rethink the design. - # - We *do* want custom JSON files to show up in the list, if those are placed in "tha3/emotions". So the list of emotions shouldn't be hardcoded. - # - *Having* a fallback repository with factory defaults (and a hidden "zero" preset) is useful. - # But we are currently missing a way to reset an emotion to its factory default. - def load_emotion_with_fallback(emotion_name: str) -> Dict[str, float]: - try: - with open(os.path.join("emotions", f"{emotion_name}.json"), "r") as json_file: - emotions_from_json = json.load(json_file) # A single json file may contain presets for multiple emotions. - posedict = emotions_from_json[emotion_name] - except (FileNotFoundError, KeyError): # If no separate json exists for the specified emotion, load the default (all 28 emotions have a default). - with open(os.path.join("emotions", "_defaults.json"), "r") as json_file: - emotions_from_json = json.load(json_file) - posedict = emotions_from_json[emotion_name] - # If still not found, it's an error, so fail-fast: let the app exit with an informative exception message. - return posedict - - # Dict keeps its keys in insertion order, so define some special states before inserting the actual emotions. - emotions = {"[custom]": {}, # custom = the user has changed at least one value manually after last loading a preset - "[reset]": load_emotion_with_fallback("zero")} # reset = a preset with all sliders in their default positions. Found in "_defaults.json". - for emotion_name in emotion_names: - emotions[emotion_name] = load_emotion_with_fallback(emotion_name) - - emotion_names = list(emotions.keys()) - return emotions, emotion_names - class SimpleParamGroupsControlPanel(wx.Panel): """A simple control panel for groups of arity-1 continuous parameters (i.e. float value, and no separate left/right controls). @@ -404,43 +313,6 @@ class MorphCategoryControlPanel(wx.Panel): self.update_ui() -def convert_output_image_from_torch_to_numpy(output_image): - if output_image.shape[2] == 2: - h, w, c = output_image.shape - numpy_image = torch.transpose(output_image.reshape(h * w, c), 0, 1).reshape(c, h, w) - elif output_image.shape[0] == 4: - numpy_image = rgba_to_numpy_image(output_image) - elif output_image.shape[0] == 3: - numpy_image = rgb_to_numpy_image(output_image) - elif output_image.shape[0] == 1: - c, h, w = output_image.shape - alpha_image = torch.cat([output_image.repeat(3, 1, 1) * 2.0 - 1.0, torch.ones(1, h, w)], dim=0) - numpy_image = rgba_to_numpy_image(alpha_image) - elif output_image.shape[0] == 2: - numpy_image = grid_change_to_numpy_image(output_image, num_channels=4) - else: - raise RuntimeError(f"Unsupported # image channels: {output_image.shape[0]}") - numpy_image = numpy.uint8(numpy.rint(numpy_image * 255.0)) - return numpy_image - - -class FpsStatistics: - def __init__(self): - self.count = 100 - self.fps = [] - - def add_fps(self, fps: float) -> None: - self.fps.append(fps) - while len(self.fps) > self.count: - del self.fps[0] - - def get_average_fps(self) -> float: - if len(self.fps) == 0: - return 0.0 - else: - return sum(self.fps) / len(self.fps) - - class MyFileDropTarget(wx.FileDropTarget): def OnDropFiles(self, x, y, filenames): if len(filenames) > 1: @@ -571,7 +443,7 @@ class MainFrame(wx.Frame): self.left_panel_sizer.Add(self.source_image_panel, 0, wx.FIXED_MINSIZE) # Emotion picker. - self.emotions, self.emotion_names = load_emotion_presets() + self.emotions, self.emotion_names = load_emotion_presets("emotions") # # Horizontal emotion picker layout; looks bad, text label vertical alignment is wrong. # self.emotion_panel = wx.Panel(self.left_panel, style=wx.SIMPLE_BORDER, size=(-1, -1)) @@ -767,7 +639,7 @@ class MainFrame(wx.Frame): if len(emotions_from_json) > 1: logger.warning(f"File {json_file_name} contains multiple emotions, loading the first one '{first_emotion_name}'.") posedict = emotions_from_json[first_emotion_name] - pose = self.posedict_to_pose(posedict) + pose = posedict_to_pose(posedict) # Apply loaded emotion self.set_current_pose(pose) @@ -863,7 +735,7 @@ class MainFrame(wx.Frame): emotion_name = self.emotion_choice.GetString(current_emotion_index) logger.info(f"Loading emotion preset {emotion_name}") posedict = self.emotions[emotion_name] - pose = self.posedict_to_pose(posedict) + pose = posedict_to_pose(posedict) self.set_current_pose(pose) current_pose = pose else: @@ -913,7 +785,7 @@ class MainFrame(wx.Frame): with torch.no_grad(): output_image = self.poser.pose(self.torch_source_image, pose, output_index)[0].detach().cpu() - numpy_image = convert_output_image_from_torch_to_numpy(output_image) + numpy_image = torch_image_to_numpy(output_image) self.last_output_numpy_image = numpy_image wx_image = wx.ImageFromBuffer( numpy_image.shape[0], @@ -963,25 +835,6 @@ class MainFrame(wx.Frame): wx.CallAfter(update_images_cont2) wx.CallAfter(update_images_cont) - def current_pose_to_posedict(self) -> Dict[str, float]: - """Convert the character's current pose into a posedict for saving into an emotion JSON.""" - current_pose_values = self.get_current_pose() - current_pose_dict = dict(zip(posedict_keys, current_pose_values)) - return current_pose_dict - - def posedict_to_pose(self, posedict: Dict[str, float]) -> List[float]: - """Convert a posedict (from an emotion JSON) into a list of morph values (in the order the models expect them).""" - # sanity check - unrecognized_keys = set(posedict.keys()) - set(posedict_keys) - if unrecognized_keys: - logger.warning(f"Ignoring unrecognized keys in posedict: {unrecognized_keys}") - # Missing keys are fine - keys for zero values can simply be omitted. - - pose = [0.0 for i in range(self.poser.get_num_parameters())] - for idx, key in enumerate(posedict_keys): - pose[idx] = posedict.get(key, 0.0) - return pose - def on_save_image(self, event: wx.Event) -> None: """Ask the user for destination and save the output image. @@ -1036,7 +889,7 @@ class MainFrame(wx.Frame): current_emotion_old_index = self.emotion_choice.GetSelection() current_emotion_name = self.emotion_choice.GetString(current_emotion_old_index) - self.emotions, self.emotion_names = load_emotion_presets() + self.emotions, self.emotion_names = load_emotion_presets("emotions") self.emotion_choice.SetItems(self.emotion_names) current_emotion_new_index = self.emotion_choice.FindString(current_emotion_name) @@ -1076,13 +929,13 @@ class MainFrame(wx.Frame): if emotion_name.startswith("[") and emotion_name.endswith("]"): continue # skip "[custom]" and "[reset]" try: - pose = self.posedict_to_pose(posedict) + pose = posedict_to_pose(posedict) posetensor = torch.tensor(pose, device=self.device, dtype=self.dtype) output_index = self.output_index_choice.GetSelection() with torch.no_grad(): output_image = self.poser.pose(self.torch_source_image, posetensor, output_index)[0].detach().cpu() - numpy_image = convert_output_image_from_torch_to_numpy(output_image) + numpy_image = torch_image_to_numpy(output_image) image_file_name = os.path.join(dir_name, f"{emotion_name}.png") self.save_numpy_image(numpy_image, image_file_name) @@ -1107,11 +960,11 @@ class MainFrame(wx.Frame): os.makedirs(os.path.dirname(image_file_name), exist_ok=True) pil_image.save(image_file_name) - data_dict = self.current_pose_to_posedict() + pose_dict = pose_to_posedict(self.get_current_pose()) json_file_path = os.path.splitext(image_file_name)[0] + ".json" filename_without_extension = os.path.splitext(os.path.basename(image_file_name))[0] - data_dict_with_filename = {filename_without_extension: data_dict} # Create a new dict with the filename as the key + data_dict_with_filename = {filename_without_extension: pose_dict} # JSON structure: {emotion_name0: posedict0, ...} try: with open(json_file_path, "w") as file: diff --git a/talkinghead/tha3/app/util.py b/talkinghead/tha3/app/util.py new file mode 100644 index 0000000..2ccb806 --- /dev/null +++ b/talkinghead/tha3/app/util.py @@ -0,0 +1,194 @@ +"""App-level utilities.""" + +__all__ = ["posedict_keys", "posedict_key_to_index", + "load_emotion_presets", + "posedict_to_pose", "pose_to_posedict", + "torch_image_to_numpy", "to_talkinghead_image", + "FpsStatistics"] + +import logging +import json +import os +from typing import Dict, List, Tuple + +import PIL + +import numpy + +import torch + +from tha3.util import rgba_to_numpy_image, rgb_to_numpy_image, grid_change_to_numpy_image + +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + + +# The keys for a pose in the emotion JSON files. +# +# TODO: "eye_unimpressed" is arity-2, but has only one entry in the JSON. The current implementation smashes both into one, +# letting the second one (right slider) win. Maybe the two values should be saved separately, but we have to avoid +# breaking the live mode served by `app.py`. +posedict_keys = ["eyebrow_troubled_left_index", "eyebrow_troubled_right_index", + "eyebrow_angry_left_index", "eyebrow_angry_right_index", + "eyebrow_lowered_left_index", "eyebrow_lowered_right_index", + "eyebrow_raised_left_index", "eyebrow_raised_right_index", + "eyebrow_happy_left_index", "eyebrow_happy_right_index", + "eyebrow_serious_left_index", "eyebrow_serious_right_index", + "eye_wink_left_index", "eye_wink_right_index", + "eye_happy_wink_left_index", "eye_happy_wink_right_index", + "eye_surprised_left_index", "eye_surprised_right_index", + "eye_relaxed_left_index", "eye_relaxed_right_index", + "eye_unimpressed", "eye_unimpressed", + "eye_raised_lower_eyelid_left_index", "eye_raised_lower_eyelid_right_index", + "iris_small_left_index", "iris_small_right_index", + "mouth_aaa_index", + "mouth_iii_index", + "mouth_uuu_index", + "mouth_eee_index", + "mouth_ooo_index", + "mouth_delta", + "mouth_lowered_corner_left_index", "mouth_lowered_corner_right_index", + "mouth_raised_corner_left_index", "mouth_raised_corner_right_index", + "mouth_smirk", + "iris_rotation_x_index", "iris_rotation_y_index", + "head_x_index", "head_y_index", + "neck_z_index", + "body_y_index", "body_z_index", + "breathing_index"] +assert len(posedict_keys) == 45 + +# posedict_keys gives us index->key; make an inverse mapping. +# Note this doesn't work for "eye_unimpressed", because it's not unique. (All the more reason to fix that.) +posedict_key_to_index = {key: idx for idx, key in enumerate(posedict_keys)} + + +def load_emotion_presets(directory: str) -> Tuple[Dict[str, Dict[str, float]], List[str]]: + """Load emotion presets from disk. + + Returns the tuple `(emotions, emotion_names)`, where:: + + emotions = {emotion0_name: posedict0, ...} + emotion_names = [emotion0_name, emotion1_name, ...] + + The dict contains the actual pose data. The list is a sorted list of emotion names + that can be used to map a linear index (e.g. the choice index in a GUI dropdown) + to the corresponding key of `emotions`. + + The directory "talkinghead/emotions" must also contain a "_defaults.json" file, + containing factory defaults (as a fallback) for the 28 standard emotions + (as recognized by distilbert), as well as a hidden "zero" preset that represents + a neutral pose. (This is separate from the "neutral" emotion, which is allowed + to be "non-zero".) + """ + emotion_names = [] + for root, dirs, files in os.walk(directory, topdown=True): + for filename in files: + if filename == "_defaults.json": # skip the repository containing the default fallbacks + continue + if filename.lower().endswith(".json"): + emotion_names.append(filename[:-5]) # drop the ".json" + emotion_names.sort() # the 28 actual emotions + + # TODO: Note that currently, we build the list of emotion names from JSON filenames, + # and then check whether each JSON implements the emotion matching its filename. + # On second thought, I'm not sure whether that makes much sense. Maybe rethink the design. + # - We *do* want custom JSON files to show up in the list, if those are placed in "tha3/emotions". So the list of emotions shouldn't be hardcoded. + # - *Having* a fallback repository with factory defaults (and a hidden "zero" preset) is useful. + # But we are currently missing a way to reset an emotion to its factory default. + def load_emotion_with_fallback(emotion_name: str) -> Dict[str, float]: + try: + with open(os.path.join(directory, f"{emotion_name}.json"), "r") as json_file: + emotions_from_json = json.load(json_file) # A single json file may contain presets for multiple emotions. + posedict = emotions_from_json[emotion_name] + except (FileNotFoundError, KeyError): # If no separate json exists for the specified emotion, load the default (all 28 emotions have a default). + with open(os.path.join(directory, "_defaults.json"), "r") as json_file: + emotions_from_json = json.load(json_file) + posedict = emotions_from_json[emotion_name] + # If still not found, it's an error, so fail-fast: let the app exit with an informative exception message. + return posedict + + # Dict keeps its keys in insertion order, so define some special states before inserting the actual emotions. + emotions = {"[custom]": {}, # custom = the user has changed at least one value manually after last loading a preset + "[reset]": load_emotion_with_fallback("zero")} # reset = a preset with all sliders in their default positions. Found in "_defaults.json". + for emotion_name in emotion_names: + emotions[emotion_name] = load_emotion_with_fallback(emotion_name) + + emotion_names = list(emotions.keys()) + return emotions, emotion_names + + +def posedict_to_pose(posedict: Dict[str, float]) -> List[float]: + """Convert a posedict (from an emotion JSON) into a list of morph values (in the order the models expect them).""" + # sanity check + unrecognized_keys = set(posedict.keys()) - set(posedict_keys) + if unrecognized_keys: + logger.warning(f"posedict_to_pose: ignoring unrecognized keys in posedict: {unrecognized_keys}") + # Missing keys are fine - keys for zero values can simply be omitted. + + pose = [0.0 for i in range(len(posedict_keys))] + for idx, key in enumerate(posedict_keys): + pose[idx] = posedict.get(key, 0.0) + return pose + + +def pose_to_posedict(pose: List[float]) -> Dict[str, float]: + """Convert `pose` into a posedict for saving into an emotion JSON.""" + return dict(zip(posedict_keys, pose)) + +# -------------------------------------------------------------------------------- +# TODO: move the image utils to the lower-level `tha3.util`? + +def torch_image_to_numpy(image: torch.tensor) -> numpy.array: + if image.shape[2] == 2: + h, w, c = image.shape + numpy_image = torch.transpose(image.reshape(h * w, c), 0, 1).reshape(c, h, w) + elif image.shape[0] == 4: + numpy_image = rgba_to_numpy_image(image) + elif image.shape[0] == 3: + numpy_image = rgb_to_numpy_image(image) + elif image.shape[0] == 1: + c, h, w = image.shape + alpha_image = torch.cat([image.repeat(3, 1, 1) * 2.0 - 1.0, torch.ones(1, h, w)], dim=0) + numpy_image = rgba_to_numpy_image(alpha_image) + elif image.shape[0] == 2: + numpy_image = grid_change_to_numpy_image(image, num_channels=4) + else: + msg = f"torch_image_to_numpy: unsupported # image channels: {image.shape[0]}" + logger.error(msg) + raise RuntimeError(msg) + numpy_image = numpy.uint8(numpy.rint(numpy_image * 255.0)) + return numpy_image + +def to_talkinghead_image(image: PIL.Image, new_size: Tuple[int] = (512, 512)) -> PIL.Image: + """Resize image to `new_size`, add alpha channel, and center. + + With default `new_size`: + + - Step 1: Resize (Lanczos) the image to maintain the aspect ratio with the larger dimension being 512 pixels. + - Step 2: Create a new image of size 512x512 with transparency. + - Step 3: Paste the resized image into the new image, centered. + """ + image.thumbnail(new_size, PIL.Image.LANCZOS) + new_image = PIL.Image.new("RGBA", new_size) + new_image.paste(image, ((new_size[0] - image.size[0]) // 2, + (new_size[1] - image.size[1]) // 2)) + return new_image + +# -------------------------------------------------------------------------------- + +class FpsStatistics: + """A simple average FPS (frames per second) counter.""" + def __init__(self): + self.count = 100 + self.fps = [] + + def add_fps(self, fps: float) -> None: + self.fps.append(fps) + while len(self.fps) > self.count: + del self.fps[0] + + def get_average_fps(self) -> float: + if len(self.fps) == 0: + return 0.0 + else: + return sum(self.fps) / len(self.fps) diff --git a/talkinghead/tha3/mocap/__init__.py b/talkinghead/tha3/mocap/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/talkinghead/tha3/mocap/ifacialmocap_constants.py b/talkinghead/tha3/mocap/ifacialmocap_constants.py deleted file mode 100644 index 27031ac..0000000 --- a/talkinghead/tha3/mocap/ifacialmocap_constants.py +++ /dev/null @@ -1,239 +0,0 @@ -EYE_LOOK_IN_LEFT = "eyeLookInLeft" -EYE_LOOK_OUT_LEFT = "eyeLookOutLeft" -EYE_LOOK_DOWN_LEFT = "eyeLookDownLeft" -EYE_LOOK_UP_LEFT = "eyeLookUpLeft" -EYE_BLINK_LEFT = "eyeBlinkLeft" -EYE_SQUINT_LEFT = "eyeSquintLeft" -EYE_WIDE_LEFT = "eyeWideLeft" -EYE_LOOK_IN_RIGHT = "eyeLookInRight" -EYE_LOOK_OUT_RIGHT = "eyeLookOutRight" -EYE_LOOK_DOWN_RIGHT = "eyeLookDownRight" -EYE_LOOK_UP_RIGHT = "eyeLookUpRight" -EYE_BLINK_RIGHT = "eyeBlinkRight" -EYE_SQUINT_RIGHT = "eyeSquintRight" -EYE_WIDE_RIGHT = "eyeWideRight" -BROW_DOWN_LEFT = "browDownLeft" -BROW_OUTER_UP_LEFT = "browOuterUpLeft" -BROW_DOWN_RIGHT = "browDownRight" -BROW_OUTER_UP_RIGHT = "browOuterUpRight" -BROW_INNER_UP = "browInnerUp" -NOSE_SNEER_LEFT = "noseSneerLeft" -NOSE_SNEER_RIGHT = "noseSneerRight" -CHEEK_SQUINT_LEFT = "cheekSquintLeft" -CHEEK_SQUINT_RIGHT = "cheekSquintRight" -CHEEK_PUFF = "cheekPuff" -MOUTH_LEFT = "mouthLeft" -MOUTH_DIMPLE_LEFT = "mouthDimpleLeft" -MOUTH_FROWN_LEFT = "mouthFrownLeft" -MOUTH_LOWER_DOWN_LEFT = "mouthLowerDownLeft" -MOUTH_PRESS_LEFT = "mouthPressLeft" -MOUTH_SMILE_LEFT = "mouthSmileLeft" -MOUTH_STRETCH_LEFT = "mouthStretchLeft" -MOUTH_UPPER_UP_LEFT = "mouthUpperUpLeft" -MOUTH_RIGHT = "mouthRight" -MOUTH_DIMPLE_RIGHT = "mouthDimpleRight" -MOUTH_FROWN_RIGHT = "mouthFrownRight" -MOUTH_LOWER_DOWN_RIGHT = "mouthLowerDownRight" -MOUTH_PRESS_RIGHT = "mouthPressRight" -MOUTH_SMILE_RIGHT = "mouthSmileRight" -MOUTH_STRETCH_RIGHT = "mouthStretchRight" -MOUTH_UPPER_UP_RIGHT = "mouthUpperUpRight" -MOUTH_CLOSE = "mouthClose" -MOUTH_FUNNEL = "mouthFunnel" -MOUTH_PUCKER = "mouthPucker" -MOUTH_ROLL_LOWER = "mouthRollLower" -MOUTH_ROLL_UPPER = "mouthRollUpper" -MOUTH_SHRUG_LOWER = "mouthShrugLower" -MOUTH_SHRUG_UPPER = "mouthShrugUpper" -JAW_LEFT = "jawLeft" -JAW_RIGHT = "jawRight" -JAW_FORWARD = "jawForward" -JAW_OPEN = "jawOpen" -TONGUE_OUT = "tongueOut" - -BLENDSHAPE_NAMES = [ - EYE_LOOK_IN_LEFT, # 0 - EYE_LOOK_OUT_LEFT, # 1 - EYE_LOOK_DOWN_LEFT, # 2 - EYE_LOOK_UP_LEFT, # 3 - EYE_BLINK_LEFT, # 4 - EYE_SQUINT_LEFT, # 5 - EYE_WIDE_LEFT, # 6 - EYE_LOOK_IN_RIGHT, # 7 - EYE_LOOK_OUT_RIGHT, # 8 - EYE_LOOK_DOWN_RIGHT, # 9 - EYE_LOOK_UP_RIGHT, # 10 - EYE_BLINK_RIGHT, # 11 - EYE_SQUINT_RIGHT, # 12 - EYE_WIDE_RIGHT, # 13 - BROW_DOWN_LEFT, # 14 - BROW_OUTER_UP_LEFT, # 15 - BROW_DOWN_RIGHT, # 16 - BROW_OUTER_UP_RIGHT, # 17 - BROW_INNER_UP, # 18 - NOSE_SNEER_LEFT, # 19 - NOSE_SNEER_RIGHT, # 20 - CHEEK_SQUINT_LEFT, # 21 - CHEEK_SQUINT_RIGHT, # 22 - CHEEK_PUFF, # 23 - MOUTH_LEFT, # 24 - MOUTH_DIMPLE_LEFT, # 25 - MOUTH_FROWN_LEFT, # 26 - MOUTH_LOWER_DOWN_LEFT, # 27 - MOUTH_PRESS_LEFT, # 28 - MOUTH_SMILE_LEFT, # 29 - MOUTH_STRETCH_LEFT, # 30 - MOUTH_UPPER_UP_LEFT, # 31 - MOUTH_RIGHT, # 32 - MOUTH_DIMPLE_RIGHT, # 33 - MOUTH_FROWN_RIGHT, # 34 - MOUTH_LOWER_DOWN_RIGHT, # 35 - MOUTH_PRESS_RIGHT, # 36 - MOUTH_SMILE_RIGHT, # 37 - MOUTH_STRETCH_RIGHT, # 38 - MOUTH_UPPER_UP_RIGHT, # 39 - MOUTH_CLOSE, # 40 - MOUTH_FUNNEL, # 41 - MOUTH_PUCKER, # 42 - MOUTH_ROLL_LOWER, # 43 - MOUTH_ROLL_UPPER, # 44 - MOUTH_SHRUG_LOWER, # 45 - MOUTH_SHRUG_UPPER, # 46 - JAW_LEFT, # 47 - JAW_RIGHT, # 48 - JAW_FORWARD, # 49 - JAW_OPEN, # 50 - TONGUE_OUT, # 51 -] - -EYE_LEFT_BLENDSHAPES = [ - EYE_LOOK_IN_LEFT, # 0 - EYE_LOOK_OUT_LEFT, # 1 - EYE_LOOK_DOWN_LEFT, # 2 - EYE_LOOK_UP_LEFT, # 3 - EYE_BLINK_LEFT, # 4 - EYE_SQUINT_LEFT, # 5 - EYE_WIDE_LEFT, # 6 -] - -EYE_RIGHT_BLENDSHAPES = [ - EYE_LOOK_IN_RIGHT, # 7 - EYE_LOOK_OUT_RIGHT, # 8 - EYE_LOOK_DOWN_RIGHT, # 9 - EYE_LOOK_UP_RIGHT, # 10 - EYE_BLINK_RIGHT, # 11 - EYE_SQUINT_RIGHT, # 12 - EYE_WIDE_RIGHT, # 13 -] - -BROW_LEFT_BLENDSHAPES = [ - BROW_DOWN_LEFT, # 14 - BROW_OUTER_UP_LEFT, # 15 - -] - -BROW_RIGHT_BLENDSHAPES = [ - BROW_DOWN_RIGHT, # 16 - BROW_OUTER_UP_RIGHT, # 17 - -] - -BROW_BOTH_BLENDSHAPES = [ - BROW_INNER_UP, # 18 -] - -NOSE_BLENDSHAPES = [ - NOSE_SNEER_LEFT, # 19 - NOSE_SNEER_RIGHT, # 20 -] - -CHECK_BLENDSHAPES = [ - CHEEK_SQUINT_LEFT, # 21 - CHEEK_SQUINT_RIGHT, # 22 - CHEEK_PUFF, # 23 -] - -MOUTH_LEFT_BLENDSHAPES = [ - MOUTH_LEFT, # 24 - MOUTH_DIMPLE_LEFT, # 25 - MOUTH_FROWN_LEFT, # 26 - MOUTH_LOWER_DOWN_LEFT, # 27 - MOUTH_PRESS_LEFT, # 28 - MOUTH_SMILE_LEFT, # 29 - MOUTH_STRETCH_LEFT, # 30 - MOUTH_UPPER_UP_LEFT, # 31 -] - -MOUTH_RIGHT_BLENDSHAPES = [ - MOUTH_RIGHT, # 32 - MOUTH_DIMPLE_RIGHT, # 33 - MOUTH_FROWN_RIGHT, # 34 - MOUTH_LOWER_DOWN_RIGHT, # 35 - MOUTH_PRESS_RIGHT, # 36 - MOUTH_SMILE_RIGHT, # 37 - MOUTH_STRETCH_RIGHT, # 38 - MOUTH_UPPER_UP_RIGHT, # 39 -] - -MOUTH_BOTH_BLENDSHAPES = [ - MOUTH_CLOSE, # 40 - MOUTH_FUNNEL, # 41 - MOUTH_PUCKER, # 42 - MOUTH_ROLL_LOWER, # 43 - MOUTH_ROLL_UPPER, # 44 - MOUTH_SHRUG_LOWER, # 45 - MOUTH_SHRUG_UPPER, # 46 -] - -JAW_BLENDSHAPES = [ - JAW_LEFT, # 47 - JAW_RIGHT, # 48 - JAW_FORWARD, # 49 - JAW_OPEN, # 50 -] - -TONGUE_BLENDSHAPES = [ - TONGUE_OUT, # 51 -] - -COLUMN_0_BLENDSHAPES = EYE_RIGHT_BLENDSHAPES + BROW_RIGHT_BLENDSHAPES + [NOSE_SNEER_RIGHT, CHEEK_SQUINT_RIGHT] -COLUMN_1_BLENDSHAPES = EYE_LEFT_BLENDSHAPES + BROW_LEFT_BLENDSHAPES + [NOSE_SNEER_LEFT, CHEEK_SQUINT_LEFT] -COLUMN_2_BLENDSHAPES = MOUTH_RIGHT_BLENDSHAPES + [JAW_RIGHT] -COLUMN_3_BLENDSHAPES = MOUTH_LEFT_BLENDSHAPES + [JAW_LEFT] -COLUMN_4_BLENDSHAPES = [BROW_INNER_UP, CHEEK_PUFF] + MOUTH_BOTH_BLENDSHAPES + [JAW_FORWARD, JAW_OPEN, TONGUE_OUT] - -BLENDSHAPE_COLUMNS = [ - COLUMN_0_BLENDSHAPES, - COLUMN_1_BLENDSHAPES, - COLUMN_2_BLENDSHAPES, - COLUMN_3_BLENDSHAPES, - COLUMN_4_BLENDSHAPES, -] - -RIGHT_EYE_BONE_X = "rightEyeBoneX" -RIGHT_EYE_BONE_Y = "rightEyeBoneY" -RIGHT_EYE_BONE_Z = "rightEyeBoneZ" -RIGHT_EYE_BONE_ROTATIONS = [RIGHT_EYE_BONE_X, RIGHT_EYE_BONE_Y, RIGHT_EYE_BONE_Z] - -LEFT_EYE_BONE_X = "leftEyeBoneX" -LEFT_EYE_BONE_Y = "leftEyeBoneY" -LEFT_EYE_BONE_Z = "leftEyeBoneZ" -LEFT_EYE_BONE_ROTATIONS = [LEFT_EYE_BONE_X, LEFT_EYE_BONE_Y, LEFT_EYE_BONE_Z] - -HEAD_BONE_X = "headBoneX" -HEAD_BONE_Y = "headBoneY" -HEAD_BONE_Z = "headBoneZ" -HEAD_BONE_ROTATIONS = [HEAD_BONE_X, HEAD_BONE_Y, HEAD_BONE_Z] - -ROTATION_NAMES = RIGHT_EYE_BONE_ROTATIONS + LEFT_EYE_BONE_ROTATIONS + HEAD_BONE_ROTATIONS - -RIGHT_EYE_BONE_QUAT = "rightEyeBoneQuat" -LEFT_EYE_BONE_QUAT = "leftEyeBoneQuat" -HEAD_BONE_QUAT = "headBoneQuat" -QUATERNION_NAMES = [ - RIGHT_EYE_BONE_QUAT, - LEFT_EYE_BONE_QUAT, - HEAD_BONE_QUAT -] - -IFACIALMOCAP_DATETIME_FORMAT = "%Y/%m/%d-%H:%M:%S.%f" diff --git a/talkinghead/tha3/mocap/ifacialmocap_pose.py b/talkinghead/tha3/mocap/ifacialmocap_pose.py deleted file mode 100644 index d90936e..0000000 --- a/talkinghead/tha3/mocap/ifacialmocap_pose.py +++ /dev/null @@ -1,27 +0,0 @@ -from tha3.mocap.ifacialmocap_constants import BLENDSHAPE_NAMES, HEAD_BONE_X, HEAD_BONE_Y, HEAD_BONE_Z, \ - HEAD_BONE_QUAT, LEFT_EYE_BONE_X, LEFT_EYE_BONE_Y, LEFT_EYE_BONE_Z, LEFT_EYE_BONE_QUAT, RIGHT_EYE_BONE_X, \ - RIGHT_EYE_BONE_Y, RIGHT_EYE_BONE_Z, RIGHT_EYE_BONE_QUAT - - -def create_default_ifacialmocap_pose(): - data = {} - - for blendshape_name in BLENDSHAPE_NAMES: - data[blendshape_name] = 0.0 - - data[HEAD_BONE_X] = 0.0 - data[HEAD_BONE_Y] = 0.0 - data[HEAD_BONE_Z] = 0.0 - data[HEAD_BONE_QUAT] = [0.0, 0.0, 0.0, 1.0] - - data[LEFT_EYE_BONE_X] = 0.0 - data[LEFT_EYE_BONE_Y] = 0.0 - data[LEFT_EYE_BONE_Z] = 0.0 - data[LEFT_EYE_BONE_QUAT] = [0.0, 0.0, 0.0, 1.0] - - data[RIGHT_EYE_BONE_X] = 0.0 - data[RIGHT_EYE_BONE_Y] = 0.0 - data[RIGHT_EYE_BONE_Z] = 0.0 - data[RIGHT_EYE_BONE_QUAT] = [0.0, 0.0, 0.0, 1.0] - - return data \ No newline at end of file diff --git a/talkinghead/tha3/mocap/ifacialmocap_pose_converter.py b/talkinghead/tha3/mocap/ifacialmocap_pose_converter.py deleted file mode 100644 index 390460b..0000000 --- a/talkinghead/tha3/mocap/ifacialmocap_pose_converter.py +++ /dev/null @@ -1,12 +0,0 @@ -from abc import ABC, abstractmethod -from typing import Dict, List - - -class IFacialMocapPoseConverter(ABC): - @abstractmethod - def convert(self, ifacialmocap_pose: Dict[str, float]) -> List[float]: - pass - - @abstractmethod - def init_pose_converter_panel(self, parent): - pass \ No newline at end of file diff --git a/talkinghead/tha3/mocap/ifacialmocap_poser_converter_25.py b/talkinghead/tha3/mocap/ifacialmocap_poser_converter_25.py deleted file mode 100644 index 82166ea..0000000 --- a/talkinghead/tha3/mocap/ifacialmocap_poser_converter_25.py +++ /dev/null @@ -1,491 +0,0 @@ -import math -import time -from enum import Enum -from typing import Optional, Dict, List - -import numpy -import scipy.optimize -import wx - -from tha3.mocap.ifacialmocap_constants import MOUTH_SMILE_LEFT, MOUTH_SHRUG_UPPER, MOUTH_SMILE_RIGHT, \ - BROW_INNER_UP, BROW_OUTER_UP_RIGHT, BROW_OUTER_UP_LEFT, BROW_DOWN_LEFT, BROW_DOWN_RIGHT, EYE_WIDE_LEFT, \ - EYE_WIDE_RIGHT, EYE_BLINK_LEFT, EYE_BLINK_RIGHT, CHEEK_SQUINT_LEFT, CHEEK_SQUINT_RIGHT, EYE_LOOK_IN_LEFT, \ - EYE_LOOK_OUT_LEFT, EYE_LOOK_IN_RIGHT, EYE_LOOK_OUT_RIGHT, EYE_LOOK_UP_LEFT, EYE_LOOK_UP_RIGHT, EYE_LOOK_DOWN_RIGHT, \ - EYE_LOOK_DOWN_LEFT, HEAD_BONE_X, HEAD_BONE_Y, HEAD_BONE_Z, JAW_OPEN, MOUTH_FROWN_LEFT, MOUTH_FROWN_RIGHT, \ - MOUTH_LOWER_DOWN_LEFT, MOUTH_LOWER_DOWN_RIGHT, MOUTH_FUNNEL, MOUTH_PUCKER -from tha3.mocap.ifacialmocap_pose_converter import IFacialMocapPoseConverter -from tha3.poser.modes.pose_parameters import get_pose_parameters - - -class EyebrowDownMode(Enum): - TROUBLED = 1 - ANGRY = 2 - LOWERED = 3 - SERIOUS = 4 - - -class WinkMode(Enum): - NORMAL = 1 - RELAXED = 2 - - -def rad_to_deg(rad): - return rad * 180.0 / math.pi - - -def deg_to_rad(deg): - return deg * math.pi / 180.0 - - -def clamp(x, min_value, max_value): - return max(min_value, min(max_value, x)) - - -class IFacialMocapPoseConverter25Args: - def __init__(self, - lower_smile_threshold: float = 0.4, - upper_smile_threshold: float = 0.6, - eyebrow_down_mode: EyebrowDownMode = EyebrowDownMode.ANGRY, - wink_mode: WinkMode = WinkMode.NORMAL, - eye_surprised_max_value: float = 0.5, - eye_wink_max_value: float = 0.8, - eyebrow_down_max_value: float = 0.4, - cheek_squint_min_value: float = 0.1, - cheek_squint_max_value: float = 0.7, - eye_rotation_factor: float = 1.0 / 0.75, - jaw_open_min_value: float = 0.1, - jaw_open_max_value: float = 0.4, - mouth_frown_max_value: float = 0.6, - mouth_funnel_min_value: float = 0.25, - mouth_funnel_max_value: float = 0.5, - iris_small_left=0.0, - iris_small_right=0.0): - self.iris_small_right = iris_small_left - self.iris_small_left = iris_small_right - self.wink_mode = wink_mode - self.mouth_funnel_max_value = mouth_funnel_max_value - self.mouth_funnel_min_value = mouth_funnel_min_value - self.mouth_frown_max_value = mouth_frown_max_value - self.jaw_open_max_value = jaw_open_max_value - self.jaw_open_min_value = jaw_open_min_value - self.eye_rotation_factor = eye_rotation_factor - self.cheek_squint_max_value = cheek_squint_max_value - self.cheek_squint_min_value = cheek_squint_min_value - self.eyebrow_down_max_value = eyebrow_down_max_value - self.eye_blink_max_value = eye_wink_max_value - self.eye_wide_max_value = eye_surprised_max_value - self.eyebrow_down_mode = eyebrow_down_mode - self.lower_smile_threshold = lower_smile_threshold - self.upper_smile_threshold = upper_smile_threshold - - -class IFacialMocapPoseConverter25(IFacialMocapPoseConverter): - def __init__(self, args: Optional[IFacialMocapPoseConverter25Args] = None): - super().__init__() - if args is None: - args = IFacialMocapPoseConverter25Args() - self.args = args - pose_parameters = get_pose_parameters() - self.pose_size = 45 - - self.eyebrow_troubled_left_index = pose_parameters.get_parameter_index("eyebrow_troubled_left") - self.eyebrow_troubled_right_index = pose_parameters.get_parameter_index("eyebrow_troubled_right") - self.eyebrow_angry_left_index = pose_parameters.get_parameter_index("eyebrow_angry_left") - self.eyebrow_angry_right_index = pose_parameters.get_parameter_index("eyebrow_angry_right") - self.eyebrow_happy_left_index = pose_parameters.get_parameter_index("eyebrow_happy_left") - self.eyebrow_happy_right_index = pose_parameters.get_parameter_index("eyebrow_happy_right") - self.eyebrow_raised_left_index = pose_parameters.get_parameter_index("eyebrow_raised_left") - self.eyebrow_raised_right_index = pose_parameters.get_parameter_index("eyebrow_raised_right") - self.eyebrow_lowered_left_index = pose_parameters.get_parameter_index("eyebrow_lowered_left") - self.eyebrow_lowered_right_index = pose_parameters.get_parameter_index("eyebrow_lowered_right") - self.eyebrow_serious_left_index = pose_parameters.get_parameter_index("eyebrow_serious_left") - self.eyebrow_serious_right_index = pose_parameters.get_parameter_index("eyebrow_serious_right") - - self.eye_surprised_left_index = pose_parameters.get_parameter_index("eye_surprised_left") - self.eye_surprised_right_index = pose_parameters.get_parameter_index("eye_surprised_right") - self.eye_wink_left_index = pose_parameters.get_parameter_index("eye_wink_left") - self.eye_wink_right_index = pose_parameters.get_parameter_index("eye_wink_right") - self.eye_happy_wink_left_index = pose_parameters.get_parameter_index("eye_happy_wink_left") - self.eye_happy_wink_right_index = pose_parameters.get_parameter_index("eye_happy_wink_right") - self.eye_relaxed_left_index = pose_parameters.get_parameter_index("eye_relaxed_left") - self.eye_relaxed_right_index = pose_parameters.get_parameter_index("eye_relaxed_right") - self.eye_raised_lower_eyelid_left_index = pose_parameters.get_parameter_index("eye_raised_lower_eyelid_left") - self.eye_raised_lower_eyelid_right_index = pose_parameters.get_parameter_index("eye_raised_lower_eyelid_right") - - self.iris_small_left_index = pose_parameters.get_parameter_index("iris_small_left") - self.iris_small_right_index = pose_parameters.get_parameter_index("iris_small_right") - - self.iris_rotation_x_index = pose_parameters.get_parameter_index("iris_rotation_x") - self.iris_rotation_y_index = pose_parameters.get_parameter_index("iris_rotation_y") - - self.head_x_index = pose_parameters.get_parameter_index("head_x") - self.head_y_index = pose_parameters.get_parameter_index("head_y") - self.neck_z_index = pose_parameters.get_parameter_index("neck_z") - - self.mouth_aaa_index = pose_parameters.get_parameter_index("mouth_aaa") - self.mouth_iii_index = pose_parameters.get_parameter_index("mouth_iii") - self.mouth_uuu_index = pose_parameters.get_parameter_index("mouth_uuu") - self.mouth_eee_index = pose_parameters.get_parameter_index("mouth_eee") - self.mouth_ooo_index = pose_parameters.get_parameter_index("mouth_ooo") - - self.mouth_lowered_corner_left_index = pose_parameters.get_parameter_index("mouth_lowered_corner_left") - self.mouth_lowered_corner_right_index = pose_parameters.get_parameter_index("mouth_lowered_corner_right") - self.mouth_raised_corner_left_index = pose_parameters.get_parameter_index("mouth_raised_corner_left") - self.mouth_raised_corner_right_index = pose_parameters.get_parameter_index("mouth_raised_corner_right") - - self.body_y_index = pose_parameters.get_parameter_index("body_y") - self.body_z_index = pose_parameters.get_parameter_index("body_z") - self.breathing_index = pose_parameters.get_parameter_index("breathing") - - self.breathing_start_time = time.time() - - self.panel = None - - def init_pose_converter_panel(self, parent): - self.panel = wx.Panel(parent, style=wx.SIMPLE_BORDER) - self.panel_sizer = wx.BoxSizer(wx.VERTICAL) - self.panel.SetSizer(self.panel_sizer) - self.panel.SetAutoLayout(1) - parent.GetSizer().Add(self.panel, 0, wx.EXPAND) - - if True: - eyebrow_down_mode_text = wx.StaticText(self.panel, label=" --- Eyebrow Down Mode --- ", - style=wx.ALIGN_CENTER) - self.panel_sizer.Add(eyebrow_down_mode_text, 0, wx.EXPAND) - - self.eyebrow_down_mode_choice = wx.Choice( - self.panel, - choices=[ - "ANGRY", - "TROUBLED", - "SERIOUS", - "LOWERED", - ]) - self.eyebrow_down_mode_choice.SetSelection(0) - self.panel_sizer.Add(self.eyebrow_down_mode_choice, 0, wx.EXPAND) - self.eyebrow_down_mode_choice.Bind(wx.EVT_CHOICE, self.change_eyebrow_down_mode) - - separator = wx.StaticLine(self.panel, -1, size=(256, 5)) - self.panel_sizer.Add(separator, 0, wx.EXPAND) - - if True: - wink_mode_text = wx.StaticText(self.panel, label=" --- Wink Mode --- ", style=wx.ALIGN_CENTER) - self.panel_sizer.Add(wink_mode_text, 0, wx.EXPAND) - - self.wink_mode_choice = wx.Choice( - self.panel, - choices=[ - "NORMAL", - "RELAXED", - ]) - self.wink_mode_choice.SetSelection(0) - self.panel_sizer.Add(self.wink_mode_choice, 0, wx.EXPAND) - self.wink_mode_choice.Bind(wx.EVT_CHOICE, self.change_wink_mode) - - separator = wx.StaticLine(self.panel, -1, size=(256, 5)) - self.panel_sizer.Add(separator, 0, wx.EXPAND) - - if True: - iris_size_text = wx.StaticText(self.panel, label=" --- Iris Size --- ", style=wx.ALIGN_CENTER) - self.panel_sizer.Add(iris_size_text, 0, wx.EXPAND) - - self.iris_left_slider = wx.Slider(self.panel, minValue=0, maxValue=1000, value=0, style=wx.HORIZONTAL) - self.panel_sizer.Add(self.iris_left_slider, 0, wx.EXPAND) - self.iris_left_slider.Bind(wx.EVT_SLIDER, self.change_iris_size) - - self.iris_right_slider = wx.Slider(self.panel, minValue=0, maxValue=1000, value=0, style=wx.HORIZONTAL) - self.panel_sizer.Add(self.iris_right_slider, 0, wx.EXPAND) - self.iris_right_slider.Bind(wx.EVT_SLIDER, self.change_iris_size) - self.iris_right_slider.Enable(False) - - self.link_left_right_irises = wx.CheckBox( - self.panel, label="Use same value for both sides") - self.link_left_right_irises.SetValue(True) - self.panel_sizer.Add(self.link_left_right_irises, wx.SizerFlags().CenterHorizontal().Border()) - self.link_left_right_irises.Bind(wx.EVT_CHECKBOX, self.link_left_right_irises_clicked) - - separator = wx.StaticLine(self.panel, -1, size=(256, 5)) - self.panel_sizer.Add(separator, 0, wx.EXPAND) - - if True: - iris_size_text = wx.StaticText(self.panel, label=" --- Iris Size --- ", style=wx.ALIGN_CENTER) - self.panel_sizer.Add(iris_size_text, 0, wx.EXPAND) - - self.iris_left_slider = wx.Slider(self.panel, minValue=0, maxValue=1000, value=0, style=wx.HORIZONTAL) - self.panel_sizer.Add(self.iris_left_slider, 0, wx.EXPAND) - self.iris_left_slider.Bind(wx.EVT_SLIDER, self.change_iris_size) - - self.iris_right_slider = wx.Slider(self.panel, minValue=0, maxValue=1000, value=0, style=wx.HORIZONTAL) - self.panel_sizer.Add(self.iris_right_slider, 0, wx.EXPAND) - self.iris_right_slider.Bind(wx.EVT_SLIDER, self.change_iris_size) - self.iris_right_slider.Enable(False) - - self.link_left_right_irises = wx.CheckBox( - self.panel, label="Use same value for both sides") - self.link_left_right_irises.SetValue(True) - self.panel_sizer.Add(self.link_left_right_irises, wx.SizerFlags().CenterHorizontal().Border()) - self.link_left_right_irises.Bind(wx.EVT_CHECKBOX, self.link_left_right_irises_clicked) - - separator = wx.StaticLine(self.panel, -1, size=(256, 5)) - self.panel_sizer.Add(separator, 0, wx.EXPAND) - - if True: - breathing_frequency_text = wx.StaticText( - self.panel, label=" --- Breathing --- ", style=wx.ALIGN_CENTER) - self.panel_sizer.Add(breathing_frequency_text, 0, wx.EXPAND) - - self.restart_breathing_cycle_button = wx.Button(self.panel, label="Restart Breathing Cycle") - self.restart_breathing_cycle_button.Bind(wx.EVT_BUTTON, self.restart_breathing_cycle_clicked) - self.panel_sizer.Add(self.restart_breathing_cycle_button, 0, wx.EXPAND) - - self.breathing_frequency_slider = wx.Slider( - self.panel, minValue=0, maxValue=60, value=20, style=wx.HORIZONTAL) - self.panel_sizer.Add(self.breathing_frequency_slider, 0, wx.EXPAND) - - self.breathing_gauge = wx.Gauge(self.panel, style=wx.GA_HORIZONTAL, range=1000) - self.panel_sizer.Add(self.breathing_gauge, 0, wx.EXPAND) - - self.panel_sizer.Fit(self.panel) - - def restart_breathing_cycle_clicked(self, event: wx.Event): - self.breathing_start_time = time.time() - - def change_eyebrow_down_mode(self, event: wx.Event): - selected_index = self.eyebrow_down_mode_choice.GetSelection() - if selected_index == 0: - self.args.eyebrow_down_mode = EyebrowDownMode.ANGRY - elif selected_index == 1: - self.args.eyebrow_down_mode = EyebrowDownMode.TROUBLED - elif selected_index == 2: - self.args.eyebrow_down_mode = EyebrowDownMode.SERIOUS - else: - self.args.eyebrow_down_mode = EyebrowDownMode.LOWERED - - def change_wink_mode(self, event: wx.Event): - selected_index = self.wink_mode_choice.GetSelection() - if selected_index == 0: - self.args.wink_mode = WinkMode.NORMAL - else: - self.args.wink_mode = WinkMode.RELAXED - - def change_iris_size(self, event: wx.Event): - if self.link_left_right_irises.GetValue(): - left_value = self.iris_left_slider.GetValue() - right_value = self.iris_right_slider.GetValue() - if left_value != right_value: - self.iris_right_slider.SetValue(left_value) - self.args.iris_small_left = left_value / 1000.0 - self.args.iris_small_right = left_value / 1000.0 - else: - self.args.iris_small_left = self.iris_left_slider.GetValue() / 1000.0 - self.args.iris_small_right = self.iris_right_slider.GetValue() / 1000.0 - - def link_left_right_irises_clicked(self, event: wx.Event): - if self.link_left_right_irises.GetValue(): - self.iris_right_slider.Enable(False) - else: - self.iris_right_slider.Enable(True) - self.change_iris_size(event) - - def decompose_head_body_param(self, param, threshold=2.0 / 3): - - if abs(param) < threshold: - return (param, 0.0) - else: - if param < 0: - sign = -1.0 - else: - sign = 1.0 - return (threshold * sign, (abs(param) - threshold) * sign) - - breathing_start_time = time.time() - def convert(self, ifacialmocap_pose: Dict[str, float]) -> List[float]: - pose = [0.0 for i in range(self.pose_size)] - - smile_value = \ - (ifacialmocap_pose[MOUTH_SMILE_LEFT] + ifacialmocap_pose[MOUTH_SMILE_RIGHT]) / 2.0 \ - + ifacialmocap_pose[MOUTH_SHRUG_UPPER] - if smile_value < self.args.lower_smile_threshold: - smile_degree = 0.0 - elif smile_value > self.args.upper_smile_threshold: - smile_degree = 1.0 - else: - smile_degree = (smile_value - self.args.lower_smile_threshold) / ( - self.args.upper_smile_threshold - self.args.lower_smile_threshold) - - # Eyebrow - if True: - brow_inner_up = ifacialmocap_pose[BROW_INNER_UP] - brow_outer_up_right = ifacialmocap_pose[BROW_OUTER_UP_RIGHT] - brow_outer_up_left = ifacialmocap_pose[BROW_OUTER_UP_LEFT] - - brow_up_left = clamp(brow_inner_up + brow_outer_up_left, 0.0, 1.0) - brow_up_right = clamp(brow_inner_up + brow_outer_up_right, 0.0, 1.0) - pose[self.eyebrow_raised_left_index] = brow_up_left - pose[self.eyebrow_raised_right_index] = brow_up_right - - brow_down_left = (1.0 - smile_degree) \ - * clamp(ifacialmocap_pose[BROW_DOWN_LEFT] / self.args.eyebrow_down_max_value, 0.0, 1.0) - brow_down_right = (1.0 - smile_degree) \ - * clamp(ifacialmocap_pose[BROW_DOWN_RIGHT] / self.args.eyebrow_down_max_value, 0.0, 1.0) - if self.args.eyebrow_down_mode == EyebrowDownMode.TROUBLED: - pose[self.eyebrow_troubled_left_index] = brow_down_left - pose[self.eyebrow_troubled_right_index] = brow_down_right - elif self.args.eyebrow_down_mode == EyebrowDownMode.ANGRY: - pose[self.eyebrow_angry_left_index] = brow_down_left - pose[self.eyebrow_angry_right_index] = brow_down_right - elif self.args.eyebrow_down_mode == EyebrowDownMode.LOWERED: - pose[self.eyebrow_lowered_left_index] = brow_down_left - pose[self.eyebrow_lowered_right_index] = brow_down_right - elif self.args.eyebrow_down_mode == EyebrowDownMode.SERIOUS: - pose[self.eyebrow_serious_left_index] = brow_down_left - pose[self.eyebrow_serious_right_index] = brow_down_right - - brow_happy_value = clamp(smile_value, 0.0, 1.0) * smile_degree - pose[self.eyebrow_happy_left_index] = brow_happy_value - pose[self.eyebrow_happy_right_index] = brow_happy_value - - # Eye - if True: - # Surprised - pose[self.eye_surprised_left_index] = clamp( - ifacialmocap_pose[EYE_WIDE_LEFT] / self.args.eye_wide_max_value, 0.0, 1.0) - pose[self.eye_surprised_right_index] = clamp( - ifacialmocap_pose[EYE_WIDE_RIGHT] / self.args.eye_wide_max_value, 0.0, 1.0) - - # Wink - if self.args.wink_mode == WinkMode.NORMAL: - wink_left_index = self.eye_wink_left_index - wink_right_index = self.eye_wink_right_index - else: - wink_left_index = self.eye_relaxed_left_index - wink_right_index = self.eye_relaxed_right_index - pose[wink_left_index] = (1.0 - smile_degree) * clamp( - ifacialmocap_pose[EYE_BLINK_LEFT] / self.args.eye_blink_max_value, 0.0, 1.0) - pose[wink_right_index] = (1.0 - smile_degree) * clamp( - ifacialmocap_pose[EYE_BLINK_RIGHT] / self.args.eye_blink_max_value, 0.0, 1.0) - pose[self.eye_happy_wink_left_index] = smile_degree * clamp( - ifacialmocap_pose[EYE_BLINK_LEFT] / self.args.eye_blink_max_value, 0.0, 1.0) - pose[self.eye_happy_wink_right_index] = smile_degree * clamp( - ifacialmocap_pose[EYE_BLINK_RIGHT] / self.args.eye_blink_max_value, 0.0, 1.0) - - # Lower eyelid - cheek_squint_denom = self.args.cheek_squint_max_value - self.args.cheek_squint_min_value - pose[self.eye_raised_lower_eyelid_left_index] = \ - clamp( - (ifacialmocap_pose[CHEEK_SQUINT_LEFT] - self.args.cheek_squint_min_value) / cheek_squint_denom, - 0.0, 1.0) - pose[self.eye_raised_lower_eyelid_right_index] = \ - clamp( - (ifacialmocap_pose[CHEEK_SQUINT_RIGHT] - self.args.cheek_squint_min_value) / cheek_squint_denom, - 0.0, 1.0) - - # Iris rotation - if True: - eye_rotation_y = (ifacialmocap_pose[EYE_LOOK_IN_LEFT] - - ifacialmocap_pose[EYE_LOOK_OUT_LEFT] - - ifacialmocap_pose[EYE_LOOK_IN_RIGHT] - + ifacialmocap_pose[EYE_LOOK_OUT_RIGHT]) / 2.0 * self.args.eye_rotation_factor - pose[self.iris_rotation_y_index] = clamp(eye_rotation_y, -1.0, 1.0) - - eye_rotation_x = (ifacialmocap_pose[EYE_LOOK_UP_LEFT] - + ifacialmocap_pose[EYE_LOOK_UP_RIGHT] - - ifacialmocap_pose[EYE_LOOK_DOWN_LEFT] - - ifacialmocap_pose[EYE_LOOK_DOWN_RIGHT]) / 2.0 * self.args.eye_rotation_factor - pose[self.iris_rotation_x_index] = clamp(eye_rotation_x, -1.0, 1.0) - - # Iris size - if True: - pose[self.iris_small_left_index] = self.args.iris_small_left - pose[self.iris_small_right_index] = self.args.iris_small_right - - # Head rotation - if True: - x_param = clamp(-ifacialmocap_pose[HEAD_BONE_X] * 180.0 / math.pi, -15.0, 15.0) / 15.0 - pose[self.head_x_index] = x_param - - y_param = clamp(-ifacialmocap_pose[HEAD_BONE_Y] * 180.0 / math.pi, -10.0, 10.0) / 10.0 - pose[self.head_y_index] = y_param - pose[self.body_y_index] = y_param - - z_param = clamp(ifacialmocap_pose[HEAD_BONE_Z] * 180.0 / math.pi, -15.0, 15.0) / 15.0 - pose[self.neck_z_index] = z_param - pose[self.body_z_index] = z_param - - # Mouth - if True: - jaw_open_denom = self.args.jaw_open_max_value - self.args.jaw_open_min_value - mouth_open = clamp((ifacialmocap_pose[JAW_OPEN] - self.args.jaw_open_min_value) / jaw_open_denom, 0.0, 1.0) - pose[self.mouth_aaa_index] = mouth_open - pose[self.mouth_raised_corner_left_index] = clamp(smile_value, 0.0, 1.0) - pose[self.mouth_raised_corner_right_index] = clamp(smile_value, 0.0, 1.0) - - is_mouth_open = mouth_open > 0.0 - if not is_mouth_open: - mouth_frown_value = clamp( - (ifacialmocap_pose[MOUTH_FROWN_LEFT] + ifacialmocap_pose[ - MOUTH_FROWN_RIGHT]) / self.args.mouth_frown_max_value, 0.0, 1.0) - pose[self.mouth_lowered_corner_left_index] = mouth_frown_value - pose[self.mouth_lowered_corner_right_index] = mouth_frown_value - else: - mouth_lower_down = clamp( - ifacialmocap_pose[MOUTH_LOWER_DOWN_LEFT] + ifacialmocap_pose[MOUTH_LOWER_DOWN_RIGHT], 0.0, 1.0) - mouth_funnel = ifacialmocap_pose[MOUTH_FUNNEL] - mouth_pucker = ifacialmocap_pose[MOUTH_PUCKER] - - mouth_point = [mouth_open, mouth_lower_down, mouth_funnel, mouth_pucker] - - aaa_point = [1.0, 1.0, 0.0, 0.0] - iii_point = [0.0, 1.0, 0.0, 0.0] - uuu_point = [0.5, 0.3, 0.25, 0.75] - ooo_point = [1.0, 0.5, 0.5, 0.4] - - decomp = numpy.array([0, 0, 0, 0]) - M = numpy.array([ - aaa_point, - iii_point, - uuu_point, - ooo_point - ]) - - def loss(decomp): - return numpy.linalg.norm(numpy.matmul(decomp, M) - mouth_point) \ - + 0.01 * numpy.linalg.norm(decomp, ord=1) - - opt_result = scipy.optimize.minimize( - loss, decomp, bounds=[(0.0, 1.0), (0.0, 1.0), (0.0, 1.0), (0.0, 1.0)]) - decomp = opt_result["x"] - restricted_decomp = [decomp.item(0), decomp.item(1), decomp.item(2), decomp.item(3)] - pose[self.mouth_aaa_index] = restricted_decomp[0] - pose[self.mouth_iii_index] = restricted_decomp[1] - mouth_funnel_denom = self.args.mouth_funnel_max_value - self.args.mouth_funnel_min_value - ooo_alpha = clamp((mouth_funnel - self.args.mouth_funnel_min_value) / mouth_funnel_denom, 0.0, 1.0) - uo_value = clamp(restricted_decomp[2] + restricted_decomp[3], 0.0, 1.0) - pose[self.mouth_uuu_index] = uo_value * (1.0 - ooo_alpha) - pose[self.mouth_ooo_index] = uo_value * ooo_alpha - - #if self.panel is not None: - #frequency = self.breathing_frequency_slider.GetValue() - frequency = 18 #breathing rate 10-50 - if frequency == 0: - #value = 0.0 - #pose[self.breathing_index] = value - self.breathing_start_time = time.time() - else: - period = 60.0 / frequency - now = time.time() - diff = now - self.breathing_start_time - frac = (diff % period) / period - value = (-math.cos(2 * math.pi * frac) + 1.0) / 2.0 - pose[self.breathing_index] = value - #print("pose", pose[self.breathing_index]) - #self.breathing_gauge.SetValue(int(1000 * value)) - - - - return pose - - -def create_ifacialmocap_pose_converter( - args: Optional[IFacialMocapPoseConverter25Args] = None) -> IFacialMocapPoseConverter: - return IFacialMocapPoseConverter25(args) diff --git a/talkinghead/tha3/mocap/ifacialmocap_v2.py b/talkinghead/tha3/mocap/ifacialmocap_v2.py deleted file mode 100644 index dae46ea..0000000 --- a/talkinghead/tha3/mocap/ifacialmocap_v2.py +++ /dev/null @@ -1,89 +0,0 @@ -import math - -from tha3.mocap.ifacialmocap_constants import BLENDSHAPE_NAMES, HEAD_BONE_X, HEAD_BONE_Y, HEAD_BONE_Z, \ - RIGHT_EYE_BONE_X, RIGHT_EYE_BONE_Y, RIGHT_EYE_BONE_Z, LEFT_EYE_BONE_X, LEFT_EYE_BONE_Y, LEFT_EYE_BONE_Z, \ - HEAD_BONE_QUAT, LEFT_EYE_BONE_QUAT, RIGHT_EYE_BONE_QUAT - -IFACIALMOCAP_PORT = 49983 -IFACIALMOCAP_START_STRING = "iFacialMocap_sahuasouryya9218sauhuiayeta91555dy3719|sendDataVersion=v2".encode('utf-8') - - -def parse_ifacialmocap_v2_pose(ifacialmocap_output): - output = {} - parts = ifacialmocap_output.split("|") - for part in parts: - part = part.strip() - if len(part) == 0: - continue - if "&" in part: - components = part.split("&") - assert len(components) == 2 - key = components[0] - value = float(components[1]) / 100.0 - if key.endswith("_L"): - key = key[:-2] + "Left" - elif key.endswith("_R"): - key = key[:-2] + "Right" - if key in BLENDSHAPE_NAMES: - output[key] = value - elif part.startswith("=head#"): - components = part[len("=head#"):].split(",") - assert len(components) == 6 - output[HEAD_BONE_X] = float(components[0]) * math.pi / 180 - output[HEAD_BONE_Y] = float(components[1]) * math.pi / 180 - output[HEAD_BONE_Z] = float(components[2]) * math.pi / 180 - elif part.startswith("rightEye#"): - components = part[len("rightEye#"):].split(",") - output[RIGHT_EYE_BONE_X] = float(components[0]) * math.pi / 180 - output[RIGHT_EYE_BONE_Y] = float(components[1]) * math.pi / 180 - output[RIGHT_EYE_BONE_Z] = float(components[2]) * math.pi / 180 - elif part.startswith("leftEye#"): - components = part[len("leftEye#"):].split(",") - output[LEFT_EYE_BONE_X] = float(components[0]) * math.pi / 180 - output[LEFT_EYE_BONE_Y] = float(components[1]) * math.pi / 180 - output[LEFT_EYE_BONE_Z] = float(components[2]) * math.pi / 180 - output[HEAD_BONE_QUAT] = [0.0, 0.0, 0.0, 1.0] - output[LEFT_EYE_BONE_QUAT] = [0.0, 0.0, 0.0, 1.0] - output[RIGHT_EYE_BONE_QUAT] = [0.0, 0.0, 0.0, 1.0] - return output - - -def parse_ifacialmocap_v1_pose(ifacialmocap_output): - output = {} - parts = ifacialmocap_output.split("|") - for part in parts: - part = part.strip() - if len(part) == 0: - continue - if part.startswith("=head#"): - components = part[len("=head#"):].split(",") - assert len(components) == 6 - output[HEAD_BONE_X] = float(components[0]) * math.pi / 180 - output[HEAD_BONE_Y] = float(components[1]) * math.pi / 180 - output[HEAD_BONE_Z] = float(components[2]) * math.pi / 180 - elif part.startswith("rightEye#"): - components = part[len("rightEye#"):].split(",") - output[RIGHT_EYE_BONE_X] = float(components[0]) * math.pi / 180 - output[RIGHT_EYE_BONE_Y] = float(components[1]) * math.pi / 180 - output[RIGHT_EYE_BONE_Z] = float(components[2]) * math.pi / 180 - elif part.startswith("leftEye#"): - components = part[len("leftEye#"):].split(",") - output[LEFT_EYE_BONE_X] = float(components[0]) * math.pi / 180 - output[LEFT_EYE_BONE_Y] = float(components[1]) * math.pi / 180 - output[LEFT_EYE_BONE_Z] = float(components[2]) * math.pi / 180 - else: - components = part.split("-") - assert len(components) == 2 - key = components[0] - value = float(components[1]) / 100.0 - if key.endswith("_L"): - key = key[:-2] + "Left" - elif key.endswith("_R"): - key = key[:-2] + "Right" - if key in BLENDSHAPE_NAMES: - output[key] = value - output[HEAD_BONE_QUAT] = [0.0, 0.0, 0.0, 1.0] - output[LEFT_EYE_BONE_QUAT] = [0.0, 0.0, 0.0, 1.0] - output[RIGHT_EYE_BONE_QUAT] = [0.0, 0.0, 0.0, 1.0] - return output - diff --git a/talkinghead/tha3/models/LICENSE.txt b/talkinghead/tha3/models/LICENSE.txt deleted file mode 100644 index da6ab6c..0000000 --- a/talkinghead/tha3/models/LICENSE.txt +++ /dev/null @@ -1,396 +0,0 @@ -Attribution 4.0 International - -======================================================================= - -Creative Commons Corporation ("Creative Commons") is not a law firm and -does not provide legal services or legal advice. Distribution of -Creative Commons public licenses does not create a lawyer-client or -other relationship. Creative Commons makes its licenses and related -information available on an "as-is" basis. Creative Commons gives no -warranties regarding its licenses, any material licensed under their -terms and conditions, or any related information. Creative Commons -disclaims all liability for damages resulting from their use to the -fullest extent possible. - -Using Creative Commons Public Licenses - -Creative Commons public licenses provide a standard set of terms and -conditions that creators and other rights holders may use to share -original works of authorship and other material subject to copyright -and certain other rights specified in the public license below. The -following considerations are for informational purposes only, are not -exhaustive, and do not form part of our licenses. - - Considerations for licensors: Our public licenses are - intended for use by those authorized to give the public - permission to use material in ways otherwise restricted by - copyright and certain other rights. Our licenses are - irrevocable. Licensors should read and understand the terms - and conditions of the license they choose before applying it. - Licensors should also secure all rights necessary before - applying our licenses so that the public can reuse the - material as expected. Licensors should clearly mark any - material not subject to the license. This includes other CC- - licensed material, or material used under an exception or - limitation to copyright. More considerations for licensors: - wiki.creativecommons.org/Considerations_for_licensors - - Considerations for the public: By using one of our public - licenses, a licensor grants the public permission to use the - licensed material under specified terms and conditions. If - the licensor's permission is not necessary for any reason--for - example, because of any applicable exception or limitation to - copyright--then that use is not regulated by the license. Our - licenses grant only permissions under copyright and certain - other rights that a licensor has authority to grant. Use of - the licensed material may still be restricted for other - reasons, including because others have copyright or other - rights in the material. A licensor may make special requests, - such as asking that all changes be marked or described. - Although not required by our licenses, you are encouraged to - respect those requests where reasonable. More considerations - for the public: - wiki.creativecommons.org/Considerations_for_licensees - -======================================================================= - -Creative Commons Attribution 4.0 International Public License - -By exercising the Licensed Rights (defined below), You accept and agree -to be bound by the terms and conditions of this Creative Commons -Attribution 4.0 International Public License ("Public License"). To the -extent this Public License may be interpreted as a contract, You are -granted the Licensed Rights in consideration of Your acceptance of -these terms and conditions, and the Licensor grants You such rights in -consideration of benefits the Licensor receives from making the -Licensed Material available under these terms and conditions. - - -Section 1 -- Definitions. - - a. Adapted Material means material subject to Copyright and Similar - Rights that is derived from or based upon the Licensed Material - and in which the Licensed Material is translated, altered, - arranged, transformed, or otherwise modified in a manner requiring - permission under the Copyright and Similar Rights held by the - Licensor. For purposes of this Public License, where the Licensed - Material is a musical work, performance, or sound recording, - Adapted Material is always produced where the Licensed Material is - synched in timed relation with a moving image. - - b. Adapter's License means the license You apply to Your Copyright - and Similar Rights in Your contributions to Adapted Material in - accordance with the terms and conditions of this Public License. - - c. Copyright and Similar Rights means copyright and/or similar rights - closely related to copyright including, without limitation, - performance, broadcast, sound recording, and Sui Generis Database - Rights, without regard to how the rights are labeled or - categorized. For purposes of this Public License, the rights - specified in Section 2(b)(1)-(2) are not Copyright and Similar - Rights. - - d. Effective Technological Measures means those measures that, in the - absence of proper authority, may not be circumvented under laws - fulfilling obligations under Article 11 of the WIPO Copyright - Treaty adopted on December 20, 1996, and/or similar international - agreements. - - e. Exceptions and Limitations means fair use, fair dealing, and/or - any other exception or limitation to Copyright and Similar Rights - that applies to Your use of the Licensed Material. - - f. Licensed Material means the artistic or literary work, database, - or other material to which the Licensor applied this Public - License. - - g. Licensed Rights means the rights granted to You subject to the - terms and conditions of this Public License, which are limited to - all Copyright and Similar Rights that apply to Your use of the - Licensed Material and that the Licensor has authority to license. - - h. Licensor means the individual(s) or entity(ies) granting rights - under this Public License. - - i. Share means to provide material to the public by any means or - process that requires permission under the Licensed Rights, such - as reproduction, public display, public performance, distribution, - dissemination, communication, or importation, and to make material - available to the public including in ways that members of the - public may access the material from a place and at a time - individually chosen by them. - - j. Sui Generis Database Rights means rights other than copyright - resulting from Directive 96/9/EC of the European Parliament and of - the Council of 11 March 1996 on the legal protection of databases, - as amended and/or succeeded, as well as other essentially - equivalent rights anywhere in the world. - - k. You means the individual or entity exercising the Licensed Rights - under this Public License. Your has a corresponding meaning. - - -Section 2 -- Scope. - - a. License grant. - - 1. Subject to the terms and conditions of this Public License, - the Licensor hereby grants You a worldwide, royalty-free, - non-sublicensable, non-exclusive, irrevocable license to - exercise the Licensed Rights in the Licensed Material to: - - a. reproduce and Share the Licensed Material, in whole or - in part; and - - b. produce, reproduce, and Share Adapted Material. - - 2. Exceptions and Limitations. For the avoidance of doubt, where - Exceptions and Limitations apply to Your use, this Public - License does not apply, and You do not need to comply with - its terms and conditions. - - 3. Term. The term of this Public License is specified in Section - 6(a). - - 4. Media and formats; technical modifications allowed. The - Licensor authorizes You to exercise the Licensed Rights in - all media and formats whether now known or hereafter created, - and to make technical modifications necessary to do so. The - Licensor waives and/or agrees not to assert any right or - authority to forbid You from making technical modifications - necessary to exercise the Licensed Rights, including - technical modifications necessary to circumvent Effective - Technological Measures. For purposes of this Public License, - simply making modifications authorized by this Section 2(a) - (4) never produces Adapted Material. - - 5. Downstream recipients. - - a. Offer from the Licensor -- Licensed Material. Every - recipient of the Licensed Material automatically - receives an offer from the Licensor to exercise the - Licensed Rights under the terms and conditions of this - Public License. - - b. No downstream restrictions. You may not offer or impose - any additional or different terms or conditions on, or - apply any Effective Technological Measures to, the - Licensed Material if doing so restricts exercise of the - Licensed Rights by any recipient of the Licensed - Material. - - 6. No endorsement. Nothing in this Public License constitutes or - may be construed as permission to assert or imply that You - are, or that Your use of the Licensed Material is, connected - with, or sponsored, endorsed, or granted official status by, - the Licensor or others designated to receive attribution as - provided in Section 3(a)(1)(A)(i). - - b. Other rights. - - 1. Moral rights, such as the right of integrity, are not - licensed under this Public License, nor are publicity, - privacy, and/or other similar personality rights; however, to - the extent possible, the Licensor waives and/or agrees not to - assert any such rights held by the Licensor to the limited - extent necessary to allow You to exercise the Licensed - Rights, but not otherwise. - - 2. Patent and trademark rights are not licensed under this - Public License. - - 3. To the extent possible, the Licensor waives any right to - collect royalties from You for the exercise of the Licensed - Rights, whether directly or through a collecting society - under any voluntary or waivable statutory or compulsory - licensing scheme. In all other cases the Licensor expressly - reserves any right to collect such royalties. - - -Section 3 -- License Conditions. - -Your exercise of the Licensed Rights is expressly made subject to the -following conditions. - - a. Attribution. - - 1. If You Share the Licensed Material (including in modified - form), You must: - - a. retain the following if it is supplied by the Licensor - with the Licensed Material: - - i. identification of the creator(s) of the Licensed - Material and any others designated to receive - attribution, in any reasonable manner requested by - the Licensor (including by pseudonym if - designated); - - ii. a copyright notice; - - iii. a notice that refers to this Public License; - - iv. a notice that refers to the disclaimer of - warranties; - - v. a URI or hyperlink to the Licensed Material to the - extent reasonably practicable; - - b. indicate if You modified the Licensed Material and - retain an indication of any previous modifications; and - - c. indicate the Licensed Material is licensed under this - Public License, and include the text of, or the URI or - hyperlink to, this Public License. - - 2. You may satisfy the conditions in Section 3(a)(1) in any - reasonable manner based on the medium, means, and context in - which You Share the Licensed Material. For example, it may be - reasonable to satisfy the conditions by providing a URI or - hyperlink to a resource that includes the required - information. - - 3. If requested by the Licensor, You must remove any of the - information required by Section 3(a)(1)(A) to the extent - reasonably practicable. - - 4. If You Share Adapted Material You produce, the Adapter's - License You apply must not prevent recipients of the Adapted - Material from complying with this Public License. - - -Section 4 -- Sui Generis Database Rights. - -Where the Licensed Rights include Sui Generis Database Rights that -apply to Your use of the Licensed Material: - - a. for the avoidance of doubt, Section 2(a)(1) grants You the right - to extract, reuse, reproduce, and Share all or a substantial - portion of the contents of the database; - - b. if You include all or a substantial portion of the database - contents in a database in which You have Sui Generis Database - Rights, then the database in which You have Sui Generis Database - Rights (but not its individual contents) is Adapted Material; and - - c. You must comply with the conditions in Section 3(a) if You Share - all or a substantial portion of the contents of the database. - -For the avoidance of doubt, this Section 4 supplements and does not -replace Your obligations under this Public License where the Licensed -Rights include other Copyright and Similar Rights. - - -Section 5 -- Disclaimer of Warranties and Limitation of Liability. - - a. UNLESS OTHERWISE SEPARATELY UNDERTAKEN BY THE LICENSOR, TO THE - EXTENT POSSIBLE, THE LICENSOR OFFERS THE LICENSED MATERIAL AS-IS - AND AS-AVAILABLE, AND MAKES NO REPRESENTATIONS OR WARRANTIES OF - ANY KIND CONCERNING THE LICENSED MATERIAL, WHETHER EXPRESS, - IMPLIED, STATUTORY, OR OTHER. THIS INCLUDES, WITHOUT LIMITATION, - WARRANTIES OF TITLE, MERCHANTABILITY, FITNESS FOR A PARTICULAR - PURPOSE, NON-INFRINGEMENT, ABSENCE OF LATENT OR OTHER DEFECTS, - ACCURACY, OR THE PRESENCE OR ABSENCE OF ERRORS, WHETHER OR NOT - KNOWN OR DISCOVERABLE. WHERE DISCLAIMERS OF WARRANTIES ARE NOT - ALLOWED IN FULL OR IN PART, THIS DISCLAIMER MAY NOT APPLY TO YOU. - - b. TO THE EXTENT POSSIBLE, IN NO EVENT WILL THE LICENSOR BE LIABLE - TO YOU ON ANY LEGAL THEORY (INCLUDING, WITHOUT LIMITATION, - NEGLIGENCE) OR OTHERWISE FOR ANY DIRECT, SPECIAL, INDIRECT, - INCIDENTAL, CONSEQUENTIAL, PUNITIVE, EXEMPLARY, OR OTHER LOSSES, - COSTS, EXPENSES, OR DAMAGES ARISING OUT OF THIS PUBLIC LICENSE OR - USE OF THE LICENSED MATERIAL, EVEN IF THE LICENSOR HAS BEEN - ADVISED OF THE POSSIBILITY OF SUCH LOSSES, COSTS, EXPENSES, OR - DAMAGES. WHERE A LIMITATION OF LIABILITY IS NOT ALLOWED IN FULL OR - IN PART, THIS LIMITATION MAY NOT APPLY TO YOU. - - c. The disclaimer of warranties and limitation of liability provided - above shall be interpreted in a manner that, to the extent - possible, most closely approximates an absolute disclaimer and - waiver of all liability. - - -Section 6 -- Term and Termination. - - a. This Public License applies for the term of the Copyright and - Similar Rights licensed here. However, if You fail to comply with - this Public License, then Your rights under this Public License - terminate automatically. - - b. Where Your right to use the Licensed Material has terminated under - Section 6(a), it reinstates: - - 1. automatically as of the date the violation is cured, provided - it is cured within 30 days of Your discovery of the - violation; or - - 2. upon express reinstatement by the Licensor. - - For the avoidance of doubt, this Section 6(b) does not affect any - right the Licensor may have to seek remedies for Your violations - of this Public License. - - c. For the avoidance of doubt, the Licensor may also offer the - Licensed Material under separate terms or conditions or stop - distributing the Licensed Material at any time; however, doing so - will not terminate this Public License. - - d. Sections 1, 5, 6, 7, and 8 survive termination of this Public - License. - - -Section 7 -- Other Terms and Conditions. - - a. The Licensor shall not be bound by any additional or different - terms or conditions communicated by You unless expressly agreed. - - b. Any arrangements, understandings, or agreements regarding the - Licensed Material not stated herein are separate from and - independent of the terms and conditions of this Public License. - - -Section 8 -- Interpretation. - - a. For the avoidance of doubt, this Public License does not, and - shall not be interpreted to, reduce, limit, restrict, or impose - conditions on any use of the Licensed Material that could lawfully - be made without permission under this Public License. - - b. To the extent possible, if any provision of this Public License is - deemed unenforceable, it shall be automatically reformed to the - minimum extent necessary to make it enforceable. If the provision - cannot be reformed, it shall be severed from this Public License - without affecting the enforceability of the remaining terms and - conditions. - - c. No term or condition of this Public License will be waived and no - failure to comply consented to unless expressly agreed to by the - Licensor. - - d. Nothing in this Public License constitutes or may be interpreted - as a limitation upon, or waiver of, any privileges and immunities - that apply to the Licensor or You, including from the legal - processes of any jurisdiction or authority. - - -======================================================================= - -Creative Commons is not a party to its public -licenses. Notwithstanding, Creative Commons may elect to apply one of -its public licenses to material it publishes and in those instances -will be considered the “Licensor.” The text of the Creative Commons -public licenses is dedicated to the public domain under the CC0 Public -Domain Dedication. Except for the limited purpose of indicating that -material is shared under a Creative Commons public license or as -otherwise permitted by the Creative Commons policies published at -creativecommons.org/policies, Creative Commons does not authorize the -use of the trademark "Creative Commons" or any other trademark or logo -of Creative Commons without its prior written consent including, -without limitation, in connection with any unauthorized modifications -to any of its public licenses or any other arrangements, -understandings, or agreements concerning use of licensed material. For -the avoidance of doubt, this paragraph does not form part of the -public licenses. - -Creative Commons may be contacted at creativecommons.org. - diff --git a/talkinghead/tha3/models/placeholder.txt b/talkinghead/tha3/models/placeholder.txt deleted file mode 100644 index 1d4aa71..0000000 --- a/talkinghead/tha3/models/placeholder.txt +++ /dev/null @@ -1 +0,0 @@ -This is the folder to extract the models to. \ No newline at end of file diff --git a/talkinghead/tha3/models/separable_float/editor.pt b/talkinghead/tha3/models/separable_float/editor.pt deleted file mode 100644 index 048bb1e..0000000 Binary files a/talkinghead/tha3/models/separable_float/editor.pt and /dev/null differ diff --git a/talkinghead/tha3/models/separable_float/eyebrow_decomposer.pt b/talkinghead/tha3/models/separable_float/eyebrow_decomposer.pt deleted file mode 100644 index 85349c7..0000000 Binary files a/talkinghead/tha3/models/separable_float/eyebrow_decomposer.pt and /dev/null differ diff --git a/talkinghead/tha3/models/separable_float/eyebrow_morphing_combiner.pt b/talkinghead/tha3/models/separable_float/eyebrow_morphing_combiner.pt deleted file mode 100644 index c39e546..0000000 Binary files a/talkinghead/tha3/models/separable_float/eyebrow_morphing_combiner.pt and /dev/null differ diff --git a/talkinghead/tha3/models/separable_float/face_morpher.pt b/talkinghead/tha3/models/separable_float/face_morpher.pt deleted file mode 100644 index 2248a4e..0000000 Binary files a/talkinghead/tha3/models/separable_float/face_morpher.pt and /dev/null differ diff --git a/talkinghead/tha3/models/separable_float/two_algo_face_body_rotator.pt b/talkinghead/tha3/models/separable_float/two_algo_face_body_rotator.pt deleted file mode 100644 index 83e0dfc..0000000 Binary files a/talkinghead/tha3/models/separable_float/two_algo_face_body_rotator.pt and /dev/null differ