8000 Implemented ESRGAN and GFPGAN upscaling by fAIseh00d · Pull Request #50 · xaviviro/refacer · GitHub
[go: up one dir, main page]
More Web Proxy on the site http://driver.im/
Skip to content
This repository was archived by the owner on Aug 29, 2023. It is now read-only.

Implemented ESRGAN and GFPGAN upscaling #50

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 10 additions & 2 deletions app.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from refacer import Refacer
import argparse
import ngrok
import os

parser = argparse.ArgumentParser(description='Refacer')
parser.add_argument("--max_num_faces", type=int, help="Max number of faces on UI", default=5)
Expand Down Expand Up @@ -51,6 +52,7 @@ def run(*vars):
origins=vars[1:(num_faces+1)]
destinations=vars[(num_faces+1):(num_faces*2)+1]
thresholds=vars[(num_faces*2)+1:]
upscaler=vars[-1]

faces = []
for k in range(0,num_faces):
Expand All @@ -61,11 +63,15 @@ def run(*vars):
'threshold':thresholds[k]
})

return refacer.reface(video_path,faces)
return refacer.reface(video_path,faces,upscaler)

origin = []
destination = []
thresholds = []
upscaler = []
upscaler_models = ['None']
upscaler_models += [file for file in os.listdir('upscaler_models') if file.endswith('.onnx')]
print(upscaler_models)

with gr.Blocks() as demo:
with gr.Row():
Expand All @@ -81,10 +87,12 @@ def run(*vars):
destination.append(gr.Image(label="Destination face"))
with gr.Row():
thresholds.append(gr.Slider(label="Threshold",minimum=0.0,maximum=1.0,value=0.2))
with gr.Row():
upscaler.append(gr.Radio(label="Face upscaler", choices=upscaler_models, value=upscaler_models[0], interactive=True))
with gr.Row():
button=gr.Button("Reface", variant="primary")

button.click(fn=run,inputs=[video]+origin+destination+thresholds,outputs=[video2])
button.click(fn=run,inputs=[video]+origin+destination+thresholds+upscaler,outputs=[video2])

if args.ngrok is not None:
connect(args.ngrok, args.server_port, {'region': args.ngrok_region, 'authtoken_from_env': False})
Expand Down
24 changes: 24 additions & 0 deletions esrgan_onnx.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
import numpy as np

class ESRGAN:
def __init__(self, session):
self.session = session
self.model_input = self.session.get_inputs()[0].name

def _pre_process(self, image_array):
image_array = image_array.transpose(2, 0, 1).astype('float32') / 255.0
image_array = np.expand_dims(image_array, axis=0)
return image_array

def _post_process(self, result):
result = np.clip(result.transpose(1, 2, 0), 0, 1) * 255.0
return result.astype(np.uint8)

def get(self, image_array):
input_size = image_array.shape[1]
image_array = self._pre_process(image_array)
ort_inputs = {self.model_input: image_array}
result = self.session.run(None, ort_inputs)[0][0]
result = self._post_process(result)
scale_factor = int(result.shape[1] / input_size)
return result, scale_factor
31 changes: 31 additions & 0 deletions gfpgan_onnx.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
import cv2
import numpy as np

class GFPGAN:
def __init__(self, session):
self.session = session
self.model_input = self.session.get_inputs()[0].name

def _pre_process(self, image_array):
image_array = cv2.resize(image_array, (512, 512))
image_array = cv2.cvtColor(image_array, cv2.COLOR_BGR2RGB)
image_array = image_array.astype('float32') / 255.0
image_array = (image_array - 0.5) / 0.5
image_array = np.expand_dims(image_array, axis=0).transpose(0, 3, 1, 2)
return image_array

def _post_process(self, result):
result = np.clip(result, -1, 1)
result = (result + 1) / 2
result = result.transpose(1, 2, 0) * 255.0
result = cv2.cvtColor(result, cv2.COLOR_RGB2BGR)
return result.astype(np.uint8)

def get(self, image_array):
input_size = image_array.shape[1]
image_array = self._pre_process(image_array)
ort_inputs = {self.model_input: image_array}
result = self.session.run(None, ort_inputs)[0][0]
result = self._post_process(result)
scale_factor = int(result.shape[1] / input_size)
return result, scale_factor
79 changes: 76 additions & 3 deletions refacer.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,9 @@
from insightface.utils.storage import ensure_available
import re
import subprocess
import numpy as np
from esrgan_onnx import ESRGAN
from gfpgan_onnx import GFPGAN

class RefacerMode(Enum):
CPU, CUDA, COREML, TENSORRT = range(1, 5)
Expand Down Expand Up @@ -93,6 +96,10 @@ def __init_apps(self):
model_path = 'inswapper_128.onnx'
sess_swap = rt.InferenceSession(model_path, self.sess_options, providers=self.providers)
self.face_swapper = INSwapper(model_path,sess_swap)
self.face_swapper_input_size = self.face_swapper.input_size[0]
#print("INSwapper resolution = ",self.face_swapper_input_size)



def prepare_faces(self, faces):
self.replacement_faces=[]
Expand Down Expand Up @@ -149,10 +156,58 @@ def __get_faces(self,frame,max_num=0):
ret.append(face)
return ret

def paste_upscale(self, bgr_fake, M, img):
upsk_face, self.scale_factor = self.face_upscaler_model.get(bgr_fake)
M_scale = M * self.scale_factor
target_img = img
IM = cv2.invertAffineTransform(M_scale)

face_matte = np.full((target_img.shape[0],target_img.shape[1]), 255, dtype=np.uint8)

##Generate white square sized as a upsk_face
img_matte = np.full((upsk_face.shape[0],upsk_face.shape[1]), 255, dtype=np.uint8)
##Transform white square back to target_img
img_matte = cv2.warpAffine(img_matte, IM, (target_img.shape[1], target_img.shape[0]), flags=cv2.INTER_NEAREST, borderValue=0.0)
##Blacken the edges of face_matte by 1 pixels (so the mask in not expanded on the image edges)
img_matte[:1,:] = img_matte[-1:,:] = img_matte[:,:1] = img_matte[:,-1:] = 0
#Detect the affine transformed white area
mask_h_inds, mask_w_inds = np.where(img_matte==255)
#Calculate the size (and diagonal size) of transformed white area width and height boundaries
mask_h = np.max(mask_h_inds) - np.min(mask_h_inds)
mask_w = np.max(mask_w_inds) - np.min(mask_w_inds)
mask_size = int(np.sqrt(mask_h*mask_w))
#Calculate the kernel size for eroding img_matte by kernel (insightface empirical guess for best size was max(mask_size//10,10))
k = max(mask_size//12, 8)
kernel = np.ones((k,k),np.uint8)
img_matte = cv2.erode(img_matte,kernel,iterations = 1)
#Calculate the kernel size for blurring img_matte by blur_size (insightface empirical guess for best size was max(mask_size//20, 5))
k = max(mask_size//24, 4)
kernel_size = (k, k)
blur_size = tuple(2*i+1 for i in kernel_size)
img_matte = cv2.GaussianBlur(img_matte, blur_size, 0)

#Normalize images to float values and reshape
img_matte = img_matte.astype(np.float32)/255
face_matte = face_matte.astype(np.float32)/255
img_matte = np.minimum(face_matte, img_matte)
img_matte = np.reshape(img_matte, [img_matte.shape[0],img_matte.shape[1],1])
##Transform upcaled face back to target_img
paste_face = cv2.warpAffine(upsk_face, IM, (target_img.shape[1], target_img.shape[0]), borderMode=cv2.BORDER_REPLICATE)
##Re-assemble image
paste_face = img_matte * paste_face
paste_face = paste_face + (1-img_matte) * target_img.astype(np.float32)
return paste_face.astype(np.uint8)

def process_first_face(self,frame):
faces = self.__get_faces(frame,max_num=1)
if len(faces) != 0:
frame = self.face_swapper.get(frame, faces[0], self.replacement_faces[0][1], paste_back=True)
if not self.upscale_en:
#print('\nRun native paste_back')
frame = self.face_swapper.get(frame, faces[0], self.replacement_faces[0][1], paste_back=True)
else:
#print('\nRun upscale')
bgr_fake, M = self.face_swapper.get(frame, faces[0], self.replacement_faces[0][1], paste_back=False)
frame = self.paste_upscale(bgr_fake,M,frame)
return frame

def process_faces(self,frame):
Expand All @@ -161,7 +216,13 @@ def process_faces(self,frame):
for i in range(len(faces) - 1, -1, -1):
sim = self.rec_app.compute_sim(rep_face[0], faces[i].embedding)
if sim>=rep_face[2]:
frame = self.face_swapper.get(frame, faces[i], rep_face[1], paste_back=True)
if not self.upscale_en:
#print('\nRun native paste_back')
frame = self.face_swapper.get(frame, faces[i], rep_face[1], paste_back=True)
else:
#print('\nRun upscale')
bgr_fake, M = self.face_swapper.get(frame, faces[i], rep_face[1], paste_back=False)
frame = self.paste_upscale(bgr_fake,M,frame)
del faces[i]
break
return frame
Expand All @@ -182,7 +243,19 @@ def reface_group(self, faces, frames, output):
for result in results:
output.write(result)

def reface(self, video_path, faces):
def reface(self, video_path, faces, upscaler):
self.upscale_en = False
if upscaler != 'None':
self.upscale_en = True
model_path = osp.join('upscaler_models',upscaler)
sess_upsk = rt.InferenceSession(model_path, self.sess_options, providers=self.providers)
if 'GFPGAN' in str(upscaler):
self.face_upscaler_model = GFPGAN(sess_upsk)
#print('\nGFPGAN upscaling.')
else:
self.face_upscaler_model = ESRGAN(sess_upsk)
#print('\nESRGAN upscaling.')
#else: print('\nNot upscaling.')
self.__check_video_has_audio(video_path)
output_video_path = os.path.join('out',Path(video_path).name)
self.prepare_faces(faces)
Expand Down
2 changes: 2 additions & 0 deletions upscaler_models/Put ESRGAN and GFPGAN ONNX models here.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
ESRGAN models can have any filename
GFPGAN models should be named GFPGAN***
0