| import argparse |
| import glob |
| import os |
| from tqdm import tqdm |
| from collections import namedtuple |
|
|
| import numpy as np |
| import torch |
| import torchvision.transforms as transforms |
| from torchvision import models |
| from PIL import Image |
|
|
| from ldm.modules.evaluate.ssim import ssim |
|
|
|
|
| transform = transforms.Compose([transforms.ToTensor()]) |
|
|
| def normalize_tensor(in_feat, eps=1e-10): |
| norm_factor = torch.sqrt(torch.sum(in_feat ** 2, dim=1)).view( |
| in_feat.size()[0], 1, in_feat.size()[2], in_feat.size()[3] |
| ) |
| return in_feat / (norm_factor.expand_as(in_feat) + eps) |
|
|
|
|
| def cos_sim(in0, in1): |
| in0_norm = normalize_tensor(in0) |
| in1_norm = normalize_tensor(in1) |
| N = in0.size()[0] |
| X = in0.size()[2] |
| Y = in0.size()[3] |
|
|
| return torch.mean( |
| torch.mean( |
| torch.sum(in0_norm * in1_norm, dim=1).view(N, 1, X, Y), dim=2 |
| ).view(N, 1, 1, Y), |
| dim=3, |
| ).view(N) |
|
|
|
|
| class squeezenet(torch.nn.Module): |
| def __init__(self, requires_grad=False, pretrained=True): |
| super(squeezenet, self).__init__() |
| pretrained_features = models.squeezenet1_1( |
| pretrained=pretrained |
| ).features |
| self.slice1 = torch.nn.Sequential() |
| self.slice2 = torch.nn.Sequential() |
| self.slice3 = torch.nn.Sequential() |
| self.slice4 = torch.nn.Sequential() |
| self.slice5 = torch.nn.Sequential() |
| self.slice6 = torch.nn.Sequential() |
| self.slice7 = torch.nn.Sequential() |
| self.N_slices = 7 |
| for x in range(2): |
| self.slice1.add_module(str(x), pretrained_features[x]) |
| for x in range(2, 5): |
| self.slice2.add_module(str(x), pretrained_features[x]) |
| for x in range(5, 8): |
| self.slice3.add_module(str(x), pretrained_features[x]) |
| for x in range(8, 10): |
| self.slice4.add_module(str(x), pretrained_features[x]) |
| for x in range(10, 11): |
| self.slice5.add_module(str(x), pretrained_features[x]) |
| for x in range(11, 12): |
| self.slice6.add_module(str(x), pretrained_features[x]) |
| for x in range(12, 13): |
| self.slice7.add_module(str(x), pretrained_features[x]) |
| if not requires_grad: |
| for param in self.parameters(): |
| param.requires_grad = False |
|
|
| def forward(self, X): |
| h = self.slice1(X) |
| h_relu1 = h |
| h = self.slice2(h) |
| h_relu2 = h |
| h = self.slice3(h) |
| h_relu3 = h |
| h = self.slice4(h) |
| h_relu4 = h |
| h = self.slice5(h) |
| h_relu5 = h |
| h = self.slice6(h) |
| h_relu6 = h |
| h = self.slice7(h) |
| h_relu7 = h |
| vgg_outputs = namedtuple( |
| "SqueezeOutputs", |
| ["relu1", "relu2", "relu3", "relu4", "relu5", "relu6", "relu7"], |
| ) |
| out = vgg_outputs( |
| h_relu1, h_relu2, h_relu3, h_relu4, h_relu5, h_relu6, h_relu7 |
| ) |
|
|
| return out |
|
|
|
|
| class alexnet(torch.nn.Module): |
| def __init__(self, requires_grad=False, pretrained=True): |
| super(alexnet, self).__init__() |
| alexnet_pretrained_features = models.alexnet( |
| pretrained=pretrained |
| ).features |
| self.slice1 = torch.nn.Sequential() |
| self.slice2 = torch.nn.Sequential() |
| self.slice3 = torch.nn.Sequential() |
| self.slice4 = torch.nn.Sequential() |
| self.slice5 = torch.nn.Sequential() |
| self.N_slices = 5 |
| for x in range(2): |
| self.slice1.add_module(str(x), alexnet_pretrained_features[x]) |
| for x in range(2, 5): |
| self.slice2.add_module(str(x), alexnet_pretrained_features[x]) |
| for x in range(5, 8): |
| self.slice3.add_module(str(x), alexnet_pretrained_features[x]) |
| for x in range(8, 10): |
| self.slice4.add_module(str(x), alexnet_pretrained_features[x]) |
| for x in range(10, 12): |
| self.slice5.add_module(str(x), alexnet_pretrained_features[x]) |
| if not requires_grad: |
| for param in self.parameters(): |
| param.requires_grad = False |
|
|
| def forward(self, X): |
| h = self.slice1(X) |
| h_relu1 = h |
| h = self.slice2(h) |
| h_relu2 = h |
| h = self.slice3(h) |
| h_relu3 = h |
| h = self.slice4(h) |
| h_relu4 = h |
| h = self.slice5(h) |
| h_relu5 = h |
| alexnet_outputs = namedtuple( |
| "AlexnetOutputs", ["relu1", "relu2", "relu3", "relu4", "relu5"] |
| ) |
| out = alexnet_outputs(h_relu1, h_relu2, h_relu3, h_relu4, h_relu5) |
|
|
| return out |
|
|
|
|
| class vgg16(torch.nn.Module): |
| def __init__(self, requires_grad=False, pretrained=True): |
| super(vgg16, self).__init__() |
| vgg_pretrained_features = models.vgg16(pretrained=pretrained).features |
| self.slice1 = torch.nn.Sequential() |
| self.slice2 = torch.nn.Sequential() |
| self.slice3 = torch.nn.Sequential() |
| self.slice4 = torch.nn.Sequential() |
| self.slice5 = torch.nn.Sequential() |
| self.N_slices = 5 |
| for x in range(4): |
| self.slice1.add_module(str(x), vgg_pretrained_features[x]) |
| for x in range(4, 9): |
| self.slice2.add_module(str(x), vgg_pretrained_features[x]) |
| for x in range(9, 16): |
| self.slice3.add_module(str(x), vgg_pretrained_features[x]) |
| for x in range(16, 23): |
| self.slice4.add_module(str(x), vgg_pretrained_features[x]) |
| for x in range(23, 30): |
| self.slice5.add_module(str(x), vgg_pretrained_features[x]) |
| if not requires_grad: |
| for param in self.parameters(): |
| param.requires_grad = False |
|
|
| def forward(self, X): |
| h = self.slice1(X) |
| h_relu1_2 = h |
| h = self.slice2(h) |
| h_relu2_2 = h |
| h = self.slice3(h) |
| h_relu3_3 = h |
| h = self.slice4(h) |
| h_relu4_3 = h |
| h = self.slice5(h) |
| h_relu5_3 = h |
| vgg_outputs = namedtuple( |
| "VggOutputs", |
| ["relu1_2", "relu2_2", "relu3_3", "relu4_3", "relu5_3"], |
| ) |
| out = vgg_outputs(h_relu1_2, h_relu2_2, h_relu3_3, h_relu4_3, h_relu5_3) |
|
|
| return out |
|
|
|
|
| class resnet(torch.nn.Module): |
| def __init__(self, requires_grad=False, pretrained=True, num=18): |
| super(resnet, self).__init__() |
| if num == 18: |
| self.net = models.resnet18(pretrained=pretrained) |
| elif num == 34: |
| self.net = models.resnet34(pretrained=pretrained) |
| elif num == 50: |
| self.net = models.resnet50(pretrained=pretrained) |
| elif num == 101: |
| self.net = models.resnet101(pretrained=pretrained) |
| elif num == 152: |
| self.net = models.resnet152(pretrained=pretrained) |
| self.N_slices = 5 |
|
|
| self.conv1 = self.net.conv1 |
| self.bn1 = self.net.bn1 |
| self.relu = self.net.relu |
| self.maxpool = self.net.maxpool |
| self.layer1 = self.net.layer1 |
| self.layer2 = self.net.layer2 |
| self.layer3 = self.net.layer3 |
| self.layer4 = self.net.layer4 |
|
|
| def forward(self, X): |
| h = self.conv1(X) |
| h = self.bn1(h) |
| h = self.relu(h) |
| h_relu1 = h |
| h = self.maxpool(h) |
| h = self.layer1(h) |
| h_conv2 = h |
| h = self.layer2(h) |
| h_conv3 = h |
| h = self.layer3(h) |
| h_conv4 = h |
| h = self.layer4(h) |
| h_conv5 = h |
|
|
| outputs = namedtuple( |
| "Outputs", ["relu1", "conv2", "conv3", "conv4", "conv5"] |
| ) |
| out = outputs(h_relu1, h_conv2, h_conv3, h_conv4, h_conv5) |
|
|
| return out |
|
|
| |
| class PNet(torch.nn.Module): |
| """Pre-trained network with all channels equally weighted by default""" |
|
|
| def __init__(self, pnet_type="vgg", pnet_rand=False, use_gpu=True): |
| super(PNet, self).__init__() |
|
|
| self.use_gpu = use_gpu |
|
|
| self.pnet_type = pnet_type |
| self.pnet_rand = pnet_rand |
|
|
| self.shift = torch.Tensor([-0.030, -0.088, -0.188]).view(1, 3, 1, 1) |
| self.scale = torch.Tensor([0.458, 0.448, 0.450]).view(1, 3, 1, 1) |
|
|
| if self.pnet_type in ["vgg", "vgg16"]: |
| self.net = vgg16(pretrained=not self.pnet_rand, requires_grad=False) |
| elif self.pnet_type == "alex": |
| self.net = alexnet( |
| pretrained=not self.pnet_rand, requires_grad=False |
| ) |
| elif self.pnet_type[:-2] == "resnet": |
| self.net = resnet( |
| pretrained=not self.pnet_rand, |
| requires_grad=False, |
| num=int(self.pnet_type[-2:]), |
| ) |
| elif self.pnet_type == "squeeze": |
| self.net = squeezenet( |
| pretrained=not self.pnet_rand, requires_grad=False |
| ) |
|
|
| self.L = self.net.N_slices |
|
|
| if use_gpu: |
| self.net.cuda() |
| self.shift = self.shift.cuda() |
| self.scale = self.scale.cuda() |
|
|
| def forward(self, in0, in1, retPerLayer=False): |
| in0_sc = (in0 - self.shift.expand_as(in0)) / self.scale.expand_as(in0) |
| in1_sc = (in1 - self.shift.expand_as(in0)) / self.scale.expand_as(in0) |
|
|
| outs0 = self.net.forward(in0_sc) |
| outs1 = self.net.forward(in1_sc) |
|
|
| if retPerLayer: |
| all_scores = [] |
| for (kk, out0) in enumerate(outs0): |
| cur_score = 1.0 - cos_sim(outs0[kk], outs1[kk]) |
| if kk == 0: |
| val = 1.0 * cur_score |
| else: |
| val = val + cur_score |
| if retPerLayer: |
| all_scores += [cur_score] |
|
|
| if retPerLayer: |
| return (val, all_scores) |
| else: |
| return val |
|
|
|
|
|
|
|
|
| |
| def ssim_metric(img1, img2, mask=None): |
| return ssim(img1, img2, mask=mask, size_average=False) |
|
|
|
|
| |
| def psnr(img1, img2, mask=None,reshape=False): |
| b = img1.size(0) |
| if not (mask is None): |
| b = img1.size(0) |
| mse_err = (img1 - img2).pow(2) * mask |
| if reshape: |
| mse_err = mse_err.reshape(b, -1).sum(dim=1) / ( |
| 3 * mask.reshape(b, -1).sum(dim=1).clamp(min=1) |
| ) |
| else: |
| mse_err = mse_err.view(b, -1).sum(dim=1) / ( |
| 3 * mask.view(b, -1).sum(dim=1).clamp(min=1) |
| ) |
| else: |
| if reshape: |
| mse_err = (img1 - img2).pow(2).reshape(b, -1).mean(dim=1) |
| else: |
| mse_err = (img1 - img2).pow(2).view(b, -1).mean(dim=1) |
|
|
| psnr = 10 * (1 / mse_err).log10() |
| return psnr |
|
|
|
|
| |
| def perceptual_sim(img1, img2, vgg16): |
| |
| dist = vgg16(img1 * 2 - 1, img2 * 2 - 1) |
|
|
| return dist |
|
|
| def load_img(img_name, size=None): |
| try: |
| img = Image.open(img_name) |
|
|
| if type(size) == int: |
| img = img.resize((size, size)) |
| elif size is not None: |
| img = img.resize((size[1], size[0])) |
|
|
| img = transform(img).cuda() |
| img = img.unsqueeze(0) |
| except Exception as e: |
| print("Failed at loading %s " % img_name) |
| print(e) |
| img = torch.zeros(1, 3, 256, 256).cuda() |
| raise |
| return img |
|
|
|
|
| def compute_perceptual_similarity(folder, pred_img, tgt_img, take_every_other): |
|
|
| |
| vgg16 = PNet().to("cuda") |
| vgg16.eval() |
| vgg16.cuda() |
|
|
| values_percsim = [] |
| values_ssim = [] |
| values_psnr = [] |
| folders = os.listdir(folder) |
| for i, f in tqdm(enumerate(sorted(folders))): |
| pred_imgs = glob.glob(folder + f + "/" + pred_img) |
| tgt_imgs = glob.glob(folder + f + "/" + tgt_img) |
| assert len(tgt_imgs) == 1 |
|
|
| perc_sim = 10000 |
| ssim_sim = -10 |
| psnr_sim = -10 |
| for p_img in pred_imgs: |
| t_img = load_img(tgt_imgs[0]) |
| p_img = load_img(p_img, size=t_img.shape[2:]) |
| t_perc_sim = perceptual_sim(p_img, t_img, vgg16).item() |
| perc_sim = min(perc_sim, t_perc_sim) |
|
|
| ssim_sim = max(ssim_sim, ssim_metric(p_img, t_img).item()) |
| psnr_sim = max(psnr_sim, psnr(p_img, t_img).item()) |
|
|
| values_percsim += [perc_sim] |
| values_ssim += [ssim_sim] |
| values_psnr += [psnr_sim] |
|
|
| if take_every_other: |
| n_valuespercsim = [] |
| n_valuesssim = [] |
| n_valuespsnr = [] |
| for i in range(0, len(values_percsim) // 2): |
| n_valuespercsim += [ |
| min(values_percsim[2 * i], values_percsim[2 * i + 1]) |
| ] |
| n_valuespsnr += [max(values_psnr[2 * i], values_psnr[2 * i + 1])] |
| n_valuesssim += [max(values_ssim[2 * i], values_ssim[2 * i + 1])] |
|
|
| values_percsim = n_valuespercsim |
| values_ssim = n_valuesssim |
| values_psnr = n_valuespsnr |
|
|
| avg_percsim = np.mean(np.array(values_percsim)) |
| std_percsim = np.std(np.array(values_percsim)) |
|
|
| avg_psnr = np.mean(np.array(values_psnr)) |
| std_psnr = np.std(np.array(values_psnr)) |
|
|
| avg_ssim = np.mean(np.array(values_ssim)) |
| std_ssim = np.std(np.array(values_ssim)) |
|
|
| return { |
| "Perceptual similarity": (avg_percsim, std_percsim), |
| "PSNR": (avg_psnr, std_psnr), |
| "SSIM": (avg_ssim, std_ssim), |
| } |
|
|
|
|
| def compute_perceptual_similarity_from_list(pred_imgs_list, tgt_imgs_list, |
| take_every_other, |
| simple_format=True): |
|
|
| |
| vgg16 = PNet().to("cuda") |
| vgg16.eval() |
| vgg16.cuda() |
|
|
| values_percsim = [] |
| values_ssim = [] |
| values_psnr = [] |
| equal_count = 0 |
| ambig_count = 0 |
| for i, tgt_img in enumerate(tqdm(tgt_imgs_list)): |
| pred_imgs = pred_imgs_list[i] |
| tgt_imgs = [tgt_img] |
| assert len(tgt_imgs) == 1 |
|
|
| if type(pred_imgs) != list: |
| pred_imgs = [pred_imgs] |
|
|
| perc_sim = 10000 |
| ssim_sim = -10 |
| psnr_sim = -10 |
| assert len(pred_imgs)>0 |
| for p_img in pred_imgs: |
| t_img = load_img(tgt_imgs[0]) |
| p_img = load_img(p_img, size=t_img.shape[2:]) |
| t_perc_sim = perceptual_sim(p_img, t_img, vgg16).item() |
| perc_sim = min(perc_sim, t_perc_sim) |
|
|
| ssim_sim = max(ssim_sim, ssim_metric(p_img, t_img).item()) |
| psnr_sim = max(psnr_sim, psnr(p_img, t_img).item()) |
|
|
| values_percsim += [perc_sim] |
| values_ssim += [ssim_sim] |
| if psnr_sim != np.float("inf"): |
| values_psnr += [psnr_sim] |
| else: |
| if torch.allclose(p_img, t_img): |
| equal_count += 1 |
| print("{} equal src and wrp images.".format(equal_count)) |
| else: |
| ambig_count += 1 |
| print("{} ambiguous src and wrp images.".format(ambig_count)) |
|
|
| if take_every_other: |
| n_valuespercsim = [] |
| n_valuesssim = [] |
| n_valuespsnr = [] |
| for i in range(0, len(values_percsim) // 2): |
| n_valuespercsim += [ |
| min(values_percsim[2 * i], values_percsim[2 * i + 1]) |
| ] |
| n_valuespsnr += [max(values_psnr[2 * i], values_psnr[2 * i + 1])] |
| n_valuesssim += [max(values_ssim[2 * i], values_ssim[2 * i + 1])] |
|
|
| values_percsim = n_valuespercsim |
| values_ssim = n_valuesssim |
| values_psnr = n_valuespsnr |
|
|
| avg_percsim = np.mean(np.array(values_percsim)) |
| std_percsim = np.std(np.array(values_percsim)) |
|
|
| avg_psnr = np.mean(np.array(values_psnr)) |
| std_psnr = np.std(np.array(values_psnr)) |
|
|
| avg_ssim = np.mean(np.array(values_ssim)) |
| std_ssim = np.std(np.array(values_ssim)) |
|
|
| if simple_format: |
| |
| return { |
| "Perceptual similarity": [float(avg_percsim), float(std_percsim)], |
| "PSNR": [float(avg_psnr), float(std_psnr)], |
| "SSIM": [float(avg_ssim), float(std_ssim)], |
| } |
| else: |
| return { |
| "Perceptual similarity": (avg_percsim, std_percsim), |
| "PSNR": (avg_psnr, std_psnr), |
| "SSIM": (avg_ssim, std_ssim), |
| } |
|
|
|
|
| def compute_perceptual_similarity_from_list_topk(pred_imgs_list, tgt_imgs_list, |
| take_every_other, resize=False): |
|
|
| |
| vgg16 = PNet().to("cuda") |
| vgg16.eval() |
| vgg16.cuda() |
|
|
| values_percsim = [] |
| values_ssim = [] |
| values_psnr = [] |
| individual_percsim = [] |
| individual_ssim = [] |
| individual_psnr = [] |
| for i, tgt_img in enumerate(tqdm(tgt_imgs_list)): |
| pred_imgs = pred_imgs_list[i] |
| tgt_imgs = [tgt_img] |
| assert len(tgt_imgs) == 1 |
|
|
| if type(pred_imgs) != list: |
| assert False |
| pred_imgs = [pred_imgs] |
|
|
| perc_sim = 10000 |
| ssim_sim = -10 |
| psnr_sim = -10 |
| sample_percsim = list() |
| sample_ssim = list() |
| sample_psnr = list() |
| for p_img in pred_imgs: |
| if resize: |
| t_img = load_img(tgt_imgs[0], size=(256,256)) |
| else: |
| t_img = load_img(tgt_imgs[0]) |
| p_img = load_img(p_img, size=t_img.shape[2:]) |
|
|
| t_perc_sim = perceptual_sim(p_img, t_img, vgg16).item() |
| sample_percsim.append(t_perc_sim) |
| perc_sim = min(perc_sim, t_perc_sim) |
|
|
| t_ssim = ssim_metric(p_img, t_img).item() |
| sample_ssim.append(t_ssim) |
| ssim_sim = max(ssim_sim, t_ssim) |
|
|
| t_psnr = psnr(p_img, t_img).item() |
| sample_psnr.append(t_psnr) |
| psnr_sim = max(psnr_sim, t_psnr) |
|
|
| values_percsim += [perc_sim] |
| values_ssim += [ssim_sim] |
| values_psnr += [psnr_sim] |
| individual_percsim.append(sample_percsim) |
| individual_ssim.append(sample_ssim) |
| individual_psnr.append(sample_psnr) |
|
|
| if take_every_other: |
| assert False, "Do this later, after specifying topk to get proper results" |
| n_valuespercsim = [] |
| n_valuesssim = [] |
| n_valuespsnr = [] |
| for i in range(0, len(values_percsim) // 2): |
| n_valuespercsim += [ |
| min(values_percsim[2 * i], values_percsim[2 * i + 1]) |
| ] |
| n_valuespsnr += [max(values_psnr[2 * i], values_psnr[2 * i + 1])] |
| n_valuesssim += [max(values_ssim[2 * i], values_ssim[2 * i + 1])] |
|
|
| values_percsim = n_valuespercsim |
| values_ssim = n_valuesssim |
| values_psnr = n_valuespsnr |
|
|
| avg_percsim = np.mean(np.array(values_percsim)) |
| std_percsim = np.std(np.array(values_percsim)) |
|
|
| avg_psnr = np.mean(np.array(values_psnr)) |
| std_psnr = np.std(np.array(values_psnr)) |
|
|
| avg_ssim = np.mean(np.array(values_ssim)) |
| std_ssim = np.std(np.array(values_ssim)) |
|
|
| individual_percsim = np.array(individual_percsim) |
| individual_psnr = np.array(individual_psnr) |
| individual_ssim = np.array(individual_ssim) |
|
|
| return { |
| "avg_of_best": { |
| "Perceptual similarity": [float(avg_percsim), float(std_percsim)], |
| "PSNR": [float(avg_psnr), float(std_psnr)], |
| "SSIM": [float(avg_ssim), float(std_ssim)], |
| }, |
| "individual": { |
| "PSIM": individual_percsim, |
| "PSNR": individual_psnr, |
| "SSIM": individual_ssim, |
| } |
| } |
|
|
|
|
| if __name__ == "__main__": |
| args = argparse.ArgumentParser() |
| args.add_argument("--folder", type=str, default="") |
| args.add_argument("--pred_image", type=str, default="") |
| args.add_argument("--target_image", type=str, default="") |
| args.add_argument("--take_every_other", action="store_true", default=False) |
| args.add_argument("--output_file", type=str, default="") |
|
|
| opts = args.parse_args() |
|
|
| folder = opts.folder |
| pred_img = opts.pred_image |
| tgt_img = opts.target_image |
|
|
| results = compute_perceptual_similarity( |
| folder, pred_img, tgt_img, opts.take_every_other |
| ) |
|
|
| f = open(opts.output_file, 'w') |
| for key in results: |
| print("%s for %s: \n" % (key, opts.folder)) |
| print( |
| "\t {:0.4f} | {:0.4f} \n".format(results[key][0], results[key][1]) |
| ) |
|
|
| f.write("%s for %s: \n" % (key, opts.folder)) |
| f.write( |
| "\t {:0.4f} | {:0.4f} \n".format(results[key][0], results[key][1]) |
| ) |
|
|
| f.close() |
|
|