| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | """ |
| | Fine-tuning the library models for language modeling on a text file (GPT, GPT-2, BERT, RoBERTa). |
| | GPT and GPT-2 are fine-tuned using a causal language modeling (CLM) loss while BERT and RoBERTa are fine-tuned |
| | using a masked language modeling (MLM) loss. |
| | """ |
| |
|
| | from __future__ import absolute_import |
| | import os |
| | import sys |
| | import pickle |
| | import torch |
| | import json |
| | import random |
| | import logging |
| | import argparse |
| | import numpy as np |
| | from io import open |
| | from itertools import cycle |
| | import torch.nn as nn |
| | from tqdm import tqdm, trange |
| | from torch.nn.utils.rnn import pad_sequence |
| | from torch.utils.data import DataLoader, Dataset, SequentialSampler, RandomSampler,TensorDataset |
| | from torch.utils.data.distributed import DistributedSampler |
| | from tqdm import tqdm |
| | from fuzzywuzzy import fuzz |
| | import re |
| | import multiprocessing |
| | from transformers import (WEIGHTS_NAME, AdamW, get_linear_schedule_with_warmup, T5ForConditionalGeneration, AutoTokenizer) |
| |
|
| | divide_number = 2 |
| | cpu_cont = 16 |
| | logging.basicConfig(format = '%(asctime)s - %(levelname)s - %(name)s - %(message)s', |
| | datefmt = '%m/%d/%Y %H:%M:%S', |
| | level = logging.INFO) |
| | logger = logging.getLogger(__name__) |
| |
|
| | |
| |
|
| |
|
| | class Example(object): |
| | """A single training/test example.""" |
| | def __init__(self, |
| | idx, |
| | source, |
| | target, |
| | comp_type, |
| | tar_type |
| | ): |
| | self.idx = idx |
| | self.source = source |
| | self.target = target |
| | self.comp_type = comp_type |
| | self.tar_type = tar_type |
| |
|
| |
|
| | def read_examples(filename): |
| | """Read examples from filename.""" |
| | examples=[] |
| | |
| | with open(filename,encoding="utf-8") as f: |
| | max_src_len = 0 |
| | max_tar_len = 0 |
| | for idx, line in enumerate(f): |
| | js=json.loads(line) |
| | inputs = " ".join(js["Template_token"][1:]) |
| |
|
| | |
| | if "ground_truth" in js: |
| | outputs = " ".join(js["ground_truth"]) |
| | else: |
| | outputs = inputs |
| | if 'Idx' in js: |
| | idx = js['Idx'] |
| | |
| |
|
| | comp_type = js["Compiler_Type"] |
| | tar_type = js["Target"] |
| | examples.append( |
| | Example( |
| | idx = idx, |
| | source = inputs, |
| | target = outputs, |
| | comp_type = comp_type, |
| | tar_type = tar_type |
| | ) |
| | ) |
| | return examples |
| |
|
| |
|
| | class InputFeatures(object): |
| | """A single training/test features for a example.""" |
| | def __init__(self, |
| | example_id, |
| | source_ids, source_mask, |
| | target_ids, target_mask, |
| | comp_type, tar_type |
| | ): |
| | self.example_id = example_id |
| | self.source_ids = source_ids |
| | self.source_mask = source_mask |
| | self.target_ids = target_ids |
| | self.target_mask = target_mask |
| | self.comp_type = comp_type |
| | self.tar_type = tar_type |
| | |
| | def convert_examples_to_features(examples, tokenizer, args,stage=None): |
| | features = [] |
| | for example_index, example in enumerate(examples): |
| | |
| | source_ids = torch.LongTensor(tokenizer.encode(example.source, |
| | add_special_tokens=True, max_length=args.max_source_length, truncation=True)) |
| | |
| | |
| | source_mask = torch.ones_like(source_ids) |
| | |
| | if stage=="test": |
| | target = "None" |
| | else: |
| | target = example.target |
| |
|
| | target_ids = torch.LongTensor(tokenizer.encode(target, |
| | add_special_tokens=True, max_length=args.max_target_length, truncation=True)) |
| | target_mask = torch.ones_like(target_ids) |
| |
|
| | |
| | features.append( |
| | InputFeatures( |
| | example_index, |
| | source_ids, source_mask, |
| | target_ids, target_mask, |
| | example.comp_type, example.tar_type |
| | ) |
| | ) |
| | return features |
| |
|
| |
|
| |
|
| | def set_seed(seed=20240124): |
| | random.seed(seed) |
| | os.environ['PYHTONHASHSEED'] = str(seed) |
| | np.random.seed(seed) |
| | torch.manual_seed(seed) |
| | torch.cuda.manual_seed(seed) |
| | torch.backends.cudnn.deterministic = True |
| | |
| | |
| | def main(): |
| | parser = argparse.ArgumentParser() |
| |
|
| | |
| | parser.add_argument("--model_name_or_path", default=None, type=str, required=True, |
| | help="Path to pre-trained model: e.g. roberta-base" ) |
| | parser.add_argument("--output_dir", default=None, type=str, required=True, |
| | help="The output directory where the model predictions and checkpoints will be written.") |
| | parser.add_argument("--load_model_path", default=None, type=str, |
| | help="Path to trained model: Should contain the .bin files" ) |
| | |
| | parser.add_argument("--task", default=None, type=str, required=True, |
| | help="Task Type: statement_level, next_statement" ) |
| |
|
| | parser.add_argument("--train_filename", default="../../Dataset/", type=str, |
| | help="The train filename. Should contain the .jsonl files for this task.") |
| | parser.add_argument("--dev_filename", default="../../Dataset/", type=str, |
| | help="The dev filename. Should contain the .jsonl files for this task.") |
| | parser.add_argument("--test_filename", default="../../Dataset/", type=str, |
| | help="The test filename. Should contain the .jsonl files for this task.") |
| | |
| | parser.add_argument("--config_name", default="", type=str, |
| | help="Pretrained config name or path if not the same as model_name") |
| | parser.add_argument("--tokenizer_name", default="", type=str, |
| | help="Pretrained tokenizer name or path if not the same as model_name") |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | parser.add_argument("--do_train", action='store_true', |
| | help="Whether to run training.") |
| | |
| | parser.add_argument("--do_eval", action='store_true', |
| | help="Whether to run eval on the dev set.") |
| | parser.add_argument("--do_test", action='store_true', |
| | help="Whether to run eval on the dev set.") |
| | parser.add_argument("--test_org", action='store_true', |
| | help="Whether to run eval on org model.") |
| | parser.add_argument("--do_lower_case", action='store_true', |
| | help="Set this flag if you are using an uncased model.") |
| | parser.add_argument("--no_cuda", action='store_true', |
| | help="Avoid using CUDA when available") |
| | parser.add_argument("--do_cpuonly", action='store_true', |
| | help="Whether CPU only training.") |
| | parser.add_argument("--do_itr", action='store_true', |
| | help="Whether to itr training.") |
| | parser.add_argument("--train_batch_size", default=8, type=int, |
| | help="Batch size per GPU/CPU for training.") |
| | parser.add_argument("--eval_batch_size", default=8, type=int, |
| | help="Batch size per GPU/CPU for evaluation.") |
| | parser.add_argument('--gradient_accumulation_steps', type=int, default=1, |
| | help="Number of updates steps to accumulate before performing a backward/update pass.") |
| | parser.add_argument("--learning_rate", default=5e-5, type=float, |
| | help="The initial learning rate for Adam.") |
| | parser.add_argument("--beam_size", default=10, type=int, |
| | help="beam size for beam search") |
| | parser.add_argument("--weight_decay", default=0.0, type=float, |
| | help="Weight deay if we apply some.") |
| | parser.add_argument("--adam_epsilon", default=1e-8, type=float, |
| | help="Epsilon for Adam optimizer.") |
| | parser.add_argument("--max_grad_norm", default=1.0, type=float, |
| | help="Max gradient norm.") |
| | parser.add_argument("--num_train_epochs", default=3, type=int, |
| | help="Total number of training epochs to perform.") |
| | parser.add_argument("--max_steps", default=-1, type=int, |
| | help="If > 0: set total number of training steps to perform. Override num_train_epochs.") |
| | parser.add_argument("--eval_steps", default=-1, type=int, |
| | help="") |
| | parser.add_argument("--max_target_length", default=128, type=int, |
| | help="") |
| | parser.add_argument("--max_source_length", default=512, type=int, |
| | help="") |
| | parser.add_argument("--train_steps", default=-1, type=int, |
| | help="") |
| | parser.add_argument("--warmup_steps", default=0, type=int, |
| | help="Linear warmup over warmup_steps.") |
| | parser.add_argument("--local_rank", type=int, default=-1, |
| | help="For distributed training: local_rank") |
| | parser.add_argument('--seed', type=int, default=20240124, |
| | help="random seed for initialization") |
| | |
| | args = parser.parse_args() |
| | |
| | logging.basicConfig(format='%(asctime)s - %(levelname)s - %(name)s - %(message)s', |
| | datefmt='%m/%d/%Y %H:%M:%S',level=logging.INFO ) |
| | |
| | device = torch.device("cuda" if torch.cuda.is_available() else "cpu") |
| | args.n_gpu = torch.cuda.device_count() |
| | args.device = device |
| | logger.info("device: %s, n_gpu: %s",device, args.n_gpu) |
| | |
| | |
| | set_seed(args.seed) |
| |
|
| | |
| | if os.path.exists(args.output_dir) is False: |
| | os.makedirs(args.output_dir) |
| |
|
| | |
| | tokenizer = AutoTokenizer.from_pretrained(args.model_name_or_path) |
| | is_trust = False |
| | if "codet5p-220m" in args.model_name_or_path: |
| | is_trust = False |
| | else: |
| | is_trust = True |
| | model = T5ForConditionalGeneration.from_pretrained(args.model_name_or_path) |
| |
|
| | |
| | logger.info("Training/evaluation parameters %s", args) |
| |
|
| | if args.load_model_path is not None: |
| | model_save_name = "Existing_Types/pytorch_model.bin" |
| | if args.do_itr: |
| | model_save_name = "pytorch_model.bin" |
| | if args.do_cpuonly: |
| | model_save_name = "New_Types/pytorch_model.bin" |
| | if args.task == "statement_level": |
| | logger.info("reload model from {}".format(args.load_model_path + "/statement_level/"+model_save_name)) |
| | model.load_state_dict(torch.load(args.load_model_path + "/statement_level/"+model_save_name)) |
| | else: |
| | logger.info("reload model from {}".format(args.load_model_path + "/next_statement/"+model_save_name)) |
| | model.load_state_dict(torch.load(args.load_model_path + "/next_statement/"+model_save_name)) |
| | |
| | |
| | model.to(args.device) |
| | |
| | if args.n_gpu > 1: |
| | |
| | model = torch.nn.DataParallel(model) |
| |
|
| | if args.do_train: |
| | |
| |
|
| | file_name_pre = "New_Target_Completion" |
| | file_name_post = "Existing_Types/train.jsonl" |
| | if args.do_itr: |
| | file_name_pre = "Iterative_Expansion_Completion" |
| | file_name_post = "train.jsonl" |
| | if args.do_cpuonly and not args.do_itr: |
| | file_name_pre = "New_Target_Completion" |
| | file_name_post = "New_Types/train.jsonl" |
| | if args.task == "statement_level": |
| | train_examples = read_examples(args.train_filename + file_name_pre +'/statement_level/'+file_name_post) |
| | else: |
| | train_examples = read_examples(args.train_filename + file_name_pre +'/statement_level/'+file_name_post) |
| |
|
| | train_features = convert_examples_to_features(train_examples, tokenizer,args,stage='train') |
| | all_source_ids = pad_sequence([f.source_ids for f in train_features], batch_first=True, padding_value=tokenizer.pad_token_id) |
| | all_source_mask = pad_sequence([f.source_mask for f in train_features], batch_first=True, padding_value=0) |
| | all_target_ids = pad_sequence([f.target_ids for f in train_features], batch_first=True, padding_value=tokenizer.pad_token_id) |
| | all_target_mask = pad_sequence([f.target_mask for f in train_features], batch_first=True, padding_value=0) |
| | train_data = TensorDataset(all_source_ids,all_source_mask,all_target_ids,all_target_mask) |
| | train_sampler = RandomSampler(train_data) |
| | train_dataloader = DataLoader(train_data, sampler=train_sampler, batch_size=args.train_batch_size // args.gradient_accumulation_steps) |
| |
|
| |
|
| | |
| | no_decay = ['bias', 'LayerNorm.weight'] |
| | optimizer_grouped_parameters = [ |
| | {'params': [p for n, p in model.named_parameters() if not any(nd in n for nd in no_decay)], |
| | 'weight_decay': args.weight_decay}, |
| | {'params': [p for n, p in model.named_parameters() if any(nd in n for nd in no_decay)], 'weight_decay': 0.0} |
| | ] |
| | optimizer = AdamW(optimizer_grouped_parameters, lr=args.learning_rate, eps=args.adam_epsilon) |
| | scheduler = get_linear_schedule_with_warmup(optimizer, |
| | num_warmup_steps=int(len(train_dataloader)*args.num_train_epochs*0.1), |
| | num_training_steps=len(train_dataloader)*args.num_train_epochs) |
| | |
| | |
| | logger.info("***** Running training *****") |
| | logger.info(" Num examples = %d", len(train_examples)) |
| | logger.info(" Batch size = %d", args.train_batch_size * args.gradient_accumulation_steps) |
| | logger.info(" Num epoch = %d", args.num_train_epochs) |
| | |
| |
|
| | model.train() |
| | patience, best_score, losses, dev_dataset = 0, 0, [], {} |
| | for epoch in range(args.num_train_epochs): |
| | for idx,batch in enumerate(train_dataloader): |
| | batch = tuple(t.to(device) for t in batch) |
| | source_ids,source_mask,target_ids,target_mask = batch |
| | |
| |
|
| | loss = model(input_ids=source_ids, attention_mask=source_mask.gt(0), |
| | labels=target_ids, decoder_attention_mask=target_mask.gt(0)).loss |
| | |
| | |
| | if args.n_gpu > 1: |
| | loss = loss.mean() |
| |
|
| | if args.gradient_accumulation_steps > 1: |
| | loss = loss / args.gradient_accumulation_steps |
| | |
| | losses.append(loss.item()) |
| | loss.backward() |
| | if len(losses) % args.gradient_accumulation_steps == 0: |
| | |
| | optimizer.step() |
| | optimizer.zero_grad() |
| | scheduler.step() |
| | if len(losses) // args.gradient_accumulation_steps % 100 == 0: |
| | logger.info("epoch {} step {} loss {}".format(epoch, |
| | len(losses)//args.gradient_accumulation_steps, |
| | round(np.mean(losses[-100*args.gradient_accumulation_steps:]),4))) |
| | if args.do_eval: |
| | |
| | |
| | if 'dev_loss' in dev_dataset: |
| | eval_examples,eval_data = dev_dataset['dev_loss'] |
| | else: |
| | file_name_pre = "New_Target_Completion" |
| | file_name_post = "Existing_Types/valid.jsonl" |
| | if args.do_itr: |
| | file_name_pre = "Iterative_Expansion_Completion" |
| | file_name_post = "valid.jsonl" |
| | if args.do_cpuonly and not args.do_itr: |
| | file_name_pre = "New_Target_Completion" |
| | file_name_post = "New_Types/valid.jsonl" |
| | if args.task == "statement_level": |
| | eval_examples = read_examples(args.dev_filename + file_name_pre +'/statement_level/'+file_name_post) |
| | else: |
| | eval_examples = read_examples(args.dev_filename + file_name_pre +'/statement_level/'+file_name_post) |
| |
|
| | |
| | eval_features = convert_examples_to_features(eval_examples, tokenizer, args,stage='dev') |
| | all_source_ids = pad_sequence([f.source_ids for f in eval_features], batch_first=True, padding_value=tokenizer.pad_token_id) |
| | all_source_mask = pad_sequence([f.source_mask for f in eval_features], batch_first=True, padding_value=0) |
| | all_target_ids = pad_sequence([f.target_ids for f in eval_features], batch_first=True, padding_value=tokenizer.pad_token_id) |
| | all_target_mask = pad_sequence([f.target_mask for f in eval_features], batch_first=True, padding_value=0) |
| | eval_data = TensorDataset(all_source_ids,all_source_mask,all_target_ids,all_target_mask) |
| | dev_dataset['dev_loss' ]= eval_examples,eval_data |
| | eval_sampler = SequentialSampler(eval_data) |
| | eval_dataloader = DataLoader(eval_data, sampler=eval_sampler, batch_size=args.eval_batch_size) |
| | res_list = [] |
| | logger.info("\n***** Running evaluation *****") |
| | logger.info(" Num examples = %d", len(eval_examples)) |
| | logger.info(" Batch size = %d", args.eval_batch_size) |
| |
|
| | |
| | model.eval() |
| | p=[] |
| | eval_loss,tokens_num = 0,0 |
| | for batch in eval_dataloader: |
| | batch = tuple(t.to(device) for t in batch) |
| | source_ids,source_mask,target_ids,target_mask = batch |
| | with torch.no_grad(): |
| | loss = model(input_ids=source_ids, attention_mask=source_mask, |
| | labels=target_ids, decoder_attention_mask=target_mask).loss |
| | preds = model.module.generate(source_ids, attention_mask=source_mask, use_cache=True, |
| | num_beams=args.beam_size, max_new_tokens =args.max_target_length) |
| | |
| | |
| | for pred in preds: |
| | |
| | text = tokenizer.decode(pred, skip_special_tokens=True, clean_up_tokenization_spaces=False) |
| | p.append(text) |
| | if args.n_gpu > 1: |
| | loss = loss.mean() |
| |
|
| | if args.gradient_accumulation_steps > 1: |
| | loss = loss / args.gradient_accumulation_steps |
| | eval_loss += loss.item() |
| | tokens_num += 1 |
| | |
| |
|
| | |
| | model.train() |
| | eval_loss = eval_loss / tokens_num |
| | result = {'eval_ppl': round(np.exp(eval_loss),5)} |
| | for key in sorted(result.keys()): |
| | logger.info(" %s = %s", key, str(result[key])) |
| | logger.info(" "+"*"*20) |
| |
|
| | EM = 0.0 |
| | edit_sim = 0.0 |
| | total = len(p) |
| | token_accuracy = 0 |
| | for ref,gold in zip(p,eval_examples): |
| | pred = ref.strip() |
| | gt = gold.target |
| | edit_sim += fuzz.ratio(pred, gt) |
| | if pred.split() == gt.split(): |
| | EM += 1 |
| | res_list.append([pred,gt]) |
| | dev_acc = round(EM/total*100, 2) |
| | |
| | logger.info(" %s = %s "%("Epoch",str(epoch))) |
| | logger.info(" %s = %s "%("EM Acc",str(dev_acc))) |
| | logger.info(" %s = %s "%("Edit Distance",str(round(edit_sim/total, 2)))) |
| | logger.info(" "+"*"*20) |
| |
|
| | if dev_acc > best_score: |
| | best_score = dev_acc |
| | |
| | if args.task == "statement_level": |
| | output_dir = os.path.join(args.output_dir, 'statement_level/') |
| | else: |
| | output_dir = os.path.join(args.output_dir, 'next_statement/') |
| | if not os.path.exists(output_dir): |
| | os.makedirs(output_dir) |
| | model_to_save = model.module if hasattr(model, 'module') else model |
| | model_save_name = "Exitsing_Types/pytorch_model.bin" |
| | if args.do_itr: |
| | model_save_name = "pytorch_model.bin" |
| | if args.do_cpuonly: |
| | model_save_name = "New_Types/pytorch_model.bin" |
| | output_model_file = os.path.join(output_dir, model_save_name) |
| | torch.save(model_to_save.state_dict(), output_model_file) |
| | patience = 0 |
| | else: |
| | patience += 1 |
| | if patience == 3: |
| | break |
| | logger.info(" Best score:%s",best_score) |
| | logger.info(" "+"*"*20) |
| |
|
| | if args.task == "statement_level": |
| | output_dir = os.path.join(args.output_dir, 'statement_level/') |
| | else: |
| | output_dir = os.path.join(args.output_dir, 'next_statement/') |
| |
|
| | if args.do_test: |
| | res_list = [] |
| | output_dir2 = "" |
| | |
| | if args.load_model_path is not None: |
| | model_to_load = model.module if hasattr(model, 'module') else model |
| | |
| | |
| | model_save_name = "Existing_Types/pytorch_model.bin" |
| | if args.do_itr and not args.do_cpuonly: |
| | model_save_name = "pytorch_model.bin" |
| | if args.do_itr and args.do_cpuonly: |
| | args.load_model_path = "../../../../Saved_Models/CodeT5+/New_Target_Completion" |
| | model_save_name = "New_Types/pytorch_model.bin" |
| | if args.do_cpuonly: |
| | model_save_name = "New_Types/pytorch_model.bin" |
| | if args.task == "statement_level": |
| | logger.info("reload model from {}".format(args.load_model_path + "/statement_level/"+model_save_name)) |
| | model_to_load.load_state_dict(torch.load(args.load_model_path + "/statement_level/"+model_save_name)) |
| | else: |
| | logger.info("reload model from {}".format(args.load_model_path + "/next_statement/"+model_save_name)) |
| | model_to_load.load_state_dict(torch.load(args.load_model_path + "/next_statement/"+model_save_name)) |
| | |
| | file_name_pre = "New_Target_Completion" |
| | file_name_post = "Existing_Types/test.jsonl" |
| | if args.do_itr: |
| | file_name_pre = "Iterative_Expansion_Completion" |
| | file_name_post = "test.jsonl" |
| | if args.do_cpuonly and not args.do_itr: |
| | file_name_pre = "New_Target_Completion" |
| | file_name_post = "New_Types/test.jsonl" |
| | if args.task == "statement_level": |
| | args.test_filename = os.path.join(args.test_filename, file_name_pre +'/statement_level/'+file_name_post) |
| | else: |
| | args.test_filename = os.path.join(args.test_filename, file_name_pre +'/next_statement/'+file_name_post) |
| | eval_examples = read_examples(args.test_filename) |
| | eval_features = convert_examples_to_features(eval_examples, tokenizer, args,stage='test') |
| | all_source_ids = pad_sequence([f.source_ids for f in eval_features], batch_first=True, padding_value=tokenizer.pad_token_id) |
| | all_source_mask = pad_sequence([f.source_mask for f in eval_features], batch_first=True, padding_value=0) |
| | all_target_ids = pad_sequence([f.target_ids for f in eval_features], batch_first=True, padding_value=tokenizer.pad_token_id) |
| | all_target_mask = pad_sequence([f.target_mask for f in eval_features], batch_first=True, padding_value=0) |
| | eval_data = TensorDataset(all_source_ids,all_source_mask,all_target_ids,all_target_mask) |
| |
|
| | |
| | eval_sampler = SequentialSampler(eval_data) |
| | eval_dataloader = DataLoader(eval_data, sampler=eval_sampler, batch_size=args.eval_batch_size) |
| |
|
| | model.eval() |
| | p=[] |
| | for batch in tqdm(eval_dataloader,total=len(eval_dataloader)): |
| | batch = tuple(t.to(device) for t in batch) |
| | source_ids, source_mask, _, _ = batch |
| | with torch.no_grad(): |
| | preds = model.module.generate(source_ids, attention_mask=source_mask, use_cache=True, |
| | num_beams=args.beam_size, max_new_tokens =args.max_target_length) |
| | for pred in preds: |
| | |
| | text = tokenizer.decode(pred, skip_special_tokens=True, clean_up_tokenization_spaces=False) |
| | p.append(text) |
| | model.train() |
| | |
| | |
| | |
| | gcc_dic = {"riscv":[0,0,0], "nvptx":[0,0,0], "arc":[0,0,0]} |
| | llvm_dic = {"RISCV":[0,0,0], "NVPTX":[0,0,0], "ARC":[0,0,0],"RI5CY":[0,0,0]} |
| | for ref,gold in zip(p,eval_examples): |
| | pred = ref.strip() |
| | gt = gold.target |
| | if gold.comp_type == "GCC": |
| | gcc_dic[gold.tar_type][1] += fuzz.ratio(pred, gt) |
| | gcc_dic[gold.tar_type][2] += 1 |
| | if pred.split() == gt.split(): |
| | gcc_dic[gold.tar_type][0] += 1 |
| | if gold.comp_type == "LLVM": |
| | llvm_dic[gold.tar_type][1] += fuzz.ratio(pred, gt) |
| | llvm_dic[gold.tar_type][2] += 1 |
| | if pred.split() == gt.split(): |
| | llvm_dic[gold.tar_type][0] += 1 |
| | res_list.append([pred,gt]) |
| | |
| | |
| |
|
| | for k in gcc_dic.keys(): |
| | if gcc_dic[k][2] > 0: |
| | dev_acc = round(1.0*gcc_dic[k][1] / gcc_dic[k][2], 2) |
| | dev_em = round(100.0*gcc_dic[k][0] / gcc_dic[k][2], 4) |
| | logger.info(" "+"#"*20) |
| | logger.info("GCC %s: %s = %s "%(k, "Edit Distance", str(dev_acc))) |
| | logger.info("GCC %s: %s = %s "%(k, "Exact Match Rate", str(dev_em))) |
| | logger.info(" "+"*"*20) |
| |
|
| | for k in llvm_dic.keys(): |
| | if llvm_dic[k][2] > 0: |
| | dev_acc = round(1.0*llvm_dic[k][1] / llvm_dic[k][2], 2) |
| | dev_em = round(100.0*llvm_dic[k][0] / llvm_dic[k][2], 4) |
| | logger.info(" "+"#"*20) |
| | logger.info("LLVM %s: %s = %s "%(k, "Edit Distance", str(dev_acc))) |
| | logger.info("LLVM %s: %s = %s "%(k, "Exact Match Rate", str(dev_em))) |
| | logger.info(" "+"*"*20) |
| |
|
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| |
|
| | |
| | |
| | |
| | if __name__ == "__main__": |
| | main() |
| |
|
| |
|
| |
|
| |
|