Spaces:
Sleeping
Sleeping
File size: 4,756 Bytes
86a2bff 58773bb f9437fe 9cbc665 58773bb 66ddc02 58773bb 66ddc02 1d56a5d 8ef67fc 25d463a 2bce853 25d463a 5e7f50e 8ef67fc cecf2b8 1d56a5d f3fdb65 10ca2d7 a17ae36 66a9140 f3fdb65 c485ef9 885832d c485ef9 1d7fc0d a17ae36 1d7fc0d 1f9e351 4c715cf c485ef9 c1aaef4 5e7f50e f3fdb65 8c2cd7c 5e7f50e 2c26c26 f3fdb65 76d23b8 7ded1cc 76d23b8 f3fdb65 7ded1cc c485ef9 0ed5800 8c2cd7c 1d7fc0d 0586f3b 9f73464 1d7fc0d 7ded1cc b4f4d50 f3fdb65 293fb2f 32078b8 1718a54 679ce01 acbef04 679ce01 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 |
import gradio as gr
import os
import re
import subprocess
import tempfile
from transformers import pipeline
MODEL_ID = "ejschwartz/oo-method-test-model-bylibrary"
classifier = pipeline(
"text-classification",
model=MODEL_ID,
)
def run_model(text):
results = classifier(text, top_k=None, truncation=True)
if isinstance(results, dict):
results = [results]
if results and isinstance(results[0], list):
results = results[0]
confidences = [
{"label": entry["label"], "confidence": entry["score"]}
for entry in results
]
best_label = max(confidences, key=lambda entry: entry["confidence"])["label"] if confidences else "unknown"
return {"label": best_label, "confidences": confidences}
def get_all_dis(bname, addrs=None):
anafile = tempfile.NamedTemporaryFile(prefix=os.path.basename(bname) + "_", suffix=".bat_ana")
ananame = anafile.name
addrstr = ""
if addrs is not None:
addrstr = " ".join([f"--function-at {x}" for x in addrs])
subprocess.check_output(f"bat-ana {addrstr} --no-post-analysis -o {ananame} {bname} 2>/dev/null", shell=True)
output = subprocess.check_output(f"bat-dis --no-insn-address --no-bb-cfg-arrows --color=off {ananame} 2>/dev/null", shell=True)
output = re.sub(b' +', b' ', output)
func_dis = {}
last_func = None
current_output = []
for l in output.splitlines():
if l.startswith(b";;; function 0x"):
if last_func is not None:
func_dis[last_func] = b"\n".join(current_output)
last_func = int(l.split()[2], 16)
current_output.clear()
if not b";;" in l:
current_output.append(l)
if last_func is not None:
if last_func in func_dis:
print("Warning: Ignoring multiple functions at the same address")
else:
func_dis[last_func] = b"\n".join(current_output)
return func_dis
def get_funs(f):
funs = get_all_dis(f.name)
return "\n".join(("%#x" % addr) for addr in funs.keys())
with gr.Blocks() as demo:
all_dis_state = gr.State()
gr.Markdown(
"""
# Function/Method Detector
First, upload a binary.
This model was only trained on 32-bit MSVC++ binaries. You can provide
other types of binaries, but the result will probably be gibberish.
"""
)
file_widget = gr.File(label="Binary file")
with gr.Column(visible=False) as col:
#output = gr.Textbox("Output")
gr.Markdown("""
Great, you selected an executable! Now pick the function you would like to analyze.
""")
fun_dropdown = gr.Dropdown(label="Select a function", choices=["Woohoo!"], interactive=True)
gr.Markdown("""
Below you can find the selected function's disassembly, and the model's
prediction of whether the function is an object-oriented method or a
regular function.
""")
with gr.Row(visible=True) as result:
disassembly = gr.Textbox(label="Disassembly", lines=20)
with gr.Column():
clazz = gr.Label()
example_widget = gr.Examples(
examples=[f.path for f in os.scandir(os.path.join(os.path.dirname(__file__), "examples"))],
inputs=file_widget,
outputs=[all_dis_state, disassembly, clazz]
)
def file_change_fn(file, progress=gr.Progress()):
if file is None:
return {col: gr.update(visible=False),
all_dis_state: None}
else:
#fun_data = {42: 2, 43: 3}
progress(0, desc="Disassembling executable")
fun_data = get_all_dis(file.name)
addrs = ["%#x" % addr for addr in fun_data.keys()]
default_addr = addrs[0] if addrs else None
return {col: gr.update(visible=True),
fun_dropdown: gr.update(choices=addrs, value=default_addr),
all_dis_state: fun_data
}
def function_change_fn(selected_fun, fun_data):
disassembly_str = fun_data[int(selected_fun, 16)].decode("utf-8")
load_results = run_model(disassembly_str)
top_k = {e['label']: e['confidence'] for e in load_results['confidences']}
return {disassembly: gr.update(value=disassembly_str),
clazz: gr.update(value=top_k),
}
file_widget.change(file_change_fn, file_widget, [col, fun_dropdown, all_dis_state])
fun_dropdown.change(function_change_fn, [fun_dropdown, all_dis_state], [disassembly, clazz])
demo.queue()
demo.launch(
server_name="0.0.0.0",
server_port=7860,
#share=True,
debug=True,
show_error=True,
)
|