OCRonos-Vintage-GPU

Runtime error

App Files Files Community

Pclanglais commited on Aug 4, 2024

Commit

eed441d

verified ·

1 Parent(s): 8482186

Update app.py

Browse files

Files changed (1) hide show

app.py +87 -5

app.py CHANGED Viewed

@@ -16,20 +16,102 @@ generator = ctranslate2.Generator(model_path, device=device)
 tokenizer = transformers.AutoTokenizer.from_pretrained("PleIAs/OCRonos-Vintage")
 # CSS for formatting (unchanged)
 css = """
 <style>
-... (your existing CSS)
 </style>
 """
 # Helper functions
 def generate_html_diff(old_text, new_text):
-    # (unchanged)
-    ...
 def preprocess_text(text):
-    # (unchanged)
-    ...
 def split_text(text, max_tokens=400):
     encoded = tokenizer.encode(text)

 tokenizer = transformers.AutoTokenizer.from_pretrained("PleIAs/OCRonos-Vintage")
 # CSS for formatting (unchanged)
+# CSS for formatting
 css = """
 <style>
+.generation {
+    margin-left: 2em;
+    margin-right: 2em;
+    font-size: 1.2em;
+}
+:target {
+    background-color: #CCF3DF;
+}
+.source {
+    float: left;
+    max-width: 17%;
+    margin-left: 2%;
+}
+.tooltip {
+    position: relative;
+    cursor: pointer;
+    font-variant-position: super;
+    color: #97999b;
+}
+.tooltip:hover::after {
+    content: attr(data-text);
+    position: absolute;
+    left: 0;
+    top: 120%;
+    white-space: pre-wrap;
+    width: 500px;
+    max-width: 500px;
+    z-index: 1;
+    background-color: #f9f9f9;
+    color: #000;
+    border: 1px solid #ddd;
+    border-radius: 5px;
+    padding: 5px;
+    display: block;
+    box-shadow: 0 4px 8px rgba(0,0,0,0.1);
+}
+.deleted {
+    background-color: #ffcccb;
+    text-decoration: line-through;
+}
+.inserted {
+    background-color: #90EE90;
+}
+.manuscript {
+    display: flex;
+    margin-bottom: 10px;
+    align-items: baseline;
+}
+.annotation {
+    width: 15%;
+    padding-right: 20px;
+    color: grey !important;
+    font-style: italic;
+    text-align: right;
+}
+.content {
+    width: 80%;
+}
+h2 {
+    margin: 0;
+    font-size: 1.5em;
+}
+.title-content h2 {
+    font-weight: bold;
+}
+.bibliography-content {
+    color: darkgreen !important;
+    margin-top: -5px;
+}
+.paratext-content {
+    color: #a4a4a4 !important;
+    margin-top: -5px;
+}
 </style>
 """
 # Helper functions
 def generate_html_diff(old_text, new_text):
+    d = difflib.Differ()
+    diff = list(d.compare(old_text.split(), new_text.split()))
+    html_diff = []
+    for word in diff:
+        if word.startswith(' '):
+            html_diff.append(word[2:])
+        elif word.startswith('+ '):
+            html_diff.append(f'<span style="background-color: #90EE90;">{word[2:]}</span>')
+    return ' '.join(html_diff)
 def preprocess_text(text):
+    text = re.sub(r'<[^>]+>', '', text)
+    text = re.sub(r'\n', ' ', text)
+    text = re.sub(r'\s+', ' ', text)
+    return text.strip()
 def split_text(text, max_tokens=400):
     encoded = tokenizer.encode(text)