madpeset/nikud.py

127 lines
3.9 KiB
Python
Raw Permalink Normal View History

2024-01-09 11:55:52 +02:00
# -*- coding: utf-8 -*-
2024-01-20 18:00:05 +02:00
import os
from dotenv import load_dotenv
2024-01-09 11:55:52 +02:00
import streamlit as st
import requests
2024-01-20 18:00:05 +02:00
from wand.image import Image
from wand.drawing import Drawing
from wand.color import Color
2024-01-14 00:03:03 +02:00
2024-01-09 11:55:52 +02:00
# Load environment variables from .env file
load_dotenv()
api_key = os.getenv("API_KEY")
2024-01-09 13:45:23 +02:00
# Specify the font size
2024-01-20 18:00:05 +02:00
font_size = 40
# Create an image
img_width = 696
2024-01-09 12:39:15 +02:00
2024-01-09 11:57:06 +02:00
2024-01-14 00:03:03 +02:00
# place the api_key in the .env file
# such as API_KEY=your_api_key
2024-01-09 11:55:52 +02:00
def get_nakdan_response(hebrew_text, api_key):
url = "https://nakdan-5-3.loadbalancer.dicta.org.il/addnikud"
headers = {'Content-Type': 'text/plain;charset=utf-8'}
params = {
"task": "nakdan",
"useTokenization": True,
"genre": "modern", # or "rabbinic" or "premodern" based on user's need
"data": hebrew_text,
"addmorph": True,
2024-01-09 13:45:23 +02:00
"matchpartial": True,
2024-01-09 11:55:52 +02:00
"keepmetagim": False,
"keepqq": False,
"apiKey": api_key
}
2024-01-14 00:03:03 +02:00
try:
response = requests.post(url, headers=headers, json=params)
response.raise_for_status() # Raises stored HTTPError, if one occurred.
except requests.exceptions.HTTPError as errh:
print("Http Error:", errh)
except requests.exceptions.ConnectionError as errc:
print("Error Connecting:", errc)
except requests.exceptions.Timeout as errt:
print("Timeout Error:", errt)
except requests.exceptions.RequestException as err:
print("Something went wrong", err)
else:
return response.json()
2024-01-09 11:55:52 +02:00
def main():
st.title('Dicta Nakdan API Interface')
# Input fields for user
2024-01-20 18:00:05 +02:00
hebrew_text = st.text_area("Enter Hebrew Text:", "רסק שמופי טקסט", )
2024-01-09 11:55:52 +02:00
# Button to send request
2024-01-20 18:00:05 +02:00
if st.button("Process Text",):
2024-01-09 11:55:52 +02:00
response = get_nakdan_response(hebrew_text, api_key)
2024-01-14 00:03:03 +02:00
if not isinstance(response, dict):
response = {'data': []}
2024-01-20 18:00:05 +02:00
# st.json(response)
# Extract words or newline from response
words = []
for item in response['data']:
if 'nakdan' in item:
if item['nakdan']['word'] == "\n": # Check if the word is a newline
words.append("\n")
else:
words.extend(
option['w'].replace("|", "")
for option in item['nakdan'].get('options', [])
)
# st.text(" ".join(words)) # Print words, including newlines
draw = Drawing()
fontw = "fonts\\DejaVuSans.ttf"
draw.font = fontw
draw.text_antialias = True
draw.text_encoding = 'utf-8'
draw.font_size = font_size
imgw = Image(width=696, height=400, background=Color('#ffffff'))
spacing = 10
line_width = 0
y = font_size
x = 0
2024-01-09 11:55:52 +02:00
for word in words:
2024-01-20 18:00:05 +02:00
if word == "\n":
y += font_size
line_width = 0
continue
# Create a dummy image to get the text metrics
with Image(width=1, height=1) as img:
metrics = draw.get_font_metrics(img, word)
word_width = int(metrics.text_width)
word_height = int(metrics.text_height)
line_width += word_width + spacing
# Position for the first word
2024-01-14 00:03:03 +02:00
# Check if the word fits in the current line
2024-01-20 18:00:05 +02:00
if line_width <= img_width - 20:
x = img_width - (line_width) - 20
2024-01-14 00:03:03 +02:00
# Render the word in the current line
2024-01-20 18:00:05 +02:00
draw.text(x, y, word)
x += word_width + spacing # Add word width and spacing to x
2024-01-09 13:45:23 +02:00
2024-01-20 18:00:05 +02:00
else:
y += word_height
line_width = 0
# crop the image height to the text height
imgw.crop(0, 0, img_width, y+spacing)
draw(imgw)
img_bytes = imgw.make_blob(format='png')
st.image(img_bytes)
# st.image(img)
2024-01-09 11:55:52 +02:00
if __name__ == "__main__":
2024-01-09 13:45:23 +02:00
main()