mirror of
https://github.com/yann64/haikuports.git
synced 2026-04-09 05:10:05 +02:00
tesseract: add recipe for language data files (#1497)
This commit is contained in:
157
app-text/tesseract_data/tesseract_data-3.04.00.recipe
Normal file
157
app-text/tesseract_data/tesseract_data-3.04.00.recipe
Normal file
@@ -0,0 +1,157 @@
|
||||
SUMMARY="Language data files for Tesseract OCR engine"
|
||||
DESCRIPTION="Tesseract OCR can be fully trained to recognize new languages and scripts. \
|
||||
A set of files for, community made, trained languages are available as \
|
||||
separate packages per language."
|
||||
HOMEPAGE="https://github.com/tesseract-ocr/"
|
||||
LICENSE="Apache v2"
|
||||
COPYRIGHT="1985-1995 HP labs
|
||||
2012 Google Inc."
|
||||
REVISION="1"
|
||||
SOURCE_URI="https://github.com/tesseract-ocr/tessdata/archive/$portVersion.tar.gz"
|
||||
CHECKSUM_SHA256="5dcb37198336b6953843b461ee535df1401b41008d550fc9e43d0edabca7adb1"
|
||||
SOURCE_DIR="tessdata-$portVersion"
|
||||
DISABLE_SOURCE_PACKAGE=yes
|
||||
|
||||
ARCHITECTURES="any"
|
||||
|
||||
PROVIDES="
|
||||
$portName = $portVersion
|
||||
"
|
||||
BUILD_REQUIRES="
|
||||
"
|
||||
|
||||
declare -A languages
|
||||
# Special data files
|
||||
languages[osd]="orientation and script detection"
|
||||
languages[equ]="math / equation detection"
|
||||
|
||||
# languages data files
|
||||
languages[afr]="Afrikaans"
|
||||
languages[amh]="Amharic"
|
||||
languages[ara]="Arabic"
|
||||
languages[asm]="Assamese"
|
||||
languages[aze]="Azerbaijani"
|
||||
languages[aze_cyrl]="Azerbaijani - Cyrilic"
|
||||
languages[bel]="Belarusian"
|
||||
languages[ben]="Bengali"
|
||||
languages[bod]="Tibetan"
|
||||
languages[bos]="Bosnian"
|
||||
languages[bul]="Bulgarian"
|
||||
languages[cat]="Catalan; Valencian"
|
||||
languages[ceb]="Cebuano"
|
||||
languages[ces]="Czech"
|
||||
languages[chi_sim]="Chinese - Simplified"
|
||||
languages[chi_tra]="Chinese - Traditional"
|
||||
languages[chr]="Cherokee"
|
||||
languages[cym]="Welsh"
|
||||
languages[dan]="Danish"
|
||||
languages[dan_frak]="Danish - Fraktur script"
|
||||
languages[deu]="German"
|
||||
languages[deu_frak]="Germain - Fraktur script"
|
||||
languages[dzo]="Dzongkha"
|
||||
languages[ell]="Greek, Modern (1453-)"
|
||||
languages[eng]="English"
|
||||
languages[enm]="English, Middle (1100-1500)"
|
||||
languages[epo]="Esperanto"
|
||||
languages[est]="Estonian"
|
||||
languages[eus]="Basque"
|
||||
languages[fas]="Persian"
|
||||
languages[fin]="Finnish"
|
||||
languages[fra]="French"
|
||||
languages[frk]="Frankish"
|
||||
languages[frm]="French, Middle (ca. 1400-1600)"
|
||||
languages[gle]="Irish"
|
||||
languages[glg]="Galician"
|
||||
languages[grc]="Greek, Ancient (-1453)"
|
||||
languages[guj]="Gujarati"
|
||||
languages[hat]="Haitian; Haitian Creole"
|
||||
languages[heb]="Hebrew"
|
||||
languages[hin]="Hindi"
|
||||
languages[hrv]="Croatian"
|
||||
languages[hun]="Hungarian"
|
||||
languages[iku]="Inuktitut"
|
||||
languages[ind]="Indonesian"
|
||||
languages[isl]="Icelandic"
|
||||
languages[ita]="Italian"
|
||||
languages[ita_old]="Italian - Old"
|
||||
languages[jav]="Javanese"
|
||||
languages[jpn]="Japanese"
|
||||
languages[kan]="Kannada"
|
||||
languages[kat]="Georgian"
|
||||
languages[kat_old]="Georgian - Old"
|
||||
languages[kaz]="Kazakh"
|
||||
languages[khm]="Central Khmer"
|
||||
languages[kir]="Kirghiz; Kyrgyz"
|
||||
languages[kor]="Korean"
|
||||
languages[kur]="Kurdish"
|
||||
languages[lao]="Lao"
|
||||
languages[lat]="Latin"
|
||||
languages[lav]="Latvian"
|
||||
languages[lit]="Lithuanian"
|
||||
languages[mal]="Malayalam"
|
||||
languages[mar]="Marathi"
|
||||
languages[mkd]="Macedonian"
|
||||
languages[mlt]="Maltese"
|
||||
languages[msa]="Malay"
|
||||
languages[mya]="Burmese"
|
||||
languages[nep]="Nepali"
|
||||
languages[nld]="Dutch; Flemish"
|
||||
languages[nor]="Norvegian"
|
||||
languages[ori]="Oriya"
|
||||
languages[pan]="Panjabi; Punjabi"
|
||||
languages[pol]="Polish"
|
||||
languages[por]="Portuguese"
|
||||
languages[pus]="Pushto; Pastho"
|
||||
languages[ron]="Romanian; Moldavian; Moldovan"
|
||||
languages[rus]="Russian"
|
||||
languages[san]="Sanskrit"
|
||||
languages[sin]="Sinhala; Sinhalese"
|
||||
languages[slk]="Slovak"
|
||||
languages[slk_frak]="Slovak - Fraktur script"
|
||||
languages[slv]="Slovenian"
|
||||
languages[spa]="Spanish; Castilian"
|
||||
languages[spa_old]="Spanish; Castilian - Old"
|
||||
languages[sqi]="Albanian"
|
||||
languages[srp]="Serbian"
|
||||
languages[srp_latn]="Serbian - Latin"
|
||||
languages[swa]="Swahili"
|
||||
languages[swe]="Swedish"
|
||||
languages[syr]="Syriac"
|
||||
languages[tam]="Tamil"
|
||||
languages[tel]="Telugu"
|
||||
languages[tgk]="Tajik"
|
||||
languages[tgl]="Tagalog"
|
||||
languages[tha]="Thai"
|
||||
languages[tir]="Tigrinya"
|
||||
languages[tur]="Turkish"
|
||||
languages[uig]="Uighur; Uyghur"
|
||||
languages[ukr]="Ukrainian"
|
||||
languages[urd]="Urdu"
|
||||
languages[uzb]="Uzbek"
|
||||
languages[uzb_cyrl]="Uzbek - Cyrilic"
|
||||
languages[vie]="Vietnamese"
|
||||
languages[yid]="Yiddish"
|
||||
|
||||
for lang in "${!languages[@]}"; do
|
||||
desc=${languages[${lang}]}
|
||||
|
||||
eval "\
|
||||
SUMMARY_${lang}=\"Data files for ${desc}\";\
|
||||
PROVIDES_${lang}=\"\
|
||||
${portName}_${lang} = $portVersion\
|
||||
\"; \
|
||||
REQUIRES_${lang}=\"\
|
||||
haiku\n\
|
||||
tesseract >= 3\n\
|
||||
\""
|
||||
done
|
||||
|
||||
INSTALL()
|
||||
{
|
||||
mkdir -p $dataDir/tessdata
|
||||
for lang in "${!languages[@]}"; do
|
||||
cp $lang.* $dataDir/tessdata
|
||||
packageEntries $lang \
|
||||
$dataDir/tessdata/$lang.*
|
||||
done
|
||||
}
|
||||
Reference in New Issue
Block a user