Merge identify API with tranlsate API

This commit is contained in:
Kristóf Tóth 2019-10-13 17:51:30 +02:00
parent f84de7bfff
commit a34e948a57
4 changed files with 115 additions and 144 deletions

View File

@ -1,47 +0,0 @@
package identify
import (
"os/exec"
"regexp"
)
type Indetification struct {
word string
Identifier func(*Indetification) (string, error)
}
func New(word string) Indetification {
return Indetification{
word: word,
Identifier: executeTransShell,
}
}
func executeTransShell(i *Indetification) (string, error) {
config := []string{
"-no-ansi",
"-id",
i.word,
}
outBytes, err := exec.Command("trans", config...).Output()
return string(outBytes), err
}
func (i Indetification) Identify() string {
output, err := i.Identifier(&i)
if err != nil {
panic(err)
}
return parseTransIdentifyOutput(output)
}
func parseTransIdentifyOutput(out string) string {
re := regexp.MustCompile(`(?m)^Code\s+(\w+)$`)
result := ""
matches := re.FindStringSubmatch(out)
if (len(matches) >= 2) {
result = matches[1]
}
return result
}

View File

@ -1,86 +0,0 @@
package identify_test
import (
"testing"
"."
"os"
)
type testData struct {
word string
identifyOutput string
expectedCode string
}
func TestIdentify(t *testing.T) {
cases := []testData{
testData{
"macska",
`Magyar
Name Hungarian
Family Uralic
Writing system Latin
Code hu
ISO 639-3 hun
SIL http://www-01.sil.org/iso639-3/documentation.asp?id=hun
Glottolog http://glottolog.org/resource/languoid/id/hung1274
Wikipedia http://en.wikipedia.org/wiki/Hungarian_language
`,
"hu",
},
testData{
"cat",
`English
Name English
Family Indo-European
Writing system Latin
Code en
ISO 639-3 eng
SIL http://www-01.sil.org/iso639-3/documentation.asp?id=eng
Glottolog http://glottolog.org/resource/languoid/id/stan1293
Wikipedia http://en.wikipedia.org/wiki/English_language
`,
"en",
},
testData{
"szofisztikált",
`Magyar
Name Hungarian
Family Uralic
Writing system Latin
Code hu
ISO 639-3 hun
SIL http://www-01.sil.org/iso639-3/documentation.asp?id=hun
Glottolog http://glottolog.org/resource/languoid/id/hung1274
Wikipedia http://en.wikipedia.org/wiki/Hungarian_language
`,
"hu",
},
testData{
"distribute",
`English
Name English
Family Indo-European
Writing system Latin
Code en
ISO 639-3 eng
SIL http://www-01.sil.org/iso639-3/documentation.asp?id=eng
Glottolog http://glottolog.org/resource/languoid/id/stan1293
Wikipedia http://en.wikipedia.org/wiki/English_language
`,
"en",
},
}
for _, data := range cases {
id := identify.New(data.word)
if _, ok := os.LookupEnv("NOMOCK"); !ok {
id.Identifier = func(i *identify.Indetification) (string, error) {
return data.identifyOutput, nil
}
}
if id.Identify() != data.expectedCode {
t.Errorf("Word '%s' should identify to '%s'!", data.word, data.expectedCode)
}
}
}

View File

@ -12,6 +12,7 @@ type Translation struct {
Word string Word string
Language string Language string
Translator func(string, string, string) (string, error) Translator func(string, string, string) (string, error)
Identifier func(string) (string, error)
} }
func New(word string) Translation { func New(word string) Translation {
@ -19,6 +20,7 @@ func New(word string) Translation {
Word: word, Word: word,
Language: "", Language: "",
Translator: executeTrans, Translator: executeTrans,
Identifier: executeIdentify,
} }
} }
@ -38,10 +40,6 @@ func executeTrans(word, fromLang, toLang string) (string, error) {
} }
func (t Translation) Translate(toLang string) []string { func (t Translation) Translate(toLang string) []string {
if t.Language == "" {
// TODO: Identify
panic("noooooo")
}
output, err := t.Translator(t.Word, t.Language, toLang) output, err := t.Translator(t.Word, t.Language, toLang)
if err != nil { if err != nil {
panic(err) panic(err)
@ -75,3 +73,31 @@ func uniqueSlice(items []string) []string {
return uniqueSlice return uniqueSlice
} }
func executeIdentify(word string) (string, error) {
config := []string{
"-no-ansi",
"-id",
word,
}
outBytes, err := exec.Command("trans", config...).Output()
return string(outBytes), err
}
func (t Translation) Identify() string {
output, err := t.Identifier(t.Word)
if err != nil {
panic(err)
}
return parseTransIdentifyOutput(output)
}
func parseTransIdentifyOutput(out string) string {
re := regexp.MustCompile(`(?m)^Code\s+(\w+)$`)
result := ""
matches := re.FindStringSubmatch(out)
if (len(matches) >= 2) {
result = matches[1]
}
return result
}

View File

@ -8,7 +8,7 @@ import (
) )
type testData struct { type transTestData struct {
word string word string
fromLang string fromLang string
toLang string toLang string
@ -16,9 +16,9 @@ type testData struct {
expectedResults []string expectedResults []string
} }
func TestTranslation(t *testing.T) { func TestTranslate(t *testing.T) {
cases := []testData{ cases := []transTestData{
testData{ transTestData{
"actuator", "en", "hu", "actuator", "en", "hu",
`működtető `működtető
@ -31,7 +31,7 @@ actuator
`, `,
[]string{"működtető", "indítókar", "beavatkozó", "hajtómű", "aktuátor", "hajtás"}, []string{"működtető", "indítókar", "beavatkozó", "hajtómű", "aktuátor", "hajtás"},
}, },
testData{ transTestData{
"szaxofon", "hu", "en", "szaxofon", "hu", "en",
`saxophone `saxophone
@ -40,7 +40,7 @@ szaxofon
`, `,
[]string{"saxophone"}, []string{"saxophone"},
}, },
testData{ transTestData{
"cat", "en", "hu", "cat", "en", "hu",
`macska `macska
@ -59,7 +59,7 @@ cat
`, `,
[]string{"macska", "pletykás nő", "felvon", "cica"}, []string{"macska", "pletykás nő", "felvon", "cica"},
}, },
testData{ transTestData{
"méltányosság", "hu", "en", "méltányosság", "hu", "en",
`equity `equity
@ -92,3 +92,81 @@ méltányosság
} }
} }
} }
type idTestData struct {
word string
identifyOutput string
expectedCode string
}
func TestIdentify(t *testing.T) {
cases := []idTestData{
idTestData{
"macska",
`Magyar
Name Hungarian
Family Uralic
Writing system Latin
Code hu
ISO 639-3 hun
SIL http://www-01.sil.org/iso639-3/documentation.asp?id=hun
Glottolog http://glottolog.org/resource/languoid/id/hung1274
Wikipedia http://en.wikipedia.org/wiki/Hungarian_language
`,
"hu",
},
idTestData{
"cat",
`English
Name English
Family Indo-European
Writing system Latin
Code en
ISO 639-3 eng
SIL http://www-01.sil.org/iso639-3/documentation.asp?id=eng
Glottolog http://glottolog.org/resource/languoid/id/stan1293
Wikipedia http://en.wikipedia.org/wiki/English_language
`,
"en",
},
idTestData{
"szofisztikált",
`Magyar
Name Hungarian
Family Uralic
Writing system Latin
Code hu
ISO 639-3 hun
SIL http://www-01.sil.org/iso639-3/documentation.asp?id=hun
Glottolog http://glottolog.org/resource/languoid/id/hung1274
Wikipedia http://en.wikipedia.org/wiki/Hungarian_language
`,
"hu",
},
idTestData{
"distribute",
`English
Name English
Family Indo-European
Writing system Latin
Code en
ISO 639-3 eng
SIL http://www-01.sil.org/iso639-3/documentation.asp?id=eng
Glottolog http://glottolog.org/resource/languoid/id/stan1293
Wikipedia http://en.wikipedia.org/wiki/English_language
`,
"en",
},
}
for _, data := range cases {
id := trans.New(data.word)
if _, ok := os.LookupEnv("NOMOCK"); !ok {
id.Identifier = func(string) (string, error) {
return data.identifyOutput, nil
}
}
if id.Identify() != data.expectedCode {
t.Errorf("Word '%s' should identify to '%s'!", data.word, data.expectedCode)
}
}
}