package locale import ( "fmt" "sort" "strings" "unicode" "golang.org/x/text/transform" "golang.org/x/text/unicode/norm" ) // LanguageFilterOptions defines filtering options for language search type LanguageFilterOptions struct { CommonOnly bool // Show only common languages SearchTerm string // Search by language name or country code Limit int // Maximum number of results (0 = no limit) CountryCode string // Optional: filter by country } // LanguageOption holds language display data for UI selection type LanguageOption struct { Locale string // Full locale string (e.g., "ja-JP") LanguageName string // English language name (e.g., "Japanese") LanguageLocal string // Native language name (e.g., "日本語") CountryCode string // ISO country code (e.g., "JP") CountryName string // Country name (e.g., "Japan") CountryLocal string // Country name in native/local language (e.g., "日本") Flag string // Flag emoji for country (e.g., "🇯🇵") FullLocale string // Full locale code for display (e.g., "ES-419") DisplayText string // Formatted display text (e.g., "🇯🇵 JP - 日本語 (Japanese)") MajorRegionalVariant bool // Major regional variant flag Population int // Population for sorting } // Common language codes (used for quick selection) var commonLanguageCodes = []string{ "ar", // Arabic "cs", // Czech "da", // Danish "de", // German "el", // Greek "en", // English "es", // Spanish "fi", // Finnish "fr", // French "he", // Hebrew "hi", // Hindi "hif", // Fijian Hindi "hu", // Hungarian "id", // Indonesian "it", // Italian "ja", // Japanese "ko", // Korean "ms", // Malay "nl", // Dutch "no", // Norwegian "pl", // Polish "pt", // Portuguese "ro", // Romanian "ru", // Russian "sk", // Slovak "sv", // Swedish "ta", // Tamil "te", // Telugu "th", // Thai "tr", // Turkish "vi", // Vietnamese "zh", // Chinese "fil", // Filipino } // Create a map for O(1) common language lookup var commonLanguageSet = make(map[string]bool) func init() { for _, code := range commonLanguageCodes { commonLanguageSet[code] = true } } // removeAccents removes accent marks from a string (e.g., "Español" -> "Espanol") // Uses Unicode NFD normalization to separate accent marks from base characters func removeAccents(s string) string { // Normalize the string to NFD form (decomposes accented characters) t := transform.Chain(norm.NFD) result, _, _ := transform.String(t, s) // Remove combining marks (accents, diacritics) var b strings.Builder for _, r := range result { if !unicode.Is(unicode.Mn, r) { // Mn = Mark, Nonspacing b.WriteRune(r) } } return b.String() } // SearchLanguages searches and filters languages based on provided options // This is the main entry point for language search and filtering func SearchLanguages(entries []LocaleEntry, opts LanguageFilterOptions) []LanguageOption { var results []LanguageOption // Remove accents from search term for accent-insensitive matching searchLower := strings.ToLower(removeAccents(opts.SearchTerm)) // Process each language entry for _, entry := range entries { // Skip if not a common language and CommonOnly is true if opts.CommonOnly && !commonLanguageSet[entry.LanguageCode] { continue } // Filter by search term if provided if opts.SearchTerm != "" { matches := false // Check language name (with accent-insensitive matching) if strings.Contains(strings.ToLower(removeAccents(entry.LanguageName)), searchLower) { matches = true } // Check language local name (with accent-insensitive matching) if strings.Contains(strings.ToLower(removeAccents(entry.LanguageLocal)), searchLower) { matches = true } // Check language code if strings.Contains(strings.ToLower(entry.LanguageCode), searchLower) { matches = true } // Check country codes and names (with accent-insensitive matching for country names) for _, region := range entry.Regions { if strings.Contains(strings.ToLower(region.CountryCode), searchLower) { matches = true break } if strings.Contains(strings.ToLower(removeAccents(region.CountryName)), searchLower) { matches = true break } if strings.Contains(strings.ToLower(removeAccents(region.CountryLocal)), searchLower) { matches = true break } } if !matches { continue } } // Convert to LanguageOption for each region for _, region := range entry.Regions { // Filter out Antarctica if strings.Contains(strings.ToLower(region.CountryName), "antarctica") { continue } // Filter by country code if specified if opts.CountryCode != "" && !strings.EqualFold(region.CountryCode, opts.CountryCode) { continue } // Create full locale for display (e.g., "ES-419") fullLocale := "" parts := strings.Split(region.Locale, "-") if len(parts) == 2 { fullLocale = strings.ToUpper(parts[0]) + "-" + parts[1] } opt := LanguageOption{ Locale: region.Locale, LanguageName: entry.LanguageName, LanguageLocal: entry.LanguageLocal, CountryCode: region.CountryCode, CountryName: region.CountryName, CountryLocal: region.CountryLocal, Flag: region.Flag, FullLocale: fullLocale, DisplayText: formatLanguageDisplayEntry(&entry, ®ion), MajorRegionalVariant: region.MajorRegionalVariant, Population: region.Population, } results = append(results, opt) } // If language has no regions, create a basic option if len(entry.Regions) == 0 { flagEmoji := GetFlagForLanguageCode(entry.LanguageCode) opt := LanguageOption{ Locale: entry.LanguageCode, LanguageName: entry.LanguageName, LanguageLocal: entry.LanguageLocal, CountryCode: "", CountryName: "", CountryLocal: "", Flag: flagEmoji, DisplayText: formatLanguageDisplayBasic(entry.LanguageCode, &entry, flagEmoji), MajorRegionalVariant: false, Population: 0, } results = append(results, opt) } } // Sort results with priority: base language first, then regional variants // When searching, prioritize exact matches at beginning if opts.SearchTerm != "" { sort.Slice(results, func(i, j int) bool { // Priority 1: Exact language name match (accent-insensitive) iExact := strings.ToLower(removeAccents(results[i].LanguageName)) == searchLower jExact := strings.ToLower(removeAccents(results[j].LanguageName)) == searchLower if iExact && !jExact { return true } if !iExact && jExact { return false } // Priority 2: Major regional variants (isMajor flag) iMajor := results[i].MajorRegionalVariant jMajor := results[j].MajorRegionalVariant if iMajor && !jMajor { return true // Major variants first } if !iMajor && jMajor { return false } // Priority 3: Population (if available, sort descending) if results[i].Population > 0 && results[j].Population > 0 { return results[i].Population > results[j].Population // Largest first } // Priority 4: Language name alphabetical (accent-insensitive) return strings.ToLower(removeAccents(results[i].LanguageName)) < strings.ToLower(removeAccents(results[j].LanguageName)) }) } else { // No search term, sort by major status and population sort.Slice(results, func(i, j int) bool { // Priority 1: Major regional variants (isMajor flag) iMajor := results[i].MajorRegionalVariant jMajor := results[j].MajorRegionalVariant if iMajor && !jMajor { return true // Major variants first } if !iMajor && jMajor { return false } // Priority 2: Population (if available, sort descending) if results[i].Population > 0 && results[j].Population > 0 { return results[i].Population > results[j].Population // Largest first } // Priority 3: Language name alphabetical (accent-insensitive) return strings.ToLower(removeAccents(results[i].LanguageName)) < strings.ToLower(removeAccents(results[j].LanguageName)) }) } // Apply limit if specified if opts.Limit > 0 && len(results) > opts.Limit { results = results[:opts.Limit] } return results } // GetCommonLanguages returns a list of common language options for quick selection func GetCommonLanguages(entries []LocaleEntry) []LanguageOption { opts := LanguageFilterOptions{ CommonOnly: true, } return SearchLanguages(entries, opts) } // GetMajorLanguages returns one entry per major language code (no regional variants) // This is used for step 1 of two-step language selection func GetMajorLanguages(entries []LocaleEntry) []LocaleEntry { var major []LocaleEntry // Create a map to track which language codes we've already added seen := make(map[string]bool) for _, entry := range entries { // Skip if not a common language if !commonLanguageSet[entry.LanguageCode] { continue } // Skip if we've already added this language code if seen[entry.LanguageCode] { continue } // Add entry and mark as seen major = append(major, entry) seen[entry.LanguageCode] = true } // Sort alphabetically by language name sort.Slice(major, func(i, j int) bool { return strings.ToLower(major[i].LanguageName) < strings.ToLower(major[j].LanguageName) }) return major } // GetAllLanguages returns all language options sorted alphabetically func GetAllLanguages(entries []LocaleEntry) []LanguageOption { opts := LanguageFilterOptions{ CommonOnly: false, } return SearchLanguages(entries, opts) } // GetLanguagesByCountry returns all languages available for a specific country func GetLanguagesByCountry(entries []LocaleEntry, countryCode string) []LanguageOption { var results []LanguageOption for _, entry := range entries { for _, region := range entry.Regions { if strings.EqualFold(region.CountryCode, countryCode) { opt := LanguageOption{ Locale: region.Locale, LanguageName: entry.LanguageName, LanguageLocal: entry.LanguageLocal, CountryCode: region.CountryCode, CountryName: region.CountryName, CountryLocal: region.CountryLocal, Flag: region.Flag, DisplayText: formatLanguageDisplayEntry(&entry, ®ion), MajorRegionalVariant: region.MajorRegionalVariant, Population: region.Population, } results = append(results, opt) } } } // Sort by language name sort.Slice(results, func(i, j int) bool { return strings.ToLower(results[i].LanguageName) < strings.ToLower(results[j].LanguageName) }) return results } // FindLanguageByLocale finds a specific language option by locale string func FindLanguageByLocale(entries []LocaleEntry, localeStr string) (*LanguageOption, error) { entry, region := FindByLocale(localeStr, entries) if entry == nil { return nil, fmt.Errorf("locale not found: %s", localeStr) } if region == nil { // Create option without region flagEmoji := GetFlagForLanguageCode(entry.LanguageCode) opt := &LanguageOption{ Locale: entry.LanguageCode, LanguageName: entry.LanguageName, LanguageLocal: entry.LanguageLocal, CountryCode: "", CountryName: "", CountryLocal: "", Flag: flagEmoji, DisplayText: formatLanguageDisplayBasic(entry.LanguageCode, entry, flagEmoji), } return opt, nil } opt := &LanguageOption{ Locale: region.Locale, LanguageName: entry.LanguageName, LanguageLocal: entry.LanguageLocal, CountryCode: region.CountryCode, CountryName: region.CountryName, CountryLocal: region.CountryLocal, Flag: region.Flag, DisplayText: formatLanguageDisplayEntry(entry, region), MajorRegionalVariant: region.MajorRegionalVariant, Population: region.Population, } return opt, nil } // SearchByTerm searches languages by a search term // This is a convenience wrapper around SearchLanguages func SearchByTerm(entries []LocaleEntry, term string) []LanguageOption { opts := LanguageFilterOptions{ SearchTerm: term, } return SearchLanguages(entries, opts) } // IsCommonLanguage checks if a language code is in common languages list func IsCommonLanguage(languageCode string) bool { if languageCode == "" { return false } // Extract language code from locale (e.g., "en" from "en-US") parts := strings.Split(languageCode, "-") langCode := strings.ToLower(parts[0]) return commonLanguageSet[langCode] } // GetCommonLanguageCodes returns list of common language codes func GetCommonLanguageCodes() []string { return append([]string{}, commonLanguageCodes...) } // GetMajorRegionalVariants returns regional variants sorted by major status and population func GetMajorRegionalVariants(entry *LocaleEntry) []RegionEntry { var major []RegionEntry var other []RegionEntry for _, region := range entry.Regions { if region.MajorRegionalVariant { major = append(major, region) } else { other = append(other, region) } } // Sort major by country name (alphabetical) sort.Slice(major, func(i, j int) bool { return strings.ToLower(major[i].CountryName) < strings.ToLower(major[j].CountryName) }) // Sort other by population (descending - largest first) sort.Slice(other, func(i, j int) bool { return other[i].Population > other[j].Population }) // Concatenate: major variants first, then others sorted by population return append(major, other...) } // Helper function to format language with region as "🇯🇵 JP - 日本語 (Japanese)" func formatLanguageDisplayEntry(entry *LocaleEntry, region *RegionEntry) string { if region == nil { return formatLanguageDisplayBasic(entry.LanguageCode, entry, "") } // Use region flag if available, otherwise fallback to language code flag flagEmoji := region.Flag if flagEmoji == "" { flagEmoji = GetFlagForLanguageCode(entry.LanguageCode) } // Format: "🇯🇵 JP - 日本語 (Japanese)" if entry.LanguageLocal != "" && entry.LanguageLocal != entry.LanguageName { return fmt.Sprintf("%s %s - %s (%s)", flagEmoji, region.CountryCode, entry.LanguageLocal, entry.LanguageName) } // Format: "🇯🇵 JP - Japanese" return fmt.Sprintf("%s %s - %s", flagEmoji, region.CountryCode, entry.LanguageName) } // Helper function to format language without region as "🇯🇵 - 日本語 (Japanese)" func formatLanguageDisplayBasic(langCode string, entry *LocaleEntry, flagEmoji string) string { // Fallback to language code flag if not provided if flagEmoji == "" { flagEmoji = GetFlagForLanguageCode(entry.LanguageCode) } // Format: "🇯🇵 - 日本語 (Japanese)" if entry.LanguageLocal != "" && entry.LanguageLocal != entry.LanguageName { return fmt.Sprintf("%s - %s (%s)", flagEmoji, entry.LanguageLocal, entry.LanguageName) } // Format: "🇯🇵 - Japanese" return fmt.Sprintf("%s - %s", flagEmoji, entry.LanguageName) }