Add dictionaries transform with filtering and partitioning rules

This commit is contained in:
2026-04-17 03:13:18 +03:00
parent dc373553d2
commit e5b49acecf
2 changed files with 232 additions and 0 deletions
+96
View File
@@ -0,0 +1,96 @@
import { describe, it, expect } from "vitest";
import { transformDictionaries } from "./transform.js";
import type { IRawDictionaries } from "./types.js";
function makeRaw(overrides: Partial<IRawDictionaries> = {}): IRawDictionaries {
return {
regions: [],
countries: [],
cities: [],
airports: [],
...overrides,
};
}
describe("transformDictionaries — filtering", () => {
it("drops cities whose title.ru is purely ASCII (garbage-data guard)", () => {
const raw = makeRaw({
cities: [
{ code: "MOW", title: { ru: "Москва", en: "Moscow" }, country_code: "RU", has_afl_flights: true, location: { lat: 55, lon: 37 } },
{ code: "BAD", title: { ru: "Moscow", en: "Moscow" }, country_code: "RU", has_afl_flights: true, location: { lat: 0, lon: 0 } },
],
airports: [
{ code: "SVO", city_code: "MOW", title: { ru: "Шереметьево", en: "SVO" }, has_afl_flights: true, location: { lat: 55, lon: 37 } },
],
});
const d = transformDictionaries(raw, "ru");
expect(d.cities.map((c) => c.code)).toEqual(["MOW"]);
});
it("drops airports with has_afl_flights=false", () => {
const raw = makeRaw({
cities: [
{ code: "MOW", title: { ru: "Москва" }, country_code: "RU", has_afl_flights: true, location: { lat: 55, lon: 37 } },
],
airports: [
{ code: "SVO", city_code: "MOW", title: { ru: "Шереметьево" }, has_afl_flights: true, location: { lat: 55, lon: 37 } },
{ code: "DME", city_code: "MOW", title: { ru: "Домодедово" }, has_afl_flights: false, location: { lat: 55, lon: 37 } },
],
});
const d = transformDictionaries(raw, "ru");
expect(d.airports.map((a) => a.code)).toEqual(["SVO"]);
});
it("drops airports whose title.ru is purely ASCII", () => {
const raw = makeRaw({
cities: [
{ code: "MOW", title: { ru: "Москва" }, country_code: "RU", has_afl_flights: true, location: { lat: 55, lon: 37 } },
],
airports: [
{ code: "SVO", city_code: "MOW", title: { ru: "SVO" }, has_afl_flights: true, location: { lat: 55, lon: 37 } },
],
});
const d = transformDictionaries(raw, "ru");
expect(d.airports).toHaveLength(0);
});
it("drops cities that end up with no afl airports", () => {
const raw = makeRaw({
cities: [
{ code: "MOW", title: { ru: "Москва" }, country_code: "RU", has_afl_flights: true, location: { lat: 55, lon: 37 } },
{ code: "NOW", title: { ru: "Никогорск" }, country_code: "RU", has_afl_flights: true, location: { lat: 0, lon: 0 } },
],
airports: [
{ code: "SVO", city_code: "MOW", title: { ru: "Шереметьево" }, has_afl_flights: true, location: { lat: 55, lon: 37 } },
],
});
const d = transformDictionaries(raw, "ru");
expect(d.cities.map((c) => c.code)).toEqual(["MOW"]);
});
it("partitions city codes into ruCityCodes and otherCityCodes", () => {
const raw = makeRaw({
cities: [
{ code: "MOW", title: { ru: "Москва" }, country_code: "RU", has_afl_flights: true, location: { lat: 55, lon: 37 } },
{ code: "PAR", title: { ru: "Париж" }, country_code: "FR", has_afl_flights: true, location: { lat: 48, lon: 2 } },
],
airports: [
{ code: "SVO", city_code: "MOW", title: { ru: "Шереметьево" }, has_afl_flights: true, location: { lat: 55, lon: 37 } },
{ code: "CDG", city_code: "PAR", title: { ru: "Шарль-де-Голль" }, has_afl_flights: true, location: { lat: 48, lon: 2 } },
],
});
const d = transformDictionaries(raw, "ru");
expect([...d.ruCityCodes]).toEqual(["MOW"]);
expect([...d.otherCityCodes]).toEqual(["PAR"]);
});
});
+136
View File
@@ -0,0 +1,136 @@
/**
* Pure transform from raw dictionary responses to consumer-facing shapes.
*
* Ports the rules from Angular's DictionariesService.handleLoading:
* - drop cities whose title.ru is purely ASCII (garbage-data guard)
* - drop airports without afl flights or with ASCII title.ru
* - attach airports to their city (sorted by localized title)
* - drop cities that end up with no afl airports
* - enrich city.name, city.countryName from localized titles
* - build lookup maps keyed by uppercase code
* - partition cities into RU vs non-RU sets
* - flatten regions (Russia first, Australia filtered out)
*/
import type {
IAirport,
ICity,
ICountry,
IDictionaries,
IRegion,
IRawAirport,
IRawDictionaries,
IRawRegion,
} from "./types.js";
const ASCII_ONLY = /^[a-zA-Z.,:; ]+$/;
const AUSTRALIA_REGION_ID = 500373;
const RUSSIA_REGION_ID = 500374;
export function transformDictionaries(
raw: IRawDictionaries,
lang: string,
): IDictionaries {
const citiesAfterAsciiFilter = raw.cities.filter(
(c) => !ASCII_ONLY.test(c.title["ru"] ?? ""),
);
const airportsFiltered = raw.airports.filter(
(a) =>
a.has_afl_flights === true &&
!ASCII_ONLY.test(a.title["ru"] ?? ""),
);
const countriesByCode = new Map(
raw.countries.map((c) => [c.code, c]),
);
const citiesEnriched: ICity[] = citiesAfterAsciiFilter.map((c) => {
const airports = airportsFiltered
.filter((a) => a.city_code === c.code)
.map((a) => shapeAirport(a, lang))
.sort((a, b) => a.name.localeCompare(b.name));
const country = countriesByCode.get(c.country_code);
return {
code: c.code,
name: c.title[lang] ?? c.code,
location: c.location ?? { lat: 0, lon: 0 },
country_code: c.country_code,
countryName: country?.title[lang] ?? "",
has_afl_flights: airports.length > 0,
airports,
};
});
const cities = citiesEnriched.filter((c) => c.has_afl_flights);
const airports = cities.flatMap((c) => c.airports);
const cityByCode = new Map<string, ICity>(
cities.map((c) => [c.code.toUpperCase(), c]),
);
const airportByCode = new Map<string, IAirport>(
airports.map((a) => [a.code.toUpperCase(), a]),
);
const ruCityCodes = new Set<string>();
const otherCityCodes = new Set<string>();
for (const c of cities) {
if (c.country_code === "RU") ruCityCodes.add(c.code);
else otherCityCodes.add(c.code);
}
const countries: ICountry[] = raw.countries.map((c) => ({
code: c.code,
name: c.title[lang] ?? c.code,
world_region_id: c.world_region_id,
}));
const regions = buildRegions(raw.regions, countries, lang);
return {
regions,
countries,
cities,
airports,
cityByCode,
airportByCode,
ruCityCodes,
otherCityCodes,
};
}
function shapeAirport(a: IRawAirport, lang: string): IAirport {
return {
code: a.code,
name: a.title[lang] ?? a.code,
city_code: a.city_code,
location: a.location ?? { lat: 0, lon: 0 },
has_afl_flights: a.has_afl_flights,
};
}
function buildRegions(
raw: IRawRegion[],
countries: ICountry[],
lang: string,
): IRegion[] {
const filtered = raw.filter((r) => r.world_region_id !== AUSTRALIA_REGION_ID);
const sorted = [...filtered].sort((a, b) =>
(a.title[lang] ?? "").localeCompare(b.title[lang] ?? ""),
);
const ruIdx = sorted.findIndex((r) => r.world_region_id === RUSSIA_REGION_ID);
if (ruIdx > 0) {
const [ru] = sorted.splice(ruIdx, 1);
if (ru) sorted.unshift(ru);
}
return sorted.map((r) => ({
id: r.world_region_id,
name: r.title[lang] ?? "",
countries: countries.filter((c) => c.world_region_id === r.world_region_id),
}));
}