Format rules for raw Places responses

I’ve noticed that hitting the REST api and using the PHP client both returns data that is much more raw than many of the demos surrounding what the Places search can provide. I kinda feel like I was demoed a car but when I ordered it I got a thousand pieces to put together my own engine :expressionless:

My question is: is there a formula/algorithm for converting the raw data returned by the REST API or the PHP client into meaningful suggestions for addresses ?

Hi,

Welcome to Algolia Community forum.

A lot of the code in the places.js library is actually about formatting the response in a meaningful way.
I don’t know much PHP, but I think you should be able to adapt the following code from JS to PHP without too much trouble:

/*
 * The Places API can return 2 formats, depending on whether you pass it a 
 * language parameter or not.
 */
function detectFormat(hit) {
  return typeof hit.locale_names.default === 'undefined' ? 'localised' : 'generic';
}

/*
 * a simple function to access the same data regardless of the responseType
 */
const accessor = (responseType) => {
  if (responseType === 'localised') return (obj, key) => obj[key];
  return (obj, key) => obj[key].default;
}

/* 
 * Extracts the street, city, postcode, county, and country from a hit,
 * and leverages the highlighted form whenever possible.
 *
 * The end result will be something like `<em>55</em> <em>rue d'Amsterdam</em>, <em>Paris</em> 8ème arrondissement, <em>75008</em>, Paris, France.`
 */
function formatHit(hit) {
  const responseType = detectFormat(hit);
  const get = accessor(responseType);

  // in the case of city records, their name is in the `locale_names` field
  // instead of the `city` field.
  const cityKey = hit.is_city ? 'locale_names' : 'city';

  const country = get(hit, 'country');
  const county = getBestHighlightedForm(get(hit._highlightResult, 'county'));
  const postcode = getBestPostcode(get(hit, 'postcode'), get(hit._highlightResult, 'postcode'));
  const city = getBestHighlightedForm(get(hit._highlightResult, cityKey));
  const streetName = hit.is_city ? null : getBestHighlightedForm(get(hit._highlightResult, 'locale_names'));

  const components = [streetName, city, postcode, county, country];

  // remove empty values and concatenate everything
  return components.filter(text => text !== null).join(', ');
}

/* the Places API returns an array of possible names for each attribute.
 * For instance, the city of Munich will have as a default names the list
 * ["München", "Munich"]
 * A user may have typed one or the other, but the preferred name is the
 * first item in the list (here - München)
 *
 * What this method does is iterate over each value in the _highlightResult array
 * and select the one that is the best fit based on level of match (partial/full) and
 * the number of words matching.
 *
 * The end result will be something like this:
 * 'Paris' query: `<em>Paris</em>`
 * 'Munich' query: `München (<em>Munich</em>)`
 */ 
function getBestHighlightedForm(highlightedValues) {
  if (!highlightedValues.length) {
    return null;
  }

  const defaultValue = highlightedValues[0].value;
  // collect all other matches
  const bestAttributes = [];
  for (let i = 1; i < highlightedValues.length; ++i) {
    if (highlightedValues[i].matchLevel !== 'none') {
      bestAttributes.push({
        index: i,
        words: highlightedValues[i].matchedWords,
      });
    }
  }
  // no matches in this attribute, retrieve first value
  if (bestAttributes.length === 0) {
    return defaultValue;
  }
  // sort the matches by `desc(words), asc(index)`
  bestAttributes.sort((a, b) => {
    if (a.words > b.words) {
      return -1;
    } else if (a.words < b.words) {
      return 1;
    }
    return a.index - b.index;
  });
  // and append the best match to the first value
  return bestAttributes[0].index === 0
    ? `${defaultValue} (${highlightedValues[bestAttributes[1].index].value})`
    : `${highlightedValues[bestAttributes[0].index].value} (${defaultValue})`;
}

/* similar behaviour but with postcodes */
function getBestPostcode(postcodes, highlightedPostcodes) {
  const defaultValue = highlightedPostcodes[0].value;
  // collect all other matches
  const bestAttributes = [];
  for (let i = 1; i < highlightedPostcodes.length; ++i) {
    if (highlightedPostcodes[i].matchLevel !== 'none') {
      bestAttributes.push({
        index: i,
        words: highlightedPostcodes[i].matchedWords,
      });
    }
  }
  // no matches in this attribute, retrieve first value
  if (bestAttributes.length === 0) {
    return { postcode: postcodes[0], highlightedPostcode: defaultValue };
  }
  // sort the matches by `desc(words)`
  bestAttributes.sort((a, b) => {
    if (a.words > b.words) {
      return -1;
    } else if (a.words < b.words) {
      return 1;
    }
    return a.index - b.index;
  });

  const postcode = postcodes[bestAttributes[0].index];
  return {
    postcode,
    highlightedPostcode: highlightedPostcodes[bestAttributes[0].index].value,
  };
}

Let me know if that helps you with creating meaningful addresses.

Gonna give it a try. This is what the structure for a single “Hit” looks like:

{
  "country": "United States of America",
  "is_country": false,
  "city": [
    "New York City",
    "New York",
    "NYC"
  ],
  "is_highway": true,
  "importance": 26,
  "_tags": [
    "highway",
    "highway/secondary",
    "country/us",
    "address",
    "highway/primary",
    "highway/tertiary",
    "highway/service",
    "highway/residential",
    "highway/unclassified",
    "source/osm"
  ],
  "postcode": [
    "11232-2400",
    "11227",
    "10037:10454",
    "10458",
    "11209",
    "11215",
    "10465",
    "11220",
    "11357",
    "10457",
    "11217",
    "10451",
    "1122O",
    "10456",
    "NY 10455-1201",
    "11252",
    "11232"
  ],
  "county": [
    "Kings County",
    "Brooklyn",
    "Bronx County",
    "The Bronx",
    "Queens County",
    "Queens"
  ],
  "population": 8550405,
  "country_code": "us",
  "is_city": false,
  "is_popular": false,
  "administrative": [
    "New York",
    "New York City"
  ],
  "admin_level": 15,
  "suburb": [
    "Brooklyn",
    "Bronx"
  ],
  "is_suburb": false,
  "locale_names": [
    "3rd Avenue"
  ],
  "_geoloc": {
    "lat": 40.8531,
    "lng": -73.8918
  },
  "objectID": "128594357_222265996",
  "_highlightResult": {
    "country": {
      "value": "United States of America",
      "matchLevel": "none",
      "matchedWords": []
    },
    "city": [
      {
        "value": "New York City",
        "matchLevel": "none",
        "matchedWords": []
      },
      {
        "value": "New York",
        "matchLevel": "none",
        "matchedWords": []
      },
      {
        "value": "NYC",
        "matchLevel": "none",
        "matchedWords": []
      }
    ],
    "postcode": [
      {
        "value": "11232-2400",
        "matchLevel": "none",
        "matchedWords": []
      },
      {
        "value": "11227",
        "matchLevel": "none",
        "matchedWords": []
      },
      {
        "value": "10037:10454",
        "matchLevel": "none",
        "matchedWords": []
      },
      {
        "value": "10458",
        "matchLevel": "none",
        "matchedWords": []
      },
      {
        "value": "11209",
        "matchLevel": "none",
        "matchedWords": []
      },
      {
        "value": "11215",
        "matchLevel": "none",
        "matchedWords": []
      },
      {
        "value": "10465",
        "matchLevel": "none",
        "matchedWords": []
      },
      {
        "value": "11220",
        "matchLevel": "none",
        "matchedWords": []
      },
      {
        "value": "11357",
        "matchLevel": "none",
        "matchedWords": []
      },
      {
        "value": "10457",
        "matchLevel": "none",
        "matchedWords": []
      },
      {
        "value": "11217",
        "matchLevel": "none",
        "matchedWords": []
      },
      {
        "value": "10451",
        "matchLevel": "none",
        "matchedWords": []
      },
      {
        "value": "1122O",
        "matchLevel": "none",
        "matchedWords": []
      },
      {
        "value": "10456",
        "matchLevel": "none",
        "matchedWords": []
      },
      {
        "value": "NY 10455-1201",
        "matchLevel": "none",
        "matchedWords": []
      },
      {
        "value": "11252",
        "matchLevel": "none",
        "matchedWords": []
      },
      {
        "value": "11232",
        "matchLevel": "none",
        "matchedWords": []
      }
    ],
    "county": [
      {
        "value": "Kings County",
        "matchLevel": "none",
        "matchedWords": []
      },
      {
        "value": "Brooklyn",
        "matchLevel": "none",
        "matchedWords": []
      },
      {
        "value": "Bronx County",
        "matchLevel": "none",
        "matchedWords": []
      },
      {
        "value": "The Bronx",
        "matchLevel": "none",
        "matchedWords": []
      },
      {
        "value": "Queens County",
        "matchLevel": "none",
        "matchedWords": []
      },
      {
        "value": "Queens",
        "matchLevel": "none",
        "matchedWords": []
      }
    ],
    "administrative": [
      {
        "value": "New York",
        "matchLevel": "none",
        "matchedWords": []
      },
      {
        "value": "New York City",
        "matchLevel": "none",
        "matchedWords": []
      }
    ],
    "suburb": [
      {
        "value": "Brooklyn",
        "matchLevel": "none",
        "matchedWords": []
      },
      {
        "value": "Bronx",
        "matchLevel": "none",
        "matchedWords": []
      }
    ],
    "locale_names": [
      {
        "value": "3rd Avenue",
        "matchLevel": "none",
        "matchedWords": []
      }
    ]
  }
}

Unfortunately, though the code was insightful, it does not help. The code assumes certain properties to exist that I simply do not see in the response coming from the REST api. It is as if the JS clients pass undocumented parameters to return more data than what is documented in the docs.

I’m curious if anyone has figured this out or has everyone given up and used the JS clients offered by Algolia.

So the hit format that you presented has the localised format, which is a simplified version of the generic format.

Since it is localised, I assume the existence of the following attributes:

  • hit.country {}
  • hit._highlightResult.county []
  • hit._highlightResult.city []
  • hit._highlightResult.locale_names []
  • hit.is_city bool
  • hit._highlightResult.postcode []
  • hit.postcode []

All of which are in the response hit you provided, and of the assumed type.
I assume that the _highlightResult[...] are arrays, which they are in the provided example,
and that they have the following attributes:

  • matchLevel none | partial | full
  • matchedWords []
  • value string

As such I am not sure I see which attribute you see missing for this to work. Would you mind sharing an example?

There is a typo in the code example though:

const postcode = getBestPostcode(get(hit, 'postcode'), get(hit._highlightResult, 'postcode'));

should be

const { postcode } = getBestPostcode(get(hit, 'postcode'), get(hit._highlightResult, 'postcode'));

since getBestPostcode returns an object with { postcode, highlightedPostcode }.

I might have confused the typo with missing fields. The exception thrown read as such. I am excited to try again. Thanks so much for looking into this!