Harfbuzz-ng中的語言shaper

時間 2019-11-19

標籤 harfbuzz 語言 shaper 简体版

原文原文鏈接

在harfbuzz-ng中，實際上存在兩種層面的shaper，一種是比較宏觀的字庫shaper，這類shaper通常都針對於特定的字庫文件類型來寫，好比graphite2 shaper，ot shaper等。還有一種就是語言的shaper，這種shaper是harfbuzz-ng所實現的ot shaper的一個子部分，這種shaper通常都針對於特定的語言或script來寫，好比印度語系的shaper，泰語/老撾語的shaper，阿拉伯語系的shaper等。harfbuzz-ng如何選擇一個shaper一文中有對字體shaper的選擇作過一個簡單的說明。那harfbuzz-ng又是在何處來決定要採用哪個語言shaper的呢？做出決定的依據是什麼？語言shaper的結構中又有些什麼內容呢？接下來，咱們就來嘗試解答這些問題。 api

在harfbuzz-ng如何選擇一個shaper一文中咱們有提過，harfbuzz-ng在建立shape_plan時，會經過一個函數建立一個shape_plan data，對於ot shaper而言，這個函數就是_hb_ot_shaper_shape_plan_data_create()，實際上，選擇語言shaper的動做也正是在這個部分完成的。咱們來看這個函數的定義： less

hb_ot_shaper_shape_plan_data_t *
_hb_ot_shaper_shape_plan_data_create (hb_shape_plan_t    *shape_plan,
				      const hb_feature_t *user_features,
				      unsigned int        num_user_features)
{
  hb_ot_shape_plan_t *plan = (hb_ot_shape_plan_t *) calloc (1, sizeof (hb_ot_shape_plan_t));
  if (unlikely (!plan))
    return NULL;

  hb_ot_shape_planner_t planner (shape_plan);

  planner.shaper = hb_ot_shape_complex_categorize (&planner);

  hb_ot_shape_collect_features (&planner, &shape_plan->props, user_features, num_user_features);

  planner.compile (*plan);

  if (plan->shaper->data_create) {
    plan->data = plan->shaper->data_create (plan);
    if (unlikely (!plan->data))
      return NULL;
  }

  return plan;
}

是在這個函數中，經過調用 hb_ot_shape_complex_categorize()函數來選定一個語言shaper的，該函數的定義以下：

static inline const hb_ot_complex_shaper_t *
hb_ot_shape_complex_categorize (const hb_ot_shape_planner_t *planner)
{
  switch ((hb_tag_t) planner->props.script)
  {
    default:
      return &_hb_ot_complex_shaper_default;


    /* Unicode-1.1 additions */
    case HB_SCRIPT_ARABIC:
    case HB_SCRIPT_MONGOLIAN:
    case HB_SCRIPT_SYRIAC:

    /* Unicode-5.0 additions */
    case HB_SCRIPT_NKO:
    case HB_SCRIPT_PHAGS_PA:

    /* Unicode-6.0 additions */
    case HB_SCRIPT_MANDAIC:

      /* For Arabic script, use the Arabic shaper even if no OT script tag was found.
       * This is because we do fallback shaping for Arabic script (and not others). */
      if (planner->map.chosen_script[0] != HB_OT_TAG_DEFAULT_SCRIPT ||
	  planner->props.script == HB_SCRIPT_ARABIC)
	return &_hb_ot_complex_shaper_arabic;
      else
	return &_hb_ot_complex_shaper_default;


    /* Unicode-1.1 additions */
    case HB_SCRIPT_THAI:
    case HB_SCRIPT_LAO:

      return &_hb_ot_complex_shaper_thai;



    /* ^--- Add new shapers here */


#if 0
    /* Note:
     *
     * These disabled scripts are listed in ucd/IndicSyllabicCategory.txt, but according
     * to Martin Hosken and Jonathan Kew do not require complex shaping.
     *
     * TODO We should automate figuring out which scripts do not need complex shaping
     *
     * TODO We currently keep data for these scripts in our indic table.  Need to fix the
     * generator to not do that.
     */


    /* Simple? */

    /* Unicode-3.2 additions */
    case HB_SCRIPT_BUHID:
    case HB_SCRIPT_HANUNOO:

    /* Unicode-5.1 additions */
    case HB_SCRIPT_SAURASHTRA:

    /* Unicode-6.0 additions */
    case HB_SCRIPT_BATAK:
    case HB_SCRIPT_BRAHMI:


    /* Simple */

    /* Unicode-1.1 additions */
    /* These have their own shaper now. */
    case HB_SCRIPT_LAO:
    case HB_SCRIPT_THAI:

    /* Unicode-2.0 additions */
    case HB_SCRIPT_TIBETAN:

    /* Unicode-3.2 additions */
    case HB_SCRIPT_TAGALOG:
    case HB_SCRIPT_TAGBANWA:

    /* Unicode-4.0 additions */
    case HB_SCRIPT_LIMBU:
    case HB_SCRIPT_TAI_LE:

    /* Unicode-4.1 additions */
    case HB_SCRIPT_KHAROSHTHI:
    case HB_SCRIPT_SYLOTI_NAGRI:

    /* Unicode-5.1 additions */
    case HB_SCRIPT_KAYAH_LI:

    /* Unicode-5.2 additions */
    case HB_SCRIPT_TAI_VIET:


#endif

    /* Unicode-1.1 additions */
    case HB_SCRIPT_BENGALI:
    case HB_SCRIPT_DEVANAGARI:
    case HB_SCRIPT_GUJARATI:
    case HB_SCRIPT_GURMUKHI:
    case HB_SCRIPT_KANNADA:
    case HB_SCRIPT_MALAYALAM:
    case HB_SCRIPT_ORIYA:
    case HB_SCRIPT_TAMIL:
    case HB_SCRIPT_TELUGU:

    /* Unicode-3.0 additions */
    case HB_SCRIPT_SINHALA:

    /* Unicode-4.1 additions */
    case HB_SCRIPT_BUGINESE:
    case HB_SCRIPT_NEW_TAI_LUE:

    /* Unicode-5.0 additions */
    case HB_SCRIPT_BALINESE:

    /* Unicode-5.1 additions */
    case HB_SCRIPT_CHAM:
    case HB_SCRIPT_LEPCHA:
    case HB_SCRIPT_REJANG:
    case HB_SCRIPT_SUNDANESE:

    /* Unicode-5.2 additions */
    case HB_SCRIPT_JAVANESE:
    case HB_SCRIPT_KAITHI:
    case HB_SCRIPT_MEETEI_MAYEK:
    case HB_SCRIPT_TAI_THAM:


    /* Unicode-6.1 additions */
    case HB_SCRIPT_CHAKMA:
    case HB_SCRIPT_SHARADA:
    case HB_SCRIPT_TAKRI:

      /* Only use Indic shaper if the font has Indic tables. */
      if (planner->map.found_script[0])
	return &_hb_ot_complex_shaper_indic;
      else
	return &_hb_ot_complex_shaper_default;

    case HB_SCRIPT_KHMER:
      /* A number of Khmer fonts in the wild don't have a 'pref' feature,
       * and as such won't shape properly via the Indic shaper;
       * however, they typically have 'liga' / 'clig' features that implement
       * the necessary "reordering" by means of ligature substitutions.
       * So we send such pref-less fonts through the generic shaper instead. */
      if (planner->map.found_script[0] &&
	  hb_ot_layout_language_find_feature (planner->face, HB_OT_TAG_GSUB,
					      planner->map.script_index[0],
					      planner->map.language_index[0],
					      HB_TAG ('p','r','e','f'),
					      NULL))
	return &_hb_ot_complex_shaper_indic;
      else
	return &_hb_ot_complex_shaper_default;

    case HB_SCRIPT_MYANMAR:
      /* For Myanmar, we only want to use the Indic shaper if the "new" script
       * tag is found.  For "old" script tag we want to use the default shaper. */
      if (planner->map.chosen_script[0] == HB_TAG ('m','y','m','2'))
	return &_hb_ot_complex_shaper_indic;
      else
	return &_hb_ot_complex_shaper_default;
  }
}

能夠看到，這個函數選擇語言shaper的依據就只有一個，那就是輸入字串的script，這個函數用一個switch-case結構來選擇一個語言shaper。在harfbuzz-0.9.12中，主要有以下的幾種語言shaper：

_hb_ot_complex_shaper_default
_hb_ot_complex_shaper_arabic
_hb_ot_complex_shaper_thai
_hb_ot_complex_shaper_indic

在最新版本的harfbuzz中，有又添加一些新的語言shaper。 ide

那語言shaper又是一個什麼樣的東西呢？咱們來看hb_ot_complex_shaper_t結構的定義：函數

struct hb_ot_complex_shaper_t
{
  char name[8];

  /* collect_features()
   * Called during shape_plan().
   * Shapers should use plan->map to add their features and callbacks.
   * May be NULL.
   */
  void (*collect_features) (hb_ot_shape_planner_t *plan);

  /* override_features()
   * Called during shape_plan().
   * Shapers should use plan->map to override features and add callbacks after
   * common features are added.
   * May be NULL.
   */
  void (*override_features) (hb_ot_shape_planner_t *plan);


  /* data_create()
   * Called at the end of shape_plan().
   * Whatever shapers return will be accessible through plan->data later.
   * If NULL is returned, means a plan failure.
   */
  void *(*data_create) (const hb_ot_shape_plan_t *plan);

  /* data_destroy()
   * Called when the shape_plan is being destroyed.
   * plan->data is passed here for destruction.
   * If NULL is returned, means a plan failure.
   * May be NULL.
   */
  void (*data_destroy) (void *data);


  /* preprocess_text()
   * Called during shape().
   * Shapers can use to modify text before shaping starts.
   * May be NULL.
   */
  void (*preprocess_text) (const hb_ot_shape_plan_t *plan,
			   hb_buffer_t              *buffer,
			   hb_font_t                *font);


  /* normalization_preference()
   * Called during shape().
   * May be NULL.
   */
  hb_ot_shape_normalization_mode_t
  (*normalization_preference) (const hb_segment_properties_t *props);

  /* decompose()
   * Called during shape()'s normalization.
   * May be NULL.
   */
  bool (*decompose) (const hb_ot_shape_normalize_context_t *c,
		     hb_codepoint_t  ab,
		     hb_codepoint_t *a,
		     hb_codepoint_t *b);

  /* compose()
   * Called during shape()'s normalization.
   * May be NULL.
   */
  bool (*compose) (const hb_ot_shape_normalize_context_t *c,
		   hb_codepoint_t  a,
		   hb_codepoint_t  b,
		   hb_codepoint_t *ab);

  /* setup_masks()
   * Called during shape().
   * Shapers should use map to get feature masks and set on buffer.
   * Shapers may NOT modify characters.
   * May be NULL.
   */
  void (*setup_masks) (const hb_ot_shape_plan_t *plan,
		       hb_buffer_t              *buffer,
		       hb_font_t                *font);

  bool zero_width_attached_marks;
  bool fallback_position;
};

能夠看到，這個結構裏面主要就是提供了一些callback，以方便shape等過程在須要的時候來調用。這個結構的註釋仍是提供了比較多的信息，各個callback的做用都有說明，此處再也不羅嗦。字體

Done. ui

相關標籤/搜索

每日一句

每一个你不满意的现在，都有一个你没有努力的曾经。