Source code for hail.expr.expressions.typed_expressions

from typing import Dict, Mapping, Sequence, Union

import numpy as np
from deprecated import deprecated

import hail as hl
from hail import ir
from hail.expr.types import (
    HailType,
    is_numeric,
    tarray,
    tbool,
    tcall,
    tdict,
    tfloat32,
    tfloat64,
    tint32,
    tint64,
    tinterval,
    tlocus,
    tndarray,
    tset,
    tstr,
    tstream,
    tstruct,
    ttuple,
)
from hail.typecheck import (
    anyfunc,
    dictof,
    func_spec,
    identity,
    nullable,
    oneof,
    sliceof,
    tupleof,
    typecheck,
    typecheck_method,
)
from hail.utils.java import Env, warning
from hail.utils.linkedlist import LinkedList
from hail.utils.misc import get_nice_attr_error, get_nice_field_error, wrap_to_list, wrap_to_tuple

from .base_expression import Expression, ExpressionException, to_expr, unify_all, unify_types
from .expression_typecheck import (
    coercer_from_dtype,
    expr_any,
    expr_array,
    expr_bool,
    expr_dict,
    expr_int32,
    expr_int64,
    expr_interval,
    expr_ndarray,
    expr_numeric,
    expr_oneof,
    expr_set,
    expr_str,
    expr_tuple,
)
from .indices import Aggregation, Indices


[docs]class CollectionExpression(Expression):
    """Expression of type :class:`.tarray` or :class:`.tset`

    >>> a = hl.literal([1, 2, 3, 4, 5])

    >>> s3 = hl.literal({'Alice', 'Bob', 'Charlie'})
    """

    def _filter_missing_method(self, filter_missing: bool, name: str, ret_type: HailType, *args):
        collection = self
        if filter_missing:
            collection = self.filter(hl.is_defined)
        return collection._method(name, ret_type, *args)

[docs]    @typecheck_method(f=func_spec(1, expr_bool))
    def any(self, f):
        """Returns ``True`` if `f` returns ``True`` for any element.

        Examples
        --------

        >>> hl.eval(a.any(lambda x: x % 2 == 0))
        True

        >>> hl.eval(s3.any(lambda x: x[0] == 'D'))
        False

        Notes
        -----
        This method always returns ``False`` for empty collections.

        Parameters
        ----------
        f : function ( (arg) -> :class:`.BooleanExpression`)
            Function to evaluate for each element of the collection. Must return a
            :class:`.BooleanExpression`.

        Returns
        -------
        :class:`.BooleanExpression`.
            ``True`` if `f` returns ``True`` for any element, ``False`` otherwise.
        """
        return hl.array(self).fold(lambda accum, elt: accum | f(elt), False)

[docs]    @typecheck_method(f=func_spec(1, expr_bool))
    def filter(self, f):
        """Returns a new collection containing elements where `f` returns ``True``.

        Examples
        --------

        >>> hl.eval(a.filter(lambda x: x % 2 == 0))
        [2, 4]

        >>> hl.eval(s3.filter(lambda x: ~(x[-1] == 'e')))  # doctest: +SKIP_OUTPUT_CHECK
        {'Bob'}

        Notes
        -----
        Returns a same-type expression; evaluated on a :class:`.SetExpression`, returns a
        :class:`.SetExpression`. Evaluated on an :class:`.ArrayExpression`,
        returns an :class:`.ArrayExpression`.

        Parameters
        ----------
        f : function ( (arg) -> :class:`.BooleanExpression`)
            Function to evaluate for each element of the collection. Must return a
            :class:`.BooleanExpression`.

        Returns
        -------
        :class:`.CollectionExpression`
            Expression of the same type as the callee.
        """
        # FIXME: enable doctest

        def unify_ret(t):
            if t != tbool:
                raise TypeError("'filter' expects 'f' to return an expression of type 'bool', found '{}'".format(t))
            return hl.tarray(self._type.element_type)

        def transform_ir(array, name, body):
            return ir.toArray(ir.StreamFilter(ir.toStream(array), name, body))

        array_filter = hl.array(self)._ir_lambda_method(transform_ir, f, self.dtype.element_type, unify_ret)

        if isinstance(self.dtype, tset):
            return hl.set(array_filter)
        else:
            assert isinstance(self.dtype, tarray), self.dtype
            return array_filter

[docs]    @typecheck_method(f=func_spec(1, expr_bool))
    def find(self, f):
        """Returns the first element where `f` returns ``True``.

        Examples
        --------

        >>> hl.eval(a.find(lambda x: x ** 2 > 20))
        5

        >>> hl.eval(s3.find(lambda x: x[0] == 'D'))
        None

        Notes
        -----
        If `f` returns ``False`` for every element, then the result is missing.

        Parameters
        ----------
        f : function ( (arg) -> :class:`.BooleanExpression`)
            Function to evaluate for each element of the collection. Must return a
            :class:`.BooleanExpression`.

        Returns
        -------
        :class:`.Expression`
            Expression whose type is the element type of the collection.
        """

        # FIXME this should short-circuit
        return self.fold(
            lambda accum, x: hl.if_else(hl.is_missing(accum) & f(x), x, accum), hl.missing(self._type.element_type)
        )

[docs]    @typecheck_method(f=func_spec(1, expr_any))
    def flatmap(self, f):
        """Map each element of the collection to a new collection, and flatten the results.

        Examples
        --------

        >>> hl.eval(a.flatmap(lambda x: hl.range(0, x)))
        [0, 0, 1, 0, 1, 2, 0, 1, 2, 3, 0, 1, 2, 3, 4]

        >>> hl.eval(s3.flatmap(lambda x: hl.set(hl.range(0, x.length()).map(lambda i: x[i]))))  # doctest: +SKIP_OUTPUT_CHECK
        {'A', 'B', 'C', 'a', 'b', 'c', 'e', 'h', 'i', 'l', 'o', 'r'}

        Parameters
        ----------
        f : function ( (arg) -> :class:`.CollectionExpression`)
            Function from the element type of the collection to the type of the
            collection. For instance, `flatmap` on a ``set<str>`` should take
            a ``str`` and return a ``set``.

        Returns
        -------
        :class:`.CollectionExpression`
        """
        expected_type, s = (tarray, 'array') if isinstance(self._type, tarray) else (tset, 'set')
        value_type = f(construct_variable(Env.get_uid(), self.dtype.element_type)).dtype

        if not isinstance(value_type, expected_type):
            raise TypeError(
                "'flatmap' expects 'f' to return an expression of type '{}', found '{}'".format(s, value_type)
            )

        def f2(x):
            return hl.array(f(x)) if isinstance(value_type, tset) else f(x)

        def transform_ir(array, name, body):
            return ir.toArray(ir.StreamFlatMap(ir.toStream(array), name, ir.ToStream(body)))

        array_flatmap = hl.array(self)._ir_lambda_method(transform_ir, f2, self.dtype.element_type, identity)

        if isinstance(self.dtype, tset):
            return hl.set(array_flatmap)
        assert isinstance(self.dtype, tarray), self.dtype
        return array_flatmap

[docs]    @typecheck_method(f=func_spec(2, expr_any), zero=expr_any)
    def fold(self, f, zero):
        """Reduces the collection with the given function `f`, provided the initial value `zero`.

        Examples
        --------
        >>> a = [0, 1, 2]

        >>> hl.eval(hl.fold(lambda i, j: i + j, 0, a))
        3

        Parameters
        ----------
        f : function ( (:class:`.Expression`, :class:`.Expression`) -> :class:`.Expression`)
            Function which takes the cumulative value and the next element, and
            returns a new value.
        zero : :class:`.Expression`
            Initial value to pass in as left argument of `f`.

        Returns
        -------
        :class:`.Expression`.
        """
        collection = self
        if not isinstance(collection, ArrayExpression):
            collection = hl.array(collection)
        return collection._to_stream().fold(lambda x, y: f(x, y), zero)

[docs]    @typecheck_method(f=func_spec(1, expr_bool))
    def all(self, f):
        """Returns ``True`` if `f` returns ``True`` for every element.

        Examples
        --------

        >>> hl.eval(a.all(lambda x: x < 10))
        True

        Notes
        -----
        This method returns ``True`` if the collection is empty.

        Parameters
        ----------
        f : function ( (arg) -> :class:`.BooleanExpression`)
            Function to evaluate for each element of the collection. Must return a
            :class:`.BooleanExpression`.

        Returns
        -------
        :class:`.BooleanExpression`.
            ``True`` if `f` returns ``True`` for every element, ``False`` otherwise.
        """
        return hl.array(self).fold(lambda accum, elt: accum & f(elt), True)

[docs]    @typecheck_method(f=func_spec(1, expr_any))
    def group_by(self, f):
        """Group elements into a dict according to a lambda function.

        Examples
        --------

        >>> hl.eval(a.group_by(lambda x: x % 2 == 0))  # doctest: +SKIP_OUTPUT_CHECK
        {False: [1, 3, 5], True: [2, 4]}

        >>> hl.eval(s3.group_by(lambda x: x.length()))  # doctest: +SKIP_OUTPUT_CHECK
        {3: {'Bob'}, 5: {'Alice'}, 7: {'Charlie'}}

        Parameters
        ----------
        f : function ( (arg) -> :class:`.Expression`)
            Function to evaluate for each element of the collection to produce a key for the
            resulting dictionary.

        Returns
        -------
        :class:`.DictExpression`.
            Dictionary keyed by results of `f`.
        """

        keyed = hl.array(self).map(lambda x: hl.tuple([f(x), x]))
        types = keyed.dtype.element_type.types
        return construct_expr(
            ir.GroupByKey(ir.toStream(keyed._ir)),
            tdict(types[0], tarray(types[1])),
            keyed._indices,
            keyed._aggregations,
        )

[docs]    @typecheck_method(f=func_spec(1, expr_any))
    def map(self, f):
        """Transform each element of a collection.

        Examples
        --------

        >>> hl.eval(a.map(lambda x: x ** 3))
        [1.0, 8.0, 27.0, 64.0, 125.0]

        >>> hl.eval(s3.map(lambda x: x.length()))
        {3, 5, 7}

        Parameters
        ----------
        f : function ( (arg) -> :class:`.Expression`)
            Function to transform each element of the collection.

        Returns
        -------
        :class:`.CollectionExpression`.
            Collection where each element has been transformed according to `f`.
        """

        def transform_ir(array, name, body):
            a = ir.toArray(ir.StreamMap(ir.toStream(array), name, body))
            if isinstance(self.dtype, tset):
                a = ir.ToSet(ir.toStream(a))
            return a

        array_map = hl.array(self)._ir_lambda_method(
            transform_ir, f, self._type.element_type, lambda t: self._type.__class__(t)
        )

        if isinstance(self._type, tset):
            return hl.set(array_map)
        assert isinstance(self._type, tarray)
        return array_map

[docs]    @typecheck_method(f=anyfunc)
    def starmap(self, f):
        r"""Transform each element of a collection of tuples.

        Examples
        --------

        >>> hl.eval(hl.array([(1, 2), (2, 3)]).starmap(lambda x, y: x+y))
        [3, 5]

        Parameters
        ----------
        f : function ( (\*args) -> :class:`.Expression`)
            Function to transform each element of the collection.

        Returns
        -------
        :class:`.CollectionExpression`.
            Collection where each element has been transformed according to `f`.
        """

        return self.map(lambda e: f(*e))

[docs]    def length(self):
        """Returns the size of a collection.

        Examples
        --------

        >>> hl.eval(a.length())
        5

        >>> hl.eval(s3.length())
        3

        Returns
        -------
        :class:`.Expression` of type :py:data:`.tint32`
            The number of elements in the collection.
        """
        return self.size()

[docs]    def size(self):
        """Returns the size of a collection.

        Examples
        --------

        >>> hl.eval(a.size())
        5

        >>> hl.eval(s3.size())
        3

        Returns
        -------
        :class:`.Expression` of type :py:data:`.tint32`
            The number of elements in the collection.
        """
        return apply_expr(lambda x: ir.ArrayLen(ir.CastToArray(x)), tint32, hl.array(self))

    def _extra_summary_fields(self, agg_result):
        return {
            'Min Size': agg_result[0],
            'Max Size': agg_result[1],
            'Mean Size': agg_result[2],
        }

    def _nested_summary(self, agg_result, top):
        elt = construct_variable(Env.get_uid(), self.dtype.element_type, indices=self._indices)
        return {'[<elements>]': elt._summarize(agg_result[3])}

    def _summary_aggs(self):
        length = hl.len(self)
        return hl.tuple((
            hl.agg.min(length),
            hl.agg.max(length),
            hl.agg.mean(length),
            hl.agg.explode(lambda elt: elt._all_summary_aggs(), self),
        ))

    def __contains__(self, element):
        class_name = type(self).__name__
        raise TypeError(
            f"Cannot use `in` operator on hail `{class_name}`s. Use the `contains` method instead."
            "`names.contains('Charlie')` instead of `'Charlie' in names`"
        )


[docs]class ArrayExpression(CollectionExpression):
    """Expression of type :class:`.tarray`.

    >>> names = hl.literal(['Alice', 'Bob', 'Charlie'])

    See Also
    --------
    :class:`.CollectionExpression`
    """

[docs]    def __getitem__(self, item):
        """Index into or slice the array.

        Examples
        --------

        Index with a single integer:

        >>> hl.eval(names[1])
        'Bob'

        >>> hl.eval(names[-1])
        'Charlie'

        Slicing is also supported:

        >>> hl.eval(names[1:])
        ['Bob', 'Charlie']

        Parameters
        ----------
        item : slice or :class:`.Expression` of type :py:data:`.tint32`
            Index or slice.

        Returns
        -------
        :class:`.Expression`
            Element or array slice.
        """
        if isinstance(item, slice):
            return self._slice(item.start, item.stop, item.step)
        item = to_expr(item)
        if not item.dtype == tint32:
            raise TypeError(
                "array expects key to be type 'slice' or expression of type 'int32', "
                "found expression of type '{}'".format(item._type)
            )
        else:
            return self._method("indexArray", self.dtype.element_type, item)

    @typecheck_method(start=nullable(expr_int32), stop=nullable(expr_int32), step=nullable(expr_int32))
    def _slice(self, start=None, stop=None, step=None):
        indices, aggregations = unify_all(self, *(x for x in (start, stop, step) if x is not None))
        if step is None:
            step = hl.int(1)
        if start is None:
            start = hl.if_else(step >= 0, 0, -1)
        if stop is not None:
            slice_ir = ir.ArraySlice(self._ir, start._ir, stop._ir, step._ir)
        else:
            slice_ir = ir.ArraySlice(self._ir, start._ir, stop, step._ir)

        return construct_expr(slice_ir, self.dtype, indices, aggregations)

[docs]    @typecheck_method(f=func_spec(1, expr_any))
    def aggregate(self, f):
        """Uses the aggregator library to compute a summary from an array.

        This method is useful for accessing functionality that exists in the aggregator library
        but not the basic expression library, for instance, :func:`.call_stats`.

        Parameters
        ----------
        f
            Aggregation function

        Returns
        -------
        :class:`.Expression`
        """
        return hl.agg._aggregate_local_array(self, f)

[docs]    @typecheck_method(item=expr_any)
    def contains(self, item):
        """Returns a boolean indicating whether `item` is found in the array.

        Examples
        --------

        >>> hl.eval(names.contains('Charlie'))
        True

        >>> hl.eval(names.contains('Helen'))
        False

        Parameters
        ----------
        item : :class:`.Expression`
            Item for inclusion test.

        Warning
        -------
        This method takes time proportional to the length of the array. If a
        pipeline uses this method on the same array several times, it may be
        more efficient to convert the array to a set first early in the script
        (:func:`~hail.expr.functions.set`).

        Returns
        -------
        :class:`.BooleanExpression`
            ``True`` if the element is found in the array, ``False`` otherwise.
        """
        return self._method("contains", tbool, item)

[docs]    @deprecated(version="0.2.58", reason="Replaced by first")
    def head(self):
        """Deprecated in favor of :meth:`~.ArrayExpression.first`.

        Returns the first element of the array, or missing if empty.

        Returns
        -------
        :class:`.Expression`
            Element.

        Examples
        --------
        >>> hl.eval(names.head())
        'Alice'

        If the array has no elements, then the result is missing:

        >>> hl.eval(names.filter(lambda x: x.startswith('D')).head())
        None
        """
        return self.first()

[docs]    def first(self):
        """Returns the first element of the array, or missing if empty.

        Returns
        -------
        :class:`.Expression`
            Element.

        Examples
        --------
        >>> hl.eval(names.first())
        'Alice'

        If the array has no elements, then the result is missing:
        >>> hl.eval(names.filter(lambda x: x.startswith('D')).first())
        None
        """
        # FIXME: this should generate short-circuiting IR when that is possible
        return hl.fold(lambda acc, elt: hl.coalesce(acc, elt), hl.missing(self.dtype.element_type), self)

[docs]    def last(self):
        """Returns the last element of the array, or missing if empty.

        Returns
        -------
        :class:`.Expression`
            Element.

        Examples
        --------
        >>> hl.eval(names.last())
        'Charlie'

        If the array has no elements, then the result is missing:
        >>> hl.eval(names.filter(lambda x: x.startswith('D')).last())
        None
        """
        return hl.rbind(self, hl.len(self), lambda x, n: hl.or_missing(n > 0, x[n - 1]))

[docs]    @typecheck_method(x=oneof(func_spec(1, expr_any), expr_any))
    def index(self, x):
        """Returns the first index of `x`, or missing.

        Parameters
        ----------
        x : :class:`.Expression` or :obj:`typing.Callable`
            Value to find, or function from element to Boolean expression.

        Returns
        -------
        :class:`.Int32Expression`

        Examples
        --------
        >>> hl.eval(names.index('Bob'))
        1

        >>> hl.eval(names.index('Beth'))
        None

        >>> hl.eval(names.index(lambda x: x.endswith('e')))
        0

        >>> hl.eval(names.index(lambda x: x.endswith('h')))
        None
        """
        if callable(x):

            def f(elt, x):
                return x(elt)

        else:

            def f(elt, x):
                return elt == x

        return hl.bind(lambda a: hl.range(0, a.length()).filter(lambda i: f(a[i], x)).first(), self)

[docs]    @typecheck_method(item=expr_any)
    def append(self, item):
        """Append an element to the array and return the result.

        Examples
        --------

        >>> hl.eval(names.append('Dan'))
        ['Alice', 'Bob', 'Charlie', 'Dan']

        Note
        ----
        This method does not mutate the caller, but instead returns a new
        array by copying the caller and adding `item`.

        Parameters
        ----------
        item : :class:`.Expression`
            Element to append, same type as the array element type.

        Returns
        -------
        :class:`.ArrayExpression`
        """
        if item._type != self._type.element_type:
            raise TypeError(
                "'ArrayExpression.append' expects 'item' to be the same type as its elements\n"
                "    array element type: '{}'\n"
                "    type of arg 'item': '{}'".format(self._type._element_type, item._type)
            )
        return self._method("append", self._type, item)

[docs]    @typecheck_method(a=expr_array())
    def extend(self, a):
        """Concatenate two arrays and return the result.

        Examples
        --------

        >>> hl.eval(names.extend(['Dan', 'Edith']))
        ['Alice', 'Bob', 'Charlie', 'Dan', 'Edith']

        Parameters
        ----------
        a : :class:`.ArrayExpression`
            Array to concatenate, same type as the callee.

        Returns
        -------
        :class:`.ArrayExpression`
        """
        if not a._type == self._type:
            raise TypeError(
                "'ArrayExpression.extend' expects 'a' to be the same type as the caller\n"
                "    caller type: '{}'\n"
                "    type of 'a': '{}'".format(self._type, a._type)
            )
        return self._method("extend", self._type, a)

[docs]    @typecheck_method(f=func_spec(2, expr_any), zero=expr_any)
    def scan(self, f, zero):
        """Map each element of the array to cumulative value of function `f`, with initial value `zero`.

        Examples
        --------
        >>> a = [0, 1, 2]

        >>> hl.eval(hl.array_scan(lambda i, j: i + j, 0, a))
        [0, 0, 1, 3]

        Parameters
        ----------
        f : function ( (:class:`.Expression`, :class:`.Expression`) -> :class:`.Expression`)
            Function which takes the cumulative value and the next element, and
            returns a new value.
        zero : :class:`.Expression`
            Initial value to pass in as left argument of `f`.

        Returns
        -------
        :class:`.ArrayExpression`.
        """
        return self._to_stream().scan(lambda x, y: f(x, y), zero).to_array()

[docs]    @typecheck_method(group_size=expr_int32)
    def grouped(self, group_size):
        """Partition an array into fixed size subarrays.

        Examples
        --------
        >>> a = hl.array([0, 1, 2, 3, 4])

        >>> hl.eval(a.grouped(2))
        [[0, 1], [2, 3], [4]]

        Parameters
        ----------
        group_size : :class:`.Int32Expression`
            The number of elements per group.

        Returns
        -------
        :class:`.ArrayExpression`.
        """
        indices, aggregations = unify_all(self, group_size)
        stream_ir = ir.StreamGrouped(ir.toStream(self._ir), group_size._ir)
        mapping_identifier = Env.get_uid("stream_grouped_map_to_arrays")
        mapped_to_arrays = ir.StreamMap(
            stream_ir, mapping_identifier, ir.toArray(ir.Ref(mapping_identifier, tstream(self._type.element_type)))
        )
        return construct_expr(ir.toArray(mapped_to_arrays), tarray(self._type), indices, aggregations)

    def _to_stream(self):
        return construct_expr(
            ir.toStream(self._ir), tstream(self.dtype.element_type), self._indices, self._aggregations
        )


class ArrayStructExpression(ArrayExpression):
    """Expression of type :class:`.tarray` that eventually contains structs.

    >>> people = hl.literal([hl.struct(name='Alice', age=57),
    ...                      hl.struct(name='Bob', age=12),
    ...                      hl.struct(name='Charlie', age=34)])

    Nested collections that contain structs are also
    :class:`.ArrayStructExpressions`s

    >>> people = hl.literal([[hl.struct(name='Alice', age=57), hl.struct(name='Bob', age=12)],
    ...                      [hl.struct(name='Charlie', age=34)]])

    See Also
    --------
    :class:`.ArrayExpression`, class:`.CollectionExpression`, :class:`.SetStructExpression`
    """

    def __getattr__(self, item):
        try:
            return ArrayStructExpression.__getitem__(self, item)
        except KeyError as e:
            dt = self.dtype.element_type
            while not isinstance(dt, tstruct):
                dt = dt.element_type
            self._fields = dt
            raise AttributeError(get_nice_attr_error(self, item)) from e

    def __getitem__(self, item):
        """If a string, get a field from each struct in this array. If an integer, get
        the item at that index.

        Examples
        --------

        >>> x = hl.array([hl.struct(a='foo', b=3), hl.struct(a='bar', b=4)])
        >>> hl.eval(x.a)
        ['foo', 'bar']

        >>> a = hl.array([hl.struct(b=[hl.struct(inner=1),
        ...                            hl.struct(inner=2)]),
        ...               hl.struct(b=[hl.struct(inner=3)])])
        >>> hl.eval(a.b)
        [[Struct(inner=1), Struct(inner=2)], [Struct(inner=3)]]
        >>> hl.eval(a.b.inner)
        [[1, 2], [3]]
        >>> hl.eval(hl.flatten(a.b).inner)
        [1, 2, 3]
        >>> hl.eval(hl.flatten(a.b.inner))
        [1, 2, 3]

        Parameters
        ----------
        item : :class:`str`
            Field name

        Returns
        -------
        :class:`.ArrayExpression`
            An array formed by getting the given field for each struct in
            this array

        See Also
        --------
        :meth:`.ArrayExpression.__getitem__`
        """

        if isinstance(item, str):
            return self.map(lambda x: x[item])
        return super().__getitem__(item)


[docs]class ArrayNumericExpression(ArrayExpression):
    """Expression of type :class:`.tarray` with a numeric type.

    Numeric arrays support arithmetic both with scalar values and other arrays.
    Arithmetic between two numeric arrays requires that the length of each array
    is identical, and will apply the operation positionally (``a1 * a2`` will
    multiply the first element of ``a1`` by the first element of ``a2``, the
    second element of ``a1`` by the second element of ``a2``, and so on).
    Arithmetic with a scalar will apply the operation to each element of the
    array.

    >>> a1 = hl.literal([0, 1, 2, 3, 4, 5])

    >>> a2 = hl.literal([1, -1, 1, -1, 1, -1])

    """

[docs]    def __neg__(self):
        """Negate elements of the array.

        Examples
        --------

        >>> hl.eval(-a1)
        [0, -1, -2, -3, -4, -5]

        Returns
        -------
        :class:`.ArrayNumericExpression`
            Array expression of the same type.
        """
        return self * -1

[docs]    def __add__(self, other):
        """Positionally add an array or a scalar.

        Examples
        --------

        >>> hl.eval(a1 + 5)
        [5, 6, 7, 8, 9, 10]

        >>> hl.eval(a1 + a2)
        [1, 0, 3, 2, 5, 4]

        Parameters
        ----------
        other : :class:`.NumericExpression` or :class:`.ArrayNumericExpression`
            Value or array to add.

        Returns
        -------
        :class:`.ArrayNumericExpression`
            Array of positional sums.
        """
        return self._bin_op_numeric("+", other)

    def __radd__(self, other):
        return self._bin_op_numeric_reverse("+", other)

[docs]    def __sub__(self, other):
        """Positionally subtract an array or a scalar.

        Examples
        --------

        >>> hl.eval(a2 - 1)
        [0, -2, 0, -2, 0, -2]

        >>> hl.eval(a1 - a2)
        [-1, 2, 1, 4, 3, 6]

        Parameters
        ----------
        other : :class:`.NumericExpression` or :class:`.ArrayNumericExpression`
            Value or array to subtract.

        Returns
        -------
        :class:`.ArrayNumericExpression`
            Array of positional differences.
        """
        return self._bin_op_numeric("-", other)

    def __rsub__(self, other):
        return self._bin_op_numeric_reverse("-", other)

[docs]    def __mul__(self, other):
        """Positionally multiply by an array or a scalar.

        Examples
        --------

        >>> hl.eval(a2 * 5)
        [5, -5, 5, -5, 5, -5]

        >>> hl.eval(a1 * a2)
        [0, -1, 2, -3, 4, -5]

        Parameters
        ----------
        other : :class:`.NumericExpression` or :class:`.ArrayNumericExpression`
            Value or array to multiply by.

        Returns
        -------
        :class:`.ArrayNumericExpression`
            Array of positional products.
        """
        return self._bin_op_numeric("*", other)

    def __rmul__(self, other):
        return self._bin_op_numeric_reverse("*", other)

[docs]    def __truediv__(self, other):
        """Positionally divide by an array or a scalar.

        Examples
        --------

        >>> hl.eval(a1 / 10)  # doctest: +SKIP_OUTPUT_CHECK
        [0.0, 0.1, 0.2, 0.3, 0.4, 0.5]

        >>> hl.eval(a2 / a1)  # doctest: +SKIP_OUTPUT_CHECK
        [inf, -1.0, 0.5, -0.3333333333333333, 0.25, -0.2]

        Parameters
        ----------
        other : :class:`.NumericExpression` or :class:`.ArrayNumericExpression`
            Value or array to divide by.

        Returns
        -------
        :class:`.ArrayNumericExpression`
            Array of positional quotients.
        """
        return self._bin_op_numeric("/", other, self._div_ret_type_f)

    def __rtruediv__(self, other):
        return self._bin_op_numeric_reverse("/", other, self._div_ret_type_f)

[docs]    def __floordiv__(self, other):
        """Positionally divide by an array or a scalar using floor division.

        Examples
        --------

        >>> hl.eval(a1 // 2)
        [0, 0, 1, 1, 2, 2]

        Parameters
        ----------
        other : :class:`.NumericExpression` or :class:`.ArrayNumericExpression`

        Returns
        -------
        :class:`.ArrayNumericExpression`
        """
        return self._bin_op_numeric('//', other)

    def __rfloordiv__(self, other):
        return self._bin_op_numeric_reverse('//', other)

[docs]    def __mod__(self, other):
        """Positionally compute the left modulo the right.

        Examples
        --------

        >>> hl.eval(a1 % 2)
        [0, 1, 0, 1, 0, 1]

        Parameters
        ----------
        other : :class:`.NumericExpression` or :class:`.ArrayNumericExpression`

        Returns
        -------
        :class:`.ArrayNumericExpression`
        """
        return self._bin_op_numeric('%', other)

    def __rmod__(self, other):
        return self._bin_op_numeric_reverse('%', other)

[docs]    def __pow__(self, other):
        """Positionally raise to the power of an array or a scalar.

        Examples
        --------

        >>> hl.eval(a1 ** 2)
        [0.0, 1.0, 4.0, 9.0, 16.0, 25.0]

        >>> hl.eval(a1 ** a2)
        [0.0, 1.0, 2.0, 0.3333333333333333, 4.0, 0.2]

        Parameters
        ----------
        other : :class:`.NumericExpression` or :class:`.ArrayNumericExpression`

        Returns
        -------
        :class:`.ArrayNumericExpression`
        """
        return self._bin_op_numeric('**', other, lambda _: tfloat64)

    def __rpow__(self, other):
        return self._bin_op_numeric_reverse('**', other, lambda _: tfloat64)


[docs]class SetExpression(CollectionExpression):
    """Expression of type :class:`.tset`.

    >>> s1 = hl.literal({1, 2, 3})
    >>> s2 = hl.literal({1, 3, 5})

    See Also
    --------
    :class:`.CollectionExpression`
    """

    @typecheck_method(x=ir.IR, type=HailType, indices=Indices, aggregations=LinkedList)
    def __init__(self, x, type, indices=Indices(), aggregations=LinkedList(Aggregation)):
        super(SetExpression, self).__init__(x, type, indices, aggregations)
        assert isinstance(type, tset)
        self._ec = coercer_from_dtype(type.element_type)

[docs]    @typecheck_method(item=expr_any)
    def add(self, item):
        """Returns a new set including `item`.

        Examples
        --------

        >>> hl.eval(s1.add(10))  # doctest: +SKIP_OUTPUT_CHECK
        {1, 2, 3, 10}

        Parameters
        ----------
        item : :class:`.Expression`
            Value to add.

        Returns
        -------
        :class:`.SetExpression`
            Set with `item` added.
        """
        if not self._ec.can_coerce(item.dtype):
            raise TypeError(
                "'SetExpression.add' expects 'item' to be the same type as its elements\n"
                "    set element type:   '{}'\n"
                "    type of arg 'item': '{}'".format(self.dtype.element_type, item.dtype)
            )
        return self._method("add", self.dtype, self._ec.coerce(item))

[docs]    @typecheck_method(item=expr_any)
    def remove(self, item):
        """Returns a new set excluding `item`.

        Examples
        --------

        >>> hl.eval(s1.remove(1))
        {2, 3}

        Parameters
        ----------
        item : :class:`.Expression`
            Value to remove.

        Returns
        -------
        :class:`.SetExpression`
            Set with `item` removed.
        """
        if not self._ec.can_coerce(item.dtype):
            raise TypeError(
                "'SetExpression.remove' expects 'item' to be the same type as its elements\n"
                "    set element type:   '{}'\n"
                "    type of arg 'item': '{}'".format(self.dtype.element_type, item.dtype)
            )
        return self._method("remove", self._type, self._ec.coerce(item))

[docs]    @typecheck_method(item=expr_any)
    def contains(self, item):
        """Returns ``True`` if `item` is in the set.

        Examples
        --------

        >>> hl.eval(s1.contains(1))
        True

        >>> hl.eval(s1.contains(10))
        False

        Parameters
        ----------
        item : :class:`.Expression`
            Value for inclusion test.

        Returns
        -------
        :class:`.BooleanExpression`
            ``True`` if `item` is in the set.
        """
        if not self._ec.can_coerce(item.dtype):
            raise TypeError(
                "'SetExpression.contains' expects 'item' to be the same type as its elements\n"
                "    set element type:   '{}'\n"
                "    type of arg 'item': '{}'".format(self.dtype.element_type, item.dtype)
            )
        return self._method("contains", tbool, self._ec.coerce(item))

[docs]    @typecheck_method(s=expr_set())
    def difference(self, s):
        """Return the set of elements in the set that are not present in set `s`.

        Examples
        --------

        >>> hl.eval(s1.difference(s2))
        {2}

        >>> hl.eval(s2.difference(s1))
        {5}

        Parameters
        ----------
        s : :class:`.SetExpression`
            Set expression of the same type.

        Returns
        -------
        :class:`.SetExpression`
            Set of elements not in `s`.
        """
        if not s._type.element_type == self._type.element_type:
            raise TypeError(
                "'SetExpression.difference' expects 's' to be the same type\n"
                "    set type:    '{}'\n"
                "    type of 's': '{}'".format(self._type, s._type)
            )
        return self._method("difference", self._type, s)

[docs]    @typecheck_method(s=expr_set())
    def intersection(self, s):
        """Return the intersection of the set and set `s`.

        Examples
        --------

        >>> hl.eval(s1.intersection(s2))
        {1, 3}

        Parameters
        ----------
        s : :class:`.SetExpression`
            Set expression of the same type.

        Returns
        -------
        :class:`.SetExpression`
            Set of elements present in `s`.
        """
        if not s._type.element_type == self._type.element_type:
            raise TypeError(
                "'SetExpression.intersection' expects 's' to be the same type\n"
                "    set type:    '{}'\n"
                "    type of 's': '{}'".format(self._type, s._type)
            )
        return self._method("intersection", self._type, s)

[docs]    @typecheck_method(s=expr_set())
    def is_subset(self, s):
        """Returns ``True`` if every element is contained in set `s`.

        Examples
        --------

        >>> hl.eval(s1.is_subset(s2))
        False

        >>> hl.eval(s1.remove(2).is_subset(s2))
        True

        Parameters
        ----------
        s : :class:`.SetExpression`
            Set expression of the same type.

        Returns
        -------
        :class:`.BooleanExpression`
            ``True`` if every element is contained in set `s`.
        """
        if not s._type.element_type == self._type.element_type:
            raise TypeError(
                "'SetExpression.is_subset' expects 's' to be the same type\n"
                "    set type:    '{}'\n"
                "    type of 's': '{}'".format(self._type, s._type)
            )
        return self._method("isSubset", tbool, s)

[docs]    @typecheck_method(s=expr_set())
    def union(self, s):
        """Return the union of the set and set `s`.

        Examples
        --------

        >>> hl.eval(s1.union(s2))
        {1, 2, 3, 5}

        Parameters
        ----------
        s : :class:`.SetExpression`
            Set expression of the same type.

        Returns
        -------
        :class:`.SetExpression`
            Set of elements present in either set.
        """
        if not s._type.element_type == self._type.element_type:
            raise TypeError(
                "'SetExpression.union' expects 's' to be the same type\n"
                "    set type:    '{}'\n"
                "    type of 's': '{}'".format(self._type, s._type)
            )
        return self._method("union", self._type, s)

[docs]    def __le__(self, other):
        """Test whether every element in the set is in `other`.

        Parameters
        ----------
        other : :class:`.SetExpression`
            Set expression of the same type.

        Returns
        -------
        :class:`.BooleanExpression`
            ``True`` if every element in the set is in `other`. ``False`` otherwise.
        """
        other = to_expr(other)
        if isinstance(other.dtype, hl.tset):
            return self.is_subset(other)

        return NotImplemented

[docs]    def __lt__(self, other):
        """Test whether the set is a proper subset of `other` (``set <= other and set != other``).

        Parameters
        ----------
        other : :class:`.SetExpression`
            Set expression of the same type.

        Returns
        -------
        :class:`.BooleanExpression`
            ``True`` if the set is a proper subset of `other`. ``False`` otherwise.
        """
        other = to_expr(other)
        if isinstance(other.dtype, hl.tset):
            return self.is_subset(other) & (self != other)

        return NotImplemented

[docs]    def __ge__(self, other):
        """Test whether every element in `other` is in the set.

        Parameters
        ----------
        other : :class:`.SetExpression`
            Set expression of the same type.

        Returns
        -------
        :class:`.BooleanExpression`
            ``True`` if every element in `other` is in the set. ``False`` otherwise.
        """
        other = to_expr(other)
        if isinstance(other.dtype, hl.tset):
            return other.is_subset(self)

        return NotImplemented

[docs]    def __gt__(self, other):
        """Test whether `other` is a proper subset of the set (``other <= set and other != set``).

        Parameters
        ----------
        other : :class:`.SetExpression`
            Set expression of the same type.

        Returns
        -------
        :class:`.BooleanExpression`
            ``True`` if `other` is a proper subset of the set. ``False`` otherwise.
        """
        other = to_expr(other)
        if isinstance(other.dtype, hl.tset):
            return other.is_subset(self) & (self != other)

        return NotImplemented

[docs]    def __sub__(self, other):
        """Return the difference of the set and `other`.

        Examples
        --------

        >>> hl.eval(s1 - s2)
        {2}

        >>> hl.eval(s2 - s1)
        {5}

        Parameters
        ----------
        other : :class:`.SetExpression`
            Set expression of the same type.

        Returns
        -------
        :class:`.SetExpression`
            Set of elements in the set that are not in `other`.
        """
        other = to_expr(other)
        if isinstance(other.dtype, hl.tset):
            return self.difference(other)

        return NotImplemented

    def __rsub__(self, other):
        other = to_expr(other)
        if isinstance(other.dtype, hl.tset):
            return other.difference(self)

        return NotImplemented

[docs]    def __and__(self, other):
        """Return the intersection of the set and `other`.

        Examples
        --------

        >>> hl.eval(s1 & s2)
        {1, 3}

        Parameters
        ----------
        other : :class:`.SetExpression`
            Set expression of the same type.

        Returns
        -------
        :class:`.SetExpression`
            Set of elements present in both the set and `other`.
        """
        other = to_expr(other)
        if isinstance(other.dtype, hl.tset):
            return self.intersection(other)

        return NotImplemented

    def __rand__(self, other):
        return self.__and__(other)

[docs]    def __or__(self, other):
        """Return the union of the set and `other`.

        Examples
        --------

        >>> hl.eval(s1 | s2)
        {1, 2, 3, 5}

        Parameters
        ----------
        other : :class:`.SetExpression`
            Set expression of the same type.

        Returns
        -------
        :class:`.SetExpression`
            Set of elements present in either set.
        """
        other = to_expr(other)
        if isinstance(other.dtype, hl.tset):
            return self.union(other)

        return NotImplemented

    def __ror__(self, other):
        return self.__or__(other)

[docs]    def __xor__(self, other):
        """Return the symmetric difference of the set and `other`.

        Examples
        --------

        >>> hl.eval(s1 ^ s2)
        {2, 5}

        Parameters
        ----------
        other : :class:`.SetExpression`
            Set expression of the same type.

        Returns
        -------
        :class:`.SetExpression`
            Set of elements present in either the set or `other` but not both.
        """
        other = to_expr(other)
        if isinstance(other.dtype, hl.tset):
            return self.union(other).difference(self.intersection(other))

        return NotImplemented

    def __rxor__(self, other):
        return self.__xor__(other)


class SetStructExpression(SetExpression):
    """Expression of type :class:`.tset` that eventually contains structs.

    >>> people = hl.literal({hl.struct(name='Alice', age=57),
    ...                      hl.struct(name='Bob', age=12),
    ...                      hl.struct(name='Charlie', age=34)})

    Nested collections that contain structs are also
    :class:`.SetStructExpressions`s

    >>> people = hl.set([hl.set([hl.struct(name='Alice', age=57), hl.struct(name='Bob', age=12)]),
    ...                  hl.set([hl.struct(name='Charlie', age=34)])])

    See Also
    --------
    :class:`.SetExpression`, class:`.CollectionExpression`, :class:`.SetStructExpression`
    """

    def __getattr__(self, item):
        try:
            return SetStructExpression.__getitem__(self, item)
        except KeyError as e:
            dt = self.dtype.element_type
            while not isinstance(dt, tstruct):
                dt = dt.element_type
            self._fields = dt
            raise AttributeError(get_nice_attr_error(self, item)) from e

    @typecheck_method(item=oneof(str))
    def __getitem__(self, item):
        """Get a field from each struct in this set.

        Examples
        --------

        >>> x = hl.set({hl.struct(a='foo', b=3), hl.struct(a='bar', b=4)})
        >>> hl.eval(x.a) == {'foo', 'bar'}
        True

        >>> a = hl.set({hl.struct(b={hl.struct(inner=1),
        ...                          hl.struct(inner=2)}),
        ...             hl.struct(b={hl.struct(inner=3)})})
        >>> hl.eval(hl.flatten(a.b).inner) == {1, 2, 3}
        True
        >>> hl.eval(hl.flatten(a.b.inner)) == {1, 2, 3}
        True

        Parameters
        ----------
        item : :class:`str`
            Field name

        Returns
        -------
        :class:`.SetExpression`
            A set formed by getting the given field for each struct in
            this set
        """

        return self.map(lambda x: x[item])


[docs]class DictExpression(Expression):
    """Expression of type :class:`.tdict`.

    >>> d = hl.literal({'Alice': 43, 'Bob': 33, 'Charles': 44})
    """

    @typecheck_method(x=ir.IR, type=HailType, indices=Indices, aggregations=LinkedList)
    def __init__(self, x, type, indices=Indices(), aggregations=LinkedList(Aggregation)):
        super(DictExpression, self).__init__(x, type, indices, aggregations)
        assert isinstance(type, tdict)
        self._kc = coercer_from_dtype(type.key_type)
        self._vc = coercer_from_dtype(type.value_type)

[docs]    @typecheck_method(item=expr_any)
    def __getitem__(self, item):
        """Get the value associated with key `item`.

        Examples
        --------

        >>> hl.eval(d['Alice'])
        43

        Notes
        -----
        Raises an error if `item` is not a key of the dictionary. Use
        :meth:`.DictExpression.get` to return missing instead of an error.

        Parameters
        ----------
        item : :class:`.Expression`
            Key expression.

        Returns
        -------
        :class:`.Expression`
            Value associated with key `item`.
        """
        if not self._kc.can_coerce(item.dtype):
            raise TypeError(
                "dict encountered an invalid key type\n" "    dict key type:  '{}'\n" "    type of 'item': '{}'".format(
                    self.dtype.key_type, item.dtype
                )
            )
        return self._index(self.dtype.value_type, self._kc.coerce(item))

[docs]    @typecheck_method(item=expr_any)
    def contains(self, item):
        """Returns whether a given key is present in the dictionary.

        Examples
        --------

        >>> hl.eval(d.contains('Alice'))
        True

        >>> hl.eval(d.contains('Anne'))
        False

        Parameters
        ----------
        item : :class:`.Expression`
            Key to test for inclusion.

        Returns
        -------
        :class:`.BooleanExpression`
            ``True`` if `item` is a key of the dictionary, ``False`` otherwise.
        """
        if not self._kc.can_coerce(item.dtype):
            raise TypeError(
                "'DictExpression.contains' encountered an invalid key type\n"
                "    dict key type:  '{}'\n"
                "    type of 'item': '{}'".format(self._type.key_type, item.dtype)
            )
        return self._method("contains", tbool, self._kc.coerce(item))

[docs]    @typecheck_method(item=expr_any, default=nullable(expr_any))
    def get(self, item, default=None):
        """Returns the value associated with key `k` or a default value if that key is not present.

        Examples
        --------

        >>> hl.eval(d.get('Alice'))
        43

        >>> hl.eval(d.get('Anne'))
        None

        >>> hl.eval(d.get('Anne', 0))
        0

        Parameters
        ----------
        item : :class:`.Expression`
            Key.
        default : :class:`.Expression`
            Default value. Must be same type as dictionary values.

        Returns
        -------
        :class:`.Expression`
            The value associated with `item`, or `default`.
        """
        if not self._kc.can_coerce(item.dtype):
            raise TypeError(
                "'DictExpression.get' encountered an invalid key type\n"
                "    dict key type:  '{}'\n"
                "    type of 'item': '{}'".format(self.dtype.key_type, item.dtype)
            )
        key = self._kc.coerce(item)

        if default is not None:
            if not self._vc.can_coerce(default.dtype):
                raise TypeError(
                    "'get' expects parameter 'default' to have the same type "
                    "as the dictionary value type, expected '{}' and found '{}'".format(
                        self.dtype.value_type, default.dtype
                    )
                )
            return self._method("get", self.dtype.value_type, key, self._vc.coerce(default))
        else:
            return self._method("get", self.dtype.value_type, key)

[docs]    def key_set(self):
        """Returns the set of keys in the dictionary.

        Examples
        --------

        >>> hl.eval(d.key_set())  # doctest: +SKIP_OUTPUT_CHECK
        {'Alice', 'Bob', 'Charles'}

        Returns
        -------
        :class:`.SetExpression`
            Set of all keys.
        """
        return self._method("keySet", tset(self.dtype.key_type))

[docs]    def keys(self):
        """Returns an array with all keys in the dictionary.

        Examples
        --------

        >>> hl.eval(d.keys())  # doctest: +SKIP_OUTPUT_CHECK
        ['Bob', 'Charles', 'Alice']

        Returns
        -------
        :class:`.ArrayExpression`
            Array of all keys.
        """
        return self._method("keys", tarray(self.dtype.key_type))

[docs]    @typecheck_method(f=func_spec(1, expr_any))
    def map_values(self, f):
        """Transform values of the dictionary according to a function.

        Examples
        --------

        >>> hl.eval(d.map_values(lambda x: x * 10))  # doctest: +SKIP_OUTPUT_CHECK
        {'Alice': 430, 'Bob': 330, 'Charles': 440}

        Parameters
        ----------
        f : function ( (arg) -> :class:`.Expression`)
            Function to apply to each value.

        Returns
        -------
        :class:`.DictExpression`
            Dictionary with transformed values.
        """
        return hl.dict(hl.array(self).map(lambda elt: hl.tuple([elt[0], f(elt[1])])))

[docs]    def size(self):
        """Returns the size of the dictionary.

        Examples
        --------

        >>> hl.eval(d.size())
        3

        Returns
        -------
        :class:`.Expression` of type :py:data:`.tint32`
            Size of the dictionary.
        """
        return apply_expr(lambda x: ir.ArrayLen(ir.CastToArray(x)), tint32, self)

[docs]    def values(self):
        """Returns an array with all values in the dictionary.

        Examples
        --------

        >>> hl.eval(d.values())  # doctest: +SKIP_OUTPUT_CHECK
        [33, 44, 43]

        Returns
        -------
        :class:`.ArrayExpression`
            All values in the dictionary.
        """
        return self._method("values", tarray(self.dtype.value_type))

[docs]    def items(self):
        """Returns an array of tuples containing key/value pairs in the dictionary.

        Examples
        --------

        >>> hl.eval(d.items())  # doctest: +SKIP_OUTPUT_CHECK
        [('Alice', 430), ('Bob', 330), ('Charles', 440)]

        Returns
        -------
        :class:`.ArrayExpression`
            All key/value pairs in the dictionary.
        """
        return hl.array(self)

    def _extra_summary_fields(self, agg_result):
        return {
            'Min Size': agg_result[0],
            'Max Size': agg_result[1],
            'Mean Size': agg_result[2],
        }

    def _nested_summary(self, agg_result, top):
        k = construct_variable(Env.get_uid(), self.dtype.key_type, indices=self._indices)
        v = construct_variable(Env.get_uid(), self.dtype.value_type, indices=self._indices)
        return {
            '[<keys>]': k._summarize(agg_result[3][0]),
            '[<values>]': v._summarize(agg_result[3][1]),
        }

    def _summary_aggs(self):
        length = hl.len(self)
        return hl.tuple((
            hl.agg.min(length),
            hl.agg.max(length),
            hl.agg.mean(length),
            hl.agg.explode(
                lambda elt: hl.tuple((elt[0]._all_summary_aggs(), elt[1]._all_summary_aggs())), hl.array(self)
            ),
        ))


[docs]class StructExpression(Mapping[Union[str, int], Expression], Expression):
    """Expression of type :class:`.tstruct`.

    >>> struct = hl.struct(a=5, b='Foo')

    Struct fields are accessible as attributes and keys. It is therefore
    possible to access field `a` of struct `s` with dot syntax:

    >>> hl.eval(struct.a)
    5

    However, it is recommended to use square brackets to select fields:

    >>> hl.eval(struct['a'])
    5

    The latter syntax is safer, because fields that share their name with
    an existing attribute of :class:`.StructExpression` (`keys`, `values`,
    `annotate`, `drop`, etc.) will only be accessible using the
    :meth:`.StructExpression.__getitem__` syntax. This is also the only way
    to access fields that are not valid Python identifiers, like fields with
    spaces or symbols.
    """

    @classmethod
    def _from_fields(cls, fields: 'Dict[str, Expression]'):
        t = tstruct(**{k: v.dtype for k, v in fields.items()})
        x = ir.MakeStruct([(n, expr._ir) for (n, expr) in fields.items()])
        indices, aggregations = unify_all(*fields.values())
        s = StructExpression.__new__(cls)
        super(StructExpression, s).__init__(x, t, indices, aggregations)
        s._warn_on_shadowed_name = set()
        s._fields = {}
        for k, v in fields.items():
            s._set_field(k, v)
        return s

    @typecheck_method(x=ir.IR, type=HailType, indices=Indices, aggregations=LinkedList)
    def __init__(self, x, type, indices=Indices(), aggregations=LinkedList(Aggregation)):
        super(StructExpression, self).__init__(x, type, indices, aggregations)
        self._fields: Dict[str, Expression] = {}
        self._warn_on_shadowed_name = set()

        for i, (f, t) in enumerate(self.dtype.items()):
            if isinstance(self._ir, ir.MakeStruct):
                expr = construct_expr(self._ir.fields[i][1], t, self._indices, self._aggregations)
            elif isinstance(self._ir, ir.SelectedTopLevelReference):
                expr = construct_expr(
                    ir.ProjectedTopLevelReference(self._ir.ref.name, f, t), t, self._indices, self._aggregations
                )
            elif isinstance(self._ir, ir.SelectFields):
                expr = construct_expr(ir.GetField(self._ir.old, f), t, self._indices, self._aggregations)
            else:
                expr = construct_expr(ir.GetField(self._ir, f), t, self._indices, self._aggregations)
            self._set_field(f, expr)

    def _set_field(self, key, value):
        if key not in self._fields:
            # Avoid using hasattr on self. Each new field added will fall through to __getattr__,
            # which has to build a nice error message.
            if key in self.__dict__ or hasattr(super(), key):
                self._warn_on_shadowed_name.add(key)
            else:
                self.__dict__[key] = value
            self._fields[key] = value

    def _get_field(self, item):
        if item in self._fields:
            return self._fields[item]
        else:
            raise KeyError(get_nice_field_error(self, item))

    def __getattribute__(self, item):
        if item in super().__getattribute__('_warn_on_shadowed_name'):
            warning(
                f'Field {item} is shadowed by another method or attribute. '
                f'Use ["{item}"] syntax to access the field.'
            )
            self._warn_on_shadowed_name.remove(item)
        return super().__getattribute__(item)

    def __getattr__(self, item):
        raise AttributeError(get_nice_attr_error(self, item))

    def __len__(self):
        return len(self._fields)

    def __bool__(self):
        return bool(len(self))

[docs]    @typecheck_method(item=oneof(str, int, slice))
    def __getitem__(self, item):
        """Access a field of the struct by name or index.

        Examples
        --------

        >>> hl.eval(struct['a'])
        5

        >>> hl.eval(struct[1])
        'Foo'

        Parameters
        ----------
        item : :class:`str`
            Field name.

        Returns
        -------
        :class:`.Expression`
            Struct field.
        """
        if isinstance(item, str):
            return self._get_field(item)
        if isinstance(item, int):
            return self._get_field(self.dtype.fields[item])
        else:
            assert item.start is None or isinstance(item.start, int)
            assert item.stop is None or isinstance(item.stop, int)
            assert item.step is None or isinstance(item.step, int)
            return self.select(*self.dtype.fields[item.start : item.stop : item.step])

    def __iter__(self):
        return iter(self._fields)

    def __contains__(self, item):
        return item in self._fields

    def __hash__(self):
        return object.__hash__(self)

[docs]    def __eq__(self, other):
        """Check each field for equality.

        Parameters
        ----------
        other : :class:`.Expression`
            An expression of the same type.
        """
        return Expression.__eq__(self, other)

[docs]    def __ne__(self, other):
        return Expression.__ne__(self, other)

    def __nonzero__(self):
        return Expression.__nonzero__(self)

    def _annotate_ordered(self, insertions_dict, field_order):
        def get_type(field):
            e = insertions_dict.get(field)
            if e is None:
                e = self._fields[field]
            return e.dtype

        new_type = hl.tstruct(**{f: get_type(f) for f in field_order})
        indices, aggregations = unify_all(self, *insertions_dict.values())
        return construct_expr(
            ir.InsertFields.construct_with_deduplication(
                self._ir, [(field, expr._ir) for field, expr in insertions_dict.items()], field_order
            ),
            new_type,
            indices,
            aggregations,
        )

[docs]    @typecheck_method(named_exprs=expr_any)
    def annotate(self, **named_exprs):
        """Add new fields or recompute existing fields.

        Examples
        --------

        >>> hl.eval(struct.annotate(a=10, c=2*2*2))
        Struct(a=10, b='Foo', c=8)

        Notes
        -----
        If an expression in `named_exprs` shares a name with a field of the
        struct, then that field will be replaced but keep its position in
        the struct. New fields will be appended to the end of the struct.

        Parameters
        ----------
        named_exprs : keyword args of :class:`.Expression`
            Fields to add.

        Returns
        -------
        :class:`.StructExpression`
            Struct with new or updated fields.
        """
        new_types = {n: t for (n, t) in self.dtype.items()}

        for f, e in named_exprs.items():
            new_types[f] = e.dtype

        result_type = tstruct(**new_types)
        indices, aggregations = unify_all(self, *[x for (f, x) in named_exprs.items()])

        return construct_expr(
            ir.InsertFields.construct_with_deduplication(
                self._ir, list(map(lambda x: (x[0], x[1]._ir), named_exprs.items())), None
            ),
            result_type,
            indices,
            aggregations,
        )

[docs]    @typecheck_method(fields=str, named_exprs=expr_any)
    def select(self, *fields, **named_exprs):
        """Select existing fields and compute new ones.

        Examples
        --------

        >>> hl.eval(struct.select('a', c=['bar', 'baz']))
        Struct(a=5, c=['bar', 'baz'])

        Notes
        -----
        The `fields` argument is a list of field names to keep. These fields
        will appear in the resulting struct in the order they appear in
        `fields`.

        The `named_exprs` arguments are new field expressions.

        Parameters
        ----------
        fields : varargs of :class:`str`
            Field names to keep.
        named_exprs : keyword args of :class:`.Expression`
            New field expressions.

        Returns
        -------
        :class:`.StructExpression`
            Struct containing specified existing fields and computed fields.
        """

        name_set = set()
        for a in fields:
            if a not in self._fields:
                raise KeyError(
                    "Struct has no field '{}'\n" "    Fields: [ {} ]".format(
                        a, ', '.join("'{}'".format(x) for x in self._fields)
                    )
                )
            if a in name_set:
                raise ExpressionException(
                    "'StructExpression.select' does not support duplicate identifiers.\n"
                    "    Identifier '{}' appeared more than once".format(a)
                )
            name_set.add(a)
        for n, _ in named_exprs.items():
            if n in name_set:
                raise ExpressionException("Cannot select and assign '{}' in the same 'select' call".format(n))

        selected_type = tstruct(**{f: self.dtype[f] for f in fields})
        selected_expr = construct_expr(
            ir.SelectFields(self._ir, fields), selected_type, self._indices, self._aggregations
        )

        if len(named_exprs) == 0:
            return selected_expr
        else:
            return selected_expr.annotate(**named_exprs)

[docs]    @typecheck_method(mapping=dictof(str, str))
    def rename(self, mapping):
        """Rename fields of the struct.

        Examples
        --------
        >>> s = hl.struct(x='hello', y='goodbye', a='dogs')
        >>> s.rename({'x' : 'y', 'y' : 'z'}).show()
        +----------+----------+-----------+
        | <expr>.a | <expr>.y | <expr>.z  |
        +----------+----------+-----------+
        | str      | str      | str       |
        +----------+----------+-----------+
        | "dogs"   | "hello"  | "goodbye" |
        +----------+----------+-----------+

        Parameters
        ----------
        mapping : :obj:`dict` of :class:`str`, :obj:`str`
            Mapping from old field names to new field names.

        Notes
        -----
        Any field that does not appear as a key in `mapping` will not be
        renamed.

        Returns
        -------
        :class:`.StructExpression`
            Struct with renamed fields.
        """
        old_fields = set(self._fields)
        new_to_old = dict()
        for old, new in mapping.items():
            if old not in old_fields:
                raise ValueError(f'{old} is not a field of this struct: {self.dtype}.')
            if new in old_fields and new not in mapping:
                raise ValueError(f'{old} is renamed to {new} but {new} is already in the ' f'struct: {self.dtype}.')
            if new in new_to_old:
                raise ValueError(f'{new} is the new name of both {old} and {new_to_old[new]}.')
            new_to_old[new] = old

        return self.select(
            *list(set(self._fields) - set(mapping)), **{new: self._get_field(old) for old, new in mapping.items()}
        )

[docs]    @typecheck_method(fields=str)
    def drop(self, *fields):
        """Drop fields from the struct.

        Examples
        --------

        >>> hl.eval(struct.drop('b'))
        Struct(a=5)

        Parameters
        ----------
        fields: varargs of :class:`str`
            Fields to drop.

        Returns
        -------
        :class:`.StructExpression`
            Struct without certain fields.
        """
        to_drop = set()
        for a in fields:
            if a not in self._fields:
                raise KeyError(
                    "Struct has no field '{}'\n" "    Fields: [ {} ]".format(
                        a, ', '.join("'{}'".format(x) for x in self._fields)
                    )
                )
            if a in to_drop:
                warning("Found duplicate field name in 'StructExpression.drop': '{}'".format(a))
            to_drop.add(a)

        to_keep = [f for f in self.dtype.keys() if f not in to_drop]
        return self.select(*to_keep)

[docs]    def flatten(self):
        """Recursively eliminate struct fields by adding their fields to this struct."""

        def _flatten(prefix, s):
            if isinstance(s, StructExpression):
                return [(k, v) for (f, e) in s.items() for (k, v) in _flatten(prefix + '.' + f, e)]
            else:
                return [(prefix, s)]

        return self.select(**{k: v for (f, e) in self.items() for (k, v) in _flatten(f, e)})

    def _nested_summary(self, agg_result, top):
        sep = '' if top else '.'
        return {f'{sep}{k}': f._summarize(agg_result[k]) for k, f in self.items()}

    def _summary_aggs(self):
        return hl.struct(**{k: f._all_summary_aggs() for k, f in self.items()})

[docs]    def get(self, k, default=None):
        """See :meth:`StructExpression.__getitem__`"""
        return super().get(k, default)

[docs]    def items(self):
        """A list of pairs of field name and expression for said field."""
        return super().items()

[docs]    def keys(self):
        """The list of field names."""
        return super().keys()

[docs]    def values(self):
        """A list of expressions for each field."""
        return super().values()


[docs]class TupleExpression(Expression, Sequence):
    """Expression of type :class:`.ttuple`.

    >>> tup = hl.literal(("a", 1, [1, 2, 3]))
    """

[docs]    @typecheck_method(item=oneof(int, slice))
    def __getitem__(self, item):
        """Index into the tuple.

        Examples
        --------

        >>> hl.eval(tup[1])
        1

        Parameters
        ----------
        item : :obj:`int`
            Element index.

        Returns
        -------
        :class:`.Expression`
        """
        if isinstance(item, slice):
            assert item.start is None or isinstance(item.start, int)
            assert item.stop is None or isinstance(item.stop, int)
            assert item.step is None or isinstance(item.step, int)
            return hl.or_missing(
                hl.is_defined(self), hl.tuple([self[i] for i in range(len(self))[item.start : item.stop : item.step]])
            )
        if not 0 <= item < len(self):
            raise IndexError("Out of bounds index, {}. Tuple length is {}.".format(item, len(self)))
        return construct_expr(ir.GetTupleElement(self._ir, item), self.dtype.types[item], self._indices)

[docs]    def __len__(self):
        """Returns the length of the tuple.

        Examples
        --------

        >>> len(tup)
        3

        Returns
        -------
        :obj:`int`
        """
        return len(self.dtype.types)

    def __bool__(self):
        return bool(len(self))

    def __iter__(self):
        for i in range(len(self)):
            yield self[i]

    def _nested_summary(self, agg_result, top):
        return {f'[{i}]': self[i]._summarize(agg_result[i]) for i in range(len(self))}

    def _summary_aggs(self):
        return hl.tuple([self[i]._all_summary_aggs() for i in range(len(self))])

[docs]    def count(self, value):
        """Do not use this method.

        This only exists for compatibility with the Python Sequence abstract
        base class.
        """
        return super().count()

[docs]    def index(self, value, start=0, stop=None):
        """Do not use this method.

        This only exists for compatibility with the Python Sequence abstract
        base class.
        """
        return super().index()


[docs]class NumericExpression(Expression):
    """Expression of numeric type.

    >>> x = hl.literal(3)

    >>> y = hl.literal(4.5)
    """

[docs]    @typecheck_method(other=expr_numeric)
    def __lt__(self, other):
        """Less-than comparison.

        Examples
        --------

        >>> hl.eval(x < 5)
        True

        Parameters
        ----------
        other : :class:`.NumericExpression`
            Right side for comparison.

        Returns
        -------
        :class:`.BooleanExpression`
            ``True`` if the left side is smaller than the right side.
        """
        return self._bin_op_numeric("<", other, lambda _: tbool)

[docs]    @typecheck_method(other=expr_numeric)
    def __le__(self, other):
        """Less-than-or-equals comparison.

        Examples
        --------

        >>> hl.eval(x <= 3)
        True

        Parameters
        ----------
        other : :class:`.NumericExpression`
            Right side for comparison.

        Returns
        -------
        :class:`.BooleanExpression`
            ``True`` if the left side is smaller than or equal to the right side.
        """
        return self._bin_op_numeric("<=", other, lambda _: tbool)

[docs]    @typecheck_method(other=expr_numeric)
    def __gt__(self, other):
        """Greater-than comparison.

        Examples
        --------

        >>> hl.eval(y > 4)
        True

        Parameters
        ----------
        other : :class:`.NumericExpression`
            Right side for comparison.

        Returns
        -------
        :class:`.BooleanExpression`
            ``True`` if the left side is greater than the right side.
        """
        return self._bin_op_numeric(">", other, lambda _: tbool)

[docs]    @typecheck_method(other=expr_numeric)
    def __ge__(self, other):
        """Greater-than-or-equals comparison.

        Examples
        --------

        >>> hl.eval(y >= 4)
        True

        Parameters
        ----------
        other : :class:`.NumericExpression`
            Right side for comparison.

        Returns
        -------
        :class:`.BooleanExpression`
            ``True`` if the left side is greater than or equal to the right side.
        """
        return self._bin_op_numeric(">=", other, lambda _: tbool)

    def __pos__(self):
        return self

[docs]    def __neg__(self):
        """Negate the number (multiply by -1).

        Examples
        --------

        >>> hl.eval(-x)
        -3

        Returns
        -------
        :class:`.NumericExpression`
            Negated number.
        """

        return expr_numeric.coerce(self)._unary_op("-")

[docs]    def __add__(self, other):
        """Add two numbers.

        Examples
        --------

        >>> hl.eval(x + 2)
        5

        >>> hl.eval(x + y)
        7.5

        Parameters
        ----------
        other : :class:`.NumericExpression`
            Number to add.

        Returns
        -------
        :class:`.NumericExpression`
            Sum of the two numbers.
        """
        return self._bin_op_numeric("+", other)

    def __radd__(self, other):
        return self._bin_op_numeric_reverse("+", other)

[docs]    def __sub__(self, other):
        """Subtract the right number from the left.

        Examples
        --------

        >>> hl.eval(x - 2)
        1

        >>> hl.eval(x - y)
        -1.5

        Parameters
        ----------
        other : :class:`.NumericExpression`
            Number to subtract.

        Returns
        -------
        :class:`.NumericExpression`
            Difference of the two numbers.
        """
        return self._bin_op_numeric("-", other)

    def __rsub__(self, other):
        return self._bin_op_numeric_reverse("-", other)

[docs]    def __mul__(self, other):
        """Multiply two numbers.

        Examples
        --------

        >>> hl.eval(x * 2)
        6

        >>> hl.eval(x * y)
        13.5

        Parameters
        ----------
        other : :class:`.NumericExpression`
            Number to multiply.

        Returns
        -------
        :class:`.NumericExpression`
            Product of the two numbers.
        """
        return self._bin_op_numeric("*", other)

    def __rmul__(self, other):
        return self._bin_op_numeric_reverse("*", other)

[docs]    def __truediv__(self, other):
        """Divide two numbers.

        Examples
        --------

        >>> hl.eval(x / 2)
        1.5

        >>> hl.eval(y / 0.1)
        45.0

        Parameters
        ----------
        other : :class:`.NumericExpression`
            Dividend.

        Returns
        -------
        :class:`.NumericExpression`
            The left number divided by the left.
        """
        return self._bin_op_numeric("/", other, self._div_ret_type_f)

    def __rtruediv__(self, other):
        return self._bin_op_numeric_reverse("/", other, self._div_ret_type_f)

[docs]    def __floordiv__(self, other):
        """Divide two numbers with floor division.

        Examples
        --------

        >>> hl.eval(x // 2)
        1

        >>> hl.eval(y // 2)
        2.0

        Parameters
        ----------
        other : :class:`.NumericExpression`
            Dividend.

        Returns
        -------
        :class:`.NumericExpression`
            The floor of the left number divided by the right.
        """
        return self._bin_op_numeric('//', other)

    def __rfloordiv__(self, other):
        return self._bin_op_numeric_reverse('//', other)

[docs]    def __mod__(self, other):
        """Compute the left modulo the right number.

        Examples
        --------

        >>> hl.eval(32 % x)
        2

        >>> hl.eval(7 % y)
        2.5

        Parameters
        ----------
        other : :class:`.NumericExpression`
            Dividend.

        Returns
        -------
        :class:`.NumericExpression`
            Remainder after dividing the left by the right.
        """
        return self._bin_op_numeric('%', other)

    def __rmod__(self, other):
        return self._bin_op_numeric_reverse('%', other)

[docs]    def __pow__(self, power, modulo=None):
        """Raise the left to the right power.

        Examples
        --------

        >>> hl.eval(x ** 2)
        9.0

        >>> hl.eval(x ** -2)
        0.1111111111111111

        >>> hl.eval(y ** 1.5)
        9.545941546018392

        Parameters
        ----------
        power : :class:`.NumericExpression`
        modulo
            Unsupported argument.

        Returns
        -------
        :class:`.Expression` of type :py:data:`.tfloat64`
            Result of raising left to the right power.
        """
        return self._bin_op_numeric('**', power, lambda _: tfloat64)

    def __rpow__(self, other):
        return self._bin_op_numeric_reverse('**', other, lambda _: tfloat64)


[docs]class BooleanExpression(NumericExpression):
    """Expression of type :py:data:`.tbool`.

    >>> t = hl.literal(True)
    >>> f = hl.literal(False)
    >>> na = hl.missing(hl.tbool)

    >>> hl.eval(t)
    True

    >>> hl.eval(f)
    False

    >>> hl.eval(na)
    None

    """

    @typecheck_method(other=expr_bool)
    def __rand__(self, other):
        return self.__and__(other)

    @typecheck_method(other=expr_bool)
    def __ror__(self, other):
        return self.__or__(other)

[docs]    @typecheck_method(other=expr_bool)
    def __and__(self, other):
        """Return ``True`` if the left and right arguments are ``True``.

        Examples
        --------

        >>> hl.eval(t & f)
        False

        >>> hl.eval(t & na)
        None

        >>> hl.eval(f & na)
        False

        The ``&`` and ``|`` operators have higher priority than comparison
        operators like ``==``, ``<``, or ``>``. Parentheses are often
        necessary:

        >>> x = hl.literal(5)

        >>> hl.eval((x < 10) & (x > 2))
        True

        Parameters
        ----------
        other : :class:`.BooleanExpression`
            Right-side operand.

        Returns
        -------
        :class:`.BooleanExpression`
            ``True`` if both left and right are ``True``.
        """
        return self._method("land", tbool, other)

[docs]    @typecheck_method(other=expr_bool)
    def __or__(self, other):
        """Return ``True`` if at least one of the left and right arguments is ``True``.

        Examples
        --------

        >>> hl.eval(t | f)
        True

        >>> hl.eval(t | na)
        True

        >>> hl.eval(f | na)
        None

        The ``&`` and ``|`` operators have higher priority than comparison
        operators like ``==``, ``<``, or ``>``. Parentheses are often
        necessary:

        >>> x = hl.literal(5)

        >>> hl.eval((x < 10) | (x > 20))
        True

        Parameters
        ----------
        other : :class:`.BooleanExpression`
            Right-side operand.

        Returns
        -------
        :class:`.BooleanExpression`
            ``True`` if either left or right is ``True``.
        """
        return self._method("lor", tbool, other)

[docs]    def __invert__(self):
        """Return the boolean negation.

        Examples
        --------

        >>> hl.eval(~t)
        False

        >>> hl.eval(~f)
        True

        >>> hl.eval(~na)
        None

        Returns
        -------
        :class:`.BooleanExpression`
            Boolean negation.
        """
        return self._unary_op("!")

    def _extra_summary_fields(self, agg_result):
        return {'Counts': agg_result}

    def _summary_aggs(self):
        return hl.agg.filter(hl.is_defined(self), hl.agg.counter(self))


[docs]class Float64Expression(NumericExpression):
    """Expression of type :py:data:`.tfloat64`."""

    def _extra_summary_fields(self, agg_result):
        return {
            'Minimum': agg_result['min'],
            'Maximum': agg_result['max'],
            'Mean': agg_result['mean'],
            'Std Dev': agg_result['stdev'],
        }

    def _summary_aggs(self):
        return hl.agg.stats(self)


[docs]class Float32Expression(NumericExpression):
    """Expression of type :py:data:`.tfloat32`."""

    def _extra_summary_fields(self, agg_result):
        return {
            'Minimum': agg_result['min'],
            'Maximum': agg_result['max'],
            'Mean': agg_result['mean'],
            'Std Dev': agg_result['stdev'],
        }

    def _summary_aggs(self):
        return hl.agg.stats(self)


[docs]class Int32Expression(NumericExpression):
    """Expression of type :py:data:`.tint32`."""

    def _extra_summary_fields(self, agg_result):
        return {
            'Minimum': int(agg_result['min']),
            'Maximum': int(agg_result['max']),
            'Mean': agg_result['mean'],
            'Std Dev': agg_result['stdev'],
        }

    def _summary_aggs(self):
        return hl.agg.stats(self)

[docs]    def __mul__(self, other):
        other = to_expr(other)
        if other.dtype == tstr:
            return other * self
        else:
            return NumericExpression.__mul__(self, other)

    def __rmul__(self, other):
        other = to_expr(other)
        if other.dtype == tstr:
            return other * self
        else:
            return NumericExpression.__mul__(self, other)


[docs]class Int64Expression(NumericExpression):
    """Expression of type :py:data:`.tint64`."""

    def _extra_summary_fields(self, agg_result):
        return {
            'Minimum': int(agg_result['min']),
            'Maximum': int(agg_result['max']),
            'Mean': agg_result['mean'],
            'Std Dev': agg_result['stdev'],
        }

    def _summary_aggs(self):
        return hl.agg.stats(self)


[docs]class StringExpression(Expression):
    """Expression of type :py:data:`.tstr`.

    >>> s = hl.literal('The quick brown fox')
    """

[docs]    def __getitem__(self, item):
        """Slice or index into the string.

        Examples
        --------

        >>> hl.eval(s[:15])
        'The quick brown'

        >>> hl.eval(s[0])
        'T'

        Parameters
        ----------
        item : slice or :class:`.Expression` of type :py:data:`.tint32`
            Slice or character index.

        Returns
        -------
        :class:`.StringExpression`
            Substring or character at index `item`.
        """
        if isinstance(item, slice):
            return self._slice(item.start, item.stop, item.step)
        else:
            item = to_expr(item)
            if not item.dtype == tint32:
                raise TypeError(
                    "String expects index to be type 'slice' or expression of type 'int32', "
                    "found expression of type '{}'".format(item.dtype)
                )
            return self._index(tstr, item)

    def __contains__(self, item):
        raise TypeError(
            "Cannot use `in` operator on hail `StringExpression`s. Use the `contains` method instead."
            "`my_string.contains('cat')` instead of `'cat' in my_string`"
        )

[docs]    def __add__(self, other):
        """Concatenate strings.

        Examples
        --------

        >>> hl.eval(s + ' jumped over the lazy dog')
        'The quick brown fox jumped over the lazy dog'

        Parameters
        ----------
        other : :class:`.StringExpression`
            String to concatenate.

        Returns
        -------
        :class:`.StringExpression`
            Concatenated string.
        """
        other = to_expr(other)
        if not other.dtype == tstr:
            raise NotImplementedError("'{}' + '{}'".format(self.dtype, other.dtype))
        return self._bin_op("concat", other, self.dtype)

    def __radd__(self, other):
        other = to_expr(other)
        if not other.dtype == tstr:
            raise NotImplementedError("'{}' + '{}'".format(other.dtype, self.dtype))
        return self._bin_op_reverse("concat", other, self.dtype)

    def __mul__(self, other):
        other = to_expr(other)
        if not other.dtype == tint32:
            raise NotImplementedError("'{}' + '{}'".format(self.dtype, other.dtype))
        return hl.delimit(hl.range(other).map(lambda x: self), delimiter='')

    def __rmul__(self, other):
        other = to_expr(other)
        return other * self

    def _slice(self, start=None, stop=None, step=None):
        if step is not None:
            raise NotImplementedError('Variable slice step size is not currently supported for strings')

        if start is not None:
            start = to_expr(start)
            if stop is not None:
                stop = to_expr(stop)
                return self._method('slice', tstr, start, stop)
            else:
                return self._method('sliceRight', tstr, start)
        elif stop is not None:
            stop = to_expr(stop)
            return self._method('sliceLeft', tstr, stop)
        else:
            return self

[docs]    def length(self):
        """Returns the length of the string.

        Examples
        --------

        >>> hl.eval(s.length())
        19

        Returns
        -------
        :class:`.Expression` of type :py:data:`.tint32`
            Length of the string.
        """
        return apply_expr(lambda x: ir.Apply("length", tint32, x), tint32, self)

[docs]    @typecheck_method(pattern1=expr_str, pattern2=expr_str)
    def replace(self, pattern1, pattern2):
        """Replace substrings matching `pattern1` with `pattern2` using regex.

        Examples
        --------

        Replace spaces with underscores in a Hail string:

        >>> hl.eval(hl.str("The quick  brown fox").replace(' ', '_'))
        'The_quick__brown_fox'

        Remove the leading zero in contigs in variant strings in a table:

        >>> t = hl.import_table('data/leading-zero-variants.txt')
        >>> t.show()
        +----------------+
        | variant        |
        +----------------+
        | str            |
        +----------------+
        | "01:1000:A:T"  |
        | "01:10001:T:G" |
        | "02:99:A:C"    |
        | "02:893:G:C"   |
        | "22:100:A:T"   |
        | "X:10:C:A"     |
        +----------------+
        <BLANKLINE>
        >>> t = t.annotate(variant = t.variant.replace("^0([0-9])", "$1"))
        >>> t.show()
        +---------------+
        | variant       |
        +---------------+
        | str           |
        +---------------+
        | "1:1000:A:T"  |
        | "1:10001:T:G" |
        | "2:99:A:C"    |
        | "2:893:G:C"   |
        | "22:100:A:T"  |
        | "X:10:C:A"    |
        +---------------+
        <BLANKLINE>

        Notes
        -----

        The regex expressions used should follow `Java regex syntax
        <https://docs.oracle.com/javase/8/docs/api/java/util/regex/Pattern.html>`_. In
        the Java regular expression syntax, a dollar sign, ``$1``, refers to the
        first group, not the canonical ``\\1``.

        Parameters
        ----------
        pattern1 : str or :class:`.StringExpression`
        pattern2 : str or :class:`.StringExpression`

        Returns
        -------
        """
        return self._method("replace", tstr, pattern1, pattern2)

[docs]    @typecheck_method(delim=expr_str, n=nullable(expr_int32))
    def split(self, delim, n=None):
        """Returns an array of strings generated by splitting the string at `delim`.

        Examples
        --------

        >>> hl.eval(s.split('\\s+'))
        ['The', 'quick', 'brown', 'fox']

        >>> hl.eval(s.split('\\s+', 2))
        ['The', 'quick brown fox']

        Notes
        -----
        The delimiter is a regex using the
        `Java regex syntax <https://docs.oracle.com/javase/8/docs/api/java/util/regex/Pattern.html>`_
        delimiter. To split on special characters, escape them with double
        backslash (``\\\\``).

        Parameters
        ----------
        delim : str or :class:`.StringExpression`
            Delimiter regex.
        n : :class:`.Expression` of type :py:data:`.tint32`, optional
            Maximum number of splits.

        Returns
        -------
        :class:`.ArrayExpression`
            Array of split strings.
        """
        if n is None:
            return self._method("split", tarray(tstr), delim)
        else:
            return self._method("split", tarray(tstr), delim, n)

    @typecheck_method(delim=expr_str, missing=expr_array(), quote=nullable(expr_str), regex=bool)
    def _split_line(self, delim, missing, quote, regex):
        regex_str = 'Regex' if regex else 'Char'
        if quote is None:
            return self._method(f"split{regex_str}", tarray(tstr), delim, missing)
        else:
            return self._method(f"splitQuoted{regex_str}", tarray(tstr), delim, missing, quote)

[docs]    def lower(self):
        """Returns a copy of the string, but with upper case letters converted
        to lower case.

        Examples
        --------

        >>> hl.eval(s.lower())
        'the quick brown fox'

        Returns
        -------
        :class:`.StringExpression`
        """
        return self._method("lower", tstr)

[docs]    def upper(self):
        """Returns a copy of the string, but with lower case letters converted
        to upper case.

        Examples
        --------

        >>> hl.eval(s.upper())
        'THE QUICK BROWN FOX'

        Returns
        -------
        :class:`.StringExpression`
        """
        return self._method("upper", tstr)

[docs]    def strip(self):
        r"""Returns a copy of the string with whitespace removed from the start
        and end.

        Examples
        --------

        >>> s2 = hl.str('  once upon a time\n')
        >>> hl.eval(s2.strip())
        'once upon a time'

        Returns
        -------
        :class:`.StringExpression`
        """
        return self._method("strip", tstr)

[docs]    @typecheck_method(substr=expr_str)
    def contains(self, substr):
        """Returns whether `substr` is contained in the string.

        Examples
        --------

        >>> hl.eval(s.contains('fox'))
        True

        >>> hl.eval(s.contains('dog'))
        False

        Note
        ----
        This method is case-sensitive.

        Parameters
        ----------
        substr : :class:`.StringExpression`

        Returns
        -------
        :class:`.BooleanExpression`
        """
        return self._method("contains", tbool, substr)

[docs]    @typecheck_method(substr=expr_str)
    def startswith(self, substr):
        """Returns whether `substr` is a prefix of the string.

        Examples
        --------

        >>> hl.eval(s.startswith('The'))
        True

        >>> hl.eval(s.startswith('the'))
        False

        Note
        ----
        This method is case-sensitive.

        Parameters
        ----------
        substr : :class:`.StringExpression`

        Returns
        -------
        :class:`.StringExpression`
        """
        return self._method('startswith', tbool, substr)

[docs]    @typecheck_method(substr=expr_str)
    def endswith(self, substr):
        """Returns whether `substr` is a suffix of the string.

        Examples
        --------

        >>> hl.eval(s.endswith('fox'))
        True

        Note
        ----
        This method is case-sensitive.

        Parameters
        ----------
        substr : :class:`.StringExpression`

        Returns
        -------
        :class:`.StringExpression`
        """
        return self._method('endswith', tbool, substr)

[docs]    @typecheck_method(regex=str)
    def first_match_in(self, regex):
        """Returns an array containing the capture groups of the first match of
        `regex` in the given character sequence.

        Examples
        --------

        >>> hl.eval(s.first_match_in("The quick (\\w+) fox"))
        ['brown']

        >>> hl.eval(s.first_match_in("The (\\w+) (\\w+) (\\w+)"))
        ['quick', 'brown', 'fox']

        >>> hl.eval(s.first_match_in("(\\w+) (\\w+)"))
        ['The', 'quick']

        Parameters
        ----------
        regex : :class:`.StringExpression`

        Returns
        -------
        :class:`.ArrayExpression` with element type :py:data:`.tstr`
        """
        return self._method('firstMatchIn', tarray(tstr), regex)

[docs]    @typecheck_method(mapping=expr_dict(expr_str, expr_str))
    def translate(self, mapping):
        """Translates characters of the string using `mapping`.

        Examples
        --------
        >>> string = hl.literal('ATTTGCA')
        >>> hl.eval(string.translate({'T': 'U'}))
        'AUUUGCA'

        Parameters
        ----------
        mapping : :class:`.DictExpression`
            Dictionary of character-character translations.

        Returns
        -------
        :class:`.StringExpression`

        See Also
        --------
        :meth:`.replace`
        """
        return self._method('translate', tstr, mapping)

[docs]    @typecheck_method(regex=expr_str, full_match=nullable(bool))
    def matches(self, regex, full_match=False):
        """Returns ``True`` if the string contains any match for the given regex if
        `full_match` is false. Returns ``True`` if the whole string matches the
        given regex if `full_match` is true.

        Examples
        --------

        The `regex` parameter does not need to match the entire string if `full_match` is ``False``:

        >>> string = hl.literal('NA12878')
        >>> hl.eval(string.matches('12'))
        True

        The `regex` parameter needs to match the entire string if `full_match` is ``True``:

        >>> string = hl.literal('NA12878')
        >>> hl.eval(string.matches('12', True))
        False

        >>> string = hl.literal('3412878')
        >>> hl.eval(string.matches('^[0-9]*$'))
        True

        Regex motifs can be used to match sequences of characters:

        >>> string = hl.literal('NA12878')
        >>> hl.eval(string.matches(r'NA\\d+'))
        True

        >>> string = hl.literal('3412878')
        >>> hl.eval(string.matches('^[0-9]*$'))
        True

        Notes
        -----
        The `regex` argument is a
        `regular expression <https://en.wikipedia.org/wiki/Regular_expression>`__,
        and uses
        `Java regex syntax <https://docs.oracle.com/javase/8/docs/api/java/util/regex/Pattern.html>`__.

        Parameters
        ----------
        regex: :class:`.StringExpression`
            Pattern to match.
        full_match: :obj: `bool`
            If ``True``, the function considers whether the whole string matches the regex.
            If ``False``, the function considers whether the string has a partial match for that regex

        Returns
        -------
        :class:`.BooleanExpression`
            If `full_match` is ``False``,``True`` if the string contains any match for the regex, otherwise ``False``.
            If `full_match` is ``True``,``True`` if the whole string matches the regex, otherwise ``False``.
        """
        if full_match is False:
            return regex._method("regexMatch", tbool, self)
        else:
            return regex._method("regexFullMatch", tbool, self)

[docs]    def reverse(self):
        """Returns the reversed value.
        Examples
        --------

        >>> string = hl.literal('ATGCC')
        >>> hl.eval(string.reverse())
        'CCGTA'

        Returns
        -------
        :class:`.StringExpression`
        """
        return self._method('reverse', tstr)

[docs]    @typecheck_method(collection=expr_oneof(expr_array(), expr_set()))
    def join(self, collection):
        """Returns a string which is the concatenation of the strings in `collection`
        separated by the string providing this method. Raises :class:`TypeError` if
        the element type of `collection` is not :data:`.tstr`.

        Examples
        --------

        >>> a = ['Bob', 'Charlie', 'Alice', 'Bob', 'Bob']

        >>> hl.eval(hl.str(',').join(a))
        'Bob,Charlie,Alice,Bob,Bob'

        Parameters
        ----------
        collection : :class:`.ArrayExpression` or :class:`.SetExpression`
            Collection.

        Returns
        -------
        :class:`.StringExpression`
            Joined string expression.
        """
        if collection.dtype.element_type != tstr:
            raise TypeError(f"Expected str collection, {collection.dtype.element_type} found")

        return hl.delimit(collection, self)

    def _extra_summary_fields(self, agg_result):
        return {
            'Min Size': agg_result[0],
            'Max Size': agg_result[1],
            'Mean Size': agg_result[2],
            'Sample Values': agg_result[3],
        }

    def _summary_aggs(self):
        length = hl.len(self)
        return hl.tuple((
            hl.agg.min(length),
            hl.agg.max(length),
            hl.agg.mean(length),
            hl.agg.filter(hl.is_defined(self), hl.agg.take(self, 5)),
        ))


[docs]class CallExpression(Expression):
    """Expression of type :py:data:`.tcall`.

    >>> call = hl.call(0, 1, phased=False)
    """

[docs]    def __getitem__(self, item):
        """Get the i*th* allele.

        Examples
        --------

        Index with a single integer:

        >>> hl.eval(call[0])
        0

        >>> hl.eval(call[1])
        1

        Parameters
        ----------
        item : int or :class:`.Expression` of type :py:data:`.tint32`
            Allele index.

        Returns
        -------
        :class:`.Expression` of type :py:data:`.tint32`
        """
        if isinstance(item, slice):
            raise NotImplementedError("CallExpression does not support indexing with a slice.")
        else:
            item = to_expr(item)
            if not item.dtype == tint32:
                raise TypeError(
                    "Call expects allele index to be an expression of type 'int32', "
                    "found expression of type '{}'".format(item.dtype)
                )
            return self._index(tint32, item)

[docs]    def unphase(self):
        """Returns an unphased version of this call.

        Returns
        -------
        :class:`.CallExpression`
        """
        return self._method("unphase", tcall)

[docs]    def contains_allele(self, allele):
        """Returns true if the call has one or more called alleles of the given index.

        >>> c = hl.call(0, 3)

        >>> hl.eval(c.contains_allele(3))
        True

        >>> hl.eval(c.contains_allele(1))
        False

        Returns
        -------
        :class:`.BooleanExpression`
        """
        return self._method("containsAllele", tbool, allele)

    @property
    def ploidy(self):
        """Return the number of alleles of this call.

        Examples
        --------

        >>> hl.eval(call.ploidy)
        2

        Notes
        -----
        Currently only ploidy 1 and 2 are supported.

        Returns
        -------
        :class:`.Expression` of type :py:data:`.tint32`
        """
        return self._method("ploidy", tint32)

    @property
    def phased(self):
        """True if the call is phased.

        Examples
        --------

        >>> hl.eval(call.phased)
        False

        Returns
        -------
        :class:`.BooleanExpression`
        """
        return self._method("isPhased", tbool)

[docs]    def is_haploid(self):
        """True if the call has ploidy equal to 1.

        Examples
        --------

        >>> hl.eval(call.is_haploid())
        False

        Returns
        -------
        :class:`.BooleanExpression`
        """
        return self.ploidy == 1

[docs]    def is_diploid(self):
        """True if the call has ploidy equal to 2.

        Examples
        --------

        >>> hl.eval(call.is_diploid())
        True

        Returns
        -------
        :class:`.BooleanExpression`
        """
        return self.ploidy == 2

[docs]    def is_non_ref(self):
        """Evaluate whether the call includes one or more non-reference alleles.

        Examples
        --------

        >>> hl.eval(call.is_non_ref())
        True

        Notes
        -----
        In the diploid biallelic case, a ``0/0`` call will return ``False``,
        and ``0/1`` and ``1/1`` will return ``True``.

        Returns
        -------
        :class:`.BooleanExpression`
            ``True`` if at least one allele is non-reference, ``False`` otherwise.
        """
        return self._method("isNonRef", tbool)

[docs]    def is_het(self):
        """Evaluate whether the call includes two different alleles.

        Examples
        --------

        >>> hl.eval(call.is_het())
        True

        Notes
        -----
        In the diploid biallelic case, a ``0/1`` call will return ``True``,
        and ``0/0`` and ``1/1`` will return ``False``.

        Returns
        -------
        :class:`.BooleanExpression`
            ``True`` if the two alleles are different, ``False`` if they are the same.
        """
        return self._method("isHet", tbool)

[docs]    def is_het_non_ref(self):
        """Evaluate whether the call includes two different alleles, neither of which is reference.

        Examples
        --------

        >>> hl.eval(call.is_het_non_ref())
        False

        Notes
        -----
        A biallelic variant may never have a het-non-ref call. Examples of
        these calls are ``1/2`` and ``2/4``.

        Returns
        -------
        :class:`.BooleanExpression`
            ``True`` if the call includes two different alternate alleles, ``False`` otherwise.
        """
        return self._method("isHetNonRef", tbool)

[docs]    def is_het_ref(self):
        """Evaluate whether the call includes two different alleles, one of which is reference.

        Examples
        --------

        >>> hl.eval(call.is_het_ref())
        True

        Returns
        -------
        :class:`.BooleanExpression`
            ``True`` if the call includes one reference and one alternate allele, ``False`` otherwise.
        """
        return self._method("isHetRef", tbool)

[docs]    def is_hom_ref(self):
        """Evaluate whether the call includes two reference alleles.

        Examples
        --------

        >>> hl.eval(call.is_hom_ref())
        False

        Returns
        -------
        :class:`.BooleanExpression`
            ``True`` if the call includes two reference alleles, ``False`` otherwise.
        """
        return self._method("isHomRef", tbool)

[docs]    def is_hom_var(self):
        """Evaluate whether the call includes two identical alternate alleles.

        Examples
        --------

        >>> hl.eval(call.is_hom_var())
        False

        Returns
        -------
        :class:`.BooleanExpression`
            ``True`` if the call includes two identical alternate alleles, ``False`` otherwise.
        """
        return self._method("isHomVar", tbool)

[docs]    def n_alt_alleles(self):
        """Returns the number of non-reference alleles.

        Examples
        --------

        >>> hl.eval(call.n_alt_alleles())
        1

        Notes
        -----
        For diploid biallelic calls, this method is equivalent to the alternate
        allele dosage. For instance, ``0/0`` will return ``0``, ``0/1`` will
        return ``1``, and ``1/1`` will return ``2``.

        Returns
        -------
        :class:`.Expression` of type :py:data:`.tint32`
            The number of non-reference alleles.
        """
        return self._method("nNonRefAlleles", tint32)

[docs]    @typecheck_method(alleles=oneof(expr_array(expr_str), expr_int32))
    def one_hot_alleles(self, alleles):
        """Returns an array containing the summed one-hot encoding of the
        alleles.

        Examples
        --------
        Compute one-hot encoding when number of total alleles is 2.

        >>> hl.eval(call.one_hot_alleles(2))
        [1, 1]

        **DEPRECATED**: Compute one-hot encoding based on length of list of alleles.

        >>> hl.eval(call.one_hot_alleles(['A', 'T']))
        [1, 1]

        This one-hot representation is the positional sum of the one-hot
        encoding for each called allele. For a biallelic variant, the one-hot
        encoding for a reference allele is ``[1, 0]`` and the one-hot encoding
        for an alternate allele is ``[0, 1]``. Diploid calls would produce the
        following arrays: ``[2, 0]`` for homozygous reference, ``[1, 1]`` for
        heterozygous, and ``[0, 2]`` for homozygous alternate.

        Parameters
        ----------
        alleles: :class:`.Int32Expression` or :class:`.ArrayExpression` of :obj:`.tstr`.
            Number of total alleles, including the reference, or array of variant alleles.

        Returns
        -------
        :class:`.ArrayExpression` of :obj:`.tint32`
            An array of summed one-hot encodings of allele indices.
        """

        if isinstance(alleles, Int32Expression):
            n_alleles = alleles
        else:
            n_alleles = hl.len(alleles)

        return self._method("oneHotAlleles", tarray(tint32), n_alleles)

[docs]    def unphased_diploid_gt_index(self):
        """Return the genotype index for unphased, diploid calls.

        Examples
        --------

        >>> hl.eval(call.unphased_diploid_gt_index())
        1

        Returns
        -------
        :class:`.Expression` of type :py:data:`.tint32`
        """
        return self._method("unphasedDiploidGtIndex", tint32)

    def _extra_summary_fields(self, agg_result):
        return {
            'Homozygous Reference': agg_result[0],
            'Heterozygous': agg_result[1],
            'Homozygous Variant': agg_result[2],
            'Ploidy': agg_result[3],
            'Phased': agg_result[4],
        }

    def _summary_aggs(self):
        return hl.tuple((
            hl.agg.count_where(self.is_hom_ref()),
            hl.agg.count_where(self.is_het()),
            hl.agg.count_where(self.is_hom_var()),
            hl.agg.filter(hl.is_defined(self), hl.agg.counter(self.ploidy)),
            hl.agg.filter(hl.is_defined(self), hl.agg.counter(self.phased)),
        ))


[docs]class LocusExpression(Expression):
    """Expression of type :class:`.tlocus`.

    >>> locus = hl.locus('1', 1034245)
    """

    @typecheck_method(other=expr_int32)
    def __add__(self, other):
        return self._method("add_on_contig", self.dtype, other)

    @typecheck_method(other=expr_int32)
    def __sub__(self, other):
        return self + (-other)

    @property
    def contig(self):
        """Returns the chromosome.

        Examples
        --------

        >>> hl.eval(locus.contig)
        '1'

        Returns
        -------
        :class:`.StringExpression`
            The chromosome for this locus.
        """
        return self._method("contig", tstr)

    @property
    def contig_idx(self):
        """Returns the chromosome.

        Examples
        --------

        >>> hl.eval(locus.contig_idx)
        0

        Returns
        -------
        :class:`.StringExpression`
            The index of the chromosome for this locus.
        """
        return self._method("contig_idx", tint32)

    @property
    def position(self):
        """Returns the position along the chromosome.

        Examples
        --------

        >>> hl.eval(locus.position)
        1034245

        Returns
        -------
        :class:`.Expression` of type :py:data:`.tint32`
            This locus's position along its chromosome.
        """
        return self._method("position", tint32)

[docs]    def global_position(self):
        """Returns a zero-indexed absolute position along the reference genome.

        The global position is computed as :py:attr:`~position` - 1 plus the sum
        of the lengths of all the contigs that precede this locus's :py:attr:`~contig`
        in the reference genome's ordering of contigs.

        See also :func:`.locus_from_global_position`.

        Examples
        --------
        A locus with position 1 along chromosome 1 will have a global position of 0 along
        the reference genome GRCh37.

        >>> hl.eval(hl.locus('1', 1).global_position())
        0

        A locus with position 1 along chromosome 2 will have a global position of (1-1) + 249250621,
        where 249250621 is the length of chromosome 1 on GRCh37.

        >>> hl.eval(hl.locus('2', 1).global_position())
        249250621

        A different reference genome than the default results in a different global position.

        >>> hl.eval(hl.locus('chr2', 1, 'GRCh38').global_position())
        248956422

        Returns
        -------
        :class:`.Expression` of type :py:data:`.tint64`
            Global base position of locus along the reference genome.
        """
        return self._method('locusToGlobalPos', tint64)

[docs]    def in_x_nonpar(self):
        """Returns ``True`` if the locus is in a non-pseudoautosomal
        region of chromosome X.

        Examples
        --------

        >>> hl.eval(locus.in_x_nonpar())
        False

        Returns
        -------
        :class:`.BooleanExpression`
        """
        return self._method("inXNonPar", tbool)

[docs]    def in_x_par(self):
        """Returns ``True`` if the locus is in a pseudoautosomal region
        of chromosome X.

        Examples
        --------

        >>> hl.eval(locus.in_x_par())
        False

        Returns
        -------
        :class:`.BooleanExpression`
        """
        return self._method("inXPar", tbool)

[docs]    def in_y_nonpar(self):
        """Returns ``True`` if the locus is in a non-pseudoautosomal
        region of chromosome Y.

        Examples
        --------

        >>> hl.eval(locus.in_y_nonpar())
        False

        Note
        ----
        Many variant callers only generate variants on chromosome X for the
        pseudoautosomal region. In this case, all loci mapped to chromosome
        Y are non-pseudoautosomal.

        Returns
        -------
        :class:`.BooleanExpression`
        """
        return self._method("inYNonPar", tbool)

[docs]    def in_y_par(self):
        """Returns ``True`` if the locus is in a pseudoautosomal region
        of chromosome Y.

        Examples
        --------

        >>> hl.eval(locus.in_y_par())
        False

        Note
        ----
        Many variant callers only generate variants on chromosome X for the
        pseudoautosomal region. In this case, all loci mapped to chromosome
        Y are non-pseudoautosomal.

        Returns
        -------
        :class:`.BooleanExpression`
        """
        return self._method("inYPar", tbool)

[docs]    def in_autosome(self):
        """Returns ``True`` if the locus is on an autosome.

        Notes
        -----
        All contigs are considered autosomal except those
        designated as X, Y, or MT by :class:`.ReferenceGenome`.

        Examples
        --------

        >>> hl.eval(locus.in_autosome())
        True

        Returns
        -------
        :class:`.BooleanExpression`
        """
        return self._method("isAutosomal", tbool)

[docs]    def in_autosome_or_par(self):
        """Returns ``True`` if the locus is on an autosome or
        a pseudoautosomal region of chromosome X or Y.

        Examples
        --------

        >>> hl.eval(locus.in_autosome_or_par())
        True

        Returns
        -------
        :class:`.BooleanExpression`
        """
        return self._method("isAutosomalOrPseudoAutosomal", tbool)

[docs]    def in_mito(self):
        """Returns ``True`` if the locus is on mitochondrial DNA.

        Examples
        --------

        >>> hl.eval(locus.in_mito())
        False

        Returns
        -------
        :class:`.BooleanExpression`
        """
        return self._method("isMitochondrial", tbool)

[docs]    @typecheck_method(before=expr_int32, after=expr_int32)
    def sequence_context(self, before=0, after=0):
        """Return the reference genome sequence at the locus.

        Examples
        --------

        Get the reference allele at a locus:

        >>> hl.eval(locus.sequence_context()) # doctest: +SKIP
        "G"

        Get the reference sequence at a locus including the previous 5 bases:

        >>> hl.eval(locus.sequence_context(before=5)) # doctest: +SKIP
        "ACTCGG"

        Notes
        -----
        This function requires that this locus' reference genome has an attached
        reference sequence. Use :meth:`.ReferenceGenome.add_sequence` to
        load and attach a reference sequence to a reference genome.

        Parameters
        ----------
        before : :class:`.Expression` of type :py:data:`.tint32`, optional
            Number of bases to include before the locus. Truncates at
            contig boundary.
        after : :class:`.Expression` of type :py:data:`.tint32`, optional
            Number of bases to include after the locus. Truncates at
            contig boundary.

        Returns
        -------
        :class:`.StringExpression`
        """

        rg = self.dtype.reference_genome
        if not rg.has_sequence():
            raise TypeError(
                "Reference genome '{}' does not have a sequence loaded. Use 'add_sequence' to load the sequence from a FASTA file.".format(
                    rg.name
                )
            )
        return hl.get_sequence(self.contig, self.position, before, after, rg)

[docs]    @typecheck_method(before=expr_int32, after=expr_int32)
    def window(self, before, after):
        """Returns an interval of a specified number of bases around the locus.

        Examples
        --------
        Create a window of two megabases centered at a locus:

        >>> locus = hl.locus('16', 29_500_000)
        >>> window = locus.window(1_000_000, 1_000_000)
        >>> hl.eval(window)
        Interval(start=Locus(contig=16, position=28500000, reference_genome=GRCh37), end=Locus(contig=16, position=30500000, reference_genome=GRCh37), includes_start=True, includes_end=True)

        Notes
        -----
        The returned interval is inclusive of both the `start` and `end`
        endpoints.

        Parameters
        ----------
        before : :class:`.Expression` of type :py:data:`.tint32`
            Number of bases to include before the locus. Truncates at 1.
        after : :class:`.Expression` of type :py:data:`.tint32`
            Number of bases to include after the locus. Truncates at
            contig length.

        Returns
        -------
        :class:`.IntervalExpression`
        """
        start_pos = hl.max(1, self.position - before)
        rg = self.dtype.reference_genome
        end_pos = hl.min(hl.contig_length(self.contig, rg), self.position + after)
        return hl.interval(
            start=hl.locus(self.contig, start_pos, reference_genome=rg),
            end=hl.locus(self.contig, end_pos, reference_genome=rg),
            includes_start=True,
            includes_end=True,
        )

    def _extra_summary_fields(self, agg_result):
        return {'Contig Counts': agg_result}

    def _summary_aggs(self):
        return hl.agg.filter(hl.is_defined(self), hl.agg.counter(self.contig))


[docs]class IntervalExpression(Expression):
    """Expression of type :class:`.tinterval`.

    >>> interval = hl.interval(3, 11)
    >>> locus_interval = hl.parse_locus_interval("1:53242-90543")
    """

[docs]    @typecheck_method(value=expr_any)
    def contains(self, value):
        """Tests whether a value is contained in the interval.

        Examples
        --------

        >>> hl.eval(interval.contains(3))
        True

        >>> hl.eval(interval.contains(11))
        False

        Parameters
        ----------
        value :
            Object with type matching the interval point type.

        Returns
        -------
        :class:`.BooleanExpression`
            ``True`` if `value` is contained in the interval, ``False`` otherwise.
        """
        if self.dtype.point_type != value.dtype:
            raise TypeError("expected '{}', found: '{}'".format(self.dtype.point_type, value.dtype))
        return self._method("contains", tbool, value)

[docs]    @typecheck_method(interval=expr_interval(expr_any))
    def overlaps(self, interval):
        """True if the the supplied interval contains any value in common with this one.

        Examples
        --------

        >>> hl.eval(interval.overlaps(hl.interval(5, 9)))
        True

        >>> hl.eval(interval.overlaps(hl.interval(11, 20)))
        False

        Parameters
        ----------
        interval : :class:`.Expression` with type :class:`.tinterval`
            Interval object with the same point type.

        Returns
        -------
        :class:`.BooleanExpression`
        """
        if self.dtype.point_type != interval.dtype.point_type:
            raise TypeError("expected '{}', found: '{}'".format(self.dtype.point_type, interval.dtype.point_type))
        return self._method("overlaps", tbool, interval)

    @property
    def end(self):
        """Returns the end point.

        Examples
        --------

        >>> hl.eval(interval.end)
        11

        Returns
        -------
        :class:`.Expression`
        """
        return self._method("end", self.dtype.point_type)

    @property
    def start(self):
        """Returns the start point.

        Examples
        --------

        >>> hl.eval(interval.start)
        3

        Returns
        -------
        :class:`.Expression`
        """
        return self._method("start", self.dtype.point_type)

    @property
    def includes_start(self):
        """True if the interval includes the start point.

        Examples
        --------

        >>> hl.eval(interval.includes_start)
        True

        Returns
        -------
        :class:`.BooleanExpression`
        """
        return self._method("includesStart", tbool)

    @property
    def includes_end(self):
        """True if the interval includes the end point.

        Examples
        --------

        >>> hl.eval(interval.includes_end)
        False

        Returns
        -------
        :class:`.BooleanExpression`
        """
        return self._method("includesEnd", tbool)


[docs]class NDArrayExpression(Expression):
    """Expression of type :class:`.tndarray`.

    >>> nd = hl.nd.array([[1, 2], [3, 4]])
    """

    def _data_array(self):
        shape = self.shape
        ndims = self.ndim

        def f(i, *vars):
            if i == ndims:
                return self[vars]
            else:
                return hl.range(0, hl.int32(shape[i])).map(lambda idx: f(i + 1, *vars, idx))

        return f(0)

    @property
    def ndim(self):
        """The number of dimensions of this ndarray.

        Examples
        --------

        >>> nd.ndim
        2

        Returns
        -------
        :obj:`int`
        """
        return self._type.ndim

    @property
    def T(self):
        """Reverse the dimensions of this ndarray. For an n-dimensional array `a`,
        a[i_0, ..., i_n-1, i_n] = a.T[i_n, i_n-1, ..., i_0].
        Same as `self.transpose()`.

        See also :meth:`.transpose`.

        Returns
        -------
        :class:`.NDArrayExpression`.
        """
        return self.transpose()

[docs]    @typecheck_method(axes=nullable(tupleof(int)))
    def transpose(self, axes=None):
        """
        Permute the dimensions of this ndarray according to the ordering of axes. Axis j in the ith index of
        axes maps the jth dimension of the ndarray to the ith dimension of the output ndarray.

        Parameters
        ----------
        axes : :obj:`tuple` of :obj:`int`, optional
            The new ordering of the ndarray's dimensions.

        Notes
        -----
        Does nothing on ndarrays of dimensionality 0 or 1.

        Returns
        -------
        :class:`.NDArrayExpression`.
        """
        if axes is None:
            axes = list(reversed(range(self.ndim)))
        else:
            if len(axes) != self.ndim:
                raise ValueError(
                    f'Must specify a complete permutation of the dimensions. '
                    f'Expected {self.ndim} axes, got {len(axes)}'
                )

            if len(set(axes)) != len(axes):
                raise ValueError(f'Axes cannot contain duplicates: {axes}')

            for axis in axes:
                if not 0 <= axis < self.ndim:
                    raise ValueError(f'Invalid axis: {axis}')

        if self.ndim < 2:
            return self

        return construct_expr(ir.NDArrayReindex(self._ir, axes), self._type, self._indices, self._aggregations)

    @property
    def shape(self):
        """The shape of this ndarray.

        Examples
        --------
        >>> hl.eval(nd.shape)
        (2, 2)

        Returns
        -------
        :class:`.TupleExpression`
        """
        shape_type = ttuple(*[tint64 for _ in range(self.ndim)])
        return construct_expr(ir.NDArrayShape(self._ir), shape_type, self._indices, self._aggregations)

    _opt_long_slice = sliceof(nullable(expr_int64), nullable(expr_int64), nullable(expr_int64))

    @typecheck_method(
        item=nullable(
            oneof(
                expr_int64, type(...), _opt_long_slice, tupleof(nullable(oneof(expr_int64, type(...), _opt_long_slice)))
            )
        )
    )
    def __getitem__(self, item):
        if not isinstance(item, tuple):
            item = (item,)

        num_ellipses = len([e for e in item if isinstance(e, type(...))])
        if num_ellipses > 1:
            raise IndexError("an index can only have a single ellipsis ('...')")

        num_nones = len([x for x in item if x is None])
        list_item = list(item)

        if num_ellipses == 1:
            list_types = [type(e) for e in list_item]
            ellipsis_location = list_types.index(type(...))
            num_slices_to_add = self.ndim - (len(item) - num_nones) + 1
            no_ellipses = (
                list_item[:ellipsis_location] + [slice(None)] * num_slices_to_add + list_item[ellipsis_location + 1 :]
            )
        else:
            no_ellipses = list_item

        no_nums = [x for x in no_ellipses if ((x is None) or (isinstance(x, slice)))]
        indices_nones = [i for i, x in enumerate(no_nums) if x is None]
        formatted_item = [x for x in no_ellipses if x is not None]

        if len(formatted_item) > self.ndim:
            raise IndexError(
                f'too many indices for array: array is ' f'{self.ndim}-dimensional, but {len(item)} were indexed'
            )
        if len(formatted_item) < self.ndim:
            formatted_item += [slice(None, None, None)] * (self.ndim - len(formatted_item))

        n_sliced_dims = len([s for s in formatted_item if isinstance(s, slice)])

        if n_sliced_dims > 0:
            slices = []
            for i, s in enumerate(formatted_item):
                dlen = self.shape[i]
                if isinstance(s, slice):
                    if s.step is not None:
                        step = hl.case().when(s.step != 0, s.step).or_error("Slice step cannot be zero")
                    else:
                        step = to_expr(1, tint64)

                    max_bound = hl.if_else(step > 0, dlen, dlen - 1)
                    min_bound = hl.if_else(step > 0, to_expr(0, tint64), to_expr(-1, tint64))

                    if s.start is not None:
                        # python treats start < -dlen as None when step < 0: [0,1][-3:0:-1]
                        # and 0 otherwise: [0,1][-3::1] == [0,1][0::1]
                        start = (
                            hl.case()
                            .when(s.start >= dlen, max_bound)
                            .when(s.start >= 0, s.start)
                            .when((s.start + dlen) >= 0, dlen + s.start)
                            .default(min_bound)
                        )
                    else:
                        start = hl.if_else(step >= 0, to_expr(0, tint64), dlen - 1)

                    if s.stop is not None:
                        # python treats stop < -dlen as None when step < 0: [0,1][0:-3:-1] == [0,1][0::-1]
                        # and 0 otherwise: [0,1][:-3:1] == [0,1][:0:1]
                        stop = (
                            hl.case()
                            .when(s.stop >= dlen, max_bound)
                            .when(s.stop >= 0, s.stop)
                            .when((s.stop + dlen) >= 0, dlen + s.stop)
                            .default(min_bound)
                        )
                    else:
                        stop = hl.if_else(step > 0, dlen, to_expr(-1, tint64))

                    slices.append(hl.tuple((start, stop, step)))
                else:
                    adjusted_index = hl.if_else(s < 0, s + dlen, s)
                    checked_int = (
                        hl.case()
                        .when((adjusted_index < dlen) & (adjusted_index >= 0), adjusted_index)
                        .or_error(
                            hl.str("Index ")
                            + hl.str(s)
                            + hl.str(f" is out of bounds for axis {i} with size ")
                            + hl.str(dlen)
                        )
                    )
                    slices.append(checked_int)
            indices, aggregations = unify_all(self, *slices)
            product = construct_expr(
                ir.NDArraySlice(self._ir, hl.tuple(slices)._ir),
                tndarray(self._type.element_type, n_sliced_dims),
                indices,
                aggregations,
            )

            if len(indices_nones) > 0:
                reshape_arg = []
                index_non_nones = 0
                for i in range(n_sliced_dims + num_nones):
                    if i in indices_nones:
                        reshape_arg.append(1)
                    else:
                        reshape_arg.append(product.shape[index_non_nones])
                        index_non_nones += 1
                product = product.reshape(tuple(reshape_arg))

        else:
            indices, aggregations = unify_all(self, *formatted_item)
            product = construct_expr(
                ir.NDArrayRef(self._ir, [idx._ir for idx in formatted_item]),
                self._type.element_type,
                indices,
                aggregations,
            )

            if len(indices_nones) > 0:
                reshape_arg = []
                for i in indices_nones:
                    reshape_arg.append(1)
                product = hl.nd.array(product).reshape(tuple(reshape_arg))

        return product

[docs]    @typecheck_method(shape=oneof(expr_int64, tupleof(expr_int64), expr_tuple()))
    def reshape(self, *shape):
        """Reshape this ndarray to a new shape.

        Parameters
        ----------
        shape : :class:`.Expression` of type :py:data:`.tint64` or
                :obj: `tuple` of :class:`.Expression` of type :py:data:`.tint64`

        Examples
        --------

        >>> v = hl.nd.array([1, 2, 3, 4]) # doctest: +SKIP
        >>> m = v.reshape((2, 2)) # doctest: +SKIP

        Returns
        -------
        :class:`.NDArrayExpression`.
        """

        # varargs with many ints works, but can't be a mix of ints and tuples.
        if len(shape) > 1:
            for i, arg in enumerate(shape):
                if not isinstance(arg, Int64Expression):
                    raise TypeError(f"Argument {i} of reshape needs to be of type tint64.")
        else:
            shape = shape[0]

        if isinstance(shape, tuple):
            indices, aggregations = unify_all(self, *shape)
        else:
            indices, aggregations = unify_all(self, shape)

        if isinstance(shape, TupleExpression):
            for i, tuple_field_type in enumerate(shape.dtype.types):
                if tuple_field_type not in [hl.tint32, hl.tint64]:
                    raise TypeError(f"Argument {i} of reshape needs to be an integer, got {tuple_field_type}.")
            shape_ir = hl.or_missing(hl.is_defined(shape), hl.tuple([hl.int64(i) for i in shape]))._ir
            ndim = len(shape)
        else:
            wrapped_shape = wrap_to_list(shape)
            ndim = len(wrapped_shape)
            shape_ir = hl.tuple(wrapped_shape)._ir

        return construct_expr(
            ir.NDArrayReshape(self._ir, shape_ir), tndarray(self._type.element_type, ndim), indices, aggregations
        )

[docs]    @typecheck_method(f=func_spec(1, expr_any))
    def map(self, f):
        """Applies an element-wise operation on an NDArray.

        Parameters
        ----------
        f : function ( (arg) -> :class:`.Expression`)
            Function to transform each element of the NDArray.

        Returns
        -------
        :class:`.NDArrayExpression`.
            NDArray where each element has been transformed according to `f`.
        """

        element_type = self._type.element_type
        ndarray_map = self._ir_lambda_method(ir.NDArrayMap, f, element_type, lambda t: tndarray(t, self.ndim))

        assert isinstance(self._type, tndarray)
        return ndarray_map

[docs]    @typecheck_method(other=oneof(expr_ndarray(), list), f=func_spec(2, expr_any))
    def map2(self, other, f):
        """Applies an element-wise binary operation on two NDArrays.

        Parameters
        ----------
        other : class:`.NDArrayExpression`, :class:`.ArrayExpression`, numpy NDarray,
            or nested python list/tuples. Both NDArrays must be the same shape or
            broadcastable into common shape.
        f : function ((arg1, arg2)-> :class:`.Expression`)
            Function to be applied to each element of both NDArrays.

        Returns
        -------
        :class:`.NDArrayExpression`.
            Element-wise result of applying `f` to each index in NDArrays.
        """

        if isinstance(other, (list, np.ndarray)):
            other = hl.nd.array(other)

        self_broadcast, other_broadcast = self._broadcast_to_same_ndim(other)

        element_type1 = self_broadcast._type.element_type
        element_type2 = other_broadcast._type.element_type
        ndarray_map2 = self_broadcast._ir_lambda_method2(
            other_broadcast, ir.NDArrayMap2, f, element_type1, element_type2, lambda t: tndarray(t, self_broadcast.ndim)
        )

        assert isinstance(self._type, tndarray)
        return ndarray_map2

    def _broadcast_to_same_ndim(self, other):
        if isinstance(other, NDArrayExpression):
            if self.ndim < other.ndim:
                return self._broadcast(other.ndim), other
            elif self.ndim > other.ndim:
                return self, other._broadcast(self.ndim)

        return self, other

    def _broadcast(self, n_output_dims):
        assert self.ndim < n_output_dims

        # Right-align existing dimensions and start prepending new ones
        # to the left: e.g. [0, 1] -> [3, 2, 0, 1]
        # Based off numpy broadcasting with the assumption that everything
        # can be thought to have an infinite number of 1-length dimensions
        # prepended
        old_dims = range(self.ndim)
        new_dims = range(self.ndim, n_output_dims)
        idx_mapping = list(reversed(new_dims)) + list(old_dims)

        return construct_expr(
            ir.NDArrayReindex(self._ir, idx_mapping),
            tndarray(self._type.element_type, n_output_dims),
            self._indices,
            self._aggregations,
        )


[docs]class NDArrayNumericExpression(NDArrayExpression):
    """Expression of type :class:`.tndarray` with a numeric element type.

    Numeric ndarrays support arithmetic both with scalar values and other
    arrays. Arithmetic between two numeric ndarrays requires that the shapes of
    each ndarray be either identical or compatible for broadcasting. Operations
    are applied positionally (``nd1 * nd2`` will multiply the first element of
    ``nd1`` by the first element of ``nd2``, the second element of ``nd1`` by
    the second element of ``nd2``, and so on). Arithmetic with a scalar will
    apply the operation to each element of the ndarray.
    """

    def _bin_op_numeric(self, name, other, ret_type_f=None):
        if isinstance(other, (list, np.ndarray)):
            other = hl.nd.array(other)

        self_broadcast, other_broadcast = self._broadcast_to_same_ndim(other)
        return super(NDArrayNumericExpression, self_broadcast)._bin_op_numeric(name, other_broadcast, ret_type_f)

    def _bin_op_numeric_reverse(self, name, other, ret_type_f=None):
        if isinstance(other, (list, np.ndarray)):
            other = hl.nd.array(other)

        self_broadcast, other_broadcast = self._broadcast_to_same_ndim(other)
        return super(NDArrayNumericExpression, self_broadcast)._bin_op_numeric_reverse(
            name, other_broadcast, ret_type_f
        )

[docs]    def __neg__(self):
        """Negate elements of the ndarray.

        Returns
        -------
        :class:`.NDArrayNumericExpression`
            Array expression of the same type.
        """
        return self * -1

[docs]    def __add__(self, other):
        """Positionally add an array or a scalar.

        Parameters
        ----------
        other : :class:`.NumericExpression` or :class:`.NDArrayNumericExpression`
            Value or ndarray to add.

        Returns
        -------
        :class:`.NDArrayNumericExpression`
            NDArray of positional sums.
        """
        return self._bin_op_numeric("+", other)

    def __radd__(self, other):
        return self._bin_op_numeric_reverse("+", other)

[docs]    def __sub__(self, other):
        """Positionally subtract a ndarray or a scalar.

        Parameters
        ----------
        other : :class:`.NumericExpression` or :class:`.NDArrayNumericExpression`
            Value or ndarray to subtract.

        Returns
        -------
        :class:`.NDArrayNumericExpression`
            NDArray of positional differences.
        """
        return self._bin_op_numeric("-", other)

    def __rsub__(self, other):
        return self._bin_op_numeric_reverse("-", other)

[docs]    def __mul__(self, other):
        """Positionally multiply by a ndarray or a scalar.

        Parameters
        ----------
        other : :class:`.NumericExpression` or :class:`.NDArrayNumericExpression`
            Value or ndarray to multiply by.

        Returns
        -------
        :class:`.NDArrayNumericExpression`
            NDArray of positional products.
        """
        return self._bin_op_numeric("*", other)

    def __rmul__(self, other):
        return self._bin_op_numeric_reverse("*", other)

[docs]    def __truediv__(self, other):
        """Positionally divide by a ndarray or a scalar.

        Parameters
        ----------
        other : :class:`.NumericExpression` or :class:`.NDArrayNumericExpression`
            Value or ndarray to divide by.

        Returns
        -------
        :class:`.NDArrayNumericExpression`
            NDArray of positional quotients.
        """
        return self._bin_op_numeric("/", other, self._div_ret_type_f)

    def __rtruediv__(self, other):
        return self._bin_op_numeric_reverse("/", other, self._div_ret_type_f)

[docs]    def __floordiv__(self, other):
        """Positionally divide by a ndarray or a scalar using floor division.

        Parameters
        ----------
        other : :class:`.NumericExpression` or :class:`.NDArrayNumericExpression`

        Returns
        -------
        :class:`.NDArrayNumericExpression`
        """
        return self._bin_op_numeric('//', other)

    def __rfloordiv__(self, other):
        return self._bin_op_numeric_reverse('//', other)

    def __rmatmul__(self, other):
        if not isinstance(other, NDArrayNumericExpression):
            other = hl.nd.array(other)
        return other.__matmul__(self)

[docs]    def __matmul__(self, other):
        """Matrix multiplication: `a @ b`, semantically equivalent to `NumPy` matmul. If `a` and `b` are vectors,
        the vector dot product is performed, returning a `NumericExpression`. If `a` and `b` are both 2-dimensional
        matrices, this performs normal matrix multiplication. If `a` and `b` have more than 2 dimensions, they are
        treated as multi-dimensional stacks of 2-dimensional matrices. Matrix multiplication is applied element-wise
        across the higher dimensions. E.g. if `a` has shape `(3, 4, 5)` and `b` has shape `(3, 5, 6)`, `a` is treated
        as a stack of three matrices of shape `(4, 5)` and `b` as a stack of three matrices of shape `(5, 6)`. `a @ b`
        would then have shape `(3, 4, 6)`.

        Notes
        -----
        The last dimension of `a` and the second to last dimension of `b` (or only dimension if `b` is a vector)
        must have the same length. The dimensions to the left of the last two dimensions of `a` and `b` (for NDArrays
        of dimensionality > 2) must be equal or be compatible for broadcasting.
        Number of dimensions of both NDArrays must be at least 1.

        Parameters
        ----------
        other : :class:`numpy.ndarray` :class:`.NDArrayNumericExpression`

        Returns
        -------
        :class:`.NDArrayNumericExpression` or :class:`.NumericExpression`
        """
        if not isinstance(other, NDArrayNumericExpression):
            other = hl.nd.array(other)

        indices, aggregations = unify_all(self, other)

        if self.ndim == 0 or other.ndim == 0:
            raise ValueError('MatMul must be between objects of 1 dimension or more. Try * instead')

        if self.ndim > 1 and other.ndim > 1:
            left, right = self._broadcast_to_same_ndim(other)
        else:
            left, right = self, other

        from hail.linalg.utils.misc import _ndarray_matmul_ndim

        result_ndim = _ndarray_matmul_ndim(left.ndim, right.ndim)
        elem_type = unify_types(self._type.element_type, other._type.element_type)
        ret_type = tndarray(elem_type, result_ndim)
        left = left._promote_numeric(ret_type)
        right = right._promote_numeric(ret_type)

        res = construct_expr(ir.NDArrayMatMul(left._ir, right._ir), ret_type, indices, aggregations)

        return res if result_ndim > 0 else res[()]

[docs]    @typecheck_method(axis=nullable(oneof(int, tupleof(int))))
    def sum(self, axis=None):
        """Sum out one or more axes of an ndarray.

        Parameters
        ----------
        axis : :class:`int` :class:`tuple`
            The axis or axes to sum out.

        Returns
        -------
        :class:`.NDArrayNumericExpression` or :class:`.NumericExpression`
        """
        if axis is None:
            axis = tuple(range(self.ndim))

        if self._type.element_type is hl.tbool:
            return self.map(lambda x: hl.int(x)).sum(axis)

        else:
            axis = wrap_to_tuple(axis)
            res_ir = ir.NDArrayAgg(self._ir, axis)

            axes_set = set(axis)
            if len(axes_set) < len(axis):
                raise ValueError("duplicate value in 'axis'")
            for element in axes_set:
                if element < 0 or element >= self.ndim:
                    raise ValueError(f"axis {element} is out of bounds for ndarray of dimension {self.ndim}")

            num_axes_deleted = len(axes_set)

            result_ndim = self.ndim - num_axes_deleted
            result = construct_expr(
                res_ir, tndarray(self._type.element_type, result_ndim), self._indices, self._aggregations
            )

            if result_ndim == 0:
                return result[()]
            else:
                return result


class StreamExpression(Expression):
    @typecheck_method(f=func_spec(1, expr_bool))
    def filter(self, f):
        def unify_ret(t):
            if t != tbool:
                raise TypeError("'filter' expects 'f' to return an expression of type 'bool', found '{}'".format(t))
            return hl.tstream(self._type.element_type)

        def transform_ir(stream, name, body):
            return ir.StreamFilter(stream, name, body)

        return self._ir_lambda_method(transform_ir, f, self.dtype.element_type, unify_ret)

    @typecheck_method(n=expr_int32)
    def take(self, n):
        indices, aggregations = unify_all(self, n)
        take_ir = ir.StreamTake(self._ir, n._ir)
        return construct_expr(take_ir, self.dtype, indices, aggregations)

    @typecheck_method(f=func_spec(1, expr_any))
    def map(self, f):
        def transform_ir(stream, name, body):
            return ir.StreamMap(stream, name, body)

        return self._ir_lambda_method(transform_ir, f, self.dtype.element_type, lambda t: hl.tstream(t))

    @typecheck_method(f=func_spec(1, expr_any))
    def flatmap(self, f):
        value_type = f(construct_variable(Env.get_uid(), self.dtype.element_type)).dtype

        if not isinstance(value_type, tstream):
            raise TypeError(f"'flatmap' expects 'f' to return an expression of type tstream, found '{value_type}'")

        def transform_ir(stream, name, body):
            return ir.StreamFlatMap(stream, name, body)

        return self._ir_lambda_method(transform_ir, f, self.dtype.element_type, identity)

    @typecheck_method(f=func_spec(2, expr_any), zero=expr_any)
    def fold(self, f, zero):
        indices, aggregations = unify_all(self, zero)
        accum_name = Env.get_uid()
        elt_name = Env.get_uid()

        accum_ref = construct_variable(accum_name, zero.dtype, indices, aggregations)
        elt_ref = construct_variable(elt_name, self.dtype.element_type, self._indices, self._aggregations)
        body = to_expr(f(accum_ref, elt_ref))

        if body.dtype != zero.dtype:
            zero_coercer = coercer_from_dtype(zero.dtype)
            if zero_coercer.can_coerce(body.dtype):
                body = zero_coercer.coerce(body)
            else:
                body_coercer = coercer_from_dtype(body.dtype)
                if body_coercer.can_coerce(zero.dtype):
                    zero_coerced = body_coercer.coerce(zero)
                    accum_ref = construct_variable(accum_name, zero_coerced.dtype, indices, aggregations)
                    new_body = to_expr(f(accum_ref, elt_ref))
                    if body_coercer.can_coerce(new_body.dtype):
                        body = body_coercer.coerce(new_body)
                        zero = zero_coerced

        if body.dtype != zero.dtype:
            raise ExpressionException(
                "'StreamExpression.fold' must take function returning "
                "same expression type as zero value: \n"
                "    zero.dtype: {}\n"
                "    f.dtype: {}".format(zero.dtype, body.dtype)
            )

        x = ir.StreamFold(self._ir, zero._ir, accum_name, elt_name, body._ir)

        indices, aggregations = unify_all(self, zero, body)
        return construct_expr(x, body.dtype, indices, aggregations)

    @typecheck_method(f=func_spec(2, expr_any), zero=expr_any)
    def scan(self, f, zero):
        indices, aggregations = unify_all(self, zero)
        accum_name = Env.get_uid()
        elt_name = Env.get_uid()

        accum_ref = construct_variable(accum_name, zero.dtype, indices, aggregations)
        elt_ref = construct_variable(elt_name, self.dtype.element_type, self._indices, self._aggregations)
        body = to_expr(f(accum_ref, elt_ref))

        if body.dtype != zero.dtype:
            zero_coercer = coercer_from_dtype(zero.dtype)
            if zero_coercer.can_coerce(body.dtype):
                body = zero_coercer.coerce(body)
            else:
                body_coercer = coercer_from_dtype(body.dtype)
                if body_coercer.can_coerce(zero.dtype):
                    zero_coerced = body_coercer.coerce(zero)
                    accum_ref = construct_variable(accum_name, zero_coerced.dtype, indices, aggregations)
                    new_body = to_expr(f(accum_ref, elt_ref))
                    if body_coercer.can_coerce(new_body.dtype):
                        body = body_coercer.coerce(new_body)
                        zero = zero_coerced

        if body.dtype != zero.dtype:
            raise ExpressionException(
                "'StreamExpression.scan' must take function returning "
                "same expression type as zero value: \n"
                "    zero.dtype: {}\n"
                "    f.dtype: {}".format(zero.dtype, body.dtype)
            )

        x = ir.StreamScan(self._ir, zero._ir, accum_name, elt_name, body._ir)

        indices, aggregations = unify_all(self, zero, body)
        return construct_expr(x, tstream(body.dtype), indices, aggregations)

    @typecheck_method(f=func_spec(1, expr_any))
    def aggregate(self, f):
        return hl.agg._aggregate_local_array(self, f)

    def to_array(self):
        return construct_expr(ir.toArray(self._ir), tarray(self.dtype.element_type), self._indices, self._aggregations)

    @typecheck_method(start=expr_int32, index_first=bool)
    def zip_with_index(self, start, index_first=True):
        indices, aggs = unify_all(self, start)
        elt = Env.get_uid()
        idx = Env.get_uid()
        elt_type = self.dtype.element_type
        if index_first:
            tuple = ir.MakeTuple([ir.Ref(idx, tint32), ir.Ref(elt, elt_type)])
        else:
            tuple = ir.MakeTuple([ir.Ref(elt, elt_type), ir.Ref(idx, tint32)])
        return construct_expr(
            ir.StreamZip([self._ir, ir.StreamIota(start._ir, ir.I32(1))], [elt, idx], tuple, 'TakeMinLength'),
            hl.tstream(hl.ttuple(hl.tint32, elt_type) if index_first else hl.ttuple(elt_type, hl.tint32)),
            indices,
            aggs,
        )

    @typecheck_method(group_size=expr_int32)
    def grouped(self, group_size):
        indices, aggregations = unify_all(self, group_size)
        stream_ir = ir.StreamGrouped(self._ir, group_size._ir)
        mapping_identifier = Env.get_uid("stream_grouped_map_to_arrays")
        mapped_to_arrays = ir.StreamMap(
            stream_ir, mapping_identifier, ir.toArray(ir.Ref(mapping_identifier, tstream(self._type.element_type)))
        )
        return construct_expr(mapped_to_arrays, tstream(tarray(self._type.element_type)), indices, aggregations)


scalars = {
    tbool: BooleanExpression,
    tint32: Int32Expression,
    tint64: Int64Expression,
    tfloat32: Float32Expression,
    tfloat64: Float64Expression,
    tstr: StringExpression,
    tcall: CallExpression,
}

typ_to_expr = {
    tlocus: LocusExpression,
    tinterval: IntervalExpression,
    tcall: CallExpression,
    tdict: DictExpression,
    tarray: ArrayExpression,
    tstream: StreamExpression,
    tset: SetExpression,
    tstruct: StructExpression,
    ttuple: TupleExpression,
    tndarray: NDArrayExpression,
}


def apply_expr(f, result_type, *args):
    indices, aggregations = unify_all(*args)
    ir = f(*[arg._ir for arg in args])
    return construct_expr(ir, result_type, indices, aggregations)


@typecheck(x=ir.IR, type=nullable(HailType), indices=Indices, aggregations=LinkedList)
def construct_expr(
    x: ir.IR, type: HailType, indices: Indices = Indices(), aggregations: LinkedList = LinkedList(Aggregation)
):
    if type is None:
        return Expression(x, None, indices, aggregations)
    x.assign_type(type)
    if isinstance(type, tarray) and is_numeric(type.element_type):
        return ArrayNumericExpression(x, type, indices, aggregations)
    elif isinstance(type, tarray):
        etype = type.element_type
        if isinstance(etype, (hl.tarray, hl.tset)):
            while isinstance(etype, (hl.tarray, hl.tset)):
                etype = etype.element_type
        if isinstance(etype, hl.tstruct):
            return ArrayStructExpression(x, type, indices, aggregations)
        else:
            return typ_to_expr[type.__class__](x, type, indices, aggregations)
    elif isinstance(type, tset):
        etype = type.element_type
        if isinstance(etype, (hl.tarray, hl.tset)):
            while isinstance(etype, (hl.tarray, hl.tset)):
                etype = etype.element_type
        if isinstance(etype, hl.tstruct):
            return SetStructExpression(x, type, indices, aggregations)
        else:
            return typ_to_expr[type.__class__](x, type, indices, aggregations)
    elif isinstance(type, tndarray) and is_numeric(type.element_type):
        return NDArrayNumericExpression(x, type, indices, aggregations)
    elif type in scalars:
        return scalars[type](x, type, indices, aggregations)
    elif type.__class__ in typ_to_expr:
        return typ_to_expr[type.__class__](x, type, indices, aggregations)
    else:
        raise NotImplementedError(type)


@typecheck(name=str, type=HailType, indices=Indices)
def construct_reference(name, type, indices):
    assert isinstance(type, hl.tstruct)
    x = ir.SelectedTopLevelReference(name, type)
    return construct_expr(x, type, indices)


@typecheck(name=str, type=HailType, indices=Indices, aggregations=LinkedList)
def construct_variable(name, type, indices: Indices = Indices(), aggregations: LinkedList = LinkedList(Aggregation)):
    return construct_expr(ir.Ref(name, type), type, indices, aggregations)