# Copyright 2018 DeepMind Technologies Limited. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """Number-related questions, e.g., "write seventy-two as a number".""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import functools import math import random # Dependency imports from mathematics_dataset import example from mathematics_dataset.sample import number from mathematics_dataset.util import composition from mathematics_dataset.util import display import numpy as np import six from six.moves import range import sympy _ENTROPY_TRAIN = (3, 10) _ENTROPY_INTERPOLATE = (8, 8) _ENTROPY_EXTRAPOLATE = (12, 12) # Number of module compositions appearing in train/test, and extrapolation data. _NUM_MODULES_COMPOSED = [2, 4] def _make_modules(entropy, num_modules_composed): """Returns modules given "difficulty" parameters.""" fns = { 'gcd': gcd, 'lcm': lcm, 'div_remainder': div_remainder, 'is_prime': is_prime, 'is_factor': is_factor, 'round_number': round_number, 'place_value': place_value, 'list_prime_factors': list_prime_factors, } # These modules don't have both pure and composed. modules = { 'base_conversion': functools.partial(base_conversion, *entropy), } sample_args_pure = composition.PreSampleArgs(1, 1, *entropy) sample_args_composed = composition.PreSampleArgs( num_modules_composed[0], num_modules_composed[1], *entropy) for name, module in six.iteritems(fns): modules[name] = functools.partial(module, None, sample_args_pure) modules[name + '_composed'] = functools.partial( module, None, sample_args_composed) return modules def train(entropy_fn): """Returns dict of training modules.""" return _make_modules( entropy=entropy_fn(_ENTROPY_TRAIN), num_modules_composed=_NUM_MODULES_COMPOSED) def test(): """Returns dict of testing modules.""" return _make_modules( entropy=_ENTROPY_INTERPOLATE, num_modules_composed=_NUM_MODULES_COMPOSED) def test_extra(): """Returns dict of extrapolation testing modules.""" sample_args_pure = composition.PreSampleArgs(1, 1, *_ENTROPY_EXTRAPOLATE) return { 'round_number_big': functools.partial( round_number, None, sample_args_pure), 'place_value_big': functools.partial(place_value, None, sample_args_pure), } def place_value(value, sample_args, context=None): """E.g., "Q: What is the tens digit of 31859? A: 5.""" del value # unused for now if context is None: context = composition.Context() entropy, sample_args = sample_args.peel() integer = number.integer(entropy, signed=False, min_abs=1) (entity,) = context.sample(sample_args, [integer]) integer_as_string = str(integer) num_digits = len(integer_as_string) firsts = ['', 'ten ', 'hundred '] seconds = [ 'thousands', 'millions', 'billions', 'trillions', 'quadrillions', 'quintillions', 'sextillions', 'septillions', 'octillions', 'nonillions', 'decillions', ] place_names = ['units', 'tens', 'hundreds'] for second in seconds: for first in firsts: place_names.append(first + second) place = random.randint(1, num_digits) # 1 = units, 2 = tens, etc. place_name = place_names[place - 1] answer = sympy.Integer(integer_as_string[num_digits - place]) return example.Problem( question=example.question( context, 'What is the {place_name} digit of {integer}?', place_name=place_name, integer=entity.expression_else_handle), answer=answer) # TODO(b/124040078): add to composition system? def round_number(value, sample_args, context=None): """Question for rounding integers and decimals.""" del value # unused for now if context is None: context = composition.Context() entropy, sample_args = sample_args.peel() # This is the power of 10 to round to. E.g., power == 0 corresponds to # rounding to the nearest integer; power == -2 corresponds to rounding to two # decimal places, and power == 3 corresponds to rounding to the nearest 1000. power = random.randint(-7, 6) answer_entropy = 1 + random.uniform(0, entropy / 2) entropy = max(1, entropy - answer_entropy) value_integer = number.integer(answer_entropy, signed=True) remainder_divisor = 10 ** int(math.ceil(entropy)) remainder_range_lower = -remainder_divisor / 2 remainder_range_upper = remainder_divisor / 2 if value_integer <= 0: remainder_range_lower += 1 if value_integer >= 0: remainder_range_upper -= 1 remainder = random.randint(remainder_range_lower, remainder_range_upper) input_ = value_integer + sympy.Rational(remainder, remainder_divisor) scale = 10**power if power >= 0 else sympy.Rational(1, 10**(-power)) input_ = input_ * scale value = value_integer * scale if not number.is_integer(input_): input_ = display.Decimal(input_) if not number.is_integer(value): value = display.Decimal(value) (input_,) = context.sample(sample_args, [input_]) if power > 0: # Rounding to a power of ten. round_to = 10**power if random.choice([False, True]): # Write the rounding value as a word instead. round_to = display.StringNumber(round_to, join_number_words_with_hyphens=False) description = 'the nearest {round_to}'.format(round_to=round_to) elif power == 0 and random.choice([False, True]): # Round to nearest integer. description = 'the nearest integer' else: # Round to decimal places. description = random.choice(['{dps} decimal place', '{dps} dp']) if power != -1: # Plural description += 's' dps = -power if random.choice([False, True]): dps = display.StringNumber(dps) description = description.format(dps=dps) template = random.choice([ 'Round {input} to {description}.', 'What is {input} rounded to {description}?', ]) return example.Problem( question=example.question( context, template, input=input_, description=description), answer=value) def _semi_prime(entropy): """Generates a semi-prime with the given entropy.""" # Add on extra entropy to account for the sparsity of the primes; we don't # actually use the integers sampled, but rather a random prime close to them; # thus some entropy is lost, which we must account for entropy += math.log10(max(1, entropy * math.log(10))) # We intentionally uniformy sample the "entropy" (i.e., approx number digits) # of the two factors. entropy_1, entropy_2 = entropy * np.random.dirichlet([1, 1]) # Need >= 2 for randprime to always work (Betrand's postulate). approx_1 = number.integer(entropy_1, signed=False, min_abs=2) approx_2 = number.integer(entropy_2, signed=False, min_abs=2) factor_1 = sympy.ntheory.generate.randprime(approx_1 / 2, approx_1 * 2) factor_2 = sympy.ntheory.generate.randprime(approx_2 / 2, approx_2 * 2) return factor_1 * factor_2 def is_prime(value, sample_args, context=None): """Questions asking about primality.""" del value # unused for now if context is None: context = composition.Context() entropy, sample_args = sample_args.peel() composite = _semi_prime(entropy) if random.choice([False, True]): # Use the composite integer = composite is_prime_ = False else: # Take the next prime after the composite, to ensure the same distribution # as composites. Do "composite - 4" so we occasionally see "2" as a prime. integer = sympy.ntheory.generate.nextprime(composite - 4) is_prime_ = True (integer_entity,) = context.sample(sample_args, [integer]) if random.choice([False, True]) and integer != 1: answer = not is_prime_ attribute_name = random.choice(['composite', 'a composite number']) else: answer = is_prime_ attribute_name = random.choice(['prime', 'a prime number']) return example.Problem( question=example.question( context, 'Is {integer} {attribute}?', integer=integer_entity.expression_else_handle, attribute=attribute_name), answer=answer) def is_factor(value, sample_args, context=None): """E.g., "Is 5 a factor of 48?".""" del value # unused if context is None: context = composition.Context() entropy, sample_args = sample_args.peel() entropy_factor = 1 + random.uniform(0, entropy/3) entropy = max(0, entropy - entropy_factor) maybe_factor = number.integer(entropy_factor, False, min_abs=2) integer = maybe_factor * number.integer(entropy, False, min_abs=1) # Produce balanced classes. if random.choice([False, True]): # The following makes it not a factor. integer += random.randint(1, maybe_factor - 1) (entity,) = context.sample(sample_args, [integer]) templates = [ 'Is {maybe_factor} a factor of {value}?', 'Is {value} a multiple of {maybe_factor}?', 'Does {maybe_factor} divide {value}?', ] if maybe_factor == 2: templates += [ 'Is {value} even?', ] template = random.choice(templates) answer = integer % maybe_factor == 0 return example.Problem( question=example.question( context, template, maybe_factor=maybe_factor, value=entity.expression_else_handle), answer=answer) def list_prime_factors(value, sample_args, context=None): """E.g., "What are the prime factors of 36?".""" del value # unused for now if context is None: context = composition.Context() entropy, sample_args = sample_args.peel() entropy = max(1, entropy) integer = number.integer(entropy, signed=False, min_abs=2) (entity,) = context.sample(sample_args, [integer]) prime_factors = sorted(sympy.factorint(integer).keys()) template = random.choice([ 'What are the prime factors of {integer}?', 'List the prime factors of {integer}.', ]) return example.Problem( question=example.question( context, template, integer=entity.expression_else_handle), answer=display.NumberList(prime_factors)) def _pair_with_large_hidden_factor(entropy): """Returns pair of numbers with possibly large common factor hidden.""" entropy_p, entropy_q, _ = entropy * np.random.dirichlet([1, 1, 1]) # Min entropy on p and q to minimize trivial solutions. entropy_p = max(1, entropy_p) entropy_q = max(1, entropy_q) entropy_mult = max(0, entropy - entropy_p - entropy_q) p = number.integer(entropy_p, False, min_abs=1) q = number.integer(entropy_q, False, min_abs=1) mult = number.integer(entropy_mult, False, min_abs=1) p *= mult q *= mult return p, q def lcm(value, sample_args, context=None): """Question for least common multiple of p and q.""" del value # unused if context is None: context = composition.Context() entropy, sample_args = sample_args.peel() p, q = _pair_with_large_hidden_factor(entropy) answer = sympy.lcm(p, q) if random.choice([False, True]): p, q = context.sample(sample_args, [p, q]) # Ask the question directly. adjective = random.choice(['least', 'lowest', 'smallest']) template = random.choice([ 'Calculate the {adjective} common multiple of {p} and {q}.', 'What is the {adjective} common multiple of {p} and {q}?', ]) return example.Problem( question=example.question( context, template, adjective=adjective, p=p.expression_else_handle, q=q.expression_else_handle), answer=answer) else: # Phrase the question as finding the common denominator of two fractions. p = number.integer(2, signed=True, coprime_to=p) / p q = number.integer(2, signed=True, coprime_to=q) / q p, q = context.sample(sample_args, [p, q]) template = random.choice([ 'What is the common denominator of {p} and {q}?', 'Find the common denominator of {p} and {q}.', 'Calculate the common denominator of {p} and {q}.', ]) return example.Problem( question=example.question( context, template, p=p.expression_else_handle, q=q.expression_else_handle), answer=answer) def _random_coprime_pair(entropy): """Returns a pair of random coprime integers.""" coprime_product = number.integer(entropy, False, min_abs=1) factors = sympy.factorint(coprime_product) def take(): prime = random.choice(list(factors.keys())) power = factors[prime] del factors[prime] return prime ** power if random.random() < 0.8 and len(factors) >= 2: # Disallow trivial factoring where possible. count_left = random.randint(1, len(factors) - 1) count_right = len(factors) - count_left else: count_left = random.randint(0, len(factors)) count_right = len(factors) - count_left left = sympy.prod([take() for _ in range(count_left)]) right = sympy.prod([take() for _ in range(count_right)]) assert left * right == coprime_product return left, right # @composition.module(number.is_positive_integer) def gcd(value, sample_args, context=None): """Question for greatest common divisor of p and q.""" is_question = context is None if context is None: context = composition.Context() entropy, sample_args = sample_args.peel() if value is None: value_entropy = 1 + random.uniform(0, entropy/3) entropy = max(1, entropy - value_entropy) value = number.integer(value_entropy, False, min_abs=1) p_mult, q_mult = _random_coprime_pair(entropy) p = value * p_mult q = value * q_mult assert sympy.gcd(p, q) == value p, q = context.sample(sample_args, [p, q]) adjective = (random.choice(['greatest', 'highest']) + ' common ' + random.choice(['divisor', 'factor'])) if is_question: template = random.choice([ 'Calculate the {adjective} of {p} and {q}.', 'What is the {adjective} of {p} and {q}?', ]) return example.Problem( question=example.question( context, template, adjective=adjective, p=p, q=q), answer=value) else: return composition.Entity( context=context, value=value, description='Let {self} be the {adjective} of {p} and {q}.', adjective=adjective, p=p, q=q) # @composition.module(number.is_positive_integer) def div_remainder(value, sample_args, context=None): """E.g., "What is the remainder when 27 is divided by 5?".""" is_question = context is None if context is None: context = composition.Context() entropy, sample_args = sample_args.peel() if value is None: entropy_value = 1 + random.uniform(0, entropy/3) entropy = max(0, entropy - entropy_value) value = number.integer(entropy_value, signed=False) entropy_a, entropy_q = entropy * np.random.dirichlet([1, 1]) a = number.integer(entropy_a, signed=False, min_abs=1) q = value + number.integer(entropy_q, signed=False, min_abs=1) p = a * q + value assert p % q == value p, q = context.sample(sample_args, [p, q]) if is_question: template = random.choice([ 'Calculate the remainder when {p} is divided by {q}.', 'What is the remainder when {p} is divided by {q}?', ]) return example.Problem( question=example.question( context, template, p=p.expression_else_handle, q=q.expression_else_handle), answer=value) else: return composition.Entity( context=context, value=value, description='Let {self} be the remainder when {p} is divided by {q}.', p=p, q=q) def base_conversion(min_entropy, max_entropy): """E.g., "What is 17 base 8 in base 10?".""" context = composition.Context() from_base = random.randint(2, 16) while True: to_base = random.randint(2, 16) if to_base != from_base: break # Entropy used up in selecting bases. entropy_used = math.log10(16 * 15) entropy = random.uniform( min_entropy - entropy_used, max_entropy - entropy_used) value = number.integer(entropy, signed=True) template = random.choice([ '{from_str} (base {from_base}) to base {to_base}', 'Convert {from_str} (base {from_base}) to base {to_base}.', 'What is {from_str} (base {from_base}) in base {to_base}?', ]) return example.Problem( question=example.question( context, template, from_str=display.NumberInBase(value, from_base), from_base=from_base, to_base=to_base), answer=display.NumberInBase(value, to_base))