Python google.cloud.bigquery.SchemaField() Examples

The following are 30 code examples of google.cloud.bigquery.SchemaField(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module google.cloud.bigquery , or try the search function .
Example #1
Source File: system.py    From python-bigquery with Apache License 2.0 6 votes vote down vote up
def test_update_table_schema(self):
        dataset = self.temp_dataset(_make_dataset_id("update_table"))

        TABLE_NAME = "test_table"
        table_arg = Table(dataset.table(TABLE_NAME), schema=SCHEMA)
        self.assertFalse(_table_exists(table_arg))
        table = retry_403(Config.CLIENT.create_table)(table_arg)
        self.to_delete.insert(0, table)
        self.assertTrue(_table_exists(table))
        voter = bigquery.SchemaField("voter", "BOOLEAN", mode="NULLABLE")
        schema = table.schema
        schema.append(voter)
        table.schema = schema

        updated_table = Config.CLIENT.update_table(table, ["schema"])

        self.assertEqual(len(updated_table.schema), len(schema))
        for found, expected in zip(updated_table.schema, schema):
            self.assertEqual(found.name, expected.name)
            self.assertEqual(found.field_type, expected.field_type)
            self.assertEqual(found.mode, expected.mode) 
Example #2
Source File: create_table.py    From python-bigquery with Apache License 2.0 6 votes vote down vote up
def create_table(table_id):

    # [START bigquery_create_table]
    from google.cloud import bigquery

    # Construct a BigQuery client object.
    client = bigquery.Client()

    # TODO(developer): Set table_id to the ID of the table to create.
    # table_id = "your-project.your_dataset.your_table_name"

    schema = [
        bigquery.SchemaField("full_name", "STRING", mode="REQUIRED"),
        bigquery.SchemaField("age", "INTEGER", mode="REQUIRED"),
    ]

    table = bigquery.Table(table_id, schema=schema)
    table = client.create_table(table)  # Make an API request.
    print(
        "Created table {}.{}.{}".format(table.project, table.dataset_id, table.table_id)
    )
    # [END bigquery_create_table] 
Example #3
Source File: test_update_table_require_partition_filter.py    From python-bigquery with Apache License 2.0 6 votes vote down vote up
def test_update_table_require_partition_filter(capsys, random_table_id, client):

    # Make a partitioned table.
    schema = [bigquery.SchemaField("transaction_timestamp", "TIMESTAMP")]
    table = bigquery.Table(random_table_id, schema=schema)
    table.time_partitioning = bigquery.TimePartitioning(field="transaction_timestamp")
    table = client.create_table(table)

    update_table_require_partition_filter.update_table_require_partition_filter(
        random_table_id
    )
    out, _ = capsys.readouterr()
    assert (
        "Updated table '{}' with require_partition_filter=True".format(random_table_id)
        in out
    ) 
Example #4
Source File: create_table.py    From python-bigquery with Apache License 2.0 6 votes vote down vote up
def create_table(table_id):

    # [START bigquery_create_table]
    from google.cloud import bigquery

    # Construct a BigQuery client object.
    client = bigquery.Client()

    # TODO(developer): Set table_id to the ID of the table to create.
    # table_id = "your-project.your_dataset.your_table_name"

    schema = [
        bigquery.SchemaField("full_name", "STRING", mode="REQUIRED"),
        bigquery.SchemaField("age", "INTEGER", mode="REQUIRED"),
    ]

    table = bigquery.Table(table_id, schema=schema)
    table = client.create_table(table)  # Make an API request.
    print(
        "Created table {}.{}.{}".format(table.project, table.dataset_id, table.table_id)
    )
    # [END bigquery_create_table] 
Example #5
Source File: test_update_table_require_partition_filter.py    From python-bigquery with Apache License 2.0 6 votes vote down vote up
def test_update_table_require_partition_filter(capsys, random_table_id, client):

    # Make a partitioned table.
    schema = [bigquery.SchemaField("transaction_timestamp", "TIMESTAMP")]
    table = bigquery.Table(random_table_id, schema=schema)
    table.time_partitioning = bigquery.TimePartitioning(field="transaction_timestamp")
    table = client.create_table(table)

    update_table_require_partition_filter.update_table_require_partition_filter(
        random_table_id
    )
    out, _ = capsys.readouterr()
    assert (
        "Updated table '{}' with require_partition_filter=True".format(random_table_id)
        in out
    ) 
Example #6
Source File: server.py    From healthcare-deid with Apache License 2.0 6 votes vote down vote up
def manage_tables(dataset_id, table_id):
  """Create tables in datasets in BigQuery."""
  try:
    get_bq_dataset(dataset_id)
  except exceptions.NotFound as e:
    return flask.jsonify(error=e.code, text=e.message), e.code

  table_ref = bq_client.dataset(dataset_id).table(table_id)
  try:
    jsonschema.validate(flask.request.json, bq_table_schema)
  except jsonschema.ValidationError:
    error_msg = 'unable to validate provided payload.'
    return flask.jsonify(error=400, text=error_msg), 400

  schema = [bigquery.SchemaField(field['name'], field['type'],
                                 field.get('mode') or 'NULLABLE')
            for field in flask.request.json]
  table = bigquery.Table(table_ref, schema=schema)
  try:
    table = bq_client.create_table(table)
  except exceptions.GoogleAPIError as e:
    return flask.jsonify(error=e.message), 400

  return flask.jsonify(result='success'), 200 
Example #7
Source File: system.py    From python-bigquery with Apache License 2.0 6 votes vote down vote up
def test_list_rows_max_results_w_bqstorage(self):
        table_ref = DatasetReference("bigquery-public-data", "utility_us").table(
            "country_code_iso"
        )
        bqstorage_client = bigquery_storage_v1.BigQueryReadClient(
            credentials=Config.CLIENT._credentials
        )

        row_iterator = Config.CLIENT.list_rows(
            table_ref,
            selected_fields=[bigquery.SchemaField("country_name", "STRING")],
            max_results=100,
        )
        dataframe = row_iterator.to_dataframe(bqstorage_client=bqstorage_client)

        self.assertEqual(len(dataframe.index), 100) 
Example #8
Source File: main_test.py    From python-docs-samples with Apache License 2.0 6 votes vote down vote up
def test_table_to_dataframe(capsys, clients):
    from google.cloud import bigquery

    bqclient, bqstorageclient = clients

    # [START bigquerystorage_pandas_tutorial_all]
    # [START bigquerystorage_pandas_tutorial_read_table]
    # Download a table.
    table = bigquery.TableReference.from_string(
        "bigquery-public-data.utility_us.country_code_iso"
    )
    rows = bqclient.list_rows(
        table,
        selected_fields=[
            bigquery.SchemaField("country_name", "STRING"),
            bigquery.SchemaField("fips_code", "STRING"),
        ],
    )
    dataframe = rows.to_dataframe(bqstorage_client=bqstorageclient)
    print(dataframe.head())
    # [END bigquerystorage_pandas_tutorial_read_table]
    # [END bigquerystorage_pandas_tutorial_all]

    out, _ = capsys.readouterr()
    assert "country_name" in out 
Example #9
Source File: bigquery_utils.py    From ethereum-etl-airflow with MIT License 6 votes vote down vote up
def read_bigquery_schema_from_json_recursive(json_schema):
    """
    CAUTION: Recursive function
    This method can generate BQ schemas for nested records
    """
    result = []
    for field in json_schema:
        if field.get('type').lower() == 'record' and field.get('fields'):
            schema = bigquery.SchemaField(
                name=field.get('name'),
                field_type=field.get('type', 'STRING'),
                mode=field.get('mode', 'NULLABLE'),
                description=field.get('description'),
                fields=read_bigquery_schema_from_json_recursive(field.get('fields'))
            )
        else:
            schema = bigquery.SchemaField(
                name=field.get('name'),
                field_type=field.get('type', 'STRING'),
                mode=field.get('mode', 'NULLABLE'),
                description=field.get('description')
            )
        result.append(schema)
    return result 
Example #10
Source File: test_table_util.py    From professional-services with Apache License 2.0 6 votes vote down vote up
def test_get_bq_translated_schema(self):
        """Tests TableUtil.get_bq_translated_schema().

        Tests TableUtil's ability to translate a json schema to a BigQuery
        schema in List[google.cloud.bigquery.schema.SchemaField] format.

        Returns:
            True if test passes, else False.
        """
        expected_bq_schema = [
            bigquery.SchemaField('string1', 'STRING', 'REQUIRED',
                                 'description1'),
            bigquery.SchemaField('numeric1', 'NUMERIC', 'REQUIRED',
                                 'description2')
        ]
        bq_schema = self.table_util.get_bq_translated_schema()

        assert expected_bq_schema == bq_schema 
Example #11
Source File: helper_function.py    From professional-services with Apache License 2.0 6 votes vote down vote up
def csv_in_gcs_to_table(bucket_name: str, object_name: str, dataset_id: str,
                        table_id: str,
                        schema: List[bigquery.SchemaField]) -> None:
    """Upload CSV to BigQuery table.
        If the table already exists, it overwrites the table data.

    Args:
        bucket_name: Bucket name for holding the object
        object_name: Name of object to be uploaded
        dataset_id: Dataset id where the table is located.
        table_id: String holding id of hte table.
        schema: Schema of the table_id
    """
    client = bigquery.Client()
    dataset_ref = client.dataset(dataset_id)
    job_config = bigquery.LoadJobConfig()
    job_config.schema = schema
    job_config.source_format = bigquery.SourceFormat.CSV
    job_config.write_disposition = bigquery.WriteDisposition().WRITE_TRUNCATE
    uri = "gs://{}/{}".format(bucket_name, object_name)
    load_job = client.load_table_from_uri(uri,
                                          dataset_ref.table(table_id),
                                          job_config=job_config)
    load_job.result() 
Example #12
Source File: system.py    From python-bigquery with Apache License 2.0 6 votes vote down vote up
def test_load_table_from_dataframe_w_nullable_int64_datatype_automatic_schema(self):
        """Test that a DataFrame containing column with None-type values and int64 datatype
        can be uploaded without specifying a schema.

        https://github.com/googleapis/python-bigquery/issues/22
        """

        dataset_id = _make_dataset_id("bq_load_test")
        self.temp_dataset(dataset_id)
        table_id = "{}.{}.load_table_from_dataframe_w_nullable_int64_datatype".format(
            Config.CLIENT.project, dataset_id
        )
        df_data = collections.OrderedDict(
            [("x", pandas.Series([1, 2, None, 4], dtype="Int64"))]
        )
        dataframe = pandas.DataFrame(df_data, columns=df_data.keys())
        load_job = Config.CLIENT.load_table_from_dataframe(dataframe, table_id)
        load_job.result()
        table = Config.CLIENT.get_table(table_id)
        self.assertEqual(tuple(table.schema), (bigquery.SchemaField("x", "INTEGER"),))
        self.assertEqual(table.num_rows, 4) 
Example #13
Source File: import_pipeline.py    From professional-services with Apache License 2.0 5 votes vote down vote up
def to_bigquery_schema(self, fields):
        """Convert list of dicts into `bigquery.SchemaFields`."""
        for field in fields:
            if 'fields' in field:
                field['fields'] = self.to_bigquery_schema(field['fields'])
        return [bigquery.SchemaField(**field) for field in fields] 
Example #14
Source File: helper_function.py    From professional-services with Apache License 2.0 5 votes vote down vote up
def convert_to_schema(schema: List[Dict[str, str]) -> List[bigquery.SchemaField]:
    """Read the schema as a JSON and reformats as an array.

    Args:
        schema: list of dicts to convert to list of SchemaField

    Returns:
        List of bigquery.SchemaField objects holding the schema.
    """ 
Example #15
Source File: test_parquet_util.py    From professional-services with Apache License 2.0 5 votes vote down vote up
def setUp(self):
        """Sets up resources for tests.
        """
        bq_schema = [
            bigquery.SchemaField('string1', 'STRING', 'REQUIRED'),
            bigquery.SchemaField('numeric1', 'NUMERIC', 'REQUIRED')
        ]
        self.parquet_util = parquet_util.ParquetUtil(bq_schema=bq_schema) 
Example #16
Source File: executor_test.py    From tfx with Apache License 2.0 5 votes vote down vote up
def setUp(self):
    # Mock BigQuery result schema.
    self._schema = [
        bigquery.SchemaField('i', 'INTEGER', mode='REQUIRED'),
        bigquery.SchemaField('i2', 'INTEGER', mode='REPEATED'),
        bigquery.SchemaField('b', 'BOOLEAN', mode='REQUIRED'),
        bigquery.SchemaField('f', 'FLOAT', mode='REQUIRED'),
        bigquery.SchemaField('f2', 'FLOAT', mode='REPEATED'),
        bigquery.SchemaField('s', 'STRING', mode='REQUIRED'),
        bigquery.SchemaField('s2', 'STRING', mode='REPEATED'),
    ]
    super(ExecutorTest, self).setUp() 
Example #17
Source File: load_table_uri_truncate_csv.py    From python-bigquery with Apache License 2.0 5 votes vote down vote up
def load_table_uri_truncate_csv(table_id):

    # [START bigquery_load_table_gcs_csv_truncate]
    import six

    from google.cloud import bigquery

    # Construct a BigQuery client object.
    client = bigquery.Client()

    # TODO(developer): Set table_id to the ID of the table to create.
    # table_id = "your-project.your_dataset.your_table_name

    job_config = bigquery.LoadJobConfig(
        schema=[
            bigquery.SchemaField("name", "STRING"),
            bigquery.SchemaField("post_abbr", "STRING"),
        ],
    )

    body = six.BytesIO(b"Washington,WA")
    client.load_table_from_file(body, table_id, job_config=job_config).result()
    previous_rows = client.get_table(table_id).num_rows
    assert previous_rows > 0

    job_config = bigquery.LoadJobConfig(
        write_disposition=bigquery.WriteDisposition.WRITE_TRUNCATE,
        source_format=bigquery.SourceFormat.CSV,
        skip_leading_rows=1,
    )

    uri = "gs://cloud-samples-data/bigquery/us-states/us-states.csv"
    load_job = client.load_table_from_uri(
        uri, table_id, job_config=job_config
    )  # Make an API request.

    load_job.result()  # Waits for the job to complete.

    destination_table = client.get_table(table_id)
    print("Loaded {} rows.".format(destination_table.num_rows))
    # [END bigquery_load_table_gcs_csv_truncate] 
Example #18
Source File: test_avro_util.py    From professional-services with Apache License 2.0 5 votes vote down vote up
def setUp(self):
        """Sets up resources for tests.
        """
        bq_schema = [
            bigquery.SchemaField('string1', 'STRING', 'REQUIRED'),
            bigquery.SchemaField('numeric1', 'NUMERIC', 'REQUIRED')
        ]
        self.schema_name = 'test_schema'
        self.avro_util = avro_util.AvroUtil(bq_schema=bq_schema,
                                            schema_name=self.schema_name) 
Example #19
Source File: load_table_uri_truncate_parquet.py    From python-bigquery with Apache License 2.0 5 votes vote down vote up
def load_table_uri_truncate_parquet(table_id):

    # [START bigquery_load_table_gcs_parquet_truncate]
    import six

    from google.cloud import bigquery

    # Construct a BigQuery client object.
    client = bigquery.Client()

    # TODO(developer): Set table_id to the ID of the table to create.
    # table_id = "your-project.your_dataset.your_table_name

    job_config = bigquery.LoadJobConfig(
        schema=[
            bigquery.SchemaField("name", "STRING"),
            bigquery.SchemaField("post_abbr", "STRING"),
        ],
    )

    body = six.BytesIO(b"Washington,WA")
    client.load_table_from_file(body, table_id, job_config=job_config).result()
    previous_rows = client.get_table(table_id).num_rows
    assert previous_rows > 0

    job_config = bigquery.LoadJobConfig(
        write_disposition=bigquery.WriteDisposition.WRITE_TRUNCATE,
        source_format=bigquery.SourceFormat.PARQUET,
    )

    uri = "gs://cloud-samples-data/bigquery/us-states/us-states.parquet"
    load_job = client.load_table_from_uri(
        uri, table_id, job_config=job_config
    )  # Make an API request.

    load_job.result()  # Waits for the job to complete.

    destination_table = client.get_table(table_id)
    print("Loaded {} rows.".format(destination_table.num_rows))
    # [END bigquery_load_table_gcs_parquet_truncate] 
Example #20
Source File: conftest.py    From python-bigquery with Apache License 2.0 5 votes vote down vote up
def table_with_schema_id(client, dataset_id):
    now = datetime.datetime.now()
    table_id = "python_table_with_schema_{}_{}".format(
        now.strftime("%Y%m%d%H%M%S"), uuid.uuid4().hex[:8]
    )
    schema = [
        bigquery.SchemaField("full_name", "STRING"),
        bigquery.SchemaField("age", "INTEGER"),
    ]
    table = bigquery.Table("{}.{}".format(dataset_id, table_id), schema=schema)
    table = client.create_table(table)
    yield "{}.{}.{}".format(table.project, table.dataset_id, table.table_id)
    client.delete_table(table, not_found_ok=True) 
Example #21
Source File: load_table_uri_truncate_orc.py    From python-bigquery with Apache License 2.0 5 votes vote down vote up
def load_table_uri_truncate_orc(table_id):

    # [START bigquery_load_table_gcs_orc_truncate]
    import six

    from google.cloud import bigquery

    # Construct a BigQuery client object.
    client = bigquery.Client()

    # TODO(developer): Set table_id to the ID of the table to create.
    # table_id = "your-project.your_dataset.your_table_name

    job_config = bigquery.LoadJobConfig(
        schema=[
            bigquery.SchemaField("name", "STRING"),
            bigquery.SchemaField("post_abbr", "STRING"),
        ],
    )

    body = six.BytesIO(b"Washington,WA")
    client.load_table_from_file(body, table_id, job_config=job_config).result()
    previous_rows = client.get_table(table_id).num_rows
    assert previous_rows > 0

    job_config = bigquery.LoadJobConfig(
        write_disposition=bigquery.WriteDisposition.WRITE_TRUNCATE,
        source_format=bigquery.SourceFormat.ORC,
    )

    uri = "gs://cloud-samples-data/bigquery/us-states/us-states.orc"
    load_job = client.load_table_from_uri(
        uri, table_id, job_config=job_config
    )  # Make an API request.

    load_job.result()  # Waits for the job to complete.

    destination_table = client.get_table(table_id)
    print("Loaded {} rows.".format(destination_table.num_rows))
    # [END bigquery_load_table_gcs_orc_truncate] 
Example #22
Source File: load_table_uri_csv.py    From python-bigquery with Apache License 2.0 5 votes vote down vote up
def load_table_uri_csv(table_id):

    # [START bigquery_load_table_gcs_csv]
    from google.cloud import bigquery

    # Construct a BigQuery client object.
    client = bigquery.Client()

    # TODO(developer): Set table_id to the ID of the table to create.
    # table_id = "your-project.your_dataset.your_table_name"

    job_config = bigquery.LoadJobConfig(
        schema=[
            bigquery.SchemaField("name", "STRING"),
            bigquery.SchemaField("post_abbr", "STRING"),
        ],
        skip_leading_rows=1,
        # The source format defaults to CSV, so the line below is optional.
        source_format=bigquery.SourceFormat.CSV,
    )
    uri = "gs://cloud-samples-data/bigquery/us-states/us-states.csv"

    load_job = client.load_table_from_uri(
        uri, table_id, job_config=job_config
    )  # Make an API request.

    load_job.result()  # Waits for the job to complete.

    destination_table = client.get_table(table_id)  # Make an API request.
    print("Loaded {} rows.".format(destination_table.num_rows))
    # [END bigquery_load_table_gcs_csv] 
Example #23
Source File: create_table_range_partitioned.py    From python-bigquery with Apache License 2.0 5 votes vote down vote up
def create_table_range_partitioned(table_id):

    # [START bigquery_create_table_range_partitioned]
    from google.cloud import bigquery

    # Construct a BigQuery client object.
    client = bigquery.Client()

    # TODO(developer): Set table_id to the ID of the table to create.
    # table_id = "your-project.your_dataset.your_table_name"

    schema = [
        bigquery.SchemaField("full_name", "STRING"),
        bigquery.SchemaField("city", "STRING"),
        bigquery.SchemaField("zipcode", "INTEGER"),
    ]

    table = bigquery.Table(table_id, schema=schema)
    table.range_partitioning = bigquery.RangePartitioning(
        # To use integer range partitioning, select a top-level REQUIRED /
        # NULLABLE column with INTEGER / INT64 data type.
        field="zipcode",
        range_=bigquery.PartitionRange(start=0, end=100000, interval=10),
    )
    table = client.create_table(table)  # Make an API request.
    print(
        "Created table {}.{}.{}".format(table.project, table.dataset_id, table.table_id)
    )
    # [END bigquery_create_table_range_partitioned]
    return table 
Example #24
Source File: add_empty_column.py    From python-bigquery with Apache License 2.0 5 votes vote down vote up
def add_empty_column(table_id):

    # [START bigquery_add_empty_column]
    from google.cloud import bigquery

    # Construct a BigQuery client object.
    client = bigquery.Client()

    # TODO(developer): Set table_id to the ID of the table
    #                  to add an empty column.
    # table_id = "your-project.your_dataset.your_table_name"

    table = client.get_table(table_id)  # Make an API request.

    original_schema = table.schema
    new_schema = original_schema[:]  # Creates a copy of the schema.
    new_schema.append(bigquery.SchemaField("phone", "STRING"))

    table.schema = new_schema
    table = client.update_table(table, ["schema"])  # Make an API request.

    if len(table.schema) == len(original_schema) + 1 == len(new_schema):
        print("A new column has been added.")
    else:
        print("The column has not been added.")
    # [END bigquery_add_empty_column] 
Example #25
Source File: system.py    From python-bigquery with Apache License 2.0 5 votes vote down vote up
def test_load_table_from_dataframe_w_required(self):
        """Test that a DataFrame with required columns can be uploaded if a
        BigQuery schema is specified.

        See: https://github.com/googleapis/google-cloud-python/issues/8093
        """
        table_schema = (
            bigquery.SchemaField("name", "STRING", mode="REQUIRED"),
            bigquery.SchemaField("age", "INTEGER", mode="REQUIRED"),
        )

        records = [{"name": "Chip", "age": 2}, {"name": "Dale", "age": 3}]
        dataframe = pandas.DataFrame(records, columns=["name", "age"])
        job_config = bigquery.LoadJobConfig(schema=table_schema)
        dataset_id = _make_dataset_id("bq_load_test")
        self.temp_dataset(dataset_id)
        table_id = "{}.{}.load_table_from_dataframe_w_required".format(
            Config.CLIENT.project, dataset_id
        )

        # Create the table before loading so that schema mismatch errors are
        # identified.
        table = retry_403(Config.CLIENT.create_table)(
            Table(table_id, schema=table_schema)
        )
        self.to_delete.insert(0, table)

        job_config = bigquery.LoadJobConfig(schema=table_schema)
        load_job = Config.CLIENT.load_table_from_dataframe(
            dataframe, table_id, job_config=job_config
        )
        load_job.result()

        table = Config.CLIENT.get_table(table)
        self.assertEqual(tuple(table.schema), table_schema)
        self.assertEqual(table.num_rows, 2) 
Example #26
Source File: system.py    From python-bigquery with Apache License 2.0 5 votes vote down vote up
def test_load_table_from_json_basic_use(self):
        table_schema = (
            bigquery.SchemaField("name", "STRING", mode="REQUIRED"),
            bigquery.SchemaField("age", "INTEGER", mode="REQUIRED"),
            bigquery.SchemaField("birthday", "DATE", mode="REQUIRED"),
            bigquery.SchemaField("is_awesome", "BOOLEAN", mode="REQUIRED"),
        )

        json_rows = [
            {"name": "John", "age": 18, "birthday": "2001-10-15", "is_awesome": False},
            {"name": "Chuck", "age": 79, "birthday": "1940-03-10", "is_awesome": True},
        ]

        dataset_id = _make_dataset_id("bq_system_test")
        self.temp_dataset(dataset_id)
        table_id = "{}.{}.load_table_from_json_basic_use".format(
            Config.CLIENT.project, dataset_id
        )

        # Create the table before loading so that schema mismatch errors are
        # identified.
        table = retry_403(Config.CLIENT.create_table)(
            Table(table_id, schema=table_schema)
        )
        self.to_delete.insert(0, table)

        job_config = bigquery.LoadJobConfig(schema=table_schema)
        load_job = Config.CLIENT.load_table_from_json(
            json_rows, table_id, job_config=job_config
        )
        load_job.result()

        table = Config.CLIENT.get_table(table)
        self.assertEqual(tuple(table.schema), table_schema)
        self.assertEqual(table.num_rows, 2) 
Example #27
Source File: system.py    From python-bigquery with Apache License 2.0 5 votes vote down vote up
def test_load_table_from_json_schema_autodetect(self):
        json_rows = [
            {"name": "John", "age": 18, "birthday": "2001-10-15", "is_awesome": False},
            {"name": "Chuck", "age": 79, "birthday": "1940-03-10", "is_awesome": True},
        ]

        dataset_id = _make_dataset_id("bq_system_test")
        self.temp_dataset(dataset_id)
        table_id = "{}.{}.load_table_from_json_basic_use".format(
            Config.CLIENT.project, dataset_id
        )

        # Use schema with NULLABLE fields, because schema autodetection
        # defaults to field mode NULLABLE.
        table_schema = (
            bigquery.SchemaField("name", "STRING", mode="NULLABLE"),
            bigquery.SchemaField("age", "INTEGER", mode="NULLABLE"),
            bigquery.SchemaField("birthday", "DATE", mode="NULLABLE"),
            bigquery.SchemaField("is_awesome", "BOOLEAN", mode="NULLABLE"),
        )
        # create the table before loading so that the column order is predictable
        table = retry_403(Config.CLIENT.create_table)(
            Table(table_id, schema=table_schema)
        )
        self.to_delete.insert(0, table)

        # do not pass an explicit job config to trigger automatic schema detection
        load_job = Config.CLIENT.load_table_from_json(json_rows, table_id)
        load_job.result()

        table = Config.CLIENT.get_table(table)
        self.assertEqual(tuple(table.schema), table_schema)
        self.assertEqual(table.num_rows, 2) 
Example #28
Source File: system.py    From python-bigquery with Apache License 2.0 5 votes vote down vote up
def test_list_rows_page_size(self):
        from google.cloud.bigquery.job import SourceFormat
        from google.cloud.bigquery.job import WriteDisposition

        num_items = 7
        page_size = 3
        num_pages, num_last_page = divmod(num_items, page_size)

        SF = bigquery.SchemaField
        schema = [SF("string_col", "STRING", mode="NULLABLE")]
        to_insert = [{"string_col": "item%d" % i} for i in range(num_items)]
        rows = [json.dumps(row) for row in to_insert]
        body = six.BytesIO("{}\n".format("\n".join(rows)).encode("ascii"))

        table_id = "test_table"
        dataset = self.temp_dataset(_make_dataset_id("nested_df"))
        table = dataset.table(table_id)
        self.to_delete.insert(0, table)
        job_config = bigquery.LoadJobConfig()
        job_config.write_disposition = WriteDisposition.WRITE_TRUNCATE
        job_config.source_format = SourceFormat.NEWLINE_DELIMITED_JSON
        job_config.schema = schema
        # Load a table using a local JSON file from memory.
        Config.CLIENT.load_table_from_file(body, table, job_config=job_config).result()

        df = Config.CLIENT.list_rows(table, selected_fields=schema, page_size=page_size)
        pages = df.pages

        for i in range(num_pages):
            page = next(pages)
            self.assertEqual(page.num_items, page_size)
        page = next(pages)
        self.assertEqual(page.num_items, num_last_page) 
Example #29
Source File: system.py    From python-bigquery with Apache License 2.0 5 votes vote down vote up
def test_insert_rows_nested_nested_dictionary(self):
        # See #2951
        SF = bigquery.SchemaField
        schema = [
            SF("string_col", "STRING", mode="NULLABLE"),
            SF(
                "record_col",
                "RECORD",
                mode="NULLABLE",
                fields=[
                    SF("nested_string", "STRING", mode="NULLABLE"),
                    SF("nested_repeated", "INTEGER", mode="REPEATED"),
                    SF(
                        "nested_record",
                        "RECORD",
                        mode="NULLABLE",
                        fields=[SF("nested_nested_string", "STRING", mode="NULLABLE")],
                    ),
                ],
            ),
        ]
        record = {
            "nested_string": "another string value",
            "nested_repeated": [0, 1, 2],
            "nested_record": {"nested_nested_string": "some deep insight"},
        }
        to_insert = [{"string_col": "Some value", "record_col": record}]
        table_id = "test_table"
        dataset = self.temp_dataset(_make_dataset_id("issue_2951"))
        table_arg = Table(dataset.table(table_id), schema=schema)
        table = retry_403(Config.CLIENT.create_table)(table_arg)
        self.to_delete.insert(0, table)

        Config.CLIENT.insert_rows(table, to_insert)

        retry = RetryResult(_has_rows, max_tries=8)
        rows = retry(self._fetch_single_page)(table)
        row_tuples = [r.values() for r in rows]
        expected_rows = [("Some value", record)]
        self.assertEqual(row_tuples, expected_rows) 
Example #30
Source File: system.py    From python-bigquery with Apache License 2.0 5 votes vote down vote up
def _load_table_for_dml(self, rows, dataset_id, table_id):
        from google.cloud._testing import _NamedTemporaryFile
        from google.cloud.bigquery.job import CreateDisposition
        from google.cloud.bigquery.job import SourceFormat
        from google.cloud.bigquery.job import WriteDisposition

        dataset = self.temp_dataset(dataset_id)
        greeting = bigquery.SchemaField("greeting", "STRING", mode="NULLABLE")
        table_ref = dataset.table(table_id)
        table_arg = Table(table_ref, schema=[greeting])
        table = retry_403(Config.CLIENT.create_table)(table_arg)
        self.to_delete.insert(0, table)

        with _NamedTemporaryFile() as temp:
            with open(temp.name, "w") as csv_write:
                writer = csv.writer(csv_write)
                writer.writerow(("Greeting",))
                writer.writerows(rows)

            with open(temp.name, "rb") as csv_read:
                config = bigquery.LoadJobConfig()
                config.source_format = SourceFormat.CSV
                config.skip_leading_rows = 1
                config.create_disposition = CreateDisposition.CREATE_NEVER
                config.write_disposition = WriteDisposition.WRITE_EMPTY
                job = Config.CLIENT.load_table_from_file(
                    csv_read, table_ref, job_config=config
                )

        # Retry until done.
        job.result(timeout=JOB_TIMEOUT)
        self._fetch_single_page(table)