Airbnb Dataset – Querying the Raw Listings

My initial ASP.NET Core Minimal AP exploration uses the Inside Airbnb London dataset which has 87946 listings. The data is pretty “nasty” with lots of nullable and wide columns so it took several attempts to import.

CREATE TABLE [dbo].[listingsRaw](
	[id] [bigint] NOT NULL,
	[listing_url] [nvarchar](50) NOT NULL,
	[scrape_id] [datetime2](7) NOT NULL,
	[last_scraped] [date] NOT NULL,
	[source] [nvarchar](50) NOT NULL,
	[name] [nvarchar](max) NOT NULL,
	[description] [nvarchar](max) NULL,
	[neighborhood_overview] [nvarchar](1050) NULL,
	[picture_url] [nvarchar](150) NULL,
	[host_id] [int] NOT NULL,
	[host_url] [nvarchar](50) NOT NULL,
	[host_name] [nvarchar](50) NULL,
	[host_since] [date] NULL,
	[host_location] [nvarchar](100) NULL,
	[host_about] [nvarchar](max) NULL,
	[host_response_time] [nvarchar](50) NULL,
	[host_response_rate] [nvarchar](50) NULL,
	[host_acceptance_rate] [nvarchar](50) NULL,
	[host_is_superhost] [bit] NULL,
	[host_thumbnail_url] [nvarchar](150) NULL,
	[host_picture_url] [nvarchar](150) NULL,
	[host_neighbourhood] [nvarchar](50) NULL,
	[host_listings_count] [int] NULL,
	[host_total_listings_count] [int] NULL,
	[host_verifications] [nvarchar](50) NOT NULL,
	[host_has_profile_pic] [bit] NULL,
	[host_identity_verified] [bit] NULL,
	[neighbourhood] [nvarchar](100) NULL,
	[neighbourhood_cleansed] [nvarchar](50) NOT NULL,
	[neighbourhood_group_cleansed] [nvarchar](1) NULL,
	[latitude] [float] NOT NULL,
	[longitude] [float] NOT NULL,
	[property_type] [nvarchar](50) NOT NULL,
	[room_type] [nvarchar](50) NOT NULL,
	[accommodates] [tinyint] NOT NULL,
	[bathrooms] [nvarchar](1) NULL,
	[bathrooms_text] [nvarchar](50) NULL,
	[bedrooms] [tinyint] NULL,
	[beds] [tinyint] NULL,
	[amenities] [nvarchar](max) NOT NULL,
	[price] [money] NOT NULL,
	[minimum_nights] [smallint] NOT NULL,
	[maximum_nights] [int] NOT NULL,
	[minimum_minimum_nights] [smallint] NULL,
	[maximum_minimum_nights] [int] NULL,
	[minimum_maximum_nights] [int] NULL,
	[maximum_maximum_nights] [int] NULL,
	[minimum_nights_avg_ntm] [float] NULL,
	[maximum_nights_avg_ntm] [float] NULL,
	[calendar_updated] [nvarchar](1) NULL,
	[has_availability] [bit] NOT NULL,
	[availability_30] [tinyint] NOT NULL,
	[availability_60] [tinyint] NOT NULL,
	[availability_90] [tinyint] NOT NULL,
	[availability_365] [smallint] NOT NULL,
	[calendar_last_scraped] [date] NOT NULL,
	[number_of_reviews] [smallint] NOT NULL,
	[number_of_reviews_ltm] [int] NOT NULL,
	[number_of_reviews_l30d] [tinyint] NOT NULL,
	[first_review] [date] NULL,
	[last_review] [date] NULL,
	[review_scores_rating] [float] NULL,
	[review_scores_accuracy] [float] NULL,
	[review_scores_cleanliness] [float] NULL,
	[review_scores_checkin] [float] NULL,
	[review_scores_communication] [float] NULL,
	[review_scores_location] [float] NULL,
	[review_scores_value] [float] NULL,
	[license] [nvarchar](max) NULL,
	[instant_bookable] [bit] NOT NULL,
	[calculated_host_listings_count] [int] NULL,
	[calculated_host_listings_count_entire_homes] [int] NOT NULL,
	[calculated_host_listings_count_private_rooms] [int] NOT NULL,
	[calculated_host_listings_count_shared_rooms] [int] NOT NULL,
	[reviews_per_month] [float] NULL
) ON [PRIMARY] TEXTIMAGE_ON [PRIMARY]

There are other data quality issues e.g. the host information is duplicated in each of their Listings e.g. host_id, host_name, host_since, host_* etc. which will need to be tidied up.

Swagger user interface for Raw Listings search functionality.

I have implemented basic (“incomplete”) OpenAPI support for functionality and stress testing.

Swagger user interface parameterised search functionality.

The search results are paginated and individual listings can be retrieved using the Airbnb listing “id”.

const string SearchPaginatedSql = @"SELECT Uid,Id,[Name], neighbourhood
                     FROM listings
                     WHERE[Name] LIKE N'%' + @SearchText + N'%'
                     ORDER By[Name] 
                     OFFSET @PageSize *(@PageNumber - 1) ROWS FETCH NEXT @PageSize ROWS ONLY";

public record ListingListDto
{
   public long Id { get; set; }
   public string? Name { get; set; }
   public string? Neighbourhood { get; set; }
};
Swagger user interface search functionality with untyped response.

The first HTTP GET implementation returns an untyped result-set which was not very helpful.

app.MapGet("/Listing/Search", async (string searchText, int pageNumber, int pageSize, [FromServices] IDapperContext dappperContext) =>
{
   using (var connection = dappperContext.ConnectionCreate())
   {
      return await connection.QueryWithRetryAsync(SearchPaginatedSql, new { searchText, pageNumber, pageSize });
   }
})
.WithOpenApi();
Swagger user interface search functionality with typed response

The second HTTP GET implementation returns a typed result-set which improved the “usability” of clients generated from the OpenAPI definition file.

app.MapGet("/Listing/Search/Typed", async (string searchText, int pageNumber, int pageSize, [FromServices] IDapperContext dappperContext) =>
{
   using (var connection = dappperContext.ConnectionCreate())
   {
      return await connection.QueryWithRetryAsync<ListingListDto>(SearchPaginatedSql, new { searchText, pageNumber, pageSize });
   }
})
.Produces<IList<ListingListDto>>(StatusCodes.Status200OK)
.WithOpenApi();

The third HTTP GET method uses the Listing id specified in a routing parameter to lookup a Listing

string LookupByIdSql = @"SELECT Id,[Name], neighbourhood
FROM ListingsHosts
WHERE id = @Id";

public record ListingLookupDto
{
public long Id { get; set; }
public string? Name { get; set; }
public string? Neighbourhood { get; set; }
};
Swagger user interface Listing lookup functionality
app.MapGet("/Listing/{id:long}", async (long id, IDapperContext dappperContext) =>
{
   using (var connection = dappperContext.ConnectionCreate())
   {
      ListingLookupDto result = await connection.QuerySingleOrDefaultWithRetryAsync<ListingLookupDto>(LookupByIdSql, new { id });
      if (result is null)
      {
         return Results.Problem($"Listing {id} not found", statusCode: StatusCodes.Status404NotFound);
      }

      return Results.Ok(result);
   }
})
.Produces<ListingLookupDto>(StatusCodes.Status200OK)
.Produces<ProblemDetails>(StatusCodes.Status404NotFound)
.WithOpenApi();

The lack of validation of the SearchText, PageSize and PageNumber parameters allow uses to enter invalid values which caused searches to fail.

Swagger user interface search functionality with an invalid page number

My initial approach was to decorate the parameters of the ValidatedQuery method with DataAnnotations to ensure only valid values were accepted.

const byte SearchTextMinimumLength = 3;
const byte SearchTextMaximumLength = 20;
const byte PageNumberMinimum = 1;
const byte PageNumberMaximum = 100;
const byte PageSizeMinimum = 5;
const byte PageSizeMaximum = 50;

app.MapGet("/Listing/Search/ValidatedQuery", async (
   [FromQuery,Required, MinLength(SearchTextMinimumLength, ErrorMessage = "SearchTextMaximumLength"), MaxLength(SearchTextMaximumLength, ErrorMessage = "SearchTextMaximumLegth")]
   string searchText,
   [FromQuery, Range(PageNumberMinimum, PageNumberMaximum, ErrorMessage = "PageNumberMinimum PageNumberMaximum")]
   int pageNumber,
   [FromQuery, Range(PageSizeMinimum, PageSizeMaximum, ErrorMessage = "PageSizeMinimum PageSizeMaximum")]
   int pageSize,
   [FromServices] IDapperContext dappperContext) =>
{
   using (var connection = dappperContext.ConnectionCreate())
   {
      return await connection.QueryWithRetryAsync<ListingListDto>(SearchPaginatedSql, new { searchText, pageNumber, pageSize });
   }
})
.Produces<IList<ListingListDto>>(StatusCodes.Status200OK)
.Produces<ProblemDetails>(StatusCodes.Status400BadRequest)
.WithOpenApi();

This wasn’t a great solution because the validation of the parameters was declared as part of the user interface and would have to be repeated everywhere listing search functionality was provided.

Swagger user interface search functionality with parameter validation

The final HTTP GET method uses DataAnnotations on the SearchParameter(DTO) and AsParameters to bind the query string values.

app.MapGet("/Listing/Search/Parameters", async ([AsParameters] SearchParameters searchParameters,
   [FromServices] IDapperContext dappperContext) =>
{
   using (var connection = dappperContext.ConnectionCreate())
   {
      return await connection.QueryWithRetryAsync<ListingListDto>(SearchPaginatedSql, new { searchText = searchParameters.SearchText, searchParameters.PageNumber, searchParameters.PageSize });
   }
})
.Produces<IList<ListingListDto>>(StatusCodes.Status200OK)
.Produces<ProblemDetails>(StatusCodes.Status400BadRequest)
.WithOpenApi();

public record SearchParameters
{
   // https://github.com/domaindrivendev/Swashbuckle.AspNetCore/issues/2658 possibly related?
   public const byte SearchTextMinimumLength = 3;
   public const byte SearchTextMaximumLength = 15;
   public const int PageNumberMinimum = 1;
   public const int PageNumberMaximum = 100;
   public const byte PageSizeMinimum = 5;
   public const byte PageSizeMaximum = 50;

   //[FromQuery, Required, MinLength(SearchTextMinimumLength, ErrorMessage = "SearchTextMinimumLegth"), MaxLength(SearchTextMaximumLength, ErrorMessage = "SearchTextMaximumLegth")]
   //[Required, MinLength(SearchTextMinimumLength, ErrorMessage = "SearchTextMinimumLegth"), MaxLength(SearchTextMaximumLength, ErrorMessage = "SearchTextMaximumLegth")]
   [MinLength(SearchTextMinimumLength, ErrorMessage = "SearchTextMinimumLegth"), MaxLength(SearchTextMaximumLength, ErrorMessage = "SearchTextMaximumLegth")]
   public string SearchText { get; set; }

   //[FromQuery, Range(PageNumberMinimum, PageNumberMaximum, ErrorMessage = "PageNumberMinimum PageNumberMaximum")]
   //[Required, Range(PageNumberMinimum, PageNumberMaximum, ErrorMessage = "PageNumberMinimum PageNumberMaximum")]
   [Range(PageNumberMinimum, PageNumberMaximum, ErrorMessage = "PageNumberMinimum PageNumberMaximum")]
   public int PageNumber { get; set; }

   [Range(PageSizeMinimum, PageSizeMaximum, ErrorMessage = "PageSizeMinimum PageSizeMaximum")]
   public int PageSize { get; set; }
}
Swagger user interface search functionality with parameter validation

This last two implementations worked though the error messages I had embedded in the code were not displayed I think this is related to this Swashbuckle Issue.

There is also an issue looking up some listings with larger listing ids which I will need some investigation.

.NET Core web API + Dapper – HTTP Patch

The application I’m currently working on has some tables with many columns and these were proving painful to update with HTTP PUT methods. Over the last couple of releases, I have been extending the customer facing API with PATCH methods so the client can specify only the values to changed.

{ 
    "op": "replace", 
    "path": "/name", 
    "value": "USB missile launcher (Green)." 
},
{ 
    "op": "replace", 
    "path": "/UnitPrice", 
    "value": 25
},
{ 
    "op": "replace", 
    "path": "/recommendedRetailPrice", 
    "value": 37.41
}

The JSON Patch is a format for specifying updates to be applied to a resource.

Stock Items list before HTTP Patch operation

A JSON Patch document has an array of operations which identify a particular type of change.

Using Telerik Fiddler Composer functionality to apply an HTTP PATCH
Stock Items list after HTTP Patch operation

The StockItemPatchDtoV1 class is decorated with DataAnnotations to ensure the contents are valid.

public class StockItemPatchDtoV1
{
    [Required]
    [StringLength(100, MinimumLength = 1, ErrorMessage = "The name text must be at least {2} and no more than {1} characters long")]  // These would be constants in a real application
    public string Name { get; set; }

    [Required]
    [Range(0.0, 100.0)] // These would be constants in a real application
    public decimal UnitPrice { get; set; }

    [Required]
    [Range(0.0, 1000000.0)] // These would be constants in a real application
    public decimal RecommendedRetailPrice { get; set; }
}

The StockItemsController [HttpPatch(“{id}”)] method retrieves the stock item to be updated, then uses ApplyTo method and TryValidateModel to update only the specified fields.

[HttpPatch("{id}")]
public async Task<ActionResult<Model.StockItemGetDtoV1>> Patch([FromBody] JsonPatchDocument<Model.StockItemPatchDtoV1> stockItemPatch, int id)
{
    Model.StockItemGetDtoV1 stockItem;

    using (IDbConnection db = dapperContext.ConnectionCreate())
    {
        stockItem = await db.QuerySingleOrDefaultWithRetryAsync<Model.StockItemGetDtoV1>(sql: "[Warehouse].[StockItemsStockItemLookupV1]", param: new { stockItemId = id }, commandType: CommandType.StoredProcedure);

        if (stockItem == default)
        {
            logger.LogInformation("StockItem:{id} not found", id);

            return this.NotFound($"StockItem:{id} not found");
        }

        Model.StockItemPatchDtoV1 stockItemPatchDto = mapper.Map<Model.StockItemPatchDtoV1>(stockItem);

        stockItemPatch.ApplyTo(stockItemPatchDto, ModelState);

        if (!ModelState.IsValid || !TryValidateModel(stockItemPatchDto))
        {
            logger.LogInformation("stockItemPatchDto invalid {0}", string.Join(Environment.NewLine, ModelState.Values.SelectMany(v => v.Errors).Select(v => v.ErrorMessage + " " + v.Exception))); // would extract this out into shared module

            return BadRequest(ModelState);
        }

        mapper.Map(stockItemPatchDto, stockItem);

        await db.ExecuteWithRetryAsync(sql: "UPDATE Warehouse.StockItems SET StockItemName=@Name, UnitPrice=@UnitPrice, RecommendedRetailPrice=@RecommendedRetailPrice WHERE StockItemId=@Id", param: stockItem, commandType: CommandType.Text);
    }

    return this.Ok();
}

Initially the HTTP Patch method returned this error message.

HTTP/1.1 400 Bad Request
Content-Type: application/problem+json; charset=utf-8
Date: Tue, 27 Jun 2023 09:20:30 GMT
Server: Kestrel
Transfer-Encoding: chunked

1d7
{"type":"https://tools.ietf.org/html/rfc7231#section-6.5.1","title":"One or more validation errors occurred.","status":400,"traceId":"00-665a6ee9ed1105a105237c421793af5d-1719bda40c0b7d5d-00","errors":{"$":["The JSON value could not be converted to Microsoft.AspNetCore.JsonPatch.JsonPatchDocument`1[devMobile.WebAPIDapper.HttpPatch.Model.StockItemPatchDtoV1]. Path: $ | LineNumber: 0 | BytePositionInLine: 1."],"stockItemPatch":["The stockItemPatch field is required."]}}
0

After some research I worked out that I had forgotten to wire up the Newtonsoft JSON support with builder.Services.AddControllers().AddNewtonsoftJson();