Random wanderings through Microsoft Azure esp. PaaS plumbing, the IoT bits, AI on Micro controllers, AI on Edge Devices, .NET nanoFramework, .NET Core on *nix and ML.NET+ONNX
The Inside AirbnbLondon dataset has 87946 listings and the id column (which is the primary key) has a minimum value of 13913 and maximum of 973895808066047620 in the database.
Back in the early 90’s I used to live next to the Ealing Lawn Tennis Club in London
I used “Ealing” as the SearchText for my initial testing and tried different page numbers and sizes
Testing the search functionality with SwaggerUI
The listings search results JSON looked good but I missed one important detail…
string LookupByIdSql = @"SELECT Id, [Name], Listing_URL AS ListingURL
FROM ListingsHosts
WHERE id = @Id";
public record ListingLookupDto
{
public long Id { get; set; }
public string? Name { get; set; }
public string? ListingURL { get; set; }
};
//...
app.MapGet("/Listing/Results/{id:long}", async (long id, IDapperContext dappperContext) =>
{
using (var connection = dappperContext.ConnectionCreate())
{
ListingLookupDto result = await connection.QuerySingleOrDefaultWithRetryAsync<ListingLookupDto>(LookupByIdSql, new { id });
if (result is null)
{
return Results.Problem($"Listing {id} not found", statusCode: StatusCodes.Status404NotFound);
}
return Results.Ok(result);
}
})
.Produces<ListingLookupDto>(StatusCodes.Status200OK)
.Produces<ProblemDetails>(StatusCodes.Status404NotFound)
.WithOpenApi();
The id values in the search response and lookup DTOs were correct
Visual Studio 2022 Debugger inspecting listing id value
I had missed the clue in the search response JSON the listing id and the listingURL id didn’t match.
This specification allows implementations to set limits on the range
and precision of numbers accepted. Since software that implements
IEEE 754-2008 binary64 (double precision) numbers [IEEE754] is
generally available and widely used, good interoperability can be
achieved by implementations that expect no more precision or range
than these provide, in the sense that implementations will
approximate JSON numbers within the expected precision.
My initial ASP.NET Core Minimal AP exploration uses the Inside AirbnbLondon dataset which has 87946 listings. The data is pretty “nasty” with lots of nullable and wide columns so it took several attempts to import.
CREATE TABLE [dbo].[listingsRaw](
[id] [bigint] NOT NULL,
[listing_url] [nvarchar](50) NOT NULL,
[scrape_id] [datetime2](7) NOT NULL,
[last_scraped] [date] NOT NULL,
[source] [nvarchar](50) NOT NULL,
[name] [nvarchar](max) NOT NULL,
[description] [nvarchar](max) NULL,
[neighborhood_overview] [nvarchar](1050) NULL,
[picture_url] [nvarchar](150) NULL,
[host_id] [int] NOT NULL,
[host_url] [nvarchar](50) NOT NULL,
[host_name] [nvarchar](50) NULL,
[host_since] [date] NULL,
[host_location] [nvarchar](100) NULL,
[host_about] [nvarchar](max) NULL,
[host_response_time] [nvarchar](50) NULL,
[host_response_rate] [nvarchar](50) NULL,
[host_acceptance_rate] [nvarchar](50) NULL,
[host_is_superhost] [bit] NULL,
[host_thumbnail_url] [nvarchar](150) NULL,
[host_picture_url] [nvarchar](150) NULL,
[host_neighbourhood] [nvarchar](50) NULL,
[host_listings_count] [int] NULL,
[host_total_listings_count] [int] NULL,
[host_verifications] [nvarchar](50) NOT NULL,
[host_has_profile_pic] [bit] NULL,
[host_identity_verified] [bit] NULL,
[neighbourhood] [nvarchar](100) NULL,
[neighbourhood_cleansed] [nvarchar](50) NOT NULL,
[neighbourhood_group_cleansed] [nvarchar](1) NULL,
[latitude] [float] NOT NULL,
[longitude] [float] NOT NULL,
[property_type] [nvarchar](50) NOT NULL,
[room_type] [nvarchar](50) NOT NULL,
[accommodates] [tinyint] NOT NULL,
[bathrooms] [nvarchar](1) NULL,
[bathrooms_text] [nvarchar](50) NULL,
[bedrooms] [tinyint] NULL,
[beds] [tinyint] NULL,
[amenities] [nvarchar](max) NOT NULL,
[price] [money] NOT NULL,
[minimum_nights] [smallint] NOT NULL,
[maximum_nights] [int] NOT NULL,
[minimum_minimum_nights] [smallint] NULL,
[maximum_minimum_nights] [int] NULL,
[minimum_maximum_nights] [int] NULL,
[maximum_maximum_nights] [int] NULL,
[minimum_nights_avg_ntm] [float] NULL,
[maximum_nights_avg_ntm] [float] NULL,
[calendar_updated] [nvarchar](1) NULL,
[has_availability] [bit] NOT NULL,
[availability_30] [tinyint] NOT NULL,
[availability_60] [tinyint] NOT NULL,
[availability_90] [tinyint] NOT NULL,
[availability_365] [smallint] NOT NULL,
[calendar_last_scraped] [date] NOT NULL,
[number_of_reviews] [smallint] NOT NULL,
[number_of_reviews_ltm] [int] NOT NULL,
[number_of_reviews_l30d] [tinyint] NOT NULL,
[first_review] [date] NULL,
[last_review] [date] NULL,
[review_scores_rating] [float] NULL,
[review_scores_accuracy] [float] NULL,
[review_scores_cleanliness] [float] NULL,
[review_scores_checkin] [float] NULL,
[review_scores_communication] [float] NULL,
[review_scores_location] [float] NULL,
[review_scores_value] [float] NULL,
[license] [nvarchar](max) NULL,
[instant_bookable] [bit] NOT NULL,
[calculated_host_listings_count] [int] NULL,
[calculated_host_listings_count_entire_homes] [int] NOT NULL,
[calculated_host_listings_count_private_rooms] [int] NOT NULL,
[calculated_host_listings_count_shared_rooms] [int] NOT NULL,
[reviews_per_month] [float] NULL
) ON [PRIMARY] TEXTIMAGE_ON [PRIMARY]
There are other data quality issues e.g. the host information is duplicated in each of their Listings e.g. host_id, host_name, host_since, host_* etc. which will need to be tidied up.
Swagger user interface for Raw Listings search functionality.
I have implemented basic (“incomplete”) OpenAPI support for functionality and stress testing.
Swagger user interface parameterised search functionality.
The search results are paginated and individual listings can be retrieved using the Airbnb listing “id”.
const string SearchPaginatedSql = @"SELECT Uid,Id,[Name], neighbourhood
FROM listings
WHERE[Name] LIKE N'%' + @SearchText + N'%'
ORDER By[Name]
OFFSET @PageSize *(@PageNumber - 1) ROWS FETCH NEXT @PageSize ROWS ONLY";
public record ListingListDto
{
public long Id { get; set; }
public string? Name { get; set; }
public string? Neighbourhood { get; set; }
};
Swagger user interface search functionality with untyped response.
The first HTTP GET implementation returns an untyped result-set which was not very helpful.
app.MapGet("/Listing/Search", async (string searchText, int pageNumber, int pageSize, [FromServices] IDapperContext dappperContext) =>
{
using (var connection = dappperContext.ConnectionCreate())
{
return await connection.QueryWithRetryAsync(SearchPaginatedSql, new { searchText, pageNumber, pageSize });
}
})
.WithOpenApi();
Swagger user interface search functionality with typed response
The second HTTP GET implementation returns a typed result-set which improved the “usability” of clients generated from the OpenAPI definition file.
app.MapGet("/Listing/Search/Typed", async (string searchText, int pageNumber, int pageSize, [FromServices] IDapperContext dappperContext) =>
{
using (var connection = dappperContext.ConnectionCreate())
{
return await connection.QueryWithRetryAsync<ListingListDto>(SearchPaginatedSql, new { searchText, pageNumber, pageSize });
}
})
.Produces<IList<ListingListDto>>(StatusCodes.Status200OK)
.WithOpenApi();
string LookupByIdSql = @"SELECT Id,[Name], neighbourhood
FROM ListingsHosts
WHERE id = @Id";
public record ListingLookupDto
{
public long Id { get; set; }
public string? Name { get; set; }
public string? Neighbourhood { get; set; }
};
Swagger user interface Listing lookup functionality
app.MapGet("/Listing/{id:long}", async (long id, IDapperContext dappperContext) =>
{
using (var connection = dappperContext.ConnectionCreate())
{
ListingLookupDto result = await connection.QuerySingleOrDefaultWithRetryAsync<ListingLookupDto>(LookupByIdSql, new { id });
if (result is null)
{
return Results.Problem($"Listing {id} not found", statusCode: StatusCodes.Status404NotFound);
}
return Results.Ok(result);
}
})
.Produces<ListingLookupDto>(StatusCodes.Status200OK)
.Produces<ProblemDetails>(StatusCodes.Status404NotFound)
.WithOpenApi();
The lack of validation of the SearchText, PageSize and PageNumber parameters allow uses to enter invalid values which caused searches to fail.
Swagger user interface search functionality with an invalid page number
My initial approach was to decorate the parameters of the ValidatedQuery method with DataAnnotations to ensure only valid values were accepted.
This wasn’t a great solution because the validation of the parameters was declared as part of the user interface and would have to be repeated everywhere listing search functionality was provided.
Swagger user interface search functionality with parameter validation
app.MapGet("/Listing/Search/Parameters", async ([AsParameters] SearchParameters searchParameters,
[FromServices] IDapperContext dappperContext) =>
{
using (var connection = dappperContext.ConnectionCreate())
{
return await connection.QueryWithRetryAsync<ListingListDto>(SearchPaginatedSql, new { searchText = searchParameters.SearchText, searchParameters.PageNumber, searchParameters.PageSize });
}
})
.Produces<IList<ListingListDto>>(StatusCodes.Status200OK)
.Produces<ProblemDetails>(StatusCodes.Status400BadRequest)
.WithOpenApi();
public record SearchParameters
{
// https://github.com/domaindrivendev/Swashbuckle.AspNetCore/issues/2658 possibly related?
public const byte SearchTextMinimumLength = 3;
public const byte SearchTextMaximumLength = 15;
public const int PageNumberMinimum = 1;
public const int PageNumberMaximum = 100;
public const byte PageSizeMinimum = 5;
public const byte PageSizeMaximum = 50;
//[FromQuery, Required, MinLength(SearchTextMinimumLength, ErrorMessage = "SearchTextMinimumLegth"), MaxLength(SearchTextMaximumLength, ErrorMessage = "SearchTextMaximumLegth")]
//[Required, MinLength(SearchTextMinimumLength, ErrorMessage = "SearchTextMinimumLegth"), MaxLength(SearchTextMaximumLength, ErrorMessage = "SearchTextMaximumLegth")]
[MinLength(SearchTextMinimumLength, ErrorMessage = "SearchTextMinimumLegth"), MaxLength(SearchTextMaximumLength, ErrorMessage = "SearchTextMaximumLegth")]
public string SearchText { get; set; }
//[FromQuery, Range(PageNumberMinimum, PageNumberMaximum, ErrorMessage = "PageNumberMinimum PageNumberMaximum")]
//[Required, Range(PageNumberMinimum, PageNumberMaximum, ErrorMessage = "PageNumberMinimum PageNumberMaximum")]
[Range(PageNumberMinimum, PageNumberMaximum, ErrorMessage = "PageNumberMinimum PageNumberMaximum")]
public int PageNumber { get; set; }
[Range(PageSizeMinimum, PageSizeMaximum, ErrorMessage = "PageSizeMinimum PageSizeMaximum")]
public int PageSize { get; set; }
}
Swagger user interface search functionality with parameter validation
This last two implementations worked though the error messages I had embedded in the code were not displayed I think this is related to this Swashbuckle Issue.
There is also an issue looking up some listings with larger listing ids which I will need some investigation.